On Fri, Jan 20, 2017 at 09:53:17PM +0000, Christopherson, Sean J wrote:
Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com> wrote:
> Move PCMD's to a backing storage file in order to give more control
> to do swapping and discarding.
>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com>
> ---
> drivers/platform/x86/intel_sgx.h | 5 ++++-
> drivers/platform/x86/intel_sgx_ioctl.c | 12 +++++++++++
> drivers/platform/x86/intel_sgx_page_cache.c | 19 ++++++++++++++++--
> drivers/platform/x86/intel_sgx_util.c | 30 ++++++++++++++++++++++++++++
> drivers/platform/x86/intel_sgx_vma.c | 31
> +++++++++++++++++++++-------- 5 files changed, 86 insertions(+), 11
> deletions(-)
>
> diff --git a/drivers/platform/x86/intel_sgx.h
> b/drivers/platform/x86/intel_sgx.h index ed9e8e6..1d03606 100644
> --- a/drivers/platform/x86/intel_sgx.h
> +++ b/drivers/platform/x86/intel_sgx.h
> @@ -115,7 +115,6 @@ struct sgx_encl_page {
> struct list_head load_list;
> struct sgx_va_page *va_page;
> unsigned int va_offset;
> - struct sgx_pcmd pcmd;
> };
>
> struct sgx_tgid_ctx {
> @@ -141,6 +140,7 @@ struct sgx_encl {
> struct task_struct *owner;
> struct mm_struct *mm;
> struct file *backing;
> + struct file *pcmd;
> struct list_head load_list;
> struct kref refcount;
> unsigned long base;
> @@ -198,6 +198,9 @@ void sgx_put_epc_page(void *epc_page_vaddr);
> struct page *sgx_get_backing(struct sgx_encl *encl,
> struct sgx_encl_page *entry);
> void sgx_put_backing(struct page *backing, bool write);
> +struct page *sgx_get_pcmd(struct sgx_encl *encl,
> + struct sgx_encl_page *entry);
> +void sgx_put_pcmd(struct page *pcmd_page, bool write);
> void sgx_insert_pte(struct sgx_encl *encl,
> struct sgx_encl_page *encl_page,
> struct sgx_epc_page *epc_page,
> diff --git a/drivers/platform/x86/intel_sgx_ioctl.c
> b/drivers/platform/x86/intel_sgx_ioctl.c index b78c552..ce6a020 100644
> --- a/drivers/platform/x86/intel_sgx_ioctl.c
> +++ b/drivers/platform/x86/intel_sgx_ioctl.c
> @@ -480,6 +480,7 @@ static long sgx_ioc_enclave_create(struct file *filep,
> unsigned int cmd, struct vm_area_struct *vma;
> void *secs_vaddr = NULL;
> struct file *backing;
> + struct file *pcmd;
> long ret;
>
> secs = kzalloc(sizeof(*secs), GFP_KERNEL);
> @@ -504,9 +505,19 @@ static long sgx_ioc_enclave_create(struct file *filep,
> unsigned int cmd, goto out;
> }
>
> + pcmd = shmem_file_setup("dev/sgx",
> + ((secs->size >> PAGE_SHIFT) + 1) * 128,
> + VM_NORESERVE);
> + if (IS_ERR(pcmd)) {
> + fput(backing);
> + ret = PTR_ERR(pcmd);
> + goto out;
> + }
> +
> encl = kzalloc(sizeof(*encl), GFP_KERNEL);
> if (!encl) {
> fput(backing);
> + fput(pcmd);
> ret = -ENOMEM;
> goto out;
> }
> @@ -525,6 +536,7 @@ static long sgx_ioc_enclave_create(struct file *filep,
> unsigned int cmd, encl->base = secs->base;
> encl->size = secs->size;
> encl->backing = backing;
> + encl->pcmd = pcmd;
>
> secs_epc = sgx_alloc_page(encl->tgid_ctx, 0);
> if (IS_ERR(secs_epc)) {
> diff --git a/drivers/platform/x86/intel_sgx_page_cache.c
> b/drivers/platform/x86/intel_sgx_page_cache.c index d073057..56000fb 100644
> --- a/drivers/platform/x86/intel_sgx_page_cache.c
> +++ b/drivers/platform/x86/intel_sgx_page_cache.c
> @@ -237,10 +237,14 @@ static int __sgx_ewb(struct sgx_encl *encl,
> {
> struct sgx_page_info pginfo;
> struct page *backing;
> + struct page *pcmd;
> + unsigned long pcmd_offset;
> void *epc;
> void *va;
> int ret;
>
> + pcmd_offset = ((encl_page->addr >> PAGE_SHIFT) & 31) * 128;
> +
> backing = sgx_get_backing(encl, encl_page);
> if (IS_ERR(backing)) {
> ret = PTR_ERR(backing);
> @@ -249,21 +253,32 @@ static int __sgx_ewb(struct sgx_encl *encl,
> return ret;
> }
>
> + pcmd = sgx_get_pcmd(encl, encl_page);
> + if (IS_ERR(pcmd)) {
> + ret = PTR_ERR(pcmd);
> + sgx_warn(encl, "pinning the pcmd page for EWB failed with
> %d\n",
> + ret);
> + goto out;
> + }
> +
> epc = sgx_get_epc_page(encl_page->epc_page);
> va = sgx_get_epc_page(encl_page->va_page->epc_page);
>
> pginfo.srcpge = (unsigned long)kmap_atomic(backing);
> - pginfo.pcmd = (unsigned long)&encl_page->pcmd;
> + pginfo.pcmd = (unsigned long)kmap_atomic(pcmd) + pcmd_offset;
> pginfo.linaddr = 0;
> pginfo.secs = 0;
> ret = __ewb(&pginfo, epc,
> (void *)((unsigned long)va + encl_page->va_offset));
> + kunmap_atomic((void *)(unsigned long)(pginfo.pcmd - pcmd_offset));
> kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
>
> sgx_put_epc_page(va);
> sgx_put_epc_page(epc);
> - sgx_put_backing(backing, true);
> + sgx_put_pcmd(pcmd, true);
>
> +out:
> + sgx_put_backing(backing, true);
> return ret;
> }
>
> diff --git a/drivers/platform/x86/intel_sgx_util.c
> b/drivers/platform/x86/intel_sgx_util.c index 2c390c5..40f5839 100644
> --- a/drivers/platform/x86/intel_sgx_util.c
> +++ b/drivers/platform/x86/intel_sgx_util.c
> @@ -105,6 +105,33 @@ void sgx_put_backing(struct page *backing_page, bool
> write) put_page(backing_page);
> }
>
> +struct page *sgx_get_pcmd(struct sgx_encl *encl,
> + struct sgx_encl_page *entry)
> +{
> + struct page *pcmd;
> + struct inode *inode;
> + struct address_space *mapping;
> + gfp_t gfpmask;
> + pgoff_t index;
> +
> + inode = encl->pcmd->f_path.dentry->d_inode;
> + mapping = inode->i_mapping;
> + gfpmask = mapping_gfp_mask(mapping);
> + /* 32 PCMD's per page */
> + index = (entry->addr - encl->base) >> (PAGE_SHIFT + 5);
> + pcmd = shmem_read_mapping_page_gfp(mapping, index, gfpmask);
> +
> + return pcmd;
> +}
> +
> +void sgx_put_pcmd(struct page *pcmd_page, bool write)
> +{
> + if (write)
> + set_page_dirty(pcmd_page);
> +
> + put_page(pcmd_page);
> +}
> +
sgx_get/put_pcmd are nearly identical copies of sgx_get/put_backing,
the bulk of the logic should be put into common functions.
Sure, that would make sense.
> struct vm_area_struct *sgx_find_vma(struct sgx_encl *encl,
unsigned long
> addr) {
> struct vm_area_struct *vma;
> @@ -245,5 +272,8 @@ void sgx_encl_release(struct kref *ref)
> if (encl->backing)
> fput(encl->backing);
>
> + if (encl->pcmd)
> + fput(encl->pcmd);
> +
> kfree(encl);
> }
> diff --git a/drivers/platform/x86/intel_sgx_vma.c
> b/drivers/platform/x86/intel_sgx_vma.c index 73ec032..3171348 100644
> --- a/drivers/platform/x86/intel_sgx_vma.c
> +++ b/drivers/platform/x86/intel_sgx_vma.c
> @@ -99,12 +99,16 @@ static int sgx_eldu(struct sgx_encl *encl,
> bool is_secs)
> {
> struct page *backing;
> + struct page *pcmd;
> + unsigned long pcmd_offset;
> struct sgx_page_info pginfo;
> void *secs_ptr = NULL;
> void *epc_ptr;
> void *va_ptr;
> int ret;
>
> + pcmd_offset = ((encl_page->addr >> PAGE_SHIFT) & 31) * 128;
> +
> backing = sgx_get_backing(encl, encl_page);
> if (IS_ERR(backing)) {
> ret = PTR_ERR(backing);
> @@ -113,22 +117,36 @@ static int sgx_eldu(struct sgx_encl *encl,
> return ret;
> }
>
> + pcmd = sgx_get_pcmd(encl, encl_page);
> + if (IS_ERR(pcmd)) {
> + ret = PTR_ERR(pcmd);
> + sgx_warn(encl, "pinning the pcmd page for EWB failed with
> %d\n",
> + ret);
> + goto out;
> + }
> +
> if (!is_secs)
> secs_ptr = sgx_get_epc_page(encl->secs_page.epc_page);
> - pginfo.secs = (unsigned long)secs_ptr;
>
> epc_ptr = sgx_get_epc_page(epc_page);
> va_ptr = sgx_get_epc_page(encl_page->va_page->epc_page);
> pginfo.srcpge = (unsigned long)kmap_atomic(backing);
>
> + pginfo.srcpge = (unsigned long)kmap_atomic(backing);
Duplicate call to kmap_atomic, this results in missed #PFs.
Whoops this must have happened when I rebased these patches to the
latest master. Thanks for spotting this.
> + pginfo.pcmd = (unsigned long)kmap_atomic(pcmd) + pcmd_offset;
> pginfo.linaddr = is_secs ? 0 : encl_page->addr;
> - pginfo.pcmd = (unsigned long)&encl_page->pcmd;
> + pginfo.secs = (unsigned long)secs_ptr;
>
> ret = __eldu((unsigned long)&pginfo,
> (unsigned long)epc_ptr,
> (unsigned long)va_ptr +
> encl_page->va_offset);
> + if (ret) {
> + sgx_err(encl, "ELDU returned %d\n", ret);
> + ret = -EFAULT;
> + }
>
> + kunmap_atomic((void *)(unsigned long)(pginfo.pcmd - pcmd_offset));
> kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
> sgx_put_epc_page(va_ptr);
> sgx_put_epc_page(epc_ptr);
> @@ -136,13 +154,10 @@ static int sgx_eldu(struct sgx_encl *encl,
> if (!is_secs)
> sgx_put_epc_page(secs_ptr);
>
> - sgx_put_backing(backing, false);
> -
> - if (ret) {
> - sgx_err(encl, "ELDU returned %d\n", ret);
> - return -EFAULT;
> - }
> + sgx_put_pcmd(pcmd, true);
>
> +out:
> + sgx_put_backing(backing, 0);
> return 0;
Need to return 'ret' instead of 0.
> }
>
I'm still not a fan of using shmem for PCMD storage, but I'm fine with
this approach for now as rudimentary performance tests showed no
difference between the shmem approach and allocating PCMDs on-demand
in regular kernel memory.
With a bit reorganization the kernel memory consumption for each enclave
page starts to become acceptable (4 qwords):
struct sgx_encl_page {
union {
unsigned long addr;
unsigned int flags;
};
union {
/* loaded */
struct {
struct sgx_epc_page *epc_page;
struct list_head load_list;
}; /* 64 bytes */
/* swapped */
struct {
struct sgx_va_page *va_page;
unsigned int va_offset;
}; /* 60 bytes */
};
};
For me more important is that SGX takes away minimum amount of system
resources from other (non-SGX) tasks than the SGX performance itself.
/Jarkko