A set of patches to prevent crashes in SGX enclaves under heavy memory

pressure:
 
 SGX uses normal RAM allocated from special shmem files as backing storage
 when it runs out of SGX memory (EPC).  The code was overly aggressive when
 freeing shmem pages and was inadvertently freeing perfectly good data.
 This resulted in failures in the SGX instructions used to swap data back
 into SGX memory.
 
 This turned out to be really hard to trigger in mainline.  It was
 originally encountered testing the out-of-tree "SGX2" patches, but later
 reproduced on mainline.
 
 Fix the data loss by being more careful about truncating pages out of
 the backing storage and more judiciously setting pages dirty.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmKLqcgACgkQaDWVMHDJ
 krA7rA//ZgNgOTzCp/jdntz2KSp9MPhwaSJg0MUnsa7wt0T/3sPXaEAu9wgSZod7
 xqxH17LKUc27SyALtPrkvm68aVZ/Z0Nhq2gDndspXd/Zcl/CD/Cy+GI+ZpdNoYhz
 Fuqiq1TrszzzqBksgiEal9S874+jum2uWqYBMHB45ODp+E7F479Zm42hI3dSp1VN
 6n5zOi5u+unHgDRQ/rwMovu2XU61ZXrycqkbZvu4P4tRbEUH+EhAMKG2RyZOB2V9
 XNqr1vBJ122CWMIxcdzEUEofPFFwVEtC9jK+rdgUW1ZYAPJDjVvcnXx7dpA9PHLb
 DytBSWyeISllJKbea1pIMsdCT/IE4I3s0US2ZA3Ru7YAMgUIi+IGu++JJ2dWdDvx
 GoJz6yBVw4r6cl7kLUfbtIUPsJLYkEMpTM4XODsxMwzd2/Jdbe2UfQskzEn9Auvc
 1qGRspu/3VbqE5WFz5Npd94+B+8BOo7kKLcizBHqmX8U2PBkMnhRatxDMCu8frfL
 DlrjosgUgMYQRkEp3Zugo33O8F2EAE0T1I9g7N4sullX0jGnFifjgiPipnWcnIB9
 RnF5NHdrTMPwqhvzz+3o1yJgve56juZxESqn1khEIQEqgUtxFaEnrmYzdLlVkoGg
 XbuY7TNp1hDC3s9OHeiCL2oUaSmyh0eKCokLiAuWowVzbuU69BU=
 =pTAC
 -----END PGP SIGNATURE-----

Merge tag 'x86_sgx_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SGX updates from Dave Hansen:
 "A set of patches to prevent crashes in SGX enclaves under heavy memory
  pressure:

  SGX uses normal RAM allocated from special shmem files as backing
  storage when it runs out of SGX memory (EPC). The code was overly
  aggressive when freeing shmem pages and was inadvertently freeing
  perfectly good data. This resulted in failures in the SGX instructions
  used to swap data back into SGX memory.

  This turned out to be really hard to trigger in mainline. It was
  originally encountered testing the out-of-tree "SGX2" patches, but
  later reproduced on mainline.

  Fix the data loss by being more careful about truncating pages out of
  the backing storage and more judiciously setting pages dirty"

* tag 'x86_sgx_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/sgx: Ensure no data in PCMD page after truncate
  x86/sgx: Fix race between reclaimer and page fault handler
  x86/sgx: Obtain backing storage page with enclave mutex held
  x86/sgx: Mark PCMD page as dirty when modifying contents
  x86/sgx: Disconnect backing page references from dirty status
This commit is contained in:
Linus Torvalds 2022-05-23 20:34:58 -07:00
commit cfe1cb014b
3 changed files with 115 additions and 15 deletions

View File

@ -12,6 +12,92 @@
#include "encls.h"
#include "sgx.h"
#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
/*
* 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
* determine the page index associated with the first PCMD entry
* within a PCMD page.
*/
#define PCMD_FIRST_MASK GENMASK(4, 0)
/**
* reclaimer_writing_to_pcmd() - Query if any enclave page associated with
* a PCMD page is in process of being reclaimed.
* @encl: Enclave to which PCMD page belongs
* @start_addr: Address of enclave page using first entry within the PCMD page
*
* When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
* stored. The PCMD data of a reclaimed enclave page contains enough
* information for the processor to verify the page at the time
* it is loaded back into the Enclave Page Cache (EPC).
*
* The backing storage to which enclave pages are reclaimed is laid out as
* follows:
* Encrypted enclave pages:SECS page:PCMD pages
*
* Each PCMD page contains the PCMD metadata of
* PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
*
* A PCMD page can only be truncated if it is (a) empty, and (b) not in the
* process of getting data (and thus soon being non-empty). (b) is tested with
* a check if an enclave page sharing the PCMD page is in the process of being
* reclaimed.
*
* The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
* intends to reclaim that enclave page - it means that the PCMD page
* associated with that enclave page is about to get some data and thus
* even if the PCMD page is empty, it should not be truncated.
*
* Context: Enclave mutex (&sgx_encl->lock) must be held.
* Return: 1 if the reclaimer is about to write to the PCMD page
* 0 if the reclaimer has no intention to write to the PCMD page
*/
static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
unsigned long start_addr)
{
int reclaimed = 0;
int i;
/*
* PCMD_FIRST_MASK is based on number of PCMD entries within
* PCMD page being 32.
*/
BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
for (i = 0; i < PCMDS_PER_PAGE; i++) {
struct sgx_encl_page *entry;
unsigned long addr;
addr = start_addr + i * PAGE_SIZE;
/*
* Stop when reaching the SECS page - it does not
* have a page_array entry and its reclaim is
* started and completed with enclave mutex held so
* it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
* flag.
*/
if (addr == encl->base + encl->size)
break;
entry = xa_load(&encl->page_array, PFN_DOWN(addr));
if (!entry)
continue;
/*
* VA page slot ID uses same bit as the flag so it is important
* to ensure that the page is not already in backing store.
*/
if (entry->epc_page &&
(entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
reclaimed = 1;
break;
}
}
return reclaimed;
}
/*
* Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
* follow right after the EPC data in the backing storage. In addition to the
@ -47,6 +133,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
struct sgx_encl *encl = encl_page->encl;
pgoff_t page_index, page_pcmd_off;
unsigned long pcmd_first_page;
struct sgx_pageinfo pginfo;
struct sgx_backing b;
bool pcmd_page_empty;
@ -58,6 +145,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
else
page_index = PFN_DOWN(encl->size);
/*
* Address of enclave page using the first entry within the PCMD page.
*/
pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
ret = sgx_encl_get_backing(encl, page_index, &b);
@ -84,6 +176,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
}
memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
set_page_dirty(b.pcmd);
/*
* The area for the PCMD in the page was zeroed above. Check if the
@ -94,12 +187,20 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
kunmap_atomic(pcmd_page);
kunmap_atomic((void *)(unsigned long)pginfo.contents);
sgx_encl_put_backing(&b, false);
get_page(b.pcmd);
sgx_encl_put_backing(&b);
sgx_encl_truncate_backing_page(encl, page_index);
if (pcmd_page_empty)
if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
pcmd_page = kmap_atomic(b.pcmd);
if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
pr_warn("PCMD page not empty after truncate.\n");
kunmap_atomic(pcmd_page);
}
put_page(b.pcmd);
return ret;
}
@ -645,15 +746,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
/**
* sgx_encl_put_backing() - Unpin the backing storage
* @backing: data for accessing backing storage for the page
* @do_write: mark pages dirty
*/
void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write)
void sgx_encl_put_backing(struct sgx_backing *backing)
{
if (do_write) {
set_page_dirty(backing->pcmd);
set_page_dirty(backing->contents);
}
put_page(backing->pcmd);
put_page(backing->contents);
}

View File

@ -107,7 +107,7 @@ void sgx_encl_release(struct kref *ref);
int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
struct sgx_backing *backing);
void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
void sgx_encl_put_backing(struct sgx_backing *backing);
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
struct sgx_encl_page *page);

View File

@ -191,6 +191,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
backing->pcmd_offset;
ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
set_page_dirty(backing->pcmd);
set_page_dirty(backing->contents);
kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
backing->pcmd_offset));
@ -308,6 +310,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
sgx_encl_ewb(epc_page, backing);
encl_page->epc_page = NULL;
encl->secs_child_cnt--;
sgx_encl_put_backing(backing);
if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
@ -320,7 +323,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
sgx_encl_put_backing(&secs_backing, true);
sgx_encl_put_backing(&secs_backing);
}
out:
@ -379,11 +382,14 @@ static void sgx_reclaim_pages(void)
goto skip;
page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
if (ret)
goto skip;
mutex_lock(&encl_page->encl->lock);
ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
if (ret) {
mutex_unlock(&encl_page->encl->lock);
goto skip;
}
encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
mutex_unlock(&encl_page->encl->lock);
continue;
@ -411,7 +417,6 @@ skip:
encl_page = epc_page->owner;
sgx_reclaimer_write(epc_page, &backing[i]);
sgx_encl_put_backing(&backing[i], true);
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;