xen/privcmd: improve performance of MMAPBATCH_V2

Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
multiple frames at a time rather than one at a time, despite the pages
being non-consecutive GFNs.

xen_remap_foreign_mfn_array() is added which maps an array of GFNs
(instead of a consecutive range of GFNs).

Since per-frame errors are returned in an array, privcmd must set the
MMAPBATCH_V1 error bits as part of the "report errors" phase, after
all the frames are mapped.

Migrate times are significantly improved (when using a PV toolstack
domain).  For example, for an idle 12 GiB PV guest:

        Before     After
  real  0m38.179s  0m26.868s
  user  0m15.096s  0m13.652s
  sys   0m28.988s  0m18.732s

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
This commit is contained in:
David Vrabel 2015-03-11 14:49:57 +00:00
parent 628c28eefd
commit 4e8c0c8c4b
5 changed files with 249 additions and 80 deletions

View File

@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
static __read_mostly int xen_events_irq = -1; static __read_mostly int xen_events_irq = -1;
int xen_remap_domain_mfn_range(struct vm_area_struct *vma, int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
xen_pfn_t mfn, int nr, xen_pfn_t *mfn, int nr,
pgprot_t prot, unsigned domid, int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages) struct page **pages)
{ {
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
prot, domid, pages); prot, domid, pages);
} }
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
/* Not used by XENFEAT_auto_translated guests. */
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return -ENOSYS;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,

View File

@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
#define REMAP_BATCH_SIZE 16 #define REMAP_BATCH_SIZE 16
struct remap_data { struct remap_data {
unsigned long mfn; xen_pfn_t *mfn;
bool contiguous;
pgprot_t prot; pgprot_t prot;
struct mmu_update *mmu_update; struct mmu_update *mmu_update;
}; };
@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data) unsigned long addr, void *data)
{ {
struct remap_data *rmd = data; struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
/* If we have a contigious range, just update the mfn itself,
else update pointer to be "next mfn". */
if (rmd->contiguous)
(*rmd->mfn)++;
else
rmd->mfn++;
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte); rmd->mmu_update->val = pte_val_ma(pte);
@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
return 0; return 0;
} }
int xen_remap_domain_mfn_range(struct vm_area_struct *vma, static int do_remap_mfn(struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
xen_pfn_t mfn, int nr, xen_pfn_t *mfn, int nr,
pgprot_t prot, unsigned domid, int *err_ptr, pgprot_t prot,
struct page **pages) unsigned domid,
struct page **pages)
{ {
int err = 0;
struct remap_data rmd; struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE]; struct mmu_update mmu_update[REMAP_BATCH_SIZE];
int batch;
unsigned long range; unsigned long range;
int err = 0; int mapped = 0;
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
if (xen_feature(XENFEAT_auto_translated_physmap)) { if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH #ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */ /* We need to update the local page tables and the xen HAP */
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot, return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
domid, pages); prot, domid, pages);
#else #else
return -EINVAL; return -EINVAL;
#endif #endif
@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
rmd.mfn = mfn; rmd.mfn = mfn;
rmd.prot = prot; rmd.prot = prot;
/* We use the err_ptr to indicate if there we are doing a contigious
* mapping or a discontigious mapping. */
rmd.contiguous = !err_ptr;
while (nr) { while (nr) {
batch = min(REMAP_BATCH_SIZE, nr); int index = 0;
int done = 0;
int batch = min(REMAP_BATCH_SIZE, nr);
int batch_left = batch;
range = (unsigned long)batch << PAGE_SHIFT; range = (unsigned long)batch << PAGE_SHIFT;
rmd.mmu_update = mmu_update; rmd.mmu_update = mmu_update;
@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
if (err) if (err)
goto out; goto out;
err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); /* We record the error for each page that gives an error, but
if (err < 0) * continue mapping until the whole set is done */
goto out; do {
int i;
err = HYPERVISOR_mmu_update(&mmu_update[index],
batch_left, &done, domid);
/*
* @err_ptr may be the same buffer as @mfn, so
* only clear it after each chunk of @mfn is
* used.
*/
if (err_ptr) {
for (i = index; i < index + done; i++)
err_ptr[i] = 0;
}
if (err < 0) {
if (!err_ptr)
goto out;
err_ptr[i] = err;
done++; /* Skip failed frame. */
} else
mapped += done;
batch_left -= done;
index += done;
} while (batch_left);
nr -= batch; nr -= batch;
addr += range; addr += range;
if (err_ptr)
err_ptr += batch;
} }
err = 0;
out: out:
xen_flush_tlb_all(); xen_flush_tlb_all();
return err; return err < 0 ? err : mapped;
}
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
} }
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid, struct page **pages)
{
/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
* and the consequences later is quite hard to detect what the actual
* cause of "wrong memory was mapped in".
*/
BUG_ON(err_ptr == NULL);
return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
/* Returns: 0 success */ /* Returns: 0 success */
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages) int numpgs, struct page **pages)

View File

@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
return ret; return ret;
} }
/*
* Similar to traverse_pages, but use each page as a "block" of
* data to be processed as one unit.
*/
static int traverse_pages_block(unsigned nelem, size_t size,
struct list_head *pos,
int (*fn)(void *data, int nr, void *state),
void *state)
{
void *pagedata;
unsigned pageidx;
int ret = 0;
BUG_ON(size > PAGE_SIZE);
pageidx = PAGE_SIZE;
while (nelem) {
int nr = (PAGE_SIZE/size);
struct page *page;
if (nr > nelem)
nr = nelem;
pos = pos->next;
page = list_entry(pos, struct page, lru);
pagedata = page_address(page);
ret = (*fn)(pagedata, nr, state);
if (ret)
break;
nelem -= nr;
}
return ret;
}
struct mmap_mfn_state { struct mmap_mfn_state {
unsigned long va; unsigned long va;
struct vm_area_struct *vma; struct vm_area_struct *vma;
@ -274,39 +308,25 @@ struct mmap_batch_state {
/* auto translated dom0 note: if domU being created is PV, then mfn is /* auto translated dom0 note: if domU being created is PV, then mfn is
* mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
*/ */
static int mmap_batch_fn(void *data, void *state) static int mmap_batch_fn(void *data, int nr, void *state)
{ {
xen_pfn_t *mfnp = data; xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state; struct mmap_batch_state *st = state;
struct vm_area_struct *vma = st->vma; struct vm_area_struct *vma = st->vma;
struct page **pages = vma->vm_private_data; struct page **pages = vma->vm_private_data;
struct page *cur_page = NULL; struct page **cur_pages = NULL;
int ret; int ret;
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
cur_page = pages[st->index++]; cur_pages = &pages[st->index];
ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, BUG_ON(nr < 0);
st->vma->vm_page_prot, st->domain, ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
&cur_page); (int *)mfnp, st->vma->vm_page_prot,
st->domain, cur_pages);
/* Store error code for second pass. */ /* Adjust the global_error? */
if (st->version == 1) { if (ret != nr) {
if (ret < 0) {
/*
* V1 encodes the error codes in the 32bit top nibble of the
* mfn (with its known limitations vis-a-vis 64 bit callers).
*/
*mfnp |= (ret == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
}
} else { /* st->version == 2 */
*((int *) mfnp) = ret;
}
/* And see if it affects the global_error. */
if (ret < 0) {
if (ret == -ENOENT) if (ret == -ENOENT)
st->global_error = -ENOENT; st->global_error = -ENOENT;
else { else {
@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
st->global_error = 1; st->global_error = 1;
} }
} }
st->va += PAGE_SIZE; st->va += PAGE_SIZE * nr;
st->index += nr;
return 0; return 0;
} }
static int mmap_return_errors(void *data, void *state) static int mmap_return_error(int err, struct mmap_batch_state *st)
{ {
struct mmap_batch_state *st = state; int ret;
if (st->version == 1) { if (st->version == 1) {
xen_pfn_t mfnp = *((xen_pfn_t *) data); if (err) {
if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) xen_pfn_t mfn;
return __put_user(mfnp, st->user_mfn++);
else ret = get_user(mfn, st->user_mfn);
if (ret < 0)
return ret;
/*
* V1 encodes the error codes in the 32bit top
* nibble of the mfn (with its known
* limitations vis-a-vis 64 bit callers).
*/
mfn |= (err == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
return __put_user(mfn, st->user_mfn++);
} else
st->user_mfn++; st->user_mfn++;
} else { /* st->version == 2 */ } else { /* st->version == 2 */
int err = *((int *) data);
if (err) if (err)
return __put_user(err, st->user_err++); return __put_user(err, st->user_err++);
else else
@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
return 0; return 0;
} }
static int mmap_return_errors(void *data, int nr, void *state)
{
struct mmap_batch_state *st = state;
int *errs = data;
int i;
int ret;
for (i = 0; i < nr; i++) {
ret = mmap_return_error(errs[i], st);
if (ret < 0)
return ret;
}
return 0;
}
/* Allocate pfns that are then mapped with gmfns from foreign domid. Update /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
* the vma with the page info to use later. * the vma with the page info to use later.
* Returns: 0 if success, otherwise -errno * Returns: 0 if success, otherwise -errno
@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
state.version = version; state.version = version;
/* mmap_batch_fn guarantees ret == 0 */ /* mmap_batch_fn guarantees ret == 0 */
BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state)); &pagelist, mmap_batch_fn, &state));
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
/* Write back errors in second pass. */ /* Write back errors in second pass. */
state.user_mfn = (xen_pfn_t *)m.arr; state.user_mfn = (xen_pfn_t *)m.arr;
state.user_err = m.err; state.user_err = m.err;
ret = traverse_pages(m.num, sizeof(xen_pfn_t), ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state); &pagelist, mmap_return_errors, &state);
} else } else
ret = 0; ret = 0;

View File

@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
} }
struct remap_data { struct remap_data {
xen_pfn_t fgmfn; /* foreign domain's gmfn */ xen_pfn_t *fgmfn; /* foreign domain's gmfn */
pgprot_t prot; pgprot_t prot;
domid_t domid; domid_t domid;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int index; int index;
struct page **pages; struct page **pages;
struct xen_remap_mfn_info *info; struct xen_remap_mfn_info *info;
int *err_ptr;
int mapped;
}; };
static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
int rc; int rc;
rc = map_foreign_page(pfn, info->fgmfn, info->domid); rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
if (rc < 0) *info->err_ptr++ = rc;
return rc; if (!rc) {
set_pte_at(info->vma->vm_mm, addr, ptep, pte); set_pte_at(info->vma->vm_mm, addr, ptep, pte);
info->mapped++;
}
info->fgmfn++;
return 0; return 0;
} }
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma, int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
xen_pfn_t gfn, int nr, xen_pfn_t *mfn, int nr,
pgprot_t prot, unsigned domid, int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages) struct page **pages)
{ {
int err; int err;
struct remap_data data; struct remap_data data;
unsigned long range = nr << PAGE_SHIFT;
/* TBD: Batching, current sole caller only does page at a time */ /* Kept here for the purpose of making sure code doesn't break
if (nr > 1) x86 PVOPS */
return -EINVAL; BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
data.fgmfn = gfn; data.fgmfn = mfn;
data.prot = prot; data.prot = prot;
data.domid = domid; data.domid = domid;
data.vma = vma; data.vma = vma;
data.index = 0;
data.pages = pages; data.pages = pages;
err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, data.index = 0;
data.err_ptr = err_ptr;
data.mapped = 0;
err = apply_to_page_range(vma->vm_mm, addr, range,
remap_pte_fn, &data); remap_pte_fn, &data);
return err; return err < 0 ? err : data.mapped;
} }
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range); EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages) int nr, struct page **pages)

View File

@ -27,17 +27,54 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
struct vm_area_struct; struct vm_area_struct;
/*
* xen_remap_domain_mfn_array() - map an array of foreign frames
* @vma: VMA to map the pages into
* @addr: Address at which to map the pages
* @gfn: Array of GFNs to map
* @nr: Number entries in the GFN array
* @err_ptr: Returns per-GFN error status.
* @prot: page protection mask
* @domid: Domain owning the pages
* @pages: Array of pages if this domain has an auto-translated physmap
*
* @gfn and @err_ptr may point to the same buffer, the GFNs will be
* overwritten by the error codes after they are mapped.
*
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages);
/* xen_remap_domain_mfn_range() - map a range of foreign frames
* @vma: VMA to map the pages into
* @addr: Address at which to map the pages
* @gfn: First GFN to map.
* @nr: Number frames to map
* @prot: page protection mask
* @domid: Domain owning the pages
* @pages: Array of pages if this domain has an auto-translated physmap
*
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
int xen_remap_domain_mfn_range(struct vm_area_struct *vma, int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
xen_pfn_t mfn, int nr, xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid, pgprot_t prot, unsigned domid,
struct page **pages); struct page **pages);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages); int numpgs, struct page **pages);
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma, int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
xen_pfn_t gfn, int nr, xen_pfn_t *gfn, int nr,
pgprot_t prot, int *err_ptr, pgprot_t prot,
unsigned domid, unsigned domid,
struct page **pages); struct page **pages);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,