KVM: arm64: Add kvm_pgtable_stage2_split()
Add a new stage2 function, kvm_pgtable_stage2_split(), for splitting a range of huge pages. This will be used for eager-splitting huge pages into PAGE_SIZE pages. The goal is to avoid having to split huge pages on write-protection faults, and instead use this function to do it ahead of time for large ranges (e.g., all guest memory in 1G chunks at a time). Signed-off-by: Ricardo Koller <ricarkol@google.com> Reviewed-by: Shaoqin Huang <shahuang@redhat.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Link: https://lore.kernel.org/r/20230426172330.1439644-7-ricarkol@google.com Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
This commit is contained in:
parent
2f440b72e8
commit
8f5a3eb751
@ -671,6 +671,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
|
||||
*/
|
||||
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
|
||||
* to PAGE_SIZE guest pages.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @addr: Intermediate physical address from which to split.
|
||||
* @size: Size of the range.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
*
|
||||
* The function tries to split any level 1 or 2 entry that overlaps
|
||||
* with the input range (given by @addr and @size).
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure. Note that
|
||||
* kvm_pgtable_stage2_split() is best effort: it tries to break as many
|
||||
* blocks in the input range as allowed by @mc_capacity.
|
||||
*/
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_walk() - Walk a page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
|
||||
|
@ -1276,6 +1276,109 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
return pgtable;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of page-tables needed to replace a block with a
|
||||
* fully populated tree up to the PTE entries. Note that @level is
|
||||
* interpreted as in "level @level entry".
|
||||
*/
|
||||
static int stage2_block_get_nr_page_tables(u32 level)
|
||||
{
|
||||
switch (level) {
|
||||
case 1:
|
||||
return PTRS_PER_PTE + 1;
|
||||
case 2:
|
||||
return 1;
|
||||
case 3:
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
|
||||
level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
|
||||
static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
struct kvm_mmu_memory_cache *mc = ctx->arg;
|
||||
struct kvm_s2_mmu *mmu;
|
||||
kvm_pte_t pte = ctx->old, new, *childp;
|
||||
enum kvm_pgtable_prot prot;
|
||||
u32 level = ctx->level;
|
||||
bool force_pte;
|
||||
int nr_pages;
|
||||
u64 phys;
|
||||
|
||||
/* No huge-pages exist at the last level */
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
return 0;
|
||||
|
||||
/* We only split valid block mappings */
|
||||
if (!kvm_pte_valid(pte))
|
||||
return 0;
|
||||
|
||||
nr_pages = stage2_block_get_nr_page_tables(level);
|
||||
if (nr_pages < 0)
|
||||
return nr_pages;
|
||||
|
||||
if (mc->nobjs >= nr_pages) {
|
||||
/* Build a tree mapped down to the PTE granularity. */
|
||||
force_pte = true;
|
||||
} else {
|
||||
/*
|
||||
* Don't force PTEs, so create_unlinked() below does
|
||||
* not populate the tree up to the PTE level. The
|
||||
* consequence is that the call will require a single
|
||||
* page of level 2 entries at level 1, or a single
|
||||
* page of PTEs at level 2. If we are at level 1, the
|
||||
* PTEs will be created recursively.
|
||||
*/
|
||||
force_pte = false;
|
||||
nr_pages = 1;
|
||||
}
|
||||
|
||||
if (mc->nobjs < nr_pages)
|
||||
return -ENOMEM;
|
||||
|
||||
mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
|
||||
phys = kvm_pte_to_phys(pte);
|
||||
prot = kvm_pgtable_stage2_pte_prot(pte);
|
||||
|
||||
childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
|
||||
level, prot, mc, force_pte);
|
||||
if (IS_ERR(childp))
|
||||
return PTR_ERR(childp);
|
||||
|
||||
if (!stage2_try_break_pte(ctx, mmu)) {
|
||||
kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
|
||||
mm_ops->put_page(childp);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, the contents of the page table are guaranteed to be made
|
||||
* visible before the new PTE is assigned because stage2_make_pte()
|
||||
* writes the PTE using smp_store_release().
|
||||
*/
|
||||
new = kvm_init_table_pte(childp, mm_ops);
|
||||
stage2_make_pte(ctx, new);
|
||||
dsb(ishst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_split_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = mc,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags,
|
||||
|
Loading…
x
Reference in New Issue
Block a user