Merge branch kvm-arm64/mmu/reduce-vmemmap-overhead into kvmarm-master/next

Host stage-2 optimisations from Quentin Perret

* kvm-arm64/mmu/reduce-vmemmap-overhead:
  KVM: arm64: Use less bits for hyp_page refcount
  KVM: arm64: Use less bits for hyp_page order
  KVM: arm64: Remove hyp_pool pointer from struct hyp_page
  KVM: arm64: Unify MMIO and mem host stage-2 pools
  KVM: arm64: Remove list_head from hyp_page
  KVM: arm64: Use refcount at hyp to check page availability
  KVM: arm64: Move hyp_pool locking out of refcount helpers
This commit is contained in:
Marc Zyngier 2021-06-11 13:26:36 +01:00
commit 46c886220a
8 changed files with 146 additions and 128 deletions

View File

@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
#define HYP_NO_ORDER UINT_MAX
#define HYP_NO_ORDER USHRT_MAX
struct hyp_pool {
/*
@ -19,48 +19,13 @@ struct hyp_pool {
struct list_head free_area[MAX_ORDER];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned int max_order;
unsigned short max_order;
};
static inline void hyp_page_ref_inc(struct hyp_page *p)
{
struct hyp_pool *pool = hyp_page_to_pool(p);
hyp_spin_lock(&pool->lock);
p->refcount++;
hyp_spin_unlock(&pool->lock);
}
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
struct hyp_pool *pool = hyp_page_to_pool(p);
int ret;
hyp_spin_lock(&pool->lock);
p->refcount--;
ret = (p->refcount == 0);
hyp_spin_unlock(&pool->lock);
return ret;
}
static inline void hyp_set_page_refcounted(struct hyp_page *p)
{
struct hyp_pool *pool = hyp_page_to_pool(p);
hyp_spin_lock(&pool->lock);
if (p->refcount) {
hyp_spin_unlock(&pool->lock);
BUG();
}
p->refcount = 1;
hyp_spin_unlock(&pool->lock);
}
/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
void hyp_get_page(void *addr);
void hyp_put_page(void *addr);
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
/* Used pages cannot be freed */
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,

View File

@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
int __pkvm_prot_finalize(void);
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
int kvm_host_prepare_stage2(void *pgt_pool_base);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
static __always_inline void __load_host_stage2(void)

View File

@ -7,12 +7,9 @@
#include <linux/types.h>
struct hyp_pool;
struct hyp_page {
unsigned int refcount;
unsigned int order;
struct hyp_pool *pool;
struct list_head node;
unsigned short refcount;
unsigned short order;
};
extern u64 __hyp_vmemmap;

View File

@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
return res;
}
static inline unsigned long host_s2_mem_pgtable_pages(void)
static inline unsigned long host_s2_pgtable_pages(void)
{
unsigned long res;
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
return __hyp_pgtable_total_pages() + 16;
}
res = __hyp_pgtable_total_pages() + 16;
static inline unsigned long host_s2_dev_pgtable_pages(void)
{
/* Allow 1 GiB for MMIO mappings */
return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
#endif /* __KVM_HYP_MM_H */

View File

@ -23,8 +23,7 @@
extern unsigned long hyp_nr_cpus;
struct host_kvm host_kvm;
static struct hyp_pool host_s2_mem;
static struct hyp_pool host_s2_dev;
static struct hyp_pool host_s2_pool;
/*
* Copies of the host's CPU features registers holding sanitized values.
@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
return hyp_alloc_pages(&host_s2_mem, get_order(size));
return hyp_alloc_pages(&host_s2_pool, get_order(size));
}
static void *host_s2_zalloc_page(void *pool)
@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool)
return hyp_alloc_pages(pool, 0);
}
static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
static void host_s2_get_page(void *addr)
{
hyp_get_page(&host_s2_pool, addr);
}
static void host_s2_put_page(void *addr)
{
hyp_put_page(&host_s2_pool, addr);
}
static int prepare_s2_pool(void *pgt_pool_base)
{
unsigned long nr_pages, pfn;
int ret;
pfn = hyp_virt_to_pfn(mem_pgt_pool);
nr_pages = host_s2_mem_pgtable_pages();
ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
if (ret)
return ret;
pfn = hyp_virt_to_pfn(dev_pgt_pool);
nr_pages = host_s2_dev_pgtable_pages();
ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
pfn = hyp_virt_to_pfn(pgt_pool_base);
nr_pages = host_s2_pgtable_pages();
ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
if (ret)
return ret;
@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.page_count = hyp_page_count,
.get_page = hyp_get_page,
.put_page = hyp_put_page,
.get_page = host_s2_get_page,
.put_page = host_s2_put_page,
};
return 0;
@ -86,7 +89,7 @@ static void prepare_host_vtcr(void)
id_aa64mmfr1_el1_sys_val, phys_shift);
}
int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
int kvm_host_prepare_stage2(void *pgt_pool_base)
{
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
int ret;
@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end)
}
static inline int __host_stage2_idmap(u64 start, u64 end,
enum kvm_pgtable_prot prot,
struct hyp_pool *pool)
enum kvm_pgtable_prot prot)
{
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
prot, pool);
prot, &host_s2_pool);
}
static int host_stage2_idmap(u64 addr)
@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr)
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
struct kvm_mem_range range;
bool is_memory = find_mem_range(addr, &range);
struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
int ret;
if (is_memory)
@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr)
if (ret)
goto unlock;
ret = __host_stage2_idmap(range.start, range.end, prot, pool);
if (is_memory || ret != -ENOMEM)
ret = __host_stage2_idmap(range.start, range.end, prot);
if (ret != -ENOMEM)
goto unlock;
/*
* host_s2_mem has been provided with enough pages to cover all of
* memory with page granularity, so we should never hit the ENOMEM case.
* However, it is difficult to know how much of the MMIO range we will
* need to cover upfront, so we may need to 'recycle' the pages if we
* run out.
* The pool has been provided with enough pages to cover all of memory
* with page granularity, but it is difficult to know how much of the
* MMIO range we will need to cover upfront, so we may need to 'recycle'
* the pages if we run out.
*/
ret = host_stage2_unmap_dev_all();
if (ret)
goto unlock;
ret = __host_stage2_idmap(range.start, range.end, prot, pool);
ret = __host_stage2_idmap(range.start, range.end, prot);
unlock:
hyp_spin_unlock(&host_kvm.lock);
@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
hyp_spin_lock(&host_kvm.lock);
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
&host_s2_mem, pkvm_hyp_id);
&host_s2_pool, pkvm_hyp_id);
hyp_spin_unlock(&host_kvm.lock);
return ret != -EAGAIN ? ret : 0;

View File

@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
*/
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
struct hyp_page *p,
unsigned int order)
unsigned short order)
{
phys_addr_t addr = hyp_page_to_phys(p);
@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
/* Find a buddy page currently available for allocation */
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
struct hyp_page *p,
unsigned int order)
unsigned short order)
{
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
if (!buddy || buddy->order != order || list_empty(&buddy->node))
if (!buddy || buddy->order != order || buddy->refcount)
return NULL;
return buddy;
}
/*
* Pages that are available for allocation are tracked in free-lists, so we use
* the pages themselves to store the list nodes to avoid wasting space. As the
* allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
* path to optimize allocation speed), we also need to clean-up the list node in
* each page when we take it out of the list.
*/
static inline void page_remove_from_list(struct hyp_page *p)
{
struct list_head *node = hyp_page_to_virt(p);
__list_del_entry(node);
memset(node, 0, sizeof(*node));
}
static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
{
struct list_head *node = hyp_page_to_virt(p);
INIT_LIST_HEAD(node);
list_add_tail(node, head);
}
static inline struct hyp_page *node_to_page(struct list_head *node)
{
return hyp_virt_to_page(node);
}
static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
unsigned int order = p->order;
unsigned short order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool,
break;
/* Take the buddy out of its list, and coallesce with @p */
list_del_init(&buddy->node);
page_remove_from_list(buddy);
buddy->order = HYP_NO_ORDER;
p = min(p, buddy);
}
/* Mark the new head, and insert it */
p->order = order;
list_add_tail(&p->node, &pool->free_area[order]);
}
static void hyp_attach_page(struct hyp_page *p)
{
struct hyp_pool *pool = hyp_page_to_pool(p);
hyp_spin_lock(&pool->lock);
__hyp_attach_page(pool, p);
hyp_spin_unlock(&pool->lock);
page_add_to_list(p, &pool->free_area[order]);
}
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
unsigned int order)
unsigned short order)
{
struct hyp_page *buddy;
list_del_init(&p->node);
page_remove_from_list(p);
while (p->order > order) {
/*
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
p->order--;
buddy = __find_buddy_nocheck(pool, p, p->order);
buddy->order = p->order;
list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
page_add_to_list(buddy, &pool->free_area[buddy->order]);
}
return p;
}
void hyp_put_page(void *addr)
static inline void hyp_page_ref_inc(struct hyp_page *p)
{
struct hyp_page *p = hyp_virt_to_page(addr);
BUG_ON(p->refcount == USHRT_MAX);
p->refcount++;
}
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
p->refcount--;
return (p->refcount == 0);
}
static inline void hyp_set_page_refcounted(struct hyp_page *p)
{
BUG_ON(p->refcount);
p->refcount = 1;
}
static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
{
if (hyp_page_ref_dec_and_test(p))
hyp_attach_page(p);
__hyp_attach_page(pool, p);
}
void hyp_get_page(void *addr)
/*
* Changes to the buddy tree and page refcounts must be done with the hyp_pool
* lock held. If a refcount change requires an update to the buddy tree (e.g.
* hyp_put_page()), both operations must be done within the same critical
* section to guarantee transient states (e.g. a page with null refcount but
* not yet attached to a free list) can't be observed by well-behaved readers.
*/
void hyp_put_page(struct hyp_pool *pool, void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
hyp_page_ref_inc(p);
hyp_spin_lock(&pool->lock);
__hyp_put_page(pool, p);
hyp_spin_unlock(&pool->lock);
}
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
void hyp_get_page(struct hyp_pool *pool, void *addr)
{
unsigned int i = order;
struct hyp_page *p = hyp_virt_to_page(addr);
hyp_spin_lock(&pool->lock);
hyp_page_ref_inc(p);
hyp_spin_unlock(&pool->lock);
}
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
unsigned short i = order;
struct hyp_page *p;
hyp_spin_lock(&pool->lock);
@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
}
/* Extract it from the tree at the right order */
p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
p = node_to_page(pool->free_area[i].next);
p = __hyp_extract_page(pool, p, order);
hyp_spin_unlock(&pool->lock);
hyp_set_page_refcounted(p);
hyp_spin_unlock(&pool->lock);
return hyp_page_to_virt(p);
}
@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
memset(p, 0, sizeof(*p) * nr_pages);
for (i = 0; i < nr_pages; i++) {
p[i].pool = pool;
INIT_LIST_HEAD(&p[i].node);
p[i].order = 0;
hyp_set_page_refcounted(&p[i]);
}
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)
__hyp_attach_page(pool, &p[i]);
__hyp_put_page(pool, &p[i]);
return 0;
}

View File

@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_mem_pgt_base;
static void *host_s2_dev_pgt_base;
static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static int divide_memory_pool(void *virt, unsigned long size)
@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
if (!hyp_pgt_base)
return -ENOMEM;
nr_pages = host_s2_mem_pgtable_pages();
host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
if (!host_s2_mem_pgt_base)
return -ENOMEM;
nr_pages = host_s2_dev_pgtable_pages();
host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
if (!host_s2_dev_pgt_base)
nr_pages = host_s2_pgtable_pages();
host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
if (!host_s2_pgt_base)
return -ENOMEM;
return 0;
@ -143,6 +137,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
return hyp_alloc_pages(&hpool, 0);
}
static void hpool_get_page(void *addr)
{
hyp_get_page(&hpool, addr);
}
static void hpool_put_page(void *addr)
{
hyp_put_page(&hpool, addr);
}
void __noreturn __pkvm_init_finalise(void)
{
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@ -158,7 +162,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
ret = kvm_host_prepare_stage2(host_s2_pgt_base);
if (ret)
goto out;
@ -166,8 +170,8 @@ void __noreturn __pkvm_init_finalise(void)
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hyp_get_page,
.put_page = hyp_put_page,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;

View File

@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
}
hyp_mem_pages += hyp_s1_pgtable_pages();
hyp_mem_pages += host_s2_mem_pgtable_pages();
hyp_mem_pages += host_s2_dev_pgtable_pages();
hyp_mem_pages += host_s2_pgtable_pages();
/*
* The hyp_vmemmap needs to be backed by pages, but these pages