From ce2f46f3531a03781181b7f4bd1ff9f8c5086e7e Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Fri, 10 Dec 2021 11:28:17 +0200 Subject: [PATCH 1/8] xen/gntdev: fix unmap notification order While working with Xen's libxenvchan library I have faced an issue with unmap notifications sent in wrong order if both UNMAP_NOTIFY_SEND_EVENT and UNMAP_NOTIFY_CLEAR_BYTE were requested: first we send an event channel notification and then clear the notification byte which renders in the below inconsistency (cli_live is the byte which was requested to be cleared on unmap): [ 444.514243] gntdev_put_map UNMAP_NOTIFY_SEND_EVENT map->notify.event 6 libxenvchan_is_open cli_live 1 [ 444.515239] __unmap_grant_pages UNMAP_NOTIFY_CLEAR_BYTE at 14 Thus it is not possible to reliably implement the checks like - wait for the notification (UNMAP_NOTIFY_SEND_EVENT) - check the variable (UNMAP_NOTIFY_CLEAR_BYTE) because it is possible that the variable gets checked before it is cleared by the kernel. To fix that we need to re-order the notifications, so the variable is first gets cleared and then the event channel notification is sent. With this fix I can see the correct order of execution: [ 54.522611] __unmap_grant_pages UNMAP_NOTIFY_CLEAR_BYTE at 14 [ 54.537966] gntdev_put_map UNMAP_NOTIFY_SEND_EVENT map->notify.event 6 libxenvchan_is_open cli_live 0 Cc: stable@vger.kernel.org Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20211210092817.580718-1-andr2000@gmail.com Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index fec1b6537166..59ffea800079 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -250,13 +250,13 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; + if (map->pages && !use_ptemod) + unmap_grant_pages(map, 0, map->count); + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); } - - if (map->pages && !use_ptemod) - unmap_grant_pages(map, 0, map->count); gntdev_free_map(map); } From 335e4dd67b480c8fa571ea7e71af0d22047fcfb7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 4 Jan 2022 09:46:06 +0100 Subject: [PATCH 2/8] xen/x86: obtain upper 32 bits of video frame buffer address for Dom0 The hypervisor has been supplying this information for a couple of major releases. Make use of it. The need to set a flag in the capabilities field also points out that the prior setting of that field from the hypervisor interface's gbl_caps one was wrong, so that code gets deleted (there's also no equivalent of this in native boot code). Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/a3df8bf3-d044-b7bb-3383-cd5239d6d4af@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/vga.c | 12 ++++++++---- include/xen/interface/xen.h | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index e336f223f7f4..31b1e3477cb6 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -62,14 +62,18 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) break; } - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.gbl_caps) - + sizeof(info->u.vesa_lfb.gbl_caps)) - screen_info->capabilities = info->u.vesa_lfb.gbl_caps; if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.mode_attrs) + sizeof(info->u.vesa_lfb.mode_attrs)) screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; + + if (size >= offsetof(struct dom0_vga_console_info, + u.vesa_lfb.ext_lfb_base) + + sizeof(info->u.vesa_lfb.ext_lfb_base) + && info->u.vesa_lfb.ext_lfb_base) { + screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base; + screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } break; } } diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 5e9916939268..0ca23eca2a9c 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -722,6 +722,9 @@ struct dom0_vga_console_info { uint32_t gbl_caps; /* Mode attributes (offset 0x0, VESA command 0x4f01). */ uint16_t mode_attrs; + uint16_t pad; + /* high 32 bits of lfb_base */ + uint32_t ext_lfb_base; } vesa_lfb; } u; }; From fbf3a5c301685f7e9224bf5de9c426cf8f11ae15 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:31 +0200 Subject: [PATCH 3/8] xen/unpopulated-alloc: Drop check for virt_addr_valid() in fill_list() If memremap_pages() succeeds the range is guaranteed to have proper page table, there is no need for an additional virt_addr_valid() check. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/1639080336-26573-2-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- drivers/xen/unpopulated-alloc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index 87e6b7db892f..a03dc5b1f37b 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -85,7 +85,6 @@ static int fill_list(unsigned int nr_pages) for (i = 0; i < alloc_pages; i++) { struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i); - BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i)); pg->zone_device_data = page_list; page_list = pg; list_count++; From 5e1cdb8ee5e76bffbc11d300eebbb5193c7de530 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:32 +0200 Subject: [PATCH 4/8] arm/xen: Switch to use gnttab_setup_auto_xlat_frames() for DT Read the start address of the grant table space from DT (region 0). This patch mostly restores behaviour before commit 3cf4095d7446 ("arm/xen: Use xen_xlate_map_ballooned_pages to setup grant table") but trying not to break the ACPI support added after that commit. So the patch touches DT part only and leaves the ACPI part with xen_xlate_map_ballooned_pages(). Also in order to make a code more resilient use a fallback to xen_xlate_map_ballooned_pages() if grant table region wasn't found. This is a preparation for using Xen extended region feature where unused regions of guest physical address space (provided by the hypervisor) will be used to create grant/foreign/whatever mappings instead of wasting real RAM pages from the domain memory for establishing these mappings. The immediate benefit of this change: - Avoid superpage shattering in Xen P2M when establishing stage-2 mapping (GFN <-> MFN) for the grant table space - Avoid wasting real RAM pages (reducing the amount of memory usuable) for mapping grant table space - The grant table space is always mapped at the exact same place (region 0 is reserved for the grant table) Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1639080336-26573-3-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7619fbffcea2..49bb6757d031 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -59,6 +59,9 @@ unsigned long xen_released_pages; struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; static __read_mostly unsigned int xen_events_irq; +static __read_mostly phys_addr_t xen_grant_frames; + +#define GRANT_TABLE_INDEX 0 uint32_t xen_start_flags; EXPORT_SYMBOL(xen_start_flags); @@ -303,6 +306,7 @@ static void __init xen_acpi_guest_init(void) static void __init xen_dt_guest_init(void) { struct device_node *xen_node; + struct resource res; xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); if (!xen_node) { @@ -311,13 +315,19 @@ static void __init xen_dt_guest_init(void) } xen_events_irq = irq_of_parse_and_map(xen_node, 0); + + if (of_address_to_resource(xen_node, GRANT_TABLE_INDEX, &res)) { + pr_err("Xen grant table region is not found\n"); + return; + } + xen_grant_frames = res.start; } static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; - int cpu; + int rc, cpu; if (!xen_domain()) return 0; @@ -370,12 +380,16 @@ static int __init xen_guest_init(void) for_each_possible_cpu(cpu) per_cpu(xen_vcpu_id, cpu) = cpu; - xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); - if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, - &xen_auto_xlat_grant_frames.vaddr, - xen_auto_xlat_grant_frames.count)) { + if (!xen_grant_frames) { + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + rc = xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count); + } else + rc = gnttab_setup_auto_xlat_frames(xen_grant_frames); + if (rc) { free_percpu(xen_vcpu_info); - return -ENOMEM; + return rc; } gnttab_init(); From 9dd060afe2dfd4e3f67b6732fdc681e52cd7cbd9 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:33 +0200 Subject: [PATCH 5/8] xen/balloon: Bring alloc(free)_xenballooned_pages helpers back This patch rolls back some of the changes introduced by commit 121f2faca2c0a "xen/balloon: rename alloc/free_xenballooned_pages" in order to make possible to still allocate xenballooned pages if CONFIG_XEN_UNPOPULATED_ALLOC is enabled. On Arm the unpopulated pages will be allocated on top of extended regions provided by Xen via device-tree (the subsequent patches will add required bits to support unpopulated-alloc feature on Arm). The problem is that extended regions feature has been introduced into Xen quite recently (during 4.16 release cycle). So this effectively means that Linux must only use unpopulated-alloc on Arm if it is running on "new Xen" which advertises these regions. But, it will only be known after parsing the "hypervisor" node at boot time, so before doing that we cannot assume anything. In order to keep working if CONFIG_XEN_UNPOPULATED_ALLOC is enabled and the extended regions are not advertised (Linux is running on "old Xen", etc) we need the fallback to alloc_xenballooned_pages(). This way we wouldn't reduce the amount of memory usable (wasting RAM pages) for any of the external mappings anymore (and eliminate XSA-300) with "new Xen", but would be still functional ballooning out RAM pages with "old Xen". Also rename alloc(free)_xenballooned_pages to xen_alloc(free)_ballooned_pages and make xen_alloc(free)_unpopulated_pages static inline in xen.h if CONFIG_XEN_UNPOPULATED_ALLOC is disabled. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1639080336-26573-4-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- drivers/xen/balloon.c | 20 +++++++++----------- include/xen/balloon.h | 3 +++ include/xen/xen.h | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ba2ea11e0d3d..a2c4fc49c483 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -581,7 +581,6 @@ void balloon_set_new_target(unsigned long target) } EXPORT_SYMBOL_GPL(balloon_set_new_target); -#ifndef CONFIG_XEN_UNPOPULATED_ALLOC static int add_ballooned_pages(unsigned int nr_pages) { enum bp_state st; @@ -610,12 +609,12 @@ static int add_ballooned_pages(unsigned int nr_pages) } /** - * xen_alloc_unpopulated_pages - get pages that have been ballooned out + * xen_alloc_ballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned * @return 0 on success, error otherwise */ -int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages) +int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages) { unsigned int pgno = 0; struct page *page; @@ -652,23 +651,23 @@ int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages) return 0; out_undo: mutex_unlock(&balloon_mutex); - xen_free_unpopulated_pages(pgno, pages); + xen_free_ballooned_pages(pgno, pages); /* - * NB: free_xenballooned_pages will only subtract pgno pages, but since + * NB: xen_free_ballooned_pages will only subtract pgno pages, but since * target_unpopulated is incremented with nr_pages at the start we need * to remove the remaining ones also, or accounting will be screwed. */ balloon_stats.target_unpopulated -= nr_pages - pgno; return ret; } -EXPORT_SYMBOL(xen_alloc_unpopulated_pages); +EXPORT_SYMBOL(xen_alloc_ballooned_pages); /** - * xen_free_unpopulated_pages - return pages retrieved with get_ballooned_pages + * xen_free_ballooned_pages - return pages retrieved with get_ballooned_pages * @nr_pages: Number of pages * @pages: pages to return */ -void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) +void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages) { unsigned int i; @@ -687,9 +686,9 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) mutex_unlock(&balloon_mutex); } -EXPORT_SYMBOL(xen_free_unpopulated_pages); +EXPORT_SYMBOL(xen_free_ballooned_pages); -#if defined(CONFIG_XEN_PV) +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { @@ -712,7 +711,6 @@ static void __init balloon_add_region(unsigned long start_pfn, balloon_stats.total_pages += extra_pfn_end - start_pfn; } #endif -#endif static int __init balloon_init(void) { diff --git a/include/xen/balloon.h b/include/xen/balloon.h index e93d4f0088c5..f78a6cc94f1a 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -26,6 +26,9 @@ extern struct balloon_stats balloon_stats; void balloon_set_new_target(unsigned long target); +int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages); +void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages); + #ifdef CONFIG_XEN_BALLOON void xen_balloon_init(void); #else diff --git a/include/xen/xen.h b/include/xen/xen.h index 9f031b5faa54..86c5b37684d9 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,7 +52,21 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); +#else +#include +static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages, + struct page **pages) +{ + return xen_alloc_ballooned_pages(nr_pages, pages); +} +static inline void xen_free_unpopulated_pages(unsigned int nr_pages, + struct page **pages) +{ + xen_free_ballooned_pages(nr_pages, pages); +} +#endif #endif /* _XEN_XEN_H */ From d1a928eac72962b562162c25baf45ce147e27247 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:34 +0200 Subject: [PATCH 6/8] xen/unpopulated-alloc: Add mechanism to use Xen resource The main reason of this change is that unpopulated-alloc code cannot be used in its current form on Arm, but there is a desire to reuse it to avoid wasting real RAM pages for the grant/foreign mappings. The problem is that system "iomem_resource" is used for the address space allocation, but the really unallocated space can't be figured out precisely by the domain on Arm without hypervisor involvement. For example, not all device I/O regions are known by the time domain starts creating grant/foreign mappings. And following the advise from "iomem_resource" we might end up reusing these regions by a mistake. So, the hypervisor which maintains the P2M for the domain is in the best position to provide unused regions of guest physical address space which could be safely used to create grant/foreign mappings. Introduce new helper arch_xen_unpopulated_init() which purpose is to create specific Xen resource based on the memory regions provided by the hypervisor to be used as unused space for Xen scratch pages. If arch doesn't define arch_xen_unpopulated_init() the default "iomem_resource" will be used. Update the arguments list of allocate_resource() in fill_list() to always allocate a region from the hotpluggable range (maximum possible addressable physical memory range for which the linear mapping could be created). If arch doesn't define arch_get_mappable_range() the default range (0,-1) will be used. The behaviour on x86 won't be changed by current patch as both arch_xen_unpopulated_init() and arch_get_mappable_range() are not implemented for it. Also fallback to allocate xenballooned pages (balloon out RAM pages) if we do not have any suitable resource to work with (target_resource is invalid) and as the result we won't be able to provide unpopulated pages on a request. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1639080336-26573-5-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- drivers/xen/unpopulated-alloc.c | 86 +++++++++++++++++++++++++++++++-- include/xen/xen.h | 2 + 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index a03dc5b1f37b..a8b41057c382 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -8,6 +8,7 @@ #include +#include #include #include @@ -15,13 +16,29 @@ static DEFINE_MUTEX(list_lock); static struct page *page_list; static unsigned int list_count; +static struct resource *target_resource; + +/* + * If arch is not happy with system "iomem_resource" being used for + * the region allocation it can provide it's own view by creating specific + * Xen resource with unused regions of guest physical address space provided + * by the hypervisor. + */ +int __weak __init arch_xen_unpopulated_init(struct resource **res) +{ + *res = &iomem_resource; + + return 0; +} + static int fill_list(unsigned int nr_pages) { struct dev_pagemap *pgmap; - struct resource *res; + struct resource *res, *tmp_res = NULL; void *vaddr; unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION); - int ret = -ENOMEM; + struct range mhp_range; + int ret; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -30,14 +47,40 @@ static int fill_list(unsigned int nr_pages) res->name = "Xen scratch"; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - ret = allocate_resource(&iomem_resource, res, - alloc_pages * PAGE_SIZE, 0, -1, + mhp_range = mhp_get_pluggable_range(true); + + ret = allocate_resource(target_resource, res, + alloc_pages * PAGE_SIZE, mhp_range.start, mhp_range.end, PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); if (ret < 0) { pr_err("Cannot allocate new IOMEM resource\n"); goto err_resource; } + /* + * Reserve the region previously allocated from Xen resource to avoid + * re-using it by someone else. + */ + if (target_resource != &iomem_resource) { + tmp_res = kzalloc(sizeof(*tmp_res), GFP_KERNEL); + if (!tmp_res) { + ret = -ENOMEM; + goto err_insert; + } + + tmp_res->name = res->name; + tmp_res->start = res->start; + tmp_res->end = res->end; + tmp_res->flags = res->flags; + + ret = request_resource(&iomem_resource, tmp_res); + if (ret < 0) { + pr_err("Cannot request resource %pR (%d)\n", tmp_res, ret); + kfree(tmp_res); + goto err_insert; + } + } + pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); if (!pgmap) { ret = -ENOMEM; @@ -95,6 +138,11 @@ static int fill_list(unsigned int nr_pages) err_memremap: kfree(pgmap); err_pgmap: + if (tmp_res) { + release_resource(tmp_res); + kfree(tmp_res); + } +err_insert: release_resource(res); err_resource: kfree(res); @@ -112,6 +160,14 @@ int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages) unsigned int i; int ret = 0; + /* + * Fallback to default behavior if we do not have any suitable resource + * to allocate required region from and as the result we won't be able to + * construct pages. + */ + if (!target_resource) + return xen_alloc_ballooned_pages(nr_pages, pages); + mutex_lock(&list_lock); if (list_count < nr_pages) { ret = fill_list(nr_pages - list_count); @@ -159,6 +215,11 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) { unsigned int i; + if (!target_resource) { + xen_free_ballooned_pages(nr_pages, pages); + return; + } + mutex_lock(&list_lock); for (i = 0; i < nr_pages; i++) { pages[i]->zone_device_data = page_list; @@ -201,3 +262,20 @@ static int __init init(void) } subsys_initcall(init); #endif + +static int __init unpopulated_init(void) +{ + int ret; + + if (!xen_domain()) + return -ENODEV; + + ret = arch_xen_unpopulated_init(&target_resource); + if (ret) { + pr_err("xen:unpopulated: Cannot initialize target resource\n"); + target_resource = NULL; + } + + return ret; +} +early_initcall(unpopulated_init); diff --git a/include/xen/xen.h b/include/xen/xen.h index 86c5b37684d9..a99bab817523 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -55,6 +55,8 @@ extern u64 xen_saved_max_mem_size; #ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); +#include +int arch_xen_unpopulated_init(struct resource **res); #else #include static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages, From b2371587fe0c02657db89b67b72efc581bd3f7a0 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:35 +0200 Subject: [PATCH 7/8] arm/xen: Read extended regions from DT and init Xen resource This patch implements arch_xen_unpopulated_init() on Arm where the extended regions (if any) are gathered from DT and inserted into specific Xen resource to be used as unused address space for Xen scratch pages by unpopulated-alloc code. The extended region (safe range) is a region of guest physical address space which is unused and could be safely used to create grant/foreign mappings instead of wasting real RAM pages from the domain memory for establishing these mappings. The extended regions are chosen by the hypervisor at the domain creation time and advertised to it via "reg" property under hypervisor node in the guest device-tree. As region 0 is reserved for grant table space (always present), the indexes for extended regions are 1...N. If arch_xen_unpopulated_init() fails for some reason the default behaviour will be restored (allocate xenballooned pages). This patch also removes XEN_UNPOPULATED_ALLOC dependency on x86. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1639080336-26573-6-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 106 +++++++++++++++++++++++++++++++++++++++ drivers/xen/Kconfig | 2 +- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 49bb6757d031..ec5b082f3de6 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -62,6 +62,7 @@ static __read_mostly unsigned int xen_events_irq; static __read_mostly phys_addr_t xen_grant_frames; #define GRANT_TABLE_INDEX 0 +#define EXT_REGION_INDEX 1 uint32_t xen_start_flags; EXPORT_SYMBOL(xen_start_flags); @@ -303,6 +304,111 @@ static void __init xen_acpi_guest_init(void) #endif } +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC +/* + * A type-less specific Xen resource which contains extended regions + * (unused regions of guest physical address space provided by the hypervisor). + */ +static struct resource xen_resource = { + .name = "Xen unused space", +}; + +int __init arch_xen_unpopulated_init(struct resource **res) +{ + struct device_node *np; + struct resource *regs, *tmp_res; + uint64_t min_gpaddr = -1, max_gpaddr = 0; + unsigned int i, nr_reg = 0; + int rc; + + if (!xen_domain()) + return -ENODEV; + + if (!acpi_disabled) + return -ENODEV; + + np = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (WARN_ON(!np)) + return -ENODEV; + + /* Skip region 0 which is reserved for grant table space */ + while (of_get_address(np, nr_reg + EXT_REGION_INDEX, NULL, NULL)) + nr_reg++; + + if (!nr_reg) { + pr_err("No extended regions are found\n"); + return -EINVAL; + } + + regs = kcalloc(nr_reg, sizeof(*regs), GFP_KERNEL); + if (!regs) + return -ENOMEM; + + /* + * Create resource from extended regions provided by the hypervisor to be + * used as unused address space for Xen scratch pages. + */ + for (i = 0; i < nr_reg; i++) { + rc = of_address_to_resource(np, i + EXT_REGION_INDEX, ®s[i]); + if (rc) + goto err; + + if (max_gpaddr < regs[i].end) + max_gpaddr = regs[i].end; + if (min_gpaddr > regs[i].start) + min_gpaddr = regs[i].start; + } + + xen_resource.start = min_gpaddr; + xen_resource.end = max_gpaddr; + + /* + * Mark holes between extended regions as unavailable. The rest of that + * address space will be available for the allocation. + */ + for (i = 1; i < nr_reg; i++) { + resource_size_t start, end; + + /* There is an overlap between regions */ + if (regs[i - 1].end + 1 > regs[i].start) { + rc = -EINVAL; + goto err; + } + + /* There is no hole between regions */ + if (regs[i - 1].end + 1 == regs[i].start) + continue; + + start = regs[i - 1].end + 1; + end = regs[i].start - 1; + + tmp_res = kzalloc(sizeof(*tmp_res), GFP_KERNEL); + if (!tmp_res) { + rc = -ENOMEM; + goto err; + } + + tmp_res->name = "Unavailable space"; + tmp_res->start = start; + tmp_res->end = end; + + rc = insert_resource(&xen_resource, tmp_res); + if (rc) { + pr_err("Cannot insert resource %pR (%d)\n", tmp_res, rc); + kfree(tmp_res); + goto err; + } + } + + *res = &xen_resource; + +err: + kfree(regs); + + return rc; +} +#endif + static void __init xen_dt_guest_init(void) { struct device_node *xen_node; diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 33e941e40082..120d32f164ac 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -327,7 +327,7 @@ config XEN_FRONT_PGDIR_SHBUF config XEN_UNPOPULATED_ALLOC bool "Use unpopulated memory ranges for guest mappings" - depends on X86 && ZONE_DEVICE + depends on ZONE_DEVICE default XEN_BACKEND || XEN_GNTDEV || XEN_DOM0 help Use unpopulated memory ranges in order to create mappings for guest From 54bb4a91b281e1b21235a4bc175e1293e787c016 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:36 +0200 Subject: [PATCH 8/8] dt-bindings: xen: Clarify "reg" purpose Xen on Arm has gained new support recently to calculate and report extended regions (unused address space) safe to use for external mappings. These regions are reported via "reg" property under "hypervisor" node in the guest device-tree. As region 0 is reserved for grant table space (always present), the indexes for extended regions are 1...N. No device-tree bindings update is needed (except clarifying the text) as guest infers the presence of extended regions from the number of regions in "reg" property. While at it, remove the following sentence: "This property is unnecessary when booting Dom0 using ACPI." for "reg" and "interrupts" properties as the initialization is not done via device-tree "hypervisor" node in that case anyway. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Acked-by: Rob Herring Link: https://lore.kernel.org/r/1639080336-26573-7-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- Documentation/devicetree/bindings/arm/xen.txt | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt index db5c56db30ec..61d77acbeb5e 100644 --- a/Documentation/devicetree/bindings/arm/xen.txt +++ b/Documentation/devicetree/bindings/arm/xen.txt @@ -7,15 +7,17 @@ the following properties: compatible = "xen,xen-", "xen,xen"; where is the version of the Xen ABI of the platform. -- reg: specifies the base physical address and size of a region in - memory where the grant table should be mapped to, using an - HYPERVISOR_memory_op hypercall. The memory region is large enough to map - the whole grant table (it is larger or equal to gnttab_max_grant_frames()). - This property is unnecessary when booting Dom0 using ACPI. +- reg: specifies the base physical address and size of the regions in memory + where the special resources should be mapped to, using an HYPERVISOR_memory_op + hypercall. + Region 0 is reserved for mapping grant table, it must be always present. + The memory region is large enough to map the whole grant table (it is larger + or equal to gnttab_max_grant_frames()). + Regions 1...N are extended regions (unused address space) for mapping foreign + GFNs and grants, they might be absent if there is nothing to expose. - interrupts: the interrupt used by Xen to inject event notifications. A GIC node is also required. - This property is unnecessary when booting Dom0 using ACPI. To support UEFI on Xen ARM virtual platforms, Xen populates the FDT "uefi" node under /hypervisor with following parameters: