From 37510dd566bdbff31a769cde2fa6654bccdb8b24 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 24 Aug 2023 17:34:21 +0200 Subject: [PATCH 1/5] xen: simplify evtchn_do_upcall() call maze There are several functions involved for performing the functionality of evtchn_do_upcall(): - __xen_evtchn_do_upcall() doing the real work - xen_hvm_evtchn_do_upcall() just being a wrapper for __xen_evtchn_do_upcall(), exposed for external callers - xen_evtchn_do_upcall() calling __xen_evtchn_do_upcall(), too, but without any user Simplify this maze by: - removing the unused xen_evtchn_do_upcall() - removing xen_hvm_evtchn_do_upcall() as the only left caller of __xen_evtchn_do_upcall(), while renaming __xen_evtchn_do_upcall() to xen_evtchn_do_upcall() Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Thomas Gleixner Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 2 +- arch/x86/entry/common.c | 2 +- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_hvm.c | 2 +- drivers/xen/events/events_base.c | 21 ++------------------- drivers/xen/platform-pci.c | 2 +- include/xen/events.h | 3 +-- 7 files changed, 8 insertions(+), 26 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7d59765aef22..c392e18f1e43 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -207,7 +207,7 @@ static void xen_power_off(void) static irqreturn_t xen_arm_callback(int irq, void *arg) { - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); return IRQ_HANDLED; } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6c2826417b33..93c60c0c9d4a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -294,7 +294,7 @@ static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b8db2148c07d..0337392a3121 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL_GPL(hypercall_page); * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. - * The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events. + * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9a192f51f1b0..3f8c34707c50 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -136,7 +136,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 3bdd5b59661d..0bb86e6c4d0a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1704,7 +1704,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) generic_handle_irq(irq); } -static int __xen_evtchn_do_upcall(void) +int xen_evtchn_do_upcall(void) { struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; @@ -1735,24 +1735,7 @@ static int __xen_evtchn_do_upcall(void) return ret; } - -void xen_evtchn_do_upcall(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - - __xen_evtchn_do_upcall(); - - irq_exit(); - set_irq_regs(old_regs); -} - -int xen_hvm_evtchn_do_upcall(void) -{ - return __xen_evtchn_do_upcall(); -} -EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); +EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall); /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index fcc819131572..544d3f9010b9 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -64,7 +64,7 @@ static uint64_t get_callback_via(struct pci_dev *pdev) static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id) { - return xen_hvm_evtchn_do_upcall(); + return xen_evtchn_do_upcall(); } static int xen_allocate_irq(struct pci_dev *pdev) diff --git a/include/xen/events.h b/include/xen/events.h index 95d5e28de324..23932b0673dc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -105,8 +105,7 @@ int irq_from_virq(unsigned int cpu, unsigned int virq); evtchn_port_t evtchn_from_irq(unsigned irq); int xen_set_callback_via(uint64_t via); -void xen_evtchn_do_upcall(struct pt_regs *regs); -int xen_hvm_evtchn_do_upcall(void); +int xen_evtchn_do_upcall(void); /* Bind a pirq for a physical interrupt to an irq. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, From 361239fd1448d64faa4adba5bbf100401c0a606e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:26 +0200 Subject: [PATCH 2/5] arm/xen: remove lazy mode related definitions include/xen/arm/hypervisor.h contains definitions related to paravirt lazy mode, which are used nowhere in the code. All paravirt lazy mode related users are in x86 code, so remove the definitions on Arm side. Signed-off-by: Juergen Gross Acked-by: Stefano Stabellini Link: https://lore.kernel.org/r/20230913113828.18421-2-jgross@suse.com Signed-off-by: Juergen Gross --- include/xen/arm/hypervisor.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h index 43ef24dd030e..9995695204f5 100644 --- a/include/xen/arm/hypervisor.h +++ b/include/xen/arm/hypervisor.h @@ -7,18 +7,6 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - -static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - return PARAVIRT_LAZY_NONE; -} - #ifdef CONFIG_XEN void __init xen_early_init(void); #else From a4a7644c15096f57f92252dd6e1046bf269c87d8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:27 +0200 Subject: [PATCH 3/5] x86/xen: move paravirt lazy code Only Xen is using the paravirt lazy mode code, so it can be moved to Xen specific sources. This allows to make some of the functions static or to merge them into their only call sites. While at it do a rename from "paravirt" to "xen" for all moved specifiers. No functional change. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230913113828.18421-3-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/paravirt_types.h | 15 ------ arch/x86/include/asm/xen/hypervisor.h | 26 +++++++++++ arch/x86/kernel/paravirt.c | 67 --------------------------- arch/x86/xen/enlighten_pv.c | 39 +++++++++++++--- arch/x86/xen/mmu_pv.c | 55 ++++++++++++++-------- arch/x86/xen/multicalls.h | 4 +- include/trace/events/xen.h | 12 ++--- 7 files changed, 102 insertions(+), 116 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 4acbcddddc29..772d03487520 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -9,13 +9,6 @@ struct paravirt_patch_site { u8 type; /* type of this instruction */ u8 len; /* length of original instruction */ }; - -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; #endif #ifdef CONFIG_PARAVIRT @@ -549,14 +542,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_start_context_switch(struct task_struct *prev); -void paravirt_end_context_switch(struct task_struct *next); - -void paravirt_enter_lazy_mmu(void); -void paravirt_leave_lazy_mmu(void); -void paravirt_flush_lazy_mmu(void); - void _paravirt_nop(void); void paravirt_BUG(void); unsigned long paravirt_ret0(void); diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 5fc35f889cd1..ed05ce3df5c7 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -36,6 +36,7 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#include #include #define XEN_SIGNATURE "XenVMMXenVMM" @@ -63,4 +64,29 @@ void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); #endif +/* Lazy mode for batching updates / context switch */ +enum xen_lazy_mode { + XEN_LAZY_NONE, + XEN_LAZY_MMU, + XEN_LAZY_CPU, +}; + +DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); + +static inline void enter_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + + this_cpu_write(xen_lazy_mode, mode); +} + +static inline void leave_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != mode); + + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); +} + +enum xen_lazy_mode xen_get_lazy_mode(void); + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 975f98d5eee5..97f1436c1a20 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -143,66 +143,7 @@ int paravirt_disable_iospace(void) return request_resource(&ioport_resource, &reserve_ioports); } -static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; - -static inline void enter_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - - this_cpu_write(paravirt_lazy_mode, mode); -} - -static void leave_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode); - - this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); -} - -void paravirt_enter_lazy_mmu(void) -{ - enter_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_leave_lazy_mmu(void) -{ - leave_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_flush_lazy_mmu(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - #ifdef CONFIG_PARAVIRT_XXL -void paravirt_start_context_switch(struct task_struct *prev) -{ - BUG_ON(preemptible()); - - if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); - } - enter_lazy(PARAVIRT_LAZY_CPU); -} - -void paravirt_end_context_switch(struct task_struct *next) -{ - BUG_ON(preemptible()); - - leave_lazy(PARAVIRT_LAZY_CPU); - - if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) - arch_enter_lazy_mmu_mode(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -229,14 +170,6 @@ static noinstr void pv_native_safe_halt(void) } #endif -enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - if (in_interrupt()) - return PARAVIRT_LAZY_NONE; - - return this_cpu_read(paravirt_lazy_mode); -} - struct pv_info pv_info = { .name = "bare hardware", #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 49352fad7d1d..54b83825c4b6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -101,6 +101,16 @@ struct tls_descs { struct desc_struct desc[3]; }; +DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; + +enum xen_lazy_mode xen_get_lazy_mode(void) +{ + if (in_interrupt()) + return XEN_LAZY_NONE; + + return this_cpu_read(xen_lazy_mode); +} + /* * Updating the 3 TLS descriptors in the GDT on every task switch is * surprisingly expensive so we avoid updating them if they haven't @@ -362,10 +372,25 @@ static noinstr unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } +static void xen_start_context_switch(struct task_struct *prev) +{ + BUG_ON(preemptible()); + + if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); + } + enter_lazy(XEN_LAZY_CPU); +} + static void xen_end_context_switch(struct task_struct *next) { + BUG_ON(preemptible()); + xen_mc_flush(); - paravirt_end_context_switch(next); + leave_lazy(XEN_LAZY_CPU); + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } static unsigned long xen_store_tr(void) @@ -472,7 +497,7 @@ static void xen_set_ldt(const void *addr, unsigned entries) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gdt(const struct desc_ptr *dtr) @@ -568,7 +593,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (xen_get_lazy_mode() == XEN_LAZY_CPU) loadsegment(fs, 0); xen_mc_batch(); @@ -577,7 +602,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) load_TLS_descriptor(t, cpu, 1); load_TLS_descriptor(t, cpu, 2); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gs_index(unsigned int idx) @@ -909,7 +934,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } @@ -973,7 +998,7 @@ static void xen_write_cr0(unsigned long cr0) MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_write_cr4(unsigned long cr4) @@ -1156,7 +1181,7 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = { #endif .io_delay = xen_io_delay, - .start_context_switch = paravirt_start_context_switch, + .start_context_switch = xen_start_context_switch, .end_context_switch = xen_end_context_switch, }, }; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1652c39e3dfb..b6830554ff69 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -236,7 +236,7 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) u.val = pmd_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -270,7 +270,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; - if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) + if (xen_get_lazy_mode() != XEN_LAZY_MMU) return false; xen_mc_batch(); @@ -279,7 +279,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) u.val = pte_val_ma(pteval); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); return true; } @@ -325,7 +325,7 @@ void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } /* Assume pteval_t is equivalent to all the other *val_t types. */ @@ -419,7 +419,7 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) u.val = pud_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -499,7 +499,7 @@ static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val) __xen_set_p4d_hyper(ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -531,7 +531,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) if (user_ptr) __xen_set_p4d_hyper((p4d_t *)user_ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } #if CONFIG_PGTABLE_LEVELS >= 5 @@ -1245,7 +1245,7 @@ static noinline void xen_flush_tlb(void) op->cmd = MMUEXT_TLB_FLUSH_LOCAL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1265,7 +1265,7 @@ static void xen_flush_tlb_one_user(unsigned long addr) op->arg1.linear_addr = addr & PAGE_MASK; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1302,7 +1302,7 @@ static void xen_flush_tlb_multi(const struct cpumask *cpus, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } static unsigned long xen_read_cr3(void) @@ -1361,7 +1361,7 @@ static void xen_write_cr3(unsigned long cr3) else __xen_write_cr3(false, 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } /* @@ -1396,7 +1396,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } static int xen_pgd_alloc(struct mm_struct *mm) @@ -1557,7 +1557,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } } @@ -1587,7 +1587,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) __set_pfn_prot(pfn, PAGE_KERNEL); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); ClearPagePinned(page); } @@ -1804,7 +1804,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) */ xen_mc_batch(); __xen_write_cr3(true, __pa(init_top_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for @@ -2083,6 +2083,23 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } +static void xen_enter_lazy_mmu(void) +{ + enter_lazy(XEN_LAZY_MMU); +} + +static void xen_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (xen_get_lazy_mode() == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + static void __init xen_post_allocator_init(void) { pv_ops.mmu.set_pte = xen_set_pte; @@ -2107,7 +2124,7 @@ static void xen_leave_lazy_mmu(void) { preempt_disable(); xen_mc_flush(); - paravirt_leave_lazy_mmu(); + leave_lazy(XEN_LAZY_MMU); preempt_enable(); } @@ -2166,9 +2183,9 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .exit_mmap = xen_exit_mmap, .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, + .enter = xen_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, - .flush = paravirt_flush_lazy_mmu, + .flush = xen_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, @@ -2385,7 +2402,7 @@ static noinline void xen_flush_tlb_all(void) op->cmd = MMUEXT_TLB_FLUSH_ALL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 1c51b2c87f30..c3867b585e0d 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -26,7 +26,7 @@ static inline void xen_mc_batch(void) /* need to disable interrupts until this entry is complete */ local_irq_save(flags); - trace_xen_mc_batch(paravirt_get_lazy_mode()); + trace_xen_mc_batch(xen_get_lazy_mode()); __this_cpu_write(xen_mc_irq_flags, flags); } @@ -44,7 +44,7 @@ static inline void xen_mc_issue(unsigned mode) { trace_xen_mc_issue(mode); - if ((paravirt_get_lazy_mode() & mode) == 0) + if ((xen_get_lazy_mode() & mode) == 0) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 44a3f565264d..0577f0cdd231 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -6,26 +6,26 @@ #define _TRACE_XEN_H #include -#include +#include #include struct multicall_entry; /* Multicalls */ DECLARE_EVENT_CLASS(xen_mc__batch, - TP_PROTO(enum paravirt_lazy_mode mode), + TP_PROTO(enum xen_lazy_mode mode), TP_ARGS(mode), TP_STRUCT__entry( - __field(enum paravirt_lazy_mode, mode) + __field(enum xen_lazy_mode, mode) ), TP_fast_assign(__entry->mode = mode), TP_printk("start batch LAZY_%s", - (__entry->mode == PARAVIRT_LAZY_MMU) ? "MMU" : - (__entry->mode == PARAVIRT_LAZY_CPU) ? "CPU" : "NONE") + (__entry->mode == XEN_LAZY_MMU) ? "MMU" : + (__entry->mode == XEN_LAZY_CPU) ? "CPU" : "NONE") ); #define DEFINE_XEN_MC_BATCH(name) \ DEFINE_EVENT(xen_mc__batch, name, \ - TP_PROTO(enum paravirt_lazy_mode mode), \ + TP_PROTO(enum xen_lazy_mode mode), \ TP_ARGS(mode)) DEFINE_XEN_MC_BATCH(xen_mc_batch); From 49147beb0ccbf4c5bb81a44be93ec3bc5e4a79f1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:28 +0200 Subject: [PATCH 4/5] x86/xen: allow nesting of same lazy mode When running as a paravirtualized guest under Xen, Linux is using "lazy mode" for issuing hypercalls which don't need to take immediate effect in order to improve performance (examples are e.g. multiple PTE changes). There are two different lazy modes defined: MMU and CPU lazy mode. Today it is not possible to nest multiple lazy mode sections, even if they are of the same kind. A recent change in memory management added nesting of MMU lazy mode sections, resulting in a regression when running as Xen PV guest. Technically there is no reason why nesting of multiple sections of the same kind of lazy mode shouldn't be allowed. So add support for that for fixing the regression. Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230913113828.18421-4-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypervisor.h | 15 +++++++++++++-- arch/x86/xen/enlighten_pv.c | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index ed05ce3df5c7..7048dfacc04b 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -72,10 +72,18 @@ enum xen_lazy_mode { }; DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); +DECLARE_PER_CPU(unsigned int, xen_lazy_nesting); static inline void enter_lazy(enum xen_lazy_mode mode) { - BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode); + + if (mode == old_mode) { + this_cpu_inc(xen_lazy_nesting); + return; + } + + BUG_ON(old_mode != XEN_LAZY_NONE); this_cpu_write(xen_lazy_mode, mode); } @@ -84,7 +92,10 @@ static inline void leave_lazy(enum xen_lazy_mode mode) { BUG_ON(this_cpu_read(xen_lazy_mode) != mode); - this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + if (this_cpu_read(xen_lazy_nesting) == 0) + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + else + this_cpu_dec(xen_lazy_nesting); } enum xen_lazy_mode xen_get_lazy_mode(void); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 54b83825c4b6..bbbfdd495ebd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -102,6 +102,7 @@ struct tls_descs { }; DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; +DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); enum xen_lazy_mode xen_get_lazy_mode(void) { From 0fc6ff5a0f0488e09b496773c440ed5bb36d1f0d Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 11 Sep 2023 18:59:31 +0000 Subject: [PATCH 5/5] xen/efi: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. `efi_loader_signature` has space for 4 bytes. We are copying "Xen" (3 bytes) plus a NUL-byte which makes 4 total bytes. With that being said, there is currently not a bug with the current `strncpy()` implementation in terms of buffer overreads but we should favor a more robust string interface either way. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on the destination buffer while being functionally the same in this case. Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Cc: Kees Cook Signed-off-by: Justin Stitt Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230911-strncpy-arch-x86-xen-efi-c-v1-1-96ab2bba2feb@google.com Signed-off-by: Juergen Gross --- arch/x86/xen/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 863d0d6b3edc..7250d0e0e1a9 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -138,7 +138,7 @@ void __init xen_efi_init(struct boot_params *boot_params) if (efi_systab_xen == NULL) return; - strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", + strscpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", sizeof(boot_params->efi_info.efi_loader_signature)); boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen); boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);