From 031229b862f13ee5af8e1b06af258614f94dcbcd Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 4 Jun 2017 20:50:55 +0200 Subject: [PATCH 01/20] arm/xen: Improve a size determination in __set_phys_to_machine_multi() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Reviewed-by: Stefano Stabellini --- arch/arm/xen/p2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 0ed01f2d5ee4..11e78432b663 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -144,7 +144,7 @@ bool __set_phys_to_machine_multi(unsigned long pfn, return true; } - p2m_entry = kzalloc(sizeof(struct xen_p2m_entry), GFP_NOWAIT); + p2m_entry = kzalloc(sizeof(*p2m_entry), GFP_NOWAIT); if (!p2m_entry) { pr_warn("cannot allocate xen_p2m_entry\n"); return false; From d6bb4ec32c832fb0fcc43636327e17a0724e2324 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 4 Jun 2017 21:12:36 +0200 Subject: [PATCH 02/20] arm/xen: Delete an error message for a failed memory allocation in __set_phys_to_machine_multi() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf Signed-off-by: Markus Elfring Reviewed-by: Stefano Stabellini --- arch/arm/xen/p2m.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 11e78432b663..f5f74ac637b9 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -145,10 +145,9 @@ bool __set_phys_to_machine_multi(unsigned long pfn, } p2m_entry = kzalloc(sizeof(*p2m_entry), GFP_NOWAIT); - if (!p2m_entry) { - pr_warn("cannot allocate xen_p2m_entry\n"); + if (!p2m_entry) return false; - } + p2m_entry->pfn = pfn; p2m_entry->nr_pages = nr_pages; p2m_entry->mfn = mfn; From 84a0a967a44b9da6c5effff628aafa7698050574 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 4 Jun 2017 21:21:20 +0200 Subject: [PATCH 03/20] arm/xen: Adjust one function call together with a variable assignment The script "checkpatch.pl" pointed information out like the following. ERROR: do not use assignment in if condition Thus fix the affected source code place. Signed-off-by: Markus Elfring Reviewed-by: Stefano Stabellini --- arch/arm/xen/p2m.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index f5f74ac637b9..e71eefa2e427 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -153,7 +153,8 @@ bool __set_phys_to_machine_multi(unsigned long pfn, p2m_entry->mfn = mfn; write_lock_irqsave(&p2m_lock, irqflags); - if ((rc = xen_add_phys_to_mach_entry(p2m_entry)) < 0) { + rc = xen_add_phys_to_mach_entry(p2m_entry); + if (rc < 0) { write_unlock_irqrestore(&p2m_lock, irqflags); return false; } From 4e93b6481c87ea5afde944a32b4908357ec58992 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 30 May 2017 20:52:26 +0200 Subject: [PATCH 04/20] xen: don't print error message in case of missing Xenstore entry When registering for the Xenstore watch of the node control/sysrq the handler will be called at once. Don't issue an error message if the Xenstore node isn't there, as it will be created only when an event is being triggered. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/manage.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index c1ec8ee80924..60cf71f1b256 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -277,8 +277,16 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path, err = xenbus_transaction_start(&xbt); if (err) return; - if (xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key) < 0) { - pr_err("Unable to read sysrq code in control/sysrq\n"); + err = xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key); + if (err < 0) { + /* + * The Xenstore watch fires directly after registering it and + * after a suspend/resume cycle. So ENOENT is no error but + * might happen in those cases. + */ + if (err != -ENOENT) + pr_err("Error %d reading sysrq code in control/sysrq\n", + err); xenbus_transaction_end(xbt, 1); return; } From a2237ae761d9baa0e814e61140ca4524e31eb92b Mon Sep 17 00:00:00 2001 From: Sergey Dyasli Date: Wed, 7 Jun 2017 08:20:12 +0100 Subject: [PATCH 05/20] xen: fix HYPERVISOR_dm_op() prototype Change the third parameter to be the required struct xen_dm_op_buf * instead of a generic void * (which blindly accepts any pointer). Signed-off-by: Sergey Dyasli Reviewed-by: Juergen Gross Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypercall.h | 4 +++- include/xen/arm/hypercall.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index f6d20f6cca12..7a4db5fefd15 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -50,6 +50,8 @@ #include #include +struct xen_dm_op_buf; + /* * The hypercall asms have to meet several constraints: * - Work on 32- and 64-bit. @@ -474,7 +476,7 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg) static inline int HYPERVISOR_dm_op( - domid_t dom, unsigned int nr_bufs, void *bufs) + domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs) { return _hypercall3(int, dm_op, dom, nr_bufs, bufs); } diff --git a/include/xen/arm/hypercall.h b/include/xen/arm/hypercall.h index 73db4b2eeb89..b40485e54d80 100644 --- a/include/xen/arm/hypercall.h +++ b/include/xen/arm/hypercall.h @@ -39,6 +39,8 @@ #include #include +struct xen_dm_op_buf; + long privcmd_call(unsigned call, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5); @@ -53,7 +55,8 @@ int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); -int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, void *bufs); +int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, + struct xen_dm_op_buf *bufs); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { From 9cc91f212111cdcbefa02dcdb7dd443f224bf52c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 8 Jun 2017 10:53:10 +0200 Subject: [PATCH 06/20] xen: avoid type warning in xchg_xen_ulong The improved type-checking version of container_of() triggers a warning for xchg_xen_ulong, pointing out that 'xen_ulong_t' is unsigned, but atomic64_t contains a signed value: drivers/xen/events/events_2l.c: In function 'evtchn_2l_handle_events': drivers/xen/events/events_2l.c:187:1020: error: call to '__compiletime_assert_187' declared with attribute error: pointer type mismatch in container_of() This adds a cast to work around the warning. Cc: Ian Abbott Fixes: 85323a991d40 ("xen: arm: mandate EABI and use generic atomic operations.") Fixes: daa2ac80834d ("kernel.h: handle pointers to arrays better in container_of()") Signed-off-by: Arnd Bergmann Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini Acked-by: Ian Abbott --- arch/arm/include/asm/xen/events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h index 71e473d05fcc..620dc75362e5 100644 --- a/arch/arm/include/asm/xen/events.h +++ b/arch/arm/include/asm/xen/events.h @@ -16,7 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->ARM_cpsr); } -#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \ +#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((long long*)(ptr),\ atomic64_t, \ counter), (val)) From c48f64ab472389df6f48171899c9d337adfadc5b Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 7 Jun 2017 12:46:56 +0100 Subject: [PATCH 07/20] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU A HVM domian booting generates around 200K (evtchn:qemu-dm xen-dyn) interrupts,in a short period of time. All these evtchn:qemu-dm are bound to VCPU 0, until irqbalance sees these IRQ and moves it to a different VCPU. In one configuration, irqbalance runs every 10 seconds, which means irqbalance doesn't get to see these burst of interrupts and doesn't re-balance interrupts most of the time, making all evtchn:qemu-dm to be processed by VCPU0. This cause VCPU0 to spend most of time processing hardirq and very little time on softirq. Moreover, if dom0 kernel PREEMPTION is disabled, VCPU0 never runs watchdog (process context), triggering a softlockup detection code to panic. Binding evtchn:qemu-dm to next online VCPU, will spread hardirq processing evenly across different CPU. Later, irqbalance will try to balance evtchn:qemu-dm, if required. Signed-off-by: Anoob Soman Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 6 +++--- drivers/xen/evtchn.c | 34 +++++++++++++++++++++++++++++++- include/xen/events.h | 1 + 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b52852f38cff..813f1e86a599 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1303,10 +1303,9 @@ void rebind_evtchn_irq(int evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu) { struct evtchn_bind_vcpu bind_vcpu; - int evtchn = evtchn_from_irq(irq); int masked; if (!VALID_EVTCHN(evtchn)) @@ -1338,13 +1337,14 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) return 0; } +EXPORT_SYMBOL_GPL(xen_rebind_evtchn_to_cpu); static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); - return rebind_irq_to_cpu(data->irq, tcpu); + return xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); } static void enable_dynirq(struct irq_data *data) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 10f1ef582659..9729a64ea1a9 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -421,6 +421,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u, del_evtchn(u, evtchn); } +static DEFINE_PER_CPU(int, bind_last_selected_cpu); + +static void evtchn_bind_interdom_next_vcpu(int evtchn) +{ + unsigned int selected_cpu, irq; + struct irq_desc *desc; + unsigned long flags; + + irq = irq_from_evtchn(evtchn); + desc = irq_to_desc(irq); + + if (!desc) + return; + + raw_spin_lock_irqsave(&desc->lock, flags); + selected_cpu = this_cpu_read(bind_last_selected_cpu); + selected_cpu = cpumask_next_and(selected_cpu, + desc->irq_common_data.affinity, cpu_online_mask); + + if (unlikely(selected_cpu >= nr_cpu_ids)) + selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, + cpu_online_mask); + + this_cpu_write(bind_last_selected_cpu, selected_cpu); + + /* unmask expects irqs to be disabled */ + xen_rebind_evtchn_to_cpu(evtchn, selected_cpu); + raw_spin_unlock_irqrestore(&desc->lock, flags); +} + static long evtchn_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -478,8 +508,10 @@ static long evtchn_ioctl(struct file *file, break; rc = evtchn_bind_to_user(u, bind_interdomain.local_port); - if (rc == 0) + if (rc == 0) { rc = bind_interdomain.local_port; + evtchn_bind_interdom_next_vcpu(rc); + } break; } diff --git a/include/xen/events.h b/include/xen/events.h index 88da2abaf535..f442ca5fcd82 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -58,6 +58,7 @@ void evtchn_put(unsigned int evtchn); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); void rebind_evtchn_irq(int evtchn, int irq); +int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu); static inline void notify_remote_via_evtchn(int port) { From ad73fd595c2ab168fdd01a266cbe6e4df95f8db0 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:05:58 -0700 Subject: [PATCH 08/20] xen/vcpu: Simplify xen_vcpu related code Largely mechanical changes to aid unification of xen_vcpu_restore() logic for PV, PVH and PVHVM. xen_vcpu_setup(): the only change in logic is that clamp_max_cpus() is now handled inside the "if (!xen_have_vcpu_info_placement)" block. xen_vcpu_restore(): code movement from enlighten_pv.c to enlighten.c. xen_vcpu_info_reset(): pulls together all the code where xen_vcpu is set to default. Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 101 +++++++++++++++++++++++++---------- arch/x86/xen/enlighten_hvm.c | 6 +-- arch/x86/xen/enlighten_pv.c | 47 +++++----------- arch/x86/xen/smp_hvm.c | 3 +- arch/x86/xen/xen-ops.h | 1 + 5 files changed, 89 insertions(+), 69 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a5ffcbb20cc0..96b745e3f56c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,6 +106,35 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } +/* + * On restore, set the vcpu placement up again. + * If it fails, then we're in a bad state, since + * we can't back out from using it... + */ +void xen_vcpu_restore(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); + bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), + NULL); + + if (other_cpu && is_up && + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) + BUG(); + + xen_setup_runstate_info(cpu); + + if (xen_have_vcpu_info_placement) + xen_vcpu_setup(cpu); + + if (other_cpu && is_up && + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) + BUG(); + } +} + static void clamp_max_cpus(void) { #ifdef CONFIG_SMP @@ -114,6 +143,17 @@ static void clamp_max_cpus(void) #endif } +void xen_vcpu_info_reset(int cpu) +{ + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + } else { + /* Set to NULL so that if somebody accesses it we get an OOPS */ + per_cpu(xen_vcpu, cpu) = NULL; + } +} + void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; @@ -137,40 +177,45 @@ void xen_vcpu_setup(int cpu) if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) return; } - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + + xen_vcpu_info_reset(cpu); + + if (xen_have_vcpu_info_placement) { + vcpup = &per_cpu(xen_vcpu_info, cpu); + info.mfn = arbitrary_virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); + + /* + * Check to see if the hypervisor will put the vcpu_info + * structure where we want it, which allows direct access via + * a percpu-variable. + * N.B. This hypercall can _only_ be called once per CPU. + * Subsequent calls will error out with -EINVAL. This is due to + * the fact that hypervisor has no unregister variant and this + * hypercall does not allow to over-write info.mfn and + * info.offset. + */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, + xen_vcpu_nr(cpu), &info); + + if (err) { + pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n", + cpu, err); + xen_have_vcpu_info_placement = 0; + } else { + /* + * This cpu is using the registered vcpu info, even if + * later ones fail to. + */ + per_cpu(xen_vcpu, cpu) = vcpup; + } + } if (!xen_have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) clamp_max_cpus(); return; } - - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. - N.B. This hypercall can _only_ be called once per CPU. Subsequent - calls will error out with -EINVAL. This is due to the fact that - hypervisor has no unregister variant and this hypercall does not - allow to over-write info.mfn and info.offset. - */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), - &info); - - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - xen_have_vcpu_info_placement = 0; - clamp_max_cpus(); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - } } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index a6d014f47e52..eb53da6547ee 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -45,11 +45,7 @@ void __ref xen_hvm_init_shared_info(void) * online but xen_hvm_init_shared_info is run at resume time too and * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { - /* Leave it to be NULL. */ - if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) - continue; - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + xen_vcpu_info_reset(cpu); } } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f33eef4ebd12..3ec1c4632e12 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -107,35 +107,6 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); -/* - * On restore, set the vcpu placement up again. - * If it fails, then we're in a bad state, since - * we can't back out from using it... - */ -void xen_vcpu_restore(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), - NULL); - - if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) - BUG(); - - xen_setup_runstate_info(cpu); - - if (xen_have_vcpu_info_placement) - xen_vcpu_setup(cpu); - - if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) - BUG(); - } -} - static void __init xen_banner(void) { unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); @@ -1332,9 +1303,17 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Don't do the full vcpu_info placement stuff until we have a - possible map and a non-dummy shared_info. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because start_kernel needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); @@ -1431,9 +1410,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif xen_raw_console_write("about to get started...\n"); - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - + /* We need this for printk timestamps */ xen_setup_runstate_info(0); xen_efi_init(); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f18561bbf5c9..9e0fb9a015d4 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -12,7 +12,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) native_smp_prepare_boot_cpu(); /* - * Setup vcpu_info for boot CPU. + * Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info + * in xen_cpu_up_prepare_hvm(). */ xen_vcpu_setup(0); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a440a42c618..90828256248b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -79,6 +79,7 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; void xen_vcpu_setup(int cpu); +void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP From 0b64ffb8db4e310f77a01079ca752d946a8526b5 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:05:59 -0700 Subject: [PATCH 09/20] xen/pvh*: Support > 32 VCPUs at domain restore When Xen restores a PVHVM or PVH guest, its shared_info only holds up to 32 CPUs. The hypercall VCPUOP_register_vcpu_info allows us to setup per-page areas for VCPUs. This means we can boot PVH* guests with more than 32 VCPUs. During restore the per-cpu structure is allocated freshly by the hypervisor (vcpu_info_mfn is set to INVALID_MFN) so that the newly restored guest can make a VCPUOP_register_vcpu_info hypercall. However, we end up triggering this condition in Xen: /* Run this command on yourself or on other offline VCPUS. */ if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) ) which means we are unable to setup the per-cpu VCPU structures for running VCPUS. The Linux PV code paths makes this work by iterating over cpu_possible in xen_vcpu_restore() with: 1) is target CPU up (VCPUOP_is_up hypercall?) 2) if yes, then VCPUOP_down to pause it 3) VCPUOP_register_vcpu_info 4) if it was down, then VCPUOP_up to bring it back up With Xen commit 192df6f9122d ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs") this is available for non-PV guests. As such first check if VCPUOP_is_up is actually possible before trying this dance. As most of this dance code is done already in xen_vcpu_restore() let's make it callable on PV, PVH and PVHVM. Based-on-patch-by: Konrad Wilk Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 45 +++++++++++++++++++++++++----------- arch/x86/xen/enlighten_hvm.c | 20 ++++++---------- arch/x86/xen/smp_hvm.c | 10 ++++++++ arch/x86/xen/suspend_hvm.c | 11 +++------ include/xen/xen-ops.h | 2 ++ 5 files changed, 54 insertions(+), 34 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 96b745e3f56c..276cc21619ec 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,6 +106,21 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } +static void xen_vcpu_setup_restore(int cpu) +{ + /* Any per_cpu(xen_vcpu) is stale, so reset it */ + xen_vcpu_info_reset(cpu); + + /* + * For PVH and PVHVM, setup online VCPUs only. The rest will + * be handled by hotplug. + */ + if (xen_pv_domain() || + (xen_hvm_domain() && cpu_online(cpu))) { + xen_vcpu_setup(cpu); + } +} + /* * On restore, set the vcpu placement up again. * If it fails, then we're in a bad state, since @@ -117,17 +132,23 @@ void xen_vcpu_restore(void) for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), - NULL); + bool is_up; + + if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID) + continue; + + /* Only Xen 4.5 and higher support this. */ + is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL) > 0; if (other_cpu && is_up && HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) BUG(); - xen_setup_runstate_info(cpu); + if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_setup_runstate_info(cpu); - if (xen_have_vcpu_info_placement) - xen_vcpu_setup(cpu); + xen_vcpu_setup_restore(cpu); if (other_cpu && is_up && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) @@ -163,11 +184,11 @@ void xen_vcpu_setup(int cpu) BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* - * This path is called twice on PVHVM - first during bootup via - * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being - * hotplugged: cpu_up -> xen_hvm_cpu_notify. - * As we can only do the VCPUOP_register_vcpu_info once lets - * not over-write its result. + * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu) + * and at restore (xen_vcpu_restore). Also called for hotplugged + * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm). + * However, the hypercall can only be done once (see below) so if a VCPU + * is offlined and comes back online then let's not redo the hypercall. * * For PV it is called during restore (xen_vcpu_restore) and bootup * (xen_setup_vcpu_info_placement). The hotplug mechanism does not @@ -178,8 +199,6 @@ void xen_vcpu_setup(int cpu) return; } - xen_vcpu_info_reset(cpu); - if (xen_have_vcpu_info_placement) { vcpup = &per_cpu(xen_vcpu_info, cpu); info.mfn = arbitrary_virt_to_mfn(vcpup); @@ -214,7 +233,7 @@ void xen_vcpu_setup(int cpu) if (!xen_have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) clamp_max_cpus(); - return; + xen_vcpu_info_reset(cpu); } } diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index eb53da6547ee..ba1afadb2512 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -20,7 +20,6 @@ void __ref xen_hvm_init_shared_info(void) { - int cpu; struct xen_add_to_physmap xatp; static struct shared_info *shared_info_page; @@ -35,18 +34,6 @@ void __ref xen_hvm_init_shared_info(void) BUG(); HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; - - /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info - * page, we use it in the event channel upcall and in some pvclock - * related functions. We don't need the vcpu_info placement - * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { - xen_vcpu_info_reset(cpu); - } } static void __init init_hvm_pv_info(void) @@ -150,6 +137,13 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_shared_info(); + /* + * xen_vcpu is a pointer to the vcpu_info struct in the shared_info + * page, we use it in the event channel upcall and in some pvclock + * related functions. + */ + xen_vcpu_info_reset(0); + xen_panic_handler_init(); if (xen_feature(XENFEAT_hvm_callback_vector)) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 9e0fb9a015d4..6c8a805819ff 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -28,10 +28,20 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) { + int cpu; + native_smp_prepare_cpus(max_cpus); WARN_ON(xen_smp_intr_init(0)); xen_init_lock_cpu(0); + + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; + + /* Set default vcpu_id to make sure that we don't use cpu-0's */ + per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID; + } } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index 01afcadde50a..484999416d8b 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -8,15 +8,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) { - int cpu; - - if (!suspend_cancelled) + if (!suspend_cancelled) { xen_hvm_init_shared_info(); + xen_vcpu_restore(); + } xen_callback_vector(); xen_unplug_emulated_devices(); - if (xen_feature(XENFEAT_hvm_safe_pvclock)) { - for_each_online_cpu(cpu) { - xen_setup_runstate_info(cpu); - } - } } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index c44a2ee8c8f8..218e6aae5433 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -15,6 +15,8 @@ static inline uint32_t xen_vcpu_nr(int cpu) return per_cpu(xen_vcpu_id, cpu); } +#define XEN_VCPU_ID_INVALID U32_MAX + void xen_arch_pre_suspend(void); void xen_arch_post_suspend(int suspend_cancelled); From 0e4d583723487a975f66f0a8b346fdcb024fafa6 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:00 -0700 Subject: [PATCH 10/20] xen/pv: Fix OOPS on restore for a PV, !SMP domain If CONFIG_SMP is disabled, xen_setup_vcpu_info_placement() is called from xen_setup_shared_info(). This is fine as far as boot goes, but it means that we also call it in the restore path. This results in an OOPS because we assign to pv_mmu_ops.read_cr2 which is __ro_after_init. Also, though less problematically, this means we call xen_vcpu_setup() twice at restore -- once from the vcpu info placement call and the second time from xen_vcpu_restore(). Fix by calling xen_setup_vcpu_info_placement() at boot only. Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3ec1c4632e12..9a0714dbddfa 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -931,23 +931,27 @@ void xen_setup_shared_info(void) HYPERVISOR_shared_info = (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); -#ifndef CONFIG_SMP - /* In UP this is as good a place as any to set up shared info */ - xen_setup_vcpu_info_placement(); -#endif - xen_setup_mfn_list_list(); - /* - * Now that shared info is set up we can start using routines that - * point to pvclock area. - */ - if (system_state == SYSTEM_BOOTING) + if (system_state == SYSTEM_BOOTING) { +#ifndef CONFIG_SMP + /* + * In UP this is as good a place as any to set up shared info. + * Limit this to boot only, at restore vcpu setup is done via + * xen_vcpu_restore(). + */ + xen_setup_vcpu_info_placement(); +#endif + /* + * Now that shared info is set up we can start using routines + * that point to pvclock area. + */ xen_init_time_ops(); + } } /* This is called once we have the cpu_possible_mask */ -void xen_setup_vcpu_info_placement(void) +void __ref xen_setup_vcpu_info_placement(void) { int cpu; From c9b5d98b25161a7ebee6ea59d6424dd9f33c1b99 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:01 -0700 Subject: [PATCH 11/20] xen/vcpu: Handle xen_vcpu_setup() failure in hotplug The hypercall VCPUOP_register_vcpu_info can fail. This failure is handled by making per_cpu(xen_vcpu, cpu) point to its shared_info slot and those without one (cpu >= MAX_VIRT_CPUS) be NULL. For PVH/PVHVM, this is not enough, because we also need to pull these VCPUs out of circulation. Fix for PVH/PVHVM: on registration failure in the cpuhp prepare callback (xen_cpu_up_prepare_hvm()), return an error to the cpuhp state-machine so it can fail the CPU init. Fix for PV: the registration happens before smp_init(), so, in the failure case we clamp setup_max_cpus and limit the number of VCPUs that smp_init() will bring-up to MAX_VIRT_CPUS. This is functionally correct but it makes the code a bit simpler if we get rid of this explicit clamping: for VCPUs that don't have valid xen_vcpu, fail the CPU init in the cpuhp prepare callback (xen_cpu_up_prepare_pv()). Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 46 ++++++++++++++++++++---------------- arch/x86/xen/enlighten_hvm.c | 9 +++---- arch/x86/xen/enlighten_pv.c | 14 ++++++++++- arch/x86/xen/xen-ops.h | 2 +- 4 files changed, 45 insertions(+), 26 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 276cc21619ec..0e7ef69e8531 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,8 +106,10 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static void xen_vcpu_setup_restore(int cpu) +static int xen_vcpu_setup_restore(int cpu) { + int rc = 0; + /* Any per_cpu(xen_vcpu) is stale, so reset it */ xen_vcpu_info_reset(cpu); @@ -117,8 +119,10 @@ static void xen_vcpu_setup_restore(int cpu) */ if (xen_pv_domain() || (xen_hvm_domain() && cpu_online(cpu))) { - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); } + + return rc; } /* @@ -128,7 +132,7 @@ static void xen_vcpu_setup_restore(int cpu) */ void xen_vcpu_restore(void) { - int cpu; + int cpu, rc; for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); @@ -148,22 +152,25 @@ void xen_vcpu_restore(void) if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_runstate_info(cpu); - xen_vcpu_setup_restore(cpu); - - if (other_cpu && is_up && + rc = xen_vcpu_setup_restore(cpu); + if (rc) + pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " + "System will hang.\n", cpu, rc); + /* + * In case xen_vcpu_setup_restore() fails, do not bring up the + * VCPU. This helps us avoid the resulting OOPS when the VCPU + * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.) + * Note that this does not improve the situation much -- now the + * VM hangs instead of OOPSing -- with the VCPUs that did not + * fail, spinning in stop_machine(), waiting for the failed + * VCPUs to come up. + */ + if (other_cpu && is_up && (rc == 0) && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } } -static void clamp_max_cpus(void) -{ -#ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; -#endif -} - void xen_vcpu_info_reset(int cpu) { if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { @@ -175,7 +182,7 @@ void xen_vcpu_info_reset(int cpu) } } -void xen_vcpu_setup(int cpu) +int xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -196,7 +203,7 @@ void xen_vcpu_setup(int cpu) */ if (xen_hvm_domain()) { if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return; + return 0; } if (xen_have_vcpu_info_placement) { @@ -230,11 +237,10 @@ void xen_vcpu_setup(int cpu) } } - if (!xen_have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) - clamp_max_cpus(); + if (!xen_have_vcpu_info_placement) xen_vcpu_info_reset(cpu); - } + + return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ba1afadb2512..13b5fa1a211c 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -89,7 +89,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs) static int xen_cpu_up_prepare_hvm(unsigned int cpu) { - int rc; + int rc = 0; /* * This can happen if CPU was offlined earlier and @@ -104,7 +104,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); + if (rc) + return rc; if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -113,9 +115,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) if (rc) { WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n", cpu, rc); - return rc; } - return 0; + return rc; } static int xen_cpu_dead_hvm(unsigned int cpu) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9a0714dbddfa..bc44d2175459 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -958,7 +958,16 @@ void __ref xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) { /* Set up direct vCPU id mapping for PV guests. */ per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + + /* + * xen_vcpu_setup(cpu) can fail -- in which case it + * falls back to the shared_info version for cpus + * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. + * + * xen_cpu_up_prepare_pv() handles the rest by failing + * them in hotplug. + */ + (void) xen_vcpu_setup(cpu); } /* @@ -1432,6 +1441,9 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu) { int rc; + if (per_cpu(xen_vcpu, cpu) == NULL) + return -ENODEV; + xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 90828256248b..0d5004477db6 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -78,7 +78,7 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; -void xen_vcpu_setup(int cpu); +int xen_vcpu_setup(int cpu); void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); From ae039001054b34c4a624539b32a8b6ff3403aaf9 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:02 -0700 Subject: [PATCH 12/20] xen/vcpu: Handle xen_vcpu_setup() failure at boot On PVH, PVHVM, at failure in the VCPUOP_register_vcpu_info hypercall we limit the number of cpus to to MAX_VIRT_CPUS. However, if this failure had occurred for a cpu beyond MAX_VIRT_CPUS, we continue to function with > MAX_VIRT_CPUS. This leads to problems at the next save/restore cycle when there are > MAX_VIRT_CPUS threads going into stop_machine() but coming back up there's valid state for only the first MAX_VIRT_CPUS. This patch pulls the excess CPUs down via cpu_down(). Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/smp.c | 31 +++++++++++++++++++++++++++++++ arch/x86/xen/smp.h | 2 ++ arch/x86/xen/smp_hvm.c | 1 + arch/x86/xen/smp_pv.c | 6 +----- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 82ac611f2fc1..e7f02eb73727 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -114,6 +115,36 @@ int xen_smp_intr_init(unsigned int cpu) return rc; } +void __init xen_smp_cpus_done(unsigned int max_cpus) +{ + int cpu, rc, count = 0; + + if (xen_hvm_domain()) + native_smp_cpus_done(max_cpus); + + if (xen_have_vcpu_info_placement) + return; + + for_each_online_cpu(cpu) { + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + continue; + + rc = cpu_down(cpu); + + if (rc == 0) { + /* + * Reset vcpu_info so this cpu cannot be onlined again. + */ + xen_vcpu_info_reset(cpu); + count++; + } else { + pr_warn("%s: failed to bring CPU %d down, error %d\n", + __func__, cpu, rc); + } + } + WARN(count, "%s: brought %d CPUs offline\n", __func__, count); +} + void xen_smp_send_reschedule(int cpu) { xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 8ebb6acca64a..87d3c76cba37 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -14,6 +14,8 @@ extern void xen_smp_intr_free(unsigned int cpu); int xen_smp_intr_init_pv(unsigned int cpu); void xen_smp_intr_free_pv(unsigned int cpu); +void xen_smp_cpus_done(unsigned int max_cpus); + void xen_smp_send_reschedule(int cpu); void xen_smp_send_call_function_ipi(const struct cpumask *mask); void xen_smp_send_call_function_single_ipi(int cpu); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6c8a805819ff..fd60abedf658 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -71,4 +71,5 @@ void __init xen_hvm_smp_init(void) smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_cpus_done = xen_smp_cpus_done; } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index aae32535f4ec..1ea598e5f030 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -371,10 +371,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) return 0; } -static void xen_pv_smp_cpus_done(unsigned int max_cpus) -{ -} - #ifdef CONFIG_HOTPLUG_CPU static int xen_pv_cpu_disable(void) { @@ -469,7 +465,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_pv_smp_prepare_cpus, - .smp_cpus_done = xen_pv_smp_cpus_done, + .smp_cpus_done = xen_smp_cpus_done, .cpu_up = xen_pv_cpu_up, .cpu_die = xen_pv_cpu_die, From db985220778e200a935c6d8afddb2b1e32554375 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 14 Jun 2017 19:23:49 +0200 Subject: [PATCH 13/20] doc,xen: document hypervisor sysfs nodes for xen Today only a few sysfs nodes under /sys/hypervisor/ are documented for Xen in Documentation/ABI/testing/sysfs-hypervisor-pmu. Add the remaining Xen sysfs nodes under /sys/hypervisor/ in a new file Documentation/ABI/stable/sysfs-hypervisor-xen and add the Xen specific sysfs docs to the MAINTAINERS file. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- Documentation/ABI/stable/sysfs-hypervisor-xen | 119 ++++++++++++++++++ MAINTAINERS | 2 + 2 files changed, 121 insertions(+) create mode 100644 Documentation/ABI/stable/sysfs-hypervisor-xen diff --git a/Documentation/ABI/stable/sysfs-hypervisor-xen b/Documentation/ABI/stable/sysfs-hypervisor-xen new file mode 100644 index 000000000000..3cf5cdfcd9a8 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-hypervisor-xen @@ -0,0 +1,119 @@ +What: /sys/hypervisor/compilation/compile_date +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Contains the build time stamp of the Xen hypervisor + Might return "" in case of special security settings + in the hypervisor. + +What: /sys/hypervisor/compilation/compiled_by +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Contains information who built the Xen hypervisor + Might return "" in case of special security settings + in the hypervisor. + +What: /sys/hypervisor/compilation/compiler +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Compiler which was used to build the Xen hypervisor + Might return "" in case of special security settings + in the hypervisor. + +What: /sys/hypervisor/properties/capabilities +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Space separated list of supported guest system types. Each type + is in the format: -.- + With: + : "xen" -- x86: paravirtualized, arm: standard + "hvm" -- x86 only: fully virtualized + : major guest interface version + : minor guest interface version + : architecture, e.g.: + "x86_32": 32 bit x86 guest without PAE + "x86_32p": 32 bit x86 guest with PAE + "x86_64": 64 bit x86 guest + "armv7l": 32 bit arm guest + "aarch64": 64 bit arm guest + +What: /sys/hypervisor/properties/changeset +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Changeset of the hypervisor (git commit) + Might return "" in case of special security settings + in the hypervisor. + +What: /sys/hypervisor/properties/features +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Features the Xen hypervisor supports for the guest as defined + in include/xen/interface/features.h printed as a hex value. + +What: /sys/hypervisor/properties/pagesize +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Default page size of the hypervisor printed as a hex value. + Might return "0" in case of special security settings + in the hypervisor. + +What: /sys/hypervisor/properties/virtual_start +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Virtual address of the hypervisor as a hex value. + +What: /sys/hypervisor/type +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Type of hypervisor: + "xen": Xen hypervisor + +What: /sys/hypervisor/uuid +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + UUID of the guest as known to the Xen hypervisor. + +What: /sys/hypervisor/version/extra +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + The Xen version is in the format . + This is the part of it. + Might return "" in case of special security settings + in the hypervisor. + +What: /sys/hypervisor/version/major +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + The Xen version is in the format . + This is the part of it. + +What: /sys/hypervisor/version/minor +Date: March 2009 +KernelVersion: 2.6.30 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + The Xen version is in the format . + This is the part of it. diff --git a/MAINTAINERS b/MAINTAINERS index 7a28acd7f525..730245ac2c18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13983,6 +13983,8 @@ F: drivers/xen/ F: arch/x86/include/asm/xen/ F: include/xen/ F: include/uapi/xen/ +F: Documentation/ABI/stable/sysfs-hypervisor-xen +F: Documentation/ABI/testing/sysfs-hypervisor-pmu XEN HYPERVISOR ARM M: Stefano Stabellini From 4a4c29c96dde0eefd69054fd9e6b4255d4717799 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 14 Jun 2017 17:12:45 +0200 Subject: [PATCH 14/20] xen: add sysfs node for guest type Currently there is no reliable user interface inside a Xen guest to determine its type (e.g. HVM, PV or PVH). Instead of letting user mode try to determine this by various rather hacky mechanisms (parsing of boot messages before they are gone, trying to make use of known subtle differences in behavior of some instructions), add a sysfs node /sys/hypervisor/guest_type to explicitly deliver this information as it is known to the kernel. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- ...fs-hypervisor-pmu => sysfs-hypervisor-xen} | 15 ++++++-- MAINTAINERS | 2 +- drivers/xen/sys-hypervisor.c | 34 +++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) rename Documentation/ABI/testing/{sysfs-hypervisor-pmu => sysfs-hypervisor-xen} (67%) diff --git a/Documentation/ABI/testing/sysfs-hypervisor-pmu b/Documentation/ABI/testing/sysfs-hypervisor-xen similarity index 67% rename from Documentation/ABI/testing/sysfs-hypervisor-pmu rename to Documentation/ABI/testing/sysfs-hypervisor-xen index 224faa105e18..c0edb3fdd6eb 100644 --- a/Documentation/ABI/testing/sysfs-hypervisor-pmu +++ b/Documentation/ABI/testing/sysfs-hypervisor-xen @@ -1,8 +1,19 @@ +What: /sys/hypervisor/guest_type +Date: May 2017 +KernelVersion: 4.13 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Type of guest: + "Xen": standard guest type on arm + "HVM": fully virtualized guest (x86) + "PV": paravirtualized guest (x86) + "PVH": fully virtualized guest without legacy emulation (x86) + What: /sys/hypervisor/pmu/pmu_mode Date: August 2015 KernelVersion: 4.3 Contact: Boris Ostrovsky -Description: +Description: If running under Xen: Describes mode that Xen's performance-monitoring unit (PMU) uses. Accepted values are "off" -- PMU is disabled @@ -17,7 +28,7 @@ What: /sys/hypervisor/pmu/pmu_features Date: August 2015 KernelVersion: 4.3 Contact: Boris Ostrovsky -Description: +Description: If running under Xen: Describes Xen PMU features (as an integer). A set bit indicates that the corresponding feature is enabled. See include/xen/interface/xenpmu.h for available features diff --git a/MAINTAINERS b/MAINTAINERS index 730245ac2c18..c3ce334414e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13984,7 +13984,7 @@ F: arch/x86/include/asm/xen/ F: include/xen/ F: include/uapi/xen/ F: Documentation/ABI/stable/sysfs-hypervisor-xen -F: Documentation/ABI/testing/sysfs-hypervisor-pmu +F: Documentation/ABI/testing/sysfs-hypervisor-xen XEN HYPERVISOR ARM M: Stefano Stabellini diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 84106f9c456c..2f78f84a31e9 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -50,6 +50,35 @@ static int __init xen_sysfs_type_init(void) return sysfs_create_file(hypervisor_kobj, &type_attr.attr); } +static ssize_t guest_type_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + const char *type; + + switch (xen_domain_type) { + case XEN_NATIVE: + /* ARM only. */ + type = "Xen"; + break; + case XEN_PV_DOMAIN: + type = "PV"; + break; + case XEN_HVM_DOMAIN: + type = xen_pvh_domain() ? "PVH" : "HVM"; + break; + default: + return -EINVAL; + } + + return sprintf(buffer, "%s\n", type); +} + +HYPERVISOR_ATTR_RO(guest_type); + +static int __init xen_sysfs_guest_type_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &guest_type_attr.attr); +} + /* xen version attributes */ static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) { @@ -471,6 +500,9 @@ static int __init hyper_sysfs_init(void) ret = xen_sysfs_type_init(); if (ret) goto out; + ret = xen_sysfs_guest_type_init(); + if (ret) + goto guest_type_out; ret = xen_sysfs_version_init(); if (ret) goto version_out; @@ -502,6 +534,8 @@ uuid_out: comp_out: sysfs_remove_group(hypervisor_kobj, &version_group); version_out: + sysfs_remove_file(hypervisor_kobj, &guest_type_attr.attr); +guest_type_out: sysfs_remove_file(hypervisor_kobj, &type_attr.attr); out: return ret; From a714c2865f154a353f5cc3c81aeb8d8065934157 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 14 Jun 2017 19:23:51 +0200 Subject: [PATCH 15/20] xen: sync include/xen/interface/version.h Sync include/xen/interface/version.h with the Xen source. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross --- include/xen/interface/version.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 7ff6498679a3..145f12f9ecec 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -63,4 +63,19 @@ struct xen_feature_info { /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 +#define XENVER_commandline 9 +struct xen_commandline { + char buf[1024]; +}; + +/* + * Return value is the number of bytes written, or XEN_Exx on error. + * Calling with empty parameter returns the size of build_id. + */ +#define XENVER_build_id 10 +struct xen_build_id { + uint32_t len; /* IN: size of buf[]. */ + unsigned char buf[]; +}; + #endif /* __XEN_PUBLIC_VERSION_H__ */ From 84b7625728ea311ea35bdaa0eded53c1c56baeaa Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 14 Jun 2017 19:23:52 +0200 Subject: [PATCH 16/20] xen: add sysfs node for hypervisor build id For support of Xen hypervisor live patching the hypervisor build id is needed. Add a node /sys/hypervisor/properties/buildid containing the information. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- .../ABI/testing/sysfs-hypervisor-xen | 11 +++++++- drivers/xen/sys-hypervisor.c | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-hypervisor-xen b/Documentation/ABI/testing/sysfs-hypervisor-xen index c0edb3fdd6eb..53b7b2ea7515 100644 --- a/Documentation/ABI/testing/sysfs-hypervisor-xen +++ b/Documentation/ABI/testing/sysfs-hypervisor-xen @@ -1,5 +1,5 @@ What: /sys/hypervisor/guest_type -Date: May 2017 +Date: June 2017 KernelVersion: 4.13 Contact: xen-devel@lists.xenproject.org Description: If running under Xen: @@ -32,3 +32,12 @@ Description: If running under Xen: Describes Xen PMU features (as an integer). A set bit indicates that the corresponding feature is enabled. See include/xen/interface/xenpmu.h for available features + +What: /sys/hypervisor/properties/buildid +Date: June 2017 +KernelVersion: 4.13 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + Build id of the hypervisor, needed for hypervisor live patching. + Might return "" in case of special security settings + in the hypervisor. diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 2f78f84a31e9..9d314bba7c4e 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -356,12 +356,40 @@ static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer) HYPERVISOR_ATTR_RO(features); +static ssize_t buildid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + ssize_t ret; + struct xen_build_id *buildid; + + ret = HYPERVISOR_xen_version(XENVER_build_id, NULL); + if (ret < 0) { + if (ret == -EPERM) + ret = sprintf(buffer, ""); + return ret; + } + + buildid = kmalloc(sizeof(*buildid) + ret, GFP_KERNEL); + if (!buildid) + return -ENOMEM; + + buildid->len = ret; + ret = HYPERVISOR_xen_version(XENVER_build_id, buildid); + if (ret > 0) + ret = sprintf(buffer, "%s", buildid->buf); + kfree(buildid); + + return ret; +} + +HYPERVISOR_ATTR_RO(buildid); + static struct attribute *xen_properties_attrs[] = { &capabilities_attr.attr, &changeset_attr.attr, &virtual_start_attr.attr, &pagesize_attr.attr, &features_attr.attr, + &buildid_attr.attr, NULL }; From 1a3fc2c402810bf336882e695abd1678dbc8d279 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 8 Jun 2017 16:03:42 +0200 Subject: [PATCH 17/20] xen: avoid deadlock in xenbus driver There has been a report about a deadlock in the xenbus driver: [ 247.979498] ====================================================== [ 247.985688] WARNING: possible circular locking dependency detected [ 247.991882] 4.12.0-rc4-00022-gc4b25c0 #575 Not tainted [ 247.997040] ------------------------------------------------------ [ 248.003232] xenbus/91 is trying to acquire lock: [ 248.007875] (&u->msgbuffer_mutex){+.+.+.}, at: [] xenbus_dev_queue_reply+0x3c/0x230 [ 248.017163] [ 248.017163] but task is already holding lock: [ 248.023096] (xb_write_mutex){+.+...}, at: [] xenbus_thread+0x5f0/0x798 [ 248.031267] [ 248.031267] which lock already depends on the new lock. [ 248.031267] [ 248.039615] [ 248.039615] the existing dependency chain (in reverse order) is: [ 248.047176] [ 248.047176] -> #1 (xb_write_mutex){+.+...}: [ 248.052943] __lock_acquire+0x1728/0x1778 [ 248.057498] lock_acquire+0xc4/0x288 [ 248.061630] __mutex_lock+0x84/0x868 [ 248.065755] mutex_lock_nested+0x3c/0x50 [ 248.070227] xs_send+0x164/0x1f8 [ 248.074015] xenbus_dev_request_and_reply+0x6c/0x88 [ 248.079427] xenbus_file_write+0x260/0x420 [ 248.084073] __vfs_write+0x48/0x138 [ 248.088113] vfs_write+0xa8/0x1b8 [ 248.091983] SyS_write+0x54/0xb0 [ 248.095768] el0_svc_naked+0x24/0x28 [ 248.099897] [ 248.099897] -> #0 (&u->msgbuffer_mutex){+.+.+.}: [ 248.106088] print_circular_bug+0x80/0x2e0 [ 248.110730] __lock_acquire+0x1768/0x1778 [ 248.115288] lock_acquire+0xc4/0x288 [ 248.119417] __mutex_lock+0x84/0x868 [ 248.123545] mutex_lock_nested+0x3c/0x50 [ 248.128016] xenbus_dev_queue_reply+0x3c/0x230 [ 248.133005] xenbus_thread+0x788/0x798 [ 248.137306] kthread+0x110/0x140 [ 248.141087] ret_from_fork+0x10/0x40 It is rather easy to avoid by dropping xb_write_mutex before calling xenbus_dev_queue_reply(). Fixes: fd8aa9095a95c02dcc35540a263267c29b8fda9d ("xen: optimize xenbus driver for multiple concurrent xenstore accesses"). Cc: # 4.11 Reported-by: Andre Przywara Signed-off-by: Juergen Gross Tested-by: Andre Przywara Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_comms.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 856ada5d39c9..5b081a01779d 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -299,17 +299,7 @@ static int process_msg(void) mutex_lock(&xb_write_mutex); list_for_each_entry(req, &xs_reply_list, list) { if (req->msg.req_id == state.msg.req_id) { - if (req->state == xb_req_state_wait_reply) { - req->msg.type = state.msg.type; - req->msg.len = state.msg.len; - req->body = state.body; - req->state = xb_req_state_got_reply; - list_del(&req->list); - req->cb(req); - } else { - list_del(&req->list); - kfree(req); - } + list_del(&req->list); err = 0; break; } @@ -317,6 +307,15 @@ static int process_msg(void) mutex_unlock(&xb_write_mutex); if (err) goto out; + + if (req->state == xb_req_state_wait_reply) { + req->msg.type = state.msg.type; + req->msg.len = state.msg.len; + req->body = state.body; + req->state = xb_req_state_got_reply; + req->cb(req); + } else + kfree(req); } mutex_unlock(&xs_response_mutex); From a5d5f328b0e2baa5ee7c119fd66324eb79eeeb66 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jun 2017 13:53:56 +0200 Subject: [PATCH 18/20] xen: allocate page for shared info page from low memory In a HVM guest the kernel allocates the page for mapping the shared info structure via extend_brk() today. This will lead to a drop of performance as the underlying EPT entry will have to be split up into 4kB entries as the single shared info page is located in hypervisor memory. The issue has been detected by using the libmicro munmap test: unmapping 8kB of memory was faster by nearly a factor of two when no pv interfaces were active in the HVM guest. So instead of taking a page from memory which might be mapped via large EPT entries use a page which is already mapped via a 4kB EPT entry: we can take a page from the first 1MB of memory as the video memory at 640kB disallows using larger EPT entries. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++------- arch/x86/xen/enlighten_pv.c | 2 -- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 13b5fa1a211c..87d791356ea9 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -10,9 +11,11 @@ #include #include #include +#include #include #include +#include #include "xen-ops.h" #include "mmu.h" @@ -21,19 +24,33 @@ void __ref xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page; + u64 pa; + + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) { + /* + * Search for a free page starting at 4kB physical address. + * Low memory is preferred to avoid an EPT large page split up + * by the mapping. + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as + * the BIOS used for HVM guests is well behaved and won't + * clobber memory other than the first 4kB. + */ + for (pa = PAGE_SIZE; + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || + memblock_is_reserved(pa); + pa += PAGE_SIZE) + ; + + memblock_reserve(pa, PAGE_SIZE); + HYPERVISOR_shared_info = __va(pa); + } - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info); if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; } static void __init init_hvm_pv_info(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bc44d2175459..811e4ddb3f37 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -89,8 +89,6 @@ void *xen_initial_gdt; -RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); - static int xen_cpu_up_prepare_pv(unsigned int cpu); static int xen_cpu_dead_pv(unsigned int cpu); From bf1b9ddf181d29ec91f87d9c52bcb551ccf04157 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 23 Jun 2017 17:01:20 -0500 Subject: [PATCH 19/20] x86: xen: remove unnecessary variable in xen_foreach_remap_area() Remove unnecessary variable mfn in function xen_foreach_remap_area() and, refactor the code. Variable mfn at line 518:mfn = xen_remap_buf.mfns[i]; is only being used to store a value to be passed as an argument to the xen_update_mem_tables() function. This value can be passed directly, which makes variable mfn unnecessary. Also, value assigned to variable mfn at line 534:mfn = xen_remap_mfn; is never used. Addresses-Coverity-ID: 1260110 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/x86/xen/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a5bf7c451435..c81046323ebc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -499,7 +499,7 @@ static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, void __init xen_remap_memory(void) { unsigned long buf = (unsigned long)&xen_remap_buf; - unsigned long mfn_save, mfn, pfn; + unsigned long mfn_save, pfn; unsigned long remapped = 0; unsigned int i; unsigned long pfn_s = ~0UL; @@ -515,8 +515,7 @@ void __init xen_remap_memory(void) pfn = xen_remap_buf.target_pfn; for (i = 0; i < xen_remap_buf.size; i++) { - mfn = xen_remap_buf.mfns[i]; - xen_update_mem_tables(pfn, mfn); + xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]); remapped++; pfn++; } @@ -530,8 +529,6 @@ void __init xen_remap_memory(void) pfn_s = xen_remap_buf.target_pfn; len = xen_remap_buf.size; } - - mfn = xen_remap_mfn; xen_remap_mfn = xen_remap_buf.next_area_mfn; } From c54590cac51db8ab5fd30156bdaba34af915e629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 26 Jun 2017 14:49:46 +0200 Subject: [PATCH 20/20] x86/xen: allow userspace access during hypercalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace application can do a hypercall through /dev/xen/privcmd, and some for some hypercalls argument is a pointers to user-provided structure. When SMAP is supported and enabled, hypervisor can't access. So, lets allow it. The same applies to HYPERVISOR_dm_op, where additionally privcmd driver carefully verify buffer addresses. Cc: stable@vger.kernel.org Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypercall.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 7a4db5fefd15..11071fcd630e 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -216,10 +217,12 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + stac(); asm volatile("call *%[call]" : __HYPERCALL_5PARAM : [call] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); + clac(); return (long)__res; } @@ -478,7 +481,11 @@ static inline int HYPERVISOR_dm_op( domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs) { - return _hypercall3(int, dm_op, dom, nr_bufs, bufs); + int ret; + stac(); + ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs); + clac(); + return ret; } static inline void