diff --git a/Documentation/ABI/stable/sysfs-hypervisor-xen b/Documentation/ABI/stable/sysfs-hypervisor-xen index 748593c64568..be9ca9981bb1 100644 --- a/Documentation/ABI/stable/sysfs-hypervisor-xen +++ b/Documentation/ABI/stable/sysfs-hypervisor-xen @@ -120,3 +120,16 @@ Contact: xen-devel@lists.xenproject.org Description: If running under Xen: The Xen version is in the format . This is the part of it. + +What: /sys/hypervisor/start_flags/* +Date: March 2023 +KernelVersion: 6.3.0 +Contact: xen-devel@lists.xenproject.org +Description: If running under Xen: + All bits in Xen's start-flags are represented as + boolean files, returning '1' if set, '0' otherwise. + This takes the place of the defunct /proc/xen/capabilities, + which would contain "control_d" on dom0, and be empty + otherwise. This flag is now exposed as "initdomain" in + addition to the "privileged" flag; all other possible flags + are accessible as "unknownXX". diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8db26f10fb1d..c2be3efb2ba0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -934,12 +934,8 @@ void xen_enable_syscall(void) static void __init xen_pvmmu_arch_setup(void) { - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - HYPERVISOR_vm_assist(VMASST_CMD_enable, - VMASST_TYPE_pae_extended_cr3); - if (register_callback(CALLBACKTYPE_event, xen_asm_exc_xen_hypervisor_callback) || register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index bd02f9d50107..22fb982ff971 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -21,6 +21,8 @@ void xen_smp_send_reschedule(int cpu); void xen_smp_send_call_function_ipi(const struct cpumask *mask); void xen_smp_send_call_function_single_ipi(int cpu); +void __noreturn xen_cpu_bringup_again(unsigned long stack); + struct xen_common_irq { int irq; char *name; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 6175f2c5c822..a9cf8c8fa074 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -381,21 +381,12 @@ static void xen_pv_cpu_die(unsigned int cpu) } } -static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ +static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ { play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL); - cpu_bringup(); - /* - * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) - * clears certain data that the cpu_idle loop (which called us - * and that we return from) expects. The only way to get that - * data back is to call: - */ - tick_nohz_idle_enter(); - tick_nohz_idle_stop_tick_protected(); - - cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE); + xen_cpu_bringup_again((unsigned long)task_pt_regs(current)); + BUG(); } #else /* !CONFIG_HOTPLUG_CPU */ @@ -409,7 +400,7 @@ static void xen_pv_cpu_die(unsigned int cpu) BUG(); } -static void xen_pv_play_dead(void) +static void __noreturn xen_pv_play_dead(void) { BUG(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6b8836deb738..1d597364b49d 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -482,15 +482,51 @@ static void xen_setup_vsyscall_time_info(void) xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; } +/* + * Check if it is possible to safely use the tsc as a clocksource. This is + * only true if the hypervisor notifies the guest that its tsc is invariant, + * the tsc is stable, and the tsc instruction will never be emulated. + */ +static int __init xen_tsc_safe_clocksource(void) +{ + u32 eax, ebx, ecx, edx; + + if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC))) + return 0; + + if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC))) + return 0; + + if (check_tsc_unstable()) + return 0; + + /* Leaf 4, sub-leaf 0 (0x40000x03) */ + cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx); + + /* tsc_mode = no_emulate (2) */ + if (ebx != 2) + return 0; + + return 1; +} + static void __init xen_time_init(void) { struct pvclock_vcpu_time_info *pvti; int cpu = smp_processor_id(); struct timespec64 tp; - /* As Dom0 is never moved, no penalty on using TSC there */ + /* + * As Dom0 is never moved, no penalty on using TSC there. + * + * If it is possible for the guest to determine that the tsc is a safe + * clocksource, then set xen_clocksource rating below that of the tsc + * so that the system prefers tsc instead. + */ if (xen_initial_domain()) xen_clocksource.rating = 275; + else if (xen_tsc_safe_clocksource()) + xen_clocksource.rating = 299; clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index ffaa62167f6e..e36ea4268bd2 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -76,6 +76,13 @@ SYM_CODE_START(asm_cpu_bringup_and_idle) call cpu_bringup_and_idle SYM_CODE_END(asm_cpu_bringup_and_idle) + +SYM_CODE_START(xen_cpu_bringup_again) + UNWIND_HINT_FUNC + mov %rdi, %rsp + UNWIND_HINT_REGS + call cpu_bringup_and_idle +SYM_CODE_END(xen_cpu_bringup_again) .popsection #endif #endif diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index c443f04aaad7..c7715f8bd452 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1710,9 +1710,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) generic_handle_irq(irq); } -static void __xen_evtchn_do_upcall(void) +static int __xen_evtchn_do_upcall(void) { struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; int cpu = smp_processor_id(); struct evtchn_loop_ctrl ctrl = { 0 }; @@ -1737,6 +1738,8 @@ static void __xen_evtchn_do_upcall(void) * above. */ __this_cpu_inc(irq_epoch); + + return ret; } void xen_evtchn_do_upcall(struct pt_regs *regs) @@ -1751,9 +1754,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) set_irq_regs(old_regs); } -void xen_hvm_evtchn_do_upcall(void) +int xen_hvm_evtchn_do_upcall(void) { - __xen_evtchn_do_upcall(); + return __xen_evtchn_do_upcall(); } EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); diff --git a/drivers/xen/grant-dma-iommu.c b/drivers/xen/grant-dma-iommu.c index 16b8bc0c0b33..6a9fe02c6bfc 100644 --- a/drivers/xen/grant-dma-iommu.c +++ b/drivers/xen/grant-dma-iommu.c @@ -16,8 +16,15 @@ struct grant_dma_iommu_device { struct iommu_device iommu; }; -/* Nothing is really needed here */ -static const struct iommu_ops grant_dma_iommu_ops; +static struct iommu_device *grant_dma_iommu_probe_device(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + +/* Nothing is really needed here except a dummy probe_device callback */ +static const struct iommu_ops grant_dma_iommu_ops = { + .probe_device = grant_dma_iommu_probe_device, +}; static const struct of_device_id grant_dma_iommu_of_match[] = { { .compatible = "xen,grant-dma" }, diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index cd07e3fed0fa..fcc819131572 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -64,14 +64,13 @@ static uint64_t get_callback_via(struct pci_dev *pdev) static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id) { - xen_hvm_evtchn_do_upcall(); - return IRQ_HANDLED; + return xen_hvm_evtchn_do_upcall(); } static int xen_allocate_irq(struct pci_dev *pdev) { return request_irq(pdev->irq, do_hvm_evtchn_intr, - IRQF_NOBALANCING | IRQF_TRIGGER_RISING, + IRQF_NOBALANCING | IRQF_SHARED, "xen-platform-pci", pdev); } diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 0d4f8f4f4948..7a464a541d91 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -173,6 +173,8 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map) RING_IDX cons, prod, size, array_size; int ret; + atomic_set(&map->write, 0); + cons = intf->out_cons; prod = intf->out_prod; /* read the indexes before dealing with the data */ @@ -197,7 +199,6 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map) iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 2, size); } - atomic_set(&map->write, 0); ret = inet_sendmsg(map->sock, &msg, size); if (ret == -EAGAIN) { atomic_inc(&map->write); diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index fcb0792f090e..2f880374b463 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -31,7 +31,10 @@ struct hyp_sysfs_attr { struct attribute attr; ssize_t (*show)(struct hyp_sysfs_attr *, char *); ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t); - void *hyp_attr_data; + union { + void *hyp_attr_data; + unsigned long hyp_attr_value; + }; }; static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) @@ -399,6 +402,60 @@ static int __init xen_sysfs_properties_init(void) return sysfs_create_group(hypervisor_kobj, &xen_properties_group); } +#define FLAG_UNAME "unknown" +#define FLAG_UNAME_FMT FLAG_UNAME "%02u" +#define FLAG_UNAME_MAX sizeof(FLAG_UNAME "XX") +#define FLAG_COUNT (sizeof(xen_start_flags) * BITS_PER_BYTE) +static_assert(sizeof(xen_start_flags) <= + sizeof_field(struct hyp_sysfs_attr, hyp_attr_value)); + +static ssize_t flag_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + char *p = buffer; + + *p++ = '0' + ((xen_start_flags & attr->hyp_attr_value) != 0); + *p++ = '\n'; + return p - buffer; +} + +#define FLAG_NODE(flag, node) \ + [ilog2(flag)] = { \ + .attr = { .name = #node, .mode = 0444 },\ + .show = flag_show, \ + .hyp_attr_value = flag \ + } + +/* + * Add new, known flags here. No other changes are required, but + * note that each known flag wastes one entry in flag_unames[]. + * The code/complexity machinations to avoid this isn't worth it + * for a few entries, but keep it in mind. + */ +static struct hyp_sysfs_attr flag_attrs[FLAG_COUNT] = { + FLAG_NODE(SIF_PRIVILEGED, privileged), + FLAG_NODE(SIF_INITDOMAIN, initdomain) +}; +static struct attribute_group xen_flags_group = { + .name = "start_flags", + .attrs = (struct attribute *[FLAG_COUNT + 1]){} +}; +static char flag_unames[FLAG_COUNT][FLAG_UNAME_MAX]; + +static int __init xen_sysfs_flags_init(void) +{ + for (unsigned fnum = 0; fnum != FLAG_COUNT; fnum++) { + if (likely(flag_attrs[fnum].attr.name == NULL)) { + sprintf(flag_unames[fnum], FLAG_UNAME_FMT, fnum); + flag_attrs[fnum].attr.name = flag_unames[fnum]; + flag_attrs[fnum].attr.mode = 0444; + flag_attrs[fnum].show = flag_show; + flag_attrs[fnum].hyp_attr_value = 1 << fnum; + } + xen_flags_group.attrs[fnum] = &flag_attrs[fnum].attr; + } + return sysfs_create_group(hypervisor_kobj, &xen_flags_group); +} + #ifdef CONFIG_XEN_HAVE_VPMU struct pmu_mode { const char *name; @@ -539,18 +596,22 @@ static int __init hyper_sysfs_init(void) ret = xen_sysfs_properties_init(); if (ret) goto prop_out; + ret = xen_sysfs_flags_init(); + if (ret) + goto flags_out; #ifdef CONFIG_XEN_HAVE_VPMU if (xen_initial_domain()) { ret = xen_sysfs_pmu_init(); if (ret) { - sysfs_remove_group(hypervisor_kobj, - &xen_properties_group); - goto prop_out; + sysfs_remove_group(hypervisor_kobj, &xen_flags_group); + goto flags_out; } } #endif goto out; +flags_out: + sysfs_remove_group(hypervisor_kobj, &xen_properties_group); prop_out: sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); uuid_out: @@ -594,7 +655,7 @@ static const struct sysfs_ops hyp_sysfs_ops = { .store = hyp_sysfs_store, }; -static struct kobj_type hyp_sysfs_kobj_type = { +static const struct kobj_type hyp_sysfs_kobj_type = { .sysfs_ops = &hyp_sysfs_ops, }; diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c index 5c0b5cb5b419..b52e0fa595a9 100644 --- a/drivers/xen/xen-front-pgdir-shbuf.c +++ b/drivers/xen/xen-front-pgdir-shbuf.c @@ -30,7 +30,7 @@ struct xen_page_directory { grant_ref_t gref_dir_next_page; #define XEN_GREF_LIST_END 0 - grant_ref_t gref[1]; /* Variable length */ + grant_ref_t gref[]; /* Variable length */ }; /** diff --git a/include/xen/events.h b/include/xen/events.h index 344081e71584..44c2855c76d1 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -107,7 +107,7 @@ evtchn_port_t evtchn_from_irq(unsigned irq); int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); -void xen_hvm_evtchn_do_upcall(void); +int xen_hvm_evtchn_do_upcall(void); /* Bind a pirq for a physical interrupt to an irq. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b118f588cd2b..94b518c12f9a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -186,6 +186,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "snp_abort", "stop_this_cpu", "usercopy_abort", + "xen_cpu_bringup_again", "xen_start_kernel", };