eed0574432
The sched_clock() can be used very early since commit 857baa87b642 ("sched/clock: Enable sched clock early"). In addition, with commit 38669ba205d1 ("x86/xen/time: Output xen sched_clock time from 0"), kdump kernel in Xen HVM guest may panic at very early stage when accessing &__this_cpu_read(xen_vcpu)->time as in below: setup_arch() -> init_hypervisor_platform() -> x86_init.hyper.init_platform = xen_hvm_guest_init() -> xen_hvm_init_time_ops() -> xen_clocksource_read() -> src = &__this_cpu_read(xen_vcpu)->time; This is because Xen HVM supports at most MAX_VIRT_CPUS=32 'vcpu_info' embedded inside 'shared_info' during early stage until xen_vcpu_setup() is used to allocate/relocate 'vcpu_info' for boot cpu at arbitrary address. However, when Xen HVM guest panic on vcpu >= 32, since xen_vcpu_info_reset(0) would set per_cpu(xen_vcpu, cpu) = NULL when vcpu >= 32, xen_clocksource_read() on vcpu >= 32 would panic. This patch calls xen_hvm_init_time_ops() again later in xen_hvm_smp_prepare_boot_cpu() after the 'vcpu_info' for boot vcpu is registered when the boot vcpu is >= 32. This issue can be reproduced on purpose via below command at the guest side when kdump/kexec is enabled: "taskset -c 33 echo c > /proc/sysrq-trigger" The bugfix for PVM is not implemented due to the lack of testing environment. [boris: xen_hvm_init_time_ops() returns on errors instead of jumping to end] Cc: Joe Jin <joe.jin@oracle.com> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Link: https://lore.kernel.org/r/20220302164032.14569-3-dongli.zhang@oracle.com Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
93 lines
2.0 KiB
C
93 lines
2.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/thread_info.h>
|
|
#include <asm/smp.h>
|
|
|
|
#include <xen/events.h>
|
|
|
|
#include "xen-ops.h"
|
|
#include "smp.h"
|
|
|
|
|
|
static void __init xen_hvm_smp_prepare_boot_cpu(void)
|
|
{
|
|
BUG_ON(smp_processor_id() != 0);
|
|
native_smp_prepare_boot_cpu();
|
|
|
|
/*
|
|
* Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info
|
|
* in xen_cpu_up_prepare_hvm().
|
|
*/
|
|
xen_vcpu_setup(0);
|
|
|
|
/*
|
|
* Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS.
|
|
* Refer to comments in xen_hvm_init_time_ops().
|
|
*/
|
|
xen_hvm_init_time_ops();
|
|
|
|
/*
|
|
* The alternative logic (which patches the unlock/lock) runs before
|
|
* the smp bootup up code is activated. Hence we need to set this up
|
|
* the core kernel is being patched. Otherwise we will have only
|
|
* modules patched but not core code.
|
|
*/
|
|
xen_init_spinlocks();
|
|
}
|
|
|
|
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
|
|
{
|
|
int cpu;
|
|
|
|
native_smp_prepare_cpus(max_cpus);
|
|
|
|
if (xen_have_vector_callback) {
|
|
WARN_ON(xen_smp_intr_init(0));
|
|
xen_init_lock_cpu(0);
|
|
}
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
if (cpu == 0)
|
|
continue;
|
|
|
|
/* Set default vcpu_id to make sure that we don't use cpu-0's */
|
|
per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
static void xen_hvm_cpu_die(unsigned int cpu)
|
|
{
|
|
if (common_cpu_die(cpu) == 0) {
|
|
if (xen_have_vector_callback) {
|
|
xen_smp_intr_free(cpu);
|
|
xen_uninit_lock_cpu(cpu);
|
|
xen_teardown_timer(cpu);
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
static void xen_hvm_cpu_die(unsigned int cpu)
|
|
{
|
|
BUG();
|
|
}
|
|
#endif
|
|
|
|
void __init xen_hvm_smp_init(void)
|
|
{
|
|
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
|
|
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
|
|
smp_ops.smp_cpus_done = xen_smp_cpus_done;
|
|
smp_ops.cpu_die = xen_hvm_cpu_die;
|
|
|
|
if (!xen_have_vector_callback) {
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
nopvspin = true;
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
|
|
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
|
|
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
|
|
}
|