x86/smpboot: Unbreak CPU0 hotplug
A hang on CPU0 onlining after a preceding offlining is observed. Trace shows that CPU0 is stuck in check_tsc_sync_target() waiting for source CPU to run check_tsc_sync_source() but this never happens. Source CPU, in its turn, is stuck on synchronize_sched() which is called from native_cpu_up() -> do_boot_cpu() -> unregister_nmi_handler(). So it's a classic ABBA deadlock, due to the use of synchronize_sched() in unregister_nmi_handler(). Fix the bug by moving unregister_nmi_handler() from do_boot_cpu() to native_cpu_up() after cpu onlining is done. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20170803105818.9934-1-vkuznets@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
e93c17301a
commit
10e66760fa
@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
* Returns zero if CPU booted OK, else error code from
|
||||
* ->wakeup_secondary_cpu.
|
||||
*/
|
||||
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
|
||||
int *cpu0_nmi_registered)
|
||||
{
|
||||
volatile u32 *trampoline_status =
|
||||
(volatile u32 *) __va(real_mode_header->trampoline_status);
|
||||
@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
unsigned long start_ip = real_mode_header->trampoline_start;
|
||||
|
||||
unsigned long boot_error = 0;
|
||||
int cpu0_nmi_registered = 0;
|
||||
unsigned long timeout;
|
||||
|
||||
idle->thread.sp = (unsigned long)task_pt_regs(idle);
|
||||
@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
|
||||
else
|
||||
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
|
||||
&cpu0_nmi_registered);
|
||||
cpu0_nmi_registered);
|
||||
|
||||
if (!boot_error) {
|
||||
/*
|
||||
@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
*/
|
||||
smpboot_restore_warm_reset_vector();
|
||||
}
|
||||
/*
|
||||
* Clean up the nmi handler. Do this after the callin and callout sync
|
||||
* to avoid impact of possible long unregister time.
|
||||
*/
|
||||
if (cpu0_nmi_registered)
|
||||
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
|
||||
|
||||
return boot_error;
|
||||
}
|
||||
@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
{
|
||||
int apicid = apic->cpu_present_to_apicid(cpu);
|
||||
int cpu0_nmi_registered = 0;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
int err, ret = 0;
|
||||
|
||||
WARN_ON(irqs_disabled());
|
||||
|
||||
@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
|
||||
common_cpu_up(cpu, tidle);
|
||||
|
||||
err = do_boot_cpu(apicid, cpu, tidle);
|
||||
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
|
||||
if (err) {
|
||||
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
|
||||
return -EIO;
|
||||
ret = -EIO;
|
||||
goto unreg_nmi;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
return 0;
|
||||
unreg_nmi:
|
||||
/*
|
||||
* Clean up the nmi handler. Do this after the callin and callout sync
|
||||
* to avoid impact of possible long unregister time.
|
||||
*/
|
||||
if (cpu0_nmi_registered)
|
||||
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user