cpu/hotplug: Remove the 'cpu' member of cpuhp_cpu_state

Currently the setting of the 'cpu' member of struct cpuhp_cpu_state in
cpuhp_create() is too late as it is used earlier in _cpu_up().

If kzalloc_node() in __smpboot_create_thread() fails then the rollback will
be done with st->cpu==0 causing CPU0 to be erroneously set to be dying,
causing the scheduler to get mightily confused and throw its toys out of
the pram.

However the cpu number is actually available directly, so simply remove
the 'cpu' member and avoid the problem in the first place.

Fixes: 2ea46c6fc945 ("cpumask/hotplug: Fix cpu_dying() state tracking")
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220411152233.474129-2-steven.price@arm.com
This commit is contained in:
Steven Price 2022-04-11 16:22:32 +01:00 committed by Thomas Gleixner
parent 9e949a3886
commit b7ba6d8dc3

View File

@ -71,7 +71,6 @@ struct cpuhp_cpu_state {
bool rollback;
bool single;
bool bringup;
int cpu;
struct hlist_node *node;
struct hlist_node *last;
enum cpuhp_state cb_state;
@ -475,7 +474,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif
static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
bool bringup = st->state < target;
@ -486,14 +485,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
st->target = target;
st->single = false;
st->bringup = bringup;
if (cpu_dying(st->cpu) != !bringup)
set_cpu_dying(st->cpu, !bringup);
if (cpu_dying(cpu) != !bringup)
set_cpu_dying(cpu, !bringup);
return prev_state;
}
static inline void
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state prev_state)
{
bool bringup = !st->bringup;
@ -520,8 +520,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
}
st->bringup = bringup;
if (cpu_dying(st->cpu) != !bringup)
set_cpu_dying(st->cpu, !bringup);
if (cpu_dying(cpu) != !bringup)
set_cpu_dying(cpu, !bringup);
}
/* Regular hotplug invocation of the AP hotplug thread */
@ -541,15 +541,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
wait_for_ap_thread(st, st->bringup);
}
static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
enum cpuhp_state prev_state;
int ret;
prev_state = cpuhp_set_state(st, target);
prev_state = cpuhp_set_state(cpu, st, target);
__cpuhp_kick_ap(st);
if ((ret = st->result)) {
cpuhp_reset_state(st, prev_state);
cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
}
@ -581,7 +582,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;
return cpuhp_kick_ap(st, st->target);
return cpuhp_kick_ap(cpu, st, st->target);
}
static int bringup_cpu(unsigned int cpu)
@ -704,7 +705,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret, cpu, cpuhp_get_step(st->state)->name,
st->state);
cpuhp_reset_state(st, prev_state);
cpuhp_reset_state(cpu, st, prev_state);
if (can_rollback_cpu(st))
WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
prev_state));
@ -721,7 +722,6 @@ static void cpuhp_create(unsigned int cpu)
init_completion(&st->done_up);
init_completion(&st->done_down);
st->cpu = cpu;
}
static int cpuhp_should_run(unsigned int cpu)
@ -875,7 +875,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
cpuhp_lock_release(true);
trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
ret = cpuhp_kick_ap(st, st->target);
ret = cpuhp_kick_ap(cpu, st, st->target);
trace_cpuhp_exit(cpu, st->state, prev_state, ret);
return ret;
@ -1107,7 +1107,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret, cpu, cpuhp_get_step(st->state)->name,
st->state);
cpuhp_reset_state(st, prev_state);
cpuhp_reset_state(cpu, st, prev_state);
if (st->state < prev_state)
WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
@ -1134,7 +1134,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
cpuhp_tasks_frozen = tasks_frozen;
prev_state = cpuhp_set_state(st, target);
prev_state = cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread.
@ -1165,7 +1165,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
ret = cpuhp_down_callbacks(cpu, st, target);
if (ret && st->state < prev_state) {
if (st->state == CPUHP_TEARDOWN_CPU) {
cpuhp_reset_state(st, prev_state);
cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
} else {
WARN(1, "DEAD callback error for CPU%d", cpu);
@ -1352,7 +1352,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
cpuhp_tasks_frozen = tasks_frozen;
cpuhp_set_state(st, target);
cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread once more.