sched: Fix yet more sched_fork() races
Where commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") fixed a fork race vs cgroup, it opened up a race vs syscalls by not placing the task on the runqueue before it gets exposed through the pidhash. Commit 13765de8148f ("sched/fair: Fix fault in reweight_entity") is trying to fix a single instance of this, instead fix the whole class of issues, effectively reverting this commit. Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Tadeusz Struk <tadeusz.struk@linaro.org> Tested-by: Zhang Qiao <zhangqiao22@huawei.com> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Link: https://lkml.kernel.org/r/YgoeCbwj5mbCR0qA@hirez.programming.kicks-ass.net
This commit is contained in:
parent
754e0b0e35
commit
b1e8206582
@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
|
|||||||
extern void init_idle(struct task_struct *idle, int cpu);
|
extern void init_idle(struct task_struct *idle, int cpu);
|
||||||
|
|
||||||
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
|
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
|
||||||
extern void sched_post_fork(struct task_struct *p,
|
extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
|
||||||
struct kernel_clone_args *kargs);
|
extern void sched_post_fork(struct task_struct *p);
|
||||||
extern void sched_dead(struct task_struct *p);
|
extern void sched_dead(struct task_struct *p);
|
||||||
|
|
||||||
void __noreturn do_task_dead(void);
|
void __noreturn do_task_dead(void);
|
||||||
|
@ -2266,6 +2266,17 @@ static __latent_entropy struct task_struct *copy_process(
|
|||||||
if (retval)
|
if (retval)
|
||||||
goto bad_fork_put_pidfd;
|
goto bad_fork_put_pidfd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now that the cgroups are pinned, re-clone the parent cgroup and put
|
||||||
|
* the new task on the correct runqueue. All this *before* the task
|
||||||
|
* becomes visible.
|
||||||
|
*
|
||||||
|
* This isn't part of ->can_fork() because while the re-cloning is
|
||||||
|
* cgroup specific, it unconditionally needs to place the task on a
|
||||||
|
* runqueue.
|
||||||
|
*/
|
||||||
|
sched_cgroup_fork(p, args);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* From this point on we must avoid any synchronous user-space
|
* From this point on we must avoid any synchronous user-space
|
||||||
* communication until we take the tasklist-lock. In particular, we do
|
* communication until we take the tasklist-lock. In particular, we do
|
||||||
@ -2376,7 +2387,7 @@ static __latent_entropy struct task_struct *copy_process(
|
|||||||
write_unlock_irq(&tasklist_lock);
|
write_unlock_irq(&tasklist_lock);
|
||||||
|
|
||||||
proc_fork_connector(p);
|
proc_fork_connector(p);
|
||||||
sched_post_fork(p, args);
|
sched_post_fork(p);
|
||||||
cgroup_post_fork(p, args);
|
cgroup_post_fork(p, args);
|
||||||
perf_event_fork(p);
|
perf_event_fork(p);
|
||||||
|
|
||||||
|
@ -1214,9 +1214,8 @@ int tg_nop(struct task_group *tg, void *data)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void set_load_weight(struct task_struct *p)
|
static void set_load_weight(struct task_struct *p, bool update_load)
|
||||||
{
|
{
|
||||||
bool update_load = !(READ_ONCE(p->__state) & TASK_NEW);
|
|
||||||
int prio = p->static_prio - MAX_RT_PRIO;
|
int prio = p->static_prio - MAX_RT_PRIO;
|
||||||
struct load_weight *load = &p->se.load;
|
struct load_weight *load = &p->se.load;
|
||||||
|
|
||||||
@ -4407,7 +4406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||||||
p->static_prio = NICE_TO_PRIO(0);
|
p->static_prio = NICE_TO_PRIO(0);
|
||||||
|
|
||||||
p->prio = p->normal_prio = p->static_prio;
|
p->prio = p->normal_prio = p->static_prio;
|
||||||
set_load_weight(p);
|
set_load_weight(p, false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't need the reset flag anymore after the fork. It has
|
* We don't need the reset flag anymore after the fork. It has
|
||||||
@ -4425,6 +4424,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||||||
|
|
||||||
init_entity_runnable_average(&p->se);
|
init_entity_runnable_average(&p->se);
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_INFO
|
#ifdef CONFIG_SCHED_INFO
|
||||||
if (likely(sched_info_on()))
|
if (likely(sched_info_on()))
|
||||||
memset(&p->sched_info, 0, sizeof(p->sched_info));
|
memset(&p->sched_info, 0, sizeof(p->sched_info));
|
||||||
@ -4440,18 +4440,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
#ifdef CONFIG_CGROUP_SCHED
|
|
||||||
struct task_group *tg;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Because we're not yet on the pid-hash, p->pi_lock isn't strictly
|
||||||
|
* required yet, but lockdep gets upset if rules are violated.
|
||||||
|
*/
|
||||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||||
#ifdef CONFIG_CGROUP_SCHED
|
#ifdef CONFIG_CGROUP_SCHED
|
||||||
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
|
if (1) {
|
||||||
struct task_group, css);
|
struct task_group *tg;
|
||||||
p->sched_task_group = autogroup_task_group(p, tg);
|
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
|
||||||
|
struct task_group, css);
|
||||||
|
tg = autogroup_task_group(p, tg);
|
||||||
|
p->sched_task_group = tg;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
rseq_migrate(p);
|
rseq_migrate(p);
|
||||||
/*
|
/*
|
||||||
@ -4462,7 +4467,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
|||||||
if (p->sched_class->task_fork)
|
if (p->sched_class->task_fork)
|
||||||
p->sched_class->task_fork(p);
|
p->sched_class->task_fork(p);
|
||||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sched_post_fork(struct task_struct *p)
|
||||||
|
{
|
||||||
uclamp_post_fork(p);
|
uclamp_post_fork(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6922,7 +6930,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
|||||||
put_prev_task(rq, p);
|
put_prev_task(rq, p);
|
||||||
|
|
||||||
p->static_prio = NICE_TO_PRIO(nice);
|
p->static_prio = NICE_TO_PRIO(nice);
|
||||||
set_load_weight(p);
|
set_load_weight(p, true);
|
||||||
old_prio = p->prio;
|
old_prio = p->prio;
|
||||||
p->prio = effective_prio(p);
|
p->prio = effective_prio(p);
|
||||||
|
|
||||||
@ -7213,7 +7221,7 @@ static void __setscheduler_params(struct task_struct *p,
|
|||||||
*/
|
*/
|
||||||
p->rt_priority = attr->sched_priority;
|
p->rt_priority = attr->sched_priority;
|
||||||
p->normal_prio = normal_prio(p);
|
p->normal_prio = normal_prio(p);
|
||||||
set_load_weight(p);
|
set_load_weight(p, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -9446,7 +9454,7 @@ void __init sched_init(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
set_load_weight(&init_task);
|
set_load_weight(&init_task, false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The boot idle thread does lazy MMU switching as well:
|
* The boot idle thread does lazy MMU switching as well:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user