perf/core: Fix race between close() and fork()
Syzcaller reported the following Use-after-Free bug: close() clone() copy_process() perf_event_init_task() perf_event_init_context() mutex_lock(parent_ctx->mutex) inherit_task_group() inherit_group() inherit_event() mutex_lock(event->child_mutex) // expose event on child list list_add_tail() mutex_unlock(event->child_mutex) mutex_unlock(parent_ctx->mutex) ... goto bad_fork_* bad_fork_cleanup_perf: perf_event_free_task() perf_release() perf_event_release_kernel() list_for_each_entry() mutex_lock(ctx->mutex) mutex_lock(event->child_mutex) // event is from the failing inherit // on the other CPU perf_remove_from_context() list_move() mutex_unlock(event->child_mutex) mutex_unlock(ctx->mutex) mutex_lock(ctx->mutex) list_for_each_entry_safe() // event already stolen mutex_unlock(ctx->mutex) delayed_free_task() free_task() list_for_each_entry_safe() list_del() free_event() _free_event() // and so event->hw.target // is the already freed failed clone() if (event->hw.target) put_task_struct(event->hw.target) // WHOOPSIE, already quite dead Which puts the lie to the the comment on perf_event_free_task(): 'unexposed, unused context' not so much. Which is a 'fun' confluence of fail; copy_process() doing an unconditional free_task() and not respecting refcounts, and perf having creative locking. In particular:82d94856fa
("perf/core: Fix lock inversion between perf,trace,cpuhp") seems to have overlooked this 'fun' parade. Solve it by using the fact that detached events still have a reference count on their (previous) context. With this perf_event_free_task() can detect when events have escaped and wait for their destruction. Debugged-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Reported-by: syzbot+a24c397a29ad22d86c98@syzkaller.appspotmail.com Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Cc: <stable@vger.kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes:82d94856fa
("perf/core: Fix lock inversion between perf,trace,cpuhp") Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
committed by
Ingo Molnar
parent
e5eb08ac81
commit
1cf8dfe8a6
@ -4465,12 +4465,20 @@ static void _free_event(struct perf_event *event)
|
|||||||
if (event->destroy)
|
if (event->destroy)
|
||||||
event->destroy(event);
|
event->destroy(event);
|
||||||
|
|
||||||
if (event->ctx)
|
/*
|
||||||
put_ctx(event->ctx);
|
* Must be after ->destroy(), due to uprobe_perf_close() using
|
||||||
|
* hw.target.
|
||||||
|
*/
|
||||||
if (event->hw.target)
|
if (event->hw.target)
|
||||||
put_task_struct(event->hw.target);
|
put_task_struct(event->hw.target);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* perf_event_free_task() relies on put_ctx() being 'last', in particular
|
||||||
|
* all task references must be cleaned up.
|
||||||
|
*/
|
||||||
|
if (event->ctx)
|
||||||
|
put_ctx(event->ctx);
|
||||||
|
|
||||||
exclusive_event_destroy(event);
|
exclusive_event_destroy(event);
|
||||||
module_put(event->pmu->module);
|
module_put(event->pmu->module);
|
||||||
|
|
||||||
@ -4650,8 +4658,17 @@ again:
|
|||||||
mutex_unlock(&event->child_mutex);
|
mutex_unlock(&event->child_mutex);
|
||||||
|
|
||||||
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
|
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
|
||||||
|
void *var = &child->ctx->refcount;
|
||||||
|
|
||||||
list_del(&child->child_list);
|
list_del(&child->child_list);
|
||||||
free_event(child);
|
free_event(child);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wake any perf_event_free_task() waiting for this event to be
|
||||||
|
* freed.
|
||||||
|
*/
|
||||||
|
smp_mb(); /* pairs with wait_var_event() */
|
||||||
|
wake_up_var(var);
|
||||||
}
|
}
|
||||||
|
|
||||||
no_ctx:
|
no_ctx:
|
||||||
@ -11527,11 +11544,11 @@ static void perf_free_event(struct perf_event *event,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free an unexposed, unused context as created by inheritance by
|
* Free a context as created by inheritance by perf_event_init_task() below,
|
||||||
* perf_event_init_task below, used by fork() in case of fail.
|
* used by fork() in case of fail.
|
||||||
*
|
*
|
||||||
* Not all locks are strictly required, but take them anyway to be nice and
|
* Even though the task has never lived, the context and events have been
|
||||||
* help out with the lockdep assertions.
|
* exposed through the child_list, so we must take care tearing it all down.
|
||||||
*/
|
*/
|
||||||
void perf_event_free_task(struct task_struct *task)
|
void perf_event_free_task(struct task_struct *task)
|
||||||
{
|
{
|
||||||
@ -11561,7 +11578,23 @@ void perf_event_free_task(struct task_struct *task)
|
|||||||
perf_free_event(event, ctx);
|
perf_free_event(event, ctx);
|
||||||
|
|
||||||
mutex_unlock(&ctx->mutex);
|
mutex_unlock(&ctx->mutex);
|
||||||
put_ctx(ctx);
|
|
||||||
|
/*
|
||||||
|
* perf_event_release_kernel() could've stolen some of our
|
||||||
|
* child events and still have them on its free_list. In that
|
||||||
|
* case we must wait for these events to have been freed (in
|
||||||
|
* particular all their references to this task must've been
|
||||||
|
* dropped).
|
||||||
|
*
|
||||||
|
* Without this copy_process() will unconditionally free this
|
||||||
|
* task (irrespective of its reference count) and
|
||||||
|
* _free_event()'s put_task_struct(event->hw.target) will be a
|
||||||
|
* use-after-free.
|
||||||
|
*
|
||||||
|
* Wait for all events to drop their context reference.
|
||||||
|
*/
|
||||||
|
wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
|
||||||
|
put_ctx(ctx); /* must be last */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user