diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a5304ae8c654..393fb13733b0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -786,6 +786,7 @@ struct perf_event { struct irq_work pending_irq; struct callback_head pending_task; unsigned int pending_work; + struct rcuwait pending_work_wait; atomic_t event_limit; diff --git a/kernel/events/core.c b/kernel/events/core.c index 554d8ba93b8a..037a2d266674 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2288,7 +2288,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx) if (state != PERF_EVENT_STATE_OFF && !event->pending_work && !task_work_add(current, &event->pending_task, TWA_RESUME)) { - WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); event->pending_work = 1; } else { local_dec(&event->ctx->nr_pending); @@ -5203,9 +5202,35 @@ static bool exclusive_event_installable(struct perf_event *event, static void perf_addr_filters_splice(struct perf_event *event, struct list_head *head); +static void perf_pending_task_sync(struct perf_event *event) +{ + struct callback_head *head = &event->pending_task; + + if (!event->pending_work) + return; + /* + * If the task is queued to the current task's queue, we + * obviously can't wait for it to complete. Simply cancel it. + */ + if (task_work_cancel(current, head)) { + event->pending_work = 0; + local_dec(&event->ctx->nr_pending); + return; + } + + /* + * All accesses related to the event are within the same + * non-preemptible section in perf_pending_task(). The RCU + * grace period before the event is freed will make sure all + * those accesses are complete by then. + */ + rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); +} + static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); + perf_pending_task_sync(event); unaccount_event(event); @@ -6830,24 +6855,28 @@ static void perf_pending_task(struct callback_head *head) struct perf_event *event = container_of(head, struct perf_event, pending_task); int rctx; + /* + * All accesses to the event must belong to the same implicit RCU read-side + * critical section as the ->pending_work reset. See comment in + * perf_pending_task_sync(). + */ + preempt_disable_notrace(); /* * If we 'fail' here, that's OK, it means recursion is already disabled * and we won't recurse 'further'. */ - preempt_disable_notrace(); rctx = perf_swevent_get_recursion_context(); if (event->pending_work) { event->pending_work = 0; perf_sigtrap(event); local_dec(&event->ctx->nr_pending); + rcuwait_wake_up(&event->pending_work_wait); } if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); - - put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS @@ -11961,6 +11990,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_waitqueue_head(&event->waitq); init_irq_work(&event->pending_irq, perf_pending_irq); init_task_work(&event->pending_task, perf_pending_task); + rcuwait_init(&event->pending_work_wait); mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock);