From 1fe7ec237a7036d76764ef1981de6b3000b2cfd3 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 15 Jun 2017 11:48:24 +0200 Subject: [PATCH] tevent_threads: Fix a rundown race introduced with 1828011317b The race is easily reproduced by adding a poll(NULL,0,10) in between the two pthread_mutex_unlock calls in _tevent_threaded_schedule_immediate. Before 1828011317b, the main thread was signalled only after the helper had already unlocked event_ctx_mutex. Full explaination follows: ----------------------------------------------------------------- Inside _tevent_threaded_schedule_immediate() we have: 476 ret = pthread_mutex_unlock(&ev->scheduled_mutex); 477 if (ret != 0) { 478 abort(); 479 } HERE!!!! 481 ret = pthread_mutex_unlock(&tctx->event_ctx_mutex); 482 if (ret != 0) { 483 abort(); 484 } At the HERE!!! point, what happens is tevent_common_threaded_activate_immediate(), which is blocked on ev->scheduled_mutex, get released and does: 514 while (ev->scheduled_immediates != NULL) { 515 struct tevent_immediate *im = ev->scheduled_immediates; 516 DLIST_REMOVE(ev->scheduled_immediates, im); 517 DLIST_ADD_END(ev->immediate_events, im); 518 } - making an immediate event ready to be scheduled. This then returns into epoll_event_loop_once(), which then calls: 910 if (ev->immediate_events && 911 tevent_common_loop_immediate(ev)) { 912 return 0; 913 } which causes the immediate event to fire. This immediate event is the pthread job terminate event, which was previously set up in pthreadpool_tevent_job_signal() by: 198 if (state->tctx != NULL) { 199 /* with HAVE_PTHREAD */ 200 tevent_threaded_schedule_immediate(state->tctx, state->im, 201 pthreadpool_tevent_job_done, 202 state); So we now call pthreadpool_tevent_job_done() - which does: 225 TALLOC_FREE(state->tctx); calling tevent_threaded_context_destructor(): 384 ret = pthread_mutex_destroy(&tctx->event_ctx_mutex); <---------------- BOOM returns an error ! 385 if (ret != 0) { 386 abort(); 387 } as we haven't gotten to line 481 above (the line after HERE!!!!) so the tctx->event_ctx_mutex is still locked when we try to destroy it. So doing an additional: ret = pthread_mutex_lock(&tctx->event_ctx_mutex); ret = pthread_mutex_unlock(&tctx->event_ctx_mutex); (error checking elided) forces tevent_threaded_context_destructor() to wait until tctx->event_ctx_mutex is unlocked before it locks/unlocks and then is guaranteed safe to destroy. Signed-off-by: Volker Lendecke Reviewed-by: Jeremy Allison Reviewed-by: Stefan Metzmacher --- lib/tevent/tevent_threads.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lib/tevent/tevent_threads.c b/lib/tevent/tevent_threads.c index 8ecda027c33..4d1a8805181 100644 --- a/lib/tevent/tevent_threads.c +++ b/lib/tevent/tevent_threads.c @@ -381,6 +381,23 @@ static int tevent_threaded_context_destructor( DLIST_REMOVE(tctx->event_ctx->threaded_contexts, tctx); } + /* + * We have to coordinate with _tevent_threaded_schedule_immediate's + * unlock of the event_ctx_mutex. We're in the main thread here, + * and we can be scheduled before the helper thread finalizes its + * call _tevent_threaded_schedule_immediate. This means we would + * pthreadpool_destroy a locked mutex, which is illegal. + */ + ret = pthread_mutex_lock(&tctx->event_ctx_mutex); + if (ret != 0) { + abort(); + } + + ret = pthread_mutex_unlock(&tctx->event_ctx_mutex); + if (ret != 0) { + abort(); + } + ret = pthread_mutex_destroy(&tctx->event_ctx_mutex); if (ret != 0) { abort();