BUG/MEDIUM: threads/mworker: fix a race on startup
Marc Fournier reported an interesting case when using threads with the master-worker mode : sometimes, a listener would have its FD closed during startup. Sometimes it could even be health checks seeing this. What happens is that after the threads are created, and the pollers enabled on each threads, the master-worker pipe is registered, and at the same time a close() is performed on the write side of this pipe since the children must not use it. But since this is replicated in every thread, what happens is that the first thread closes the pipe, thus releases the FD, and the next thread starting a listener in parallel gets this FD reassigned. Then another thread closes the FD again, which this time corresponds to the listener. It can also happen with the health check sockets if they're started early enough. This patch splits the mworker_pipe_register() function in two, so that the close() of the write side of the FD is performed very early after the fork() and long before threads are created (we don't need to delay it anyway). Only the pipe registration is done in the threaded code since it is important that the pollers are properly allocated for this. The mworker_pipe_register() function now takes care of registering the pipe only once, and this is guaranteed by a new surrounding lock. The call to protocol_enable_all() looks fragile in theory since it scans the list of proxies and their listeners, though in practice all threads scan the same list and take the same locks for each listener so it's not possible that any of them escapes the process and finishes before all listeners are started. And the operation is idempotent. This fix must be backported to 1.8. Thanks to Marc for providing very detailed traces clearly showing the problem.
This commit is contained in:
parent
7a2364d474
commit
1605c7ae61
@ -237,6 +237,7 @@ enum lock_label {
|
||||
PID_LIST_LOCK,
|
||||
EMAIL_ALERTS_LOCK,
|
||||
PIPES_LOCK,
|
||||
START_LOCK,
|
||||
LOCK_LABELS
|
||||
};
|
||||
struct lock_stat {
|
||||
|
@ -2339,9 +2339,12 @@ void mworker_pipe_handler(int fd)
|
||||
return;
|
||||
}
|
||||
|
||||
void mworker_pipe_register(int pipefd[2])
|
||||
/* should only be called once per process */
|
||||
void mworker_pipe_register()
|
||||
{
|
||||
close(mworker_pipe[1]); /* close the write end of the master pipe in the children */
|
||||
if (fdtab[mworker_pipe[0]].owner)
|
||||
/* already initialized */
|
||||
return;
|
||||
|
||||
fcntl(mworker_pipe[0], F_SETFL, O_NONBLOCK);
|
||||
fdtab[mworker_pipe[0]].owner = mworker_pipe;
|
||||
@ -2419,6 +2422,7 @@ static void *run_thread_poll_loop(void *data)
|
||||
{
|
||||
struct per_thread_init_fct *ptif;
|
||||
struct per_thread_deinit_fct *ptdf;
|
||||
static __maybe_unused HA_SPINLOCK_T start_lock;
|
||||
|
||||
tid = *((unsigned int *)data);
|
||||
tid_bit = (1UL << tid);
|
||||
@ -2431,8 +2435,11 @@ static void *run_thread_poll_loop(void *data)
|
||||
}
|
||||
}
|
||||
|
||||
if (global.mode & MODE_MWORKER)
|
||||
mworker_pipe_register(mworker_pipe);
|
||||
if (global.mode & MODE_MWORKER) {
|
||||
HA_SPIN_LOCK(START_LOCK, &start_lock);
|
||||
mworker_pipe_register();
|
||||
HA_SPIN_UNLOCK(START_LOCK, &start_lock);
|
||||
}
|
||||
|
||||
protocol_enable_all();
|
||||
THREAD_SYNC_ENABLE();
|
||||
@ -2861,6 +2868,10 @@ int main(int argc, char **argv)
|
||||
/* child must never use the atexit function */
|
||||
atexit_flag = 0;
|
||||
|
||||
/* close the write end of the master pipe in the children */
|
||||
if (global.mode & MODE_MWORKER)
|
||||
close(mworker_pipe[1]);
|
||||
|
||||
if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
|
||||
devnullfd = open("/dev/null", O_RDWR, 0);
|
||||
if (devnullfd < 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user