mirror of
https://github.com/samba-team/samba.git
synced 2025-03-08 04:58:40 +03:00
proper waitpid() fix.
remove all waitpid() calls and use the event system to trap sigchld (This used to be ctdb commit 77458b2b6b51b2970c12b0e5b097088d3fb9d358)
This commit is contained in:
parent
0fba2e36b1
commit
334db8ccba
@ -628,6 +628,27 @@ static int unlink_destructor(const char *name)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sig_child_handler(struct event_context *ev,
|
||||
struct signal_event *se, int signum, int count,
|
||||
void *dont_care,
|
||||
void *private_data)
|
||||
{
|
||||
// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
||||
int status;
|
||||
pid_t pid = -1;
|
||||
|
||||
while (pid != 0) {
|
||||
pid = waitpid(-1, &status, WNOHANG);
|
||||
if (pid == -1) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
|
||||
return;
|
||||
}
|
||||
if (pid > 0) {
|
||||
DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d\n", (int)pid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
start the protocol going as a daemon
|
||||
*/
|
||||
@ -636,6 +657,7 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
|
||||
int res, ret = -1;
|
||||
struct fd_event *fde;
|
||||
const char *domain_socket_name;
|
||||
struct signal_event *se;
|
||||
|
||||
/* get rid of any old sockets */
|
||||
unlink(ctdb->daemon.name);
|
||||
@ -731,6 +753,16 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
|
||||
/* start the transport going */
|
||||
ctdb_start_transport(ctdb);
|
||||
|
||||
/* set up a handler to pick up sigchld */
|
||||
se = event_add_signal(ctdb->ev, ctdb,
|
||||
SIGCHLD, 0,
|
||||
sig_child_handler,
|
||||
ctdb);
|
||||
if (se == NULL) {
|
||||
DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* go into a wait loop to allow other nodes to complete */
|
||||
event_loop_wait(ctdb->ev);
|
||||
|
||||
|
@ -87,7 +87,6 @@ static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
|
||||
ctdb->freeze_handle = NULL;
|
||||
|
||||
kill(h->child, SIGKILL);
|
||||
waitpid(h->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,6 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
|
||||
tdb_chainlock_unmark(tdb, key);
|
||||
|
||||
kill(child, SIGKILL);
|
||||
waitpid(child, NULL, 0);
|
||||
talloc_free(tmp_ctx);
|
||||
}
|
||||
|
||||
@ -80,7 +79,6 @@ static int lockwait_destructor(struct lockwait_handle *h)
|
||||
{
|
||||
h->ctdb->statistics.pending_lockwait_calls--;
|
||||
kill(h->child, SIGKILL);
|
||||
waitpid(h->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -222,7 +222,6 @@ static int childwrite_destructor(struct childwrite_handle *h)
|
||||
{
|
||||
h->ctdb->statistics.pending_childwrite_calls--;
|
||||
kill(h->child, SIGKILL);
|
||||
waitpid(h->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -260,7 +259,6 @@ static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
|
||||
callback(c, p);
|
||||
|
||||
kill(child, SIGKILL);
|
||||
waitpid(child, NULL, 0);
|
||||
talloc_free(tmp_ctx);
|
||||
}
|
||||
|
||||
|
@ -459,7 +459,6 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
|
||||
static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
|
||||
{
|
||||
kill(state->child, SIGKILL);
|
||||
waitpid(state->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2933,11 +2933,6 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
|
||||
{
|
||||
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
|
||||
|
||||
/* make sure we harvest the child if signals are blocked for some
|
||||
reason
|
||||
*/
|
||||
waitpid(ctdb->recoverd_pid, 0, WNOHANG);
|
||||
|
||||
if (kill(ctdb->recoverd_pid, 0) != 0) {
|
||||
DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
|
||||
|
||||
@ -2958,6 +2953,27 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
|
||||
ctdb_check_recd, ctdb);
|
||||
}
|
||||
|
||||
static void recd_sig_child_handler(struct event_context *ev,
|
||||
struct signal_event *se, int signum, int count,
|
||||
void *dont_care,
|
||||
void *private_data)
|
||||
{
|
||||
// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
||||
int status;
|
||||
pid_t pid = -1;
|
||||
|
||||
while (pid != 0) {
|
||||
pid = waitpid(-1, &status, WNOHANG);
|
||||
if (pid == -1) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
|
||||
return;
|
||||
}
|
||||
if (pid > 0) {
|
||||
DEBUG(DEBUG_DEBUG, ("RECD SIGCHLD from %d\n", (int)pid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
startup the recovery daemon as a child of the main ctdb daemon
|
||||
*/
|
||||
@ -2965,6 +2981,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
|
||||
{
|
||||
int ret;
|
||||
int fd[2];
|
||||
struct signal_event *se;
|
||||
|
||||
if (pipe(fd) != 0) {
|
||||
return -1;
|
||||
@ -3016,6 +3033,16 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* set up a handler to pick up sigchld */
|
||||
se = event_add_signal(ctdb->ev, ctdb,
|
||||
SIGCHLD, 0,
|
||||
recd_sig_child_handler,
|
||||
ctdb);
|
||||
if (se == NULL) {
|
||||
DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD in recovery daemon\n"));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
monitor_cluster(ctdb);
|
||||
|
||||
DEBUG(DEBUG_ALERT,("ERROR: ctdb_recoverd finished!?\n"));
|
||||
|
@ -74,7 +74,6 @@ static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *p
|
||||
static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
|
||||
{
|
||||
kill(h->child, SIGKILL);
|
||||
waitpid(h->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -210,18 +210,16 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
{
|
||||
struct ctdb_event_script_state *state =
|
||||
talloc_get_type(p, struct ctdb_event_script_state);
|
||||
int status = -1;
|
||||
void (*callback)(struct ctdb_context *, int, void *) = state->callback;
|
||||
void *private_data = state->private_data;
|
||||
struct ctdb_context *ctdb = state->ctdb;
|
||||
signed char rt = -1;
|
||||
|
||||
read(state->fd[0], &rt, sizeof(rt));
|
||||
|
||||
waitpid(state->child, &status, 0);
|
||||
if (status != -1) {
|
||||
status = WEXITSTATUS(status);
|
||||
}
|
||||
talloc_set_destructor(state, NULL);
|
||||
talloc_free(state);
|
||||
callback(ctdb, status, private_data);
|
||||
callback(ctdb, rt, private_data);
|
||||
|
||||
ctdb->event_script_timeouts = 0;
|
||||
}
|
||||
@ -293,7 +291,6 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
|
||||
{
|
||||
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
|
||||
kill(state->child, SIGTERM);
|
||||
waitpid(state->child, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -336,13 +333,18 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
}
|
||||
|
||||
if (state->child == 0) {
|
||||
signed char rt;
|
||||
|
||||
close(state->fd[0]);
|
||||
if (ctdb->do_setsched) {
|
||||
ctdb_restore_scheduler(ctdb);
|
||||
}
|
||||
set_close_on_exec(state->fd[1]);
|
||||
ret = ctdb_event_script_v(ctdb, state->options);
|
||||
_exit(ret);
|
||||
rt = ctdb_event_script_v(ctdb, state->options);
|
||||
while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
|
||||
sleep(1);
|
||||
}
|
||||
_exit(rt);
|
||||
}
|
||||
|
||||
talloc_set_destructor(state, event_script_destructor);
|
||||
|
Loading…
x
Reference in New Issue
Block a user