mirror of
https://github.com/samba-team/samba.git
synced 2025-03-09 08:58:35 +03:00
proper waitpid() fix.
remove all waitpid() calls and use the event system to trap sigchld (This used to be ctdb commit 77458b2b6b51b2970c12b0e5b097088d3fb9d358)
This commit is contained in:
parent
0fba2e36b1
commit
334db8ccba
@ -628,6 +628,27 @@ static int unlink_destructor(const char *name)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sig_child_handler(struct event_context *ev,
|
||||||
|
struct signal_event *se, int signum, int count,
|
||||||
|
void *dont_care,
|
||||||
|
void *private_data)
|
||||||
|
{
|
||||||
|
// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
||||||
|
int status;
|
||||||
|
pid_t pid = -1;
|
||||||
|
|
||||||
|
while (pid != 0) {
|
||||||
|
pid = waitpid(-1, &status, WNOHANG);
|
||||||
|
if (pid == -1) {
|
||||||
|
DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (pid > 0) {
|
||||||
|
DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d\n", (int)pid));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
start the protocol going as a daemon
|
start the protocol going as a daemon
|
||||||
*/
|
*/
|
||||||
@ -636,6 +657,7 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
|
|||||||
int res, ret = -1;
|
int res, ret = -1;
|
||||||
struct fd_event *fde;
|
struct fd_event *fde;
|
||||||
const char *domain_socket_name;
|
const char *domain_socket_name;
|
||||||
|
struct signal_event *se;
|
||||||
|
|
||||||
/* get rid of any old sockets */
|
/* get rid of any old sockets */
|
||||||
unlink(ctdb->daemon.name);
|
unlink(ctdb->daemon.name);
|
||||||
@ -731,6 +753,16 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
|
|||||||
/* start the transport going */
|
/* start the transport going */
|
||||||
ctdb_start_transport(ctdb);
|
ctdb_start_transport(ctdb);
|
||||||
|
|
||||||
|
/* set up a handler to pick up sigchld */
|
||||||
|
se = event_add_signal(ctdb->ev, ctdb,
|
||||||
|
SIGCHLD, 0,
|
||||||
|
sig_child_handler,
|
||||||
|
ctdb);
|
||||||
|
if (se == NULL) {
|
||||||
|
DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
/* go into a wait loop to allow other nodes to complete */
|
/* go into a wait loop to allow other nodes to complete */
|
||||||
event_loop_wait(ctdb->ev);
|
event_loop_wait(ctdb->ev);
|
||||||
|
|
||||||
|
@ -87,7 +87,6 @@ static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
|
|||||||
ctdb->freeze_handle = NULL;
|
ctdb->freeze_handle = NULL;
|
||||||
|
|
||||||
kill(h->child, SIGKILL);
|
kill(h->child, SIGKILL);
|
||||||
waitpid(h->child, NULL, 0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,7 +72,6 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
|
|||||||
tdb_chainlock_unmark(tdb, key);
|
tdb_chainlock_unmark(tdb, key);
|
||||||
|
|
||||||
kill(child, SIGKILL);
|
kill(child, SIGKILL);
|
||||||
waitpid(child, NULL, 0);
|
|
||||||
talloc_free(tmp_ctx);
|
talloc_free(tmp_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +79,6 @@ static int lockwait_destructor(struct lockwait_handle *h)
|
|||||||
{
|
{
|
||||||
h->ctdb->statistics.pending_lockwait_calls--;
|
h->ctdb->statistics.pending_lockwait_calls--;
|
||||||
kill(h->child, SIGKILL);
|
kill(h->child, SIGKILL);
|
||||||
waitpid(h->child, NULL, 0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,7 +222,6 @@ static int childwrite_destructor(struct childwrite_handle *h)
|
|||||||
{
|
{
|
||||||
h->ctdb->statistics.pending_childwrite_calls--;
|
h->ctdb->statistics.pending_childwrite_calls--;
|
||||||
kill(h->child, SIGKILL);
|
kill(h->child, SIGKILL);
|
||||||
waitpid(h->child, NULL, 0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,7 +259,6 @@ static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
|
|||||||
callback(c, p);
|
callback(c, p);
|
||||||
|
|
||||||
kill(child, SIGKILL);
|
kill(child, SIGKILL);
|
||||||
waitpid(child, NULL, 0);
|
|
||||||
talloc_free(tmp_ctx);
|
talloc_free(tmp_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -459,7 +459,6 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
|
|||||||
static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
|
static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
|
||||||
{
|
{
|
||||||
kill(state->child, SIGKILL);
|
kill(state->child, SIGKILL);
|
||||||
waitpid(state->child, NULL, 0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2933,11 +2933,6 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
|
|||||||
{
|
{
|
||||||
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
|
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
|
||||||
|
|
||||||
/* make sure we harvest the child if signals are blocked for some
|
|
||||||
reason
|
|
||||||
*/
|
|
||||||
waitpid(ctdb->recoverd_pid, 0, WNOHANG);
|
|
||||||
|
|
||||||
if (kill(ctdb->recoverd_pid, 0) != 0) {
|
if (kill(ctdb->recoverd_pid, 0) != 0) {
|
||||||
DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
|
DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
|
||||||
|
|
||||||
@ -2958,6 +2953,27 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
|
|||||||
ctdb_check_recd, ctdb);
|
ctdb_check_recd, ctdb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void recd_sig_child_handler(struct event_context *ev,
|
||||||
|
struct signal_event *se, int signum, int count,
|
||||||
|
void *dont_care,
|
||||||
|
void *private_data)
|
||||||
|
{
|
||||||
|
// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
||||||
|
int status;
|
||||||
|
pid_t pid = -1;
|
||||||
|
|
||||||
|
while (pid != 0) {
|
||||||
|
pid = waitpid(-1, &status, WNOHANG);
|
||||||
|
if (pid == -1) {
|
||||||
|
DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (pid > 0) {
|
||||||
|
DEBUG(DEBUG_DEBUG, ("RECD SIGCHLD from %d\n", (int)pid));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
startup the recovery daemon as a child of the main ctdb daemon
|
startup the recovery daemon as a child of the main ctdb daemon
|
||||||
*/
|
*/
|
||||||
@ -2965,6 +2981,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
int fd[2];
|
int fd[2];
|
||||||
|
struct signal_event *se;
|
||||||
|
|
||||||
if (pipe(fd) != 0) {
|
if (pipe(fd) != 0) {
|
||||||
return -1;
|
return -1;
|
||||||
@ -3016,6 +3033,16 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set up a handler to pick up sigchld */
|
||||||
|
se = event_add_signal(ctdb->ev, ctdb,
|
||||||
|
SIGCHLD, 0,
|
||||||
|
recd_sig_child_handler,
|
||||||
|
ctdb);
|
||||||
|
if (se == NULL) {
|
||||||
|
DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD in recovery daemon\n"));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
monitor_cluster(ctdb);
|
monitor_cluster(ctdb);
|
||||||
|
|
||||||
DEBUG(DEBUG_ALERT,("ERROR: ctdb_recoverd finished!?\n"));
|
DEBUG(DEBUG_ALERT,("ERROR: ctdb_recoverd finished!?\n"));
|
||||||
|
@ -74,7 +74,6 @@ static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *p
|
|||||||
static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
|
static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
|
||||||
{
|
{
|
||||||
kill(h->child, SIGKILL);
|
kill(h->child, SIGKILL);
|
||||||
waitpid(h->child, NULL, 0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,18 +210,16 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
|||||||
{
|
{
|
||||||
struct ctdb_event_script_state *state =
|
struct ctdb_event_script_state *state =
|
||||||
talloc_get_type(p, struct ctdb_event_script_state);
|
talloc_get_type(p, struct ctdb_event_script_state);
|
||||||
int status = -1;
|
|
||||||
void (*callback)(struct ctdb_context *, int, void *) = state->callback;
|
void (*callback)(struct ctdb_context *, int, void *) = state->callback;
|
||||||
void *private_data = state->private_data;
|
void *private_data = state->private_data;
|
||||||
struct ctdb_context *ctdb = state->ctdb;
|
struct ctdb_context *ctdb = state->ctdb;
|
||||||
|
signed char rt = -1;
|
||||||
|
|
||||||
|
read(state->fd[0], &rt, sizeof(rt));
|
||||||
|
|
||||||
waitpid(state->child, &status, 0);
|
|
||||||
if (status != -1) {
|
|
||||||
status = WEXITSTATUS(status);
|
|
||||||
}
|
|
||||||
talloc_set_destructor(state, NULL);
|
talloc_set_destructor(state, NULL);
|
||||||
talloc_free(state);
|
talloc_free(state);
|
||||||
callback(ctdb, status, private_data);
|
callback(ctdb, rt, private_data);
|
||||||
|
|
||||||
ctdb->event_script_timeouts = 0;
|
ctdb->event_script_timeouts = 0;
|
||||||
}
|
}
|
||||||
@ -293,7 +291,6 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
|
|||||||
{
|
{
|
||||||
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
|
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
|
||||||
kill(state->child, SIGTERM);
|
kill(state->child, SIGTERM);
|
||||||
waitpid(state->child, NULL, 0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -336,13 +333,18 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (state->child == 0) {
|
if (state->child == 0) {
|
||||||
|
signed char rt;
|
||||||
|
|
||||||
close(state->fd[0]);
|
close(state->fd[0]);
|
||||||
if (ctdb->do_setsched) {
|
if (ctdb->do_setsched) {
|
||||||
ctdb_restore_scheduler(ctdb);
|
ctdb_restore_scheduler(ctdb);
|
||||||
}
|
}
|
||||||
set_close_on_exec(state->fd[1]);
|
set_close_on_exec(state->fd[1]);
|
||||||
ret = ctdb_event_script_v(ctdb, state->options);
|
rt = ctdb_event_script_v(ctdb, state->options);
|
||||||
_exit(ret);
|
while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
_exit(rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
talloc_set_destructor(state, event_script_destructor);
|
talloc_set_destructor(state, event_script_destructor);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user