1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00

Track all child process so we never send a signal to an unrelated process (our child died and kernel wrapped the pid-space and reused the pid for a different process

Wrap all creation of child processes inside ctdb_fork() which is used to track all processes we have spawned.
Capture SIGCHLD to track also which child processes have terminated.

Wrap kill() inside ctdb_kill() and make sure that we never send a !0 signal to a child process pid that has already terminated (and might have been replaced with a

(This used to be ctdb commit f73a4b1495830bcdd094a93732a89dd53b3c2f78)
This commit is contained in:
Ronnie Sahlberg 2012-05-03 11:42:41 +10:00
parent 85432ecc53
commit a57eba2bb4
17 changed files with 165 additions and 79 deletions

View File

@ -68,7 +68,7 @@ UTIL_OBJ = lib/util/idtree.o lib/util/db_wrap.o lib/util/strlist.o lib/util/util
CTDB_COMMON_OBJ = common/ctdb_io.o common/ctdb_util.o \
common/ctdb_ltdb.o common/ctdb_message.o common/cmdline.o \
lib/util/debug.o common/rb_tree.o @CTDB_SYSTEM_OBJ@ common/system_common.o \
common/ctdb_logging.c
common/ctdb_logging.c common/ctdb_fork.o
CTDB_LIB_OBJ = libctdb/ctdb.o libctdb/io_elem.o libctdb/local_tdb.o \
libctdb/messages.o libctdb/sync.o libctdb/control.o \

124
ctdb/common/ctdb_fork.c Normal file
View File

@ -0,0 +1,124 @@
/*
functions to track and manage processes
Copyright (C) Ronnie Sahlberg 2012
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "system/wait.h"
#include "../include/ctdb_client.h"
#include "../include/ctdb_private.h"
#include "../common/rb_tree.h"
/*
* This function forks a child process and drops the realtime
* scheduler for the child process.
*/
pid_t ctdb_fork(struct ctdb_context *ctdb)
{
pid_t pid;
char *process;
pid = fork();
if (pid == -1) {
return -1;
}
if (pid == 0) {
if (ctdb->do_setsched) {
ctdb_restore_scheduler(ctdb);
}
ctdb->can_send_controls = false;
return 0;
}
if (getpid() != ctdb->ctdbd_pid) {
return pid;
}
process = talloc_asprintf(ctdb->child_processes, "process:%d", (int)pid);
trbt_insert32(ctdb->child_processes, pid, process);
return pid;
}
static void ctdb_sigchld_handler(struct tevent_context *ev,
struct tevent_signal *te, int signum, int count,
void *dont_care,
void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int status;
pid_t pid = -1;
while (pid != 0) {
pid = waitpid(-1, &status, WNOHANG);
if (pid == -1) {
DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
return;
}
if (pid > 0) {
char *process;
if (getpid() != ctdb->ctdbd_pid) {
continue;
}
process = trbt_lookup32(ctdb->child_processes, pid);
if (process == NULL) {
DEBUG(DEBUG_ERR,("Got SIGCHLD from pid:%d we didn not spawn with ctdb_fork\n", pid));
}
DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d %s\n", (int)pid, process));
talloc_free(process);
}
}
}
struct tevent_signal *
ctdb_init_sigchld(struct ctdb_context *ctdb)
{
struct tevent_signal *se;
ctdb->child_processes = trbt_create(ctdb, 0);
se = tevent_add_signal(ctdb->ev, ctdb, SIGCHLD, 0, ctdb_sigchld_handler, ctdb);
return se;
}
int
ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum)
{
char *process;
if (signum == 0) {
return kill(pid, signum);
}
if (getpid() != ctdb->ctdbd_pid) {
return kill(pid, signum);
}
process = trbt_lookup32(ctdb->child_processes, pid);
if (process == NULL) {
DEBUG(DEBUG_ERR,("ctdb_kill: trying to kill(%d, %d) a process that does not exist\n", pid, signum));
return 0;
}
return kill(pid, signum);
}

View File

@ -332,24 +332,6 @@ void ctdb_restore_scheduler(struct ctdb_context *ctdb)
#endif
}
/*
* This function forks a child process and drops the realtime
* scheduler for the child process.
*/
pid_t ctdb_fork(struct ctdb_context *ctdb)
{
pid_t pid;
pid = fork();
if (pid == 0) {
if (ctdb->do_setsched) {
ctdb_restore_scheduler(ctdb);
}
ctdb->can_send_controls = false;
}
return pid;
}
void set_nonblocking(int fd)
{
unsigned v;

View File

@ -501,6 +501,7 @@ struct ctdb_context {
struct ctdb_reloadips_handle *reload_ips;
const char *public_addresses_file;
struct trbt_tree *child_processes;
};
struct ctdb_db_context {
@ -1031,7 +1032,11 @@ void ctdb_node_connected(struct ctdb_node *node);
bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
void ctdb_set_scheduler(struct ctdb_context *ctdb);
void ctdb_restore_scheduler(struct ctdb_context *ctdb);
struct tevent_signal *ctdb_init_sigchld(struct ctdb_context *ctdb);
pid_t ctdb_fork(struct ctdb_context *ctdb);
int ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum);
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,

View File

@ -1388,7 +1388,7 @@ static int revokechild_destructor(struct revokechild_handle *rc)
if (rc->fd[1] != -1) {
close(rc->fd[1]);
}
kill(rc->child, SIGKILL);
ctdb_kill(rc->ctdb, rc->child, SIGKILL);
DLIST_REMOVE(rc->ctdb_db->revokechild_active, rc);
return 0;
@ -1617,7 +1617,7 @@ int ctdb_start_revoke_ro_record(struct ctdb_context *ctdb, struct ctdb_db_contex
child_finished:
write(rc->fd[1], &c, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);

View File

@ -1029,27 +1029,6 @@ failed:
return -1;
}
static void sig_child_handler(struct event_context *ev,
struct signal_event *se, int signum, int count,
void *dont_care,
void *private_data)
{
// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int status;
pid_t pid = -1;
while (pid != 0) {
pid = waitpid(-1, &status, WNOHANG);
if (pid == -1) {
DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
return;
}
if (pid > 0) {
DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d\n", (int)pid));
}
}
}
static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
void *private_data)
{
@ -1074,7 +1053,6 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
int res, ret = -1;
struct fd_event *fde;
const char *domain_socket_name;
struct signal_event *se;
/* get rid of any old sockets */
unlink(ctdb->daemon.name);
@ -1105,7 +1083,6 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
ctdbd_pid = getpid();
ctdb->ctdbd_pid = ctdbd_pid;
DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
if (ctdb->do_setsched) {
@ -1128,6 +1105,12 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
exit(1);
}
/* set up a handler to pick up sigchld */
if (ctdb_init_sigchld(ctdb) == NULL) {
DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
exit(1);
}
ctdb_set_child_logging(ctdb);
/* initialize statistics collection */
@ -1203,16 +1186,6 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
/* start the transport going */
ctdb_start_transport(ctdb);
/* set up a handler to pick up sigchld */
se = event_add_signal(ctdb->ev, ctdb,
SIGCHLD, 0,
sig_child_handler,
ctdb);
if (se == NULL) {
DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
exit(1);
}
ret = ctdb_event_script_callback(ctdb,
ctdb,
ctdb_setup_event_callback,

View File

@ -122,7 +122,7 @@ static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
ctdb->freeze_mode[h->priority] = CTDB_FREEZE_NONE;
ctdb->freeze_handles[h->priority] = NULL;
kill(h->child, SIGKILL);
ctdb_kill(h->ctdb, h->child, SIGKILL);
return 0;
}
@ -189,7 +189,7 @@ static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, ui
return NULL;
}
h->child = fork();
h->child = ctdb_fork(ctdb);
if (h->child == -1) {
DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
talloc_free(h);
@ -216,7 +216,7 @@ static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, ui
while (1) {
sleep(1);
if (kill(ctdb->ctdbd_pid, 0) != 0) {
if (ctdb_kill(ctdb, ctdb->ctdbd_pid, 0) != 0) {
DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
_exit(0);

View File

@ -72,7 +72,7 @@ static void do_overflow(struct ctdb_db_context *ctdb_db,
static int lockwait_destructor(struct lockwait_handle *h)
{
CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
kill(h->child, SIGKILL);
ctdb_kill(h->ctdb, h->child, SIGKILL);
h->ctdb_db->pending_requests--;
DLIST_REMOVE(h->ctdb_db->lockwait_active, h);
return 0;
@ -202,7 +202,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
tdb_chainlock(ctdb_db->ltdb->tdb, key);
write(result->fd[1], &c, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);

View File

@ -626,7 +626,7 @@ static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
if (state->fd[1] != -1) {
state->fd[1] = -1;
}
kill(state->child, SIGKILL);
ctdb_kill(state->ctdb, state->child, SIGKILL);
return 0;
}
@ -778,7 +778,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
return -1;
}
state->child = fork();
state->child = ctdb_fork(ctdb);
if (state->child == (pid_t)-1) {
close(state->fd[0]);
close(state->fd[1]);
@ -801,7 +801,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
write(state->fd[1], &cc, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
write(state->fd[1], &cc, 1);
}

View File

@ -2949,7 +2949,7 @@ static int check_reclock_destructor(struct ctdb_check_reclock_state *state)
close(state->fd[1]);
state->fd[1] = -1;
}
kill(state->child, SIGKILL);
ctdb_kill(ctdb, state->child, SIGKILL);
return 0;
}
@ -3047,7 +3047,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
write(state->fd[1], &cc, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
write(state->fd[1], &cc, 1);
}
@ -3166,7 +3166,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
/* verify that the main daemon is still running */
if (kill(ctdb->ctdbd_pid, 0) != 0) {
if (ctdb_kill(ctdb, ctdb->ctdbd_pid, 0) != 0) {
DEBUG(DEBUG_CRIT,("CTDB daemon is no longer available. Shutting down recovery daemon\n"));
exit(-1);
}
@ -3809,7 +3809,7 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
{
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
if (kill(ctdb->recoverd_pid, 0) != 0) {
if (ctdb_kill(ctdb, ctdb->recoverd_pid, 0) != 0) {
DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon.\n", (int)ctdb->recoverd_pid));
event_add_timed(ctdb->ev, ctdb, timeval_zero(),
@ -3861,7 +3861,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
ctdb->ctdbd_pid = getpid();
ctdb->recoverd_pid = fork();
ctdb->recoverd_pid = ctdb_fork(ctdb);
if (ctdb->recoverd_pid == -1) {
return -1;
}
@ -3915,7 +3915,7 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb)
}
DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
kill(ctdb->recoverd_pid, SIGTERM);
ctdb_kill(ctdb, ctdb->recoverd_pid, SIGTERM);
}
static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te,

View File

@ -741,7 +741,7 @@ static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr
(unsigned)client->pid,
ctdb_addr_to_str(addr),
ip->client_id));
kill(client->pid, SIGKILL);
ctdb_kill(ctdb, client->pid, SIGKILL);
}
}
}
@ -3675,7 +3675,7 @@ static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
h->c = NULL;
}
kill(h->child, SIGKILL);
ctdb_kill(h->ctdb, h->child, SIGKILL);
return 0;
}
@ -3850,7 +3850,7 @@ int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_re
write(h->fd[1], &res, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);

View File

@ -78,7 +78,7 @@ static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *p
static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
{
DLIST_REMOVE(h->ctdb_db->traverse, h);
kill(h->child, SIGKILL);
ctdb_kill(h->ctdb_db->ctdb, h->child, SIGKILL);
return 0;
}

View File

@ -162,7 +162,7 @@ struct childwrite_handle {
static int childwrite_destructor(struct childwrite_handle *h)
{
CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
kill(h->child, SIGKILL);
ctdb_kill(h->ctdb, h->child, SIGKILL);
return 0;
}
@ -199,7 +199,7 @@ static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
callback(c, p);
kill(child, SIGKILL);
ctdb_kill(h->ctdb, child, SIGKILL);
talloc_free(tmp_ctx);
}
@ -260,7 +260,7 @@ static struct childwrite_handle *ctdb_childwrite(
write(result->fd[1], &c, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);

View File

@ -1199,7 +1199,7 @@ static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx)
DEBUG(DEBUG_INFO,("Vacuuming took %.3f seconds for database %s\n", l, ctdb_db->db_name));
if (child_ctx->child_pid != -1) {
kill(child_ctx->child_pid, SIGKILL);
ctdb_kill(ctdb, child_ctx->child_pid, SIGKILL);
} else {
/* Bump the number of successful fast-path runs. */
child_ctx->vacuum_handle->fast_path_count++;

View File

@ -473,7 +473,7 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
/* valgrind gets overloaded if we run next script as it's still doing
* post-execution analysis, so kill finished child here. */
if (ctdb->valgrinding) {
kill(state->child, SIGKILL);
ctdb_kill(ctdb, state->child, SIGKILL);
}
state->child = 0;
@ -529,7 +529,7 @@ static void debug_timeout(struct ctdb_event_script_state *state)
if (pid == 0) {
system(buf);
/* Now we can kill the child */
kill(state->child, SIGTERM);
ctdb_kill(state->ctdb, state->child, SIGTERM);
exit(0);
}
if (pid == -1) {
@ -584,7 +584,7 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
if (state->child) {
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
if (kill(state->child, SIGTERM) != 0) {
if (ctdb_kill(state->ctdb, state->child, SIGTERM) != 0) {
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
}
}

View File

@ -73,6 +73,7 @@
#include "common/rb_tree.c"
#include "common/system_common.c"
#include "common/ctdb_logging.c"
#include "common/ctdb_fork.c"
/* CTDB_CLIENT_OBJ */
#include "client/ctdb_client.c"

View File

@ -54,6 +54,7 @@ void ctdb_load_nodes_file(struct ctdb_context *ctdb) {}
#include "common/rb_tree.c"
#include "common/system_common.c"
#include "common/ctdb_logging.c"
#include "common/ctdb_fork.c"
/* CTDB_SERVER_OBJ */
#include "server/ctdb_daemon.c"