diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index fd110345c2c..9716c7cdde3 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -312,12 +312,6 @@ struct ctdb_context { TALLOC_CTX *recd_ctx; /* a context used to track recoverd monitoring events */ TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */ - TALLOC_CTX *event_script_ctx; - int active_events; - - struct ctdb_event_script_state *current_monitor; - struct ctdb_script_list_old *last_status[CTDB_EVENT_MAX]; - struct eventd_context *ectx; TALLOC_CTX *banning_ctx; @@ -333,9 +327,6 @@ struct ctdb_context { /* if we are a child process, do we have a domain socket to send controls on */ bool can_send_controls; - /* list of event script callback functions that are active */ - struct event_script_callback *script_callbacks; - struct ctdb_reloadips_handle *reload_ips; const char *nodes_file; diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index 2e8cbb3b632..d0d86a0c703 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -1307,6 +1307,11 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork) /* force initial recovery for election */ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; + if (ctdb_start_eventd(ctdb) != 0) { + DEBUG(DEBUG_ERR, ("Failed to start event daemon\n")); + exit(1); + } + ctdb_set_runstate(ctdb, CTDB_RUNSTATE_INIT); ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT); if (ret != 0) { @@ -1839,6 +1844,7 @@ void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code) ctdb_stop_monitoring(ctdb); ctdb_release_all_ips(ctdb); ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN); + ctdb_stop_eventd(ctdb); if (ctdb->methods != NULL && ctdb->methods->shutdown != NULL) { ctdb->methods->shutdown(ctdb); } diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index d1147ed6fbc..decbaa8125b 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -622,610 +622,20 @@ static void ctdb_event_script_run_done(int result, void *private_data) } } + state = talloc_steal(state->ctdb, state); state->callback(state->ctdb, result, state->private_data); talloc_free(state); } -static void ctdb_event_script_timeout(struct tevent_context *ev, - struct tevent_timer *te, - struct timeval t, void *p); - -/* This is attached to the event script state. */ -struct event_script_callback { - struct event_script_callback *next, *prev; - struct ctdb_context *ctdb; - - /* Warning: this can free us! */ - void (*fn)(struct ctdb_context *, int, void *); - void *private_data; -}; - -struct ctdb_event_script_state { - struct ctdb_context *ctdb; - struct event_script_callback *callback; - pid_t child; - int fd[2]; - enum ctdb_event call; - const char *options; - struct timeval timeout; - - unsigned int current; - struct ctdb_script_list_old *scripts; -}; - -static struct ctdb_script *get_current_script(struct ctdb_event_script_state *state) -{ - return &state->scripts->scripts[state->current]; -} - -/* called from ctdb_logging when we have received output on STDERR from - * one of the eventscripts - */ -static void log_event_script_output(const char *str, uint16_t len, void *p) -{ - struct ctdb_event_script_state *state - = talloc_get_type(p, struct ctdb_event_script_state); - struct ctdb_script *current; - unsigned int slen, min; - - /* We may have been aborted to run something else. Discard */ - if (state->scripts == NULL) { - return; - } - - current = get_current_script(state); - - /* Append, but don't overfill buffer. It starts zero-filled. */ - slen = strlen(current->output); - min = MIN(len, sizeof(current->output) - slen - 1); - - memcpy(current->output + slen, str, min); -} - -/* To ignore directory entry return 0, else return non-zero */ -static int script_filter(const struct dirent *de) -{ - int namelen = strlen(de->d_name); - - /* Ignore . and .. */ - if (namelen < 3) { - return 0; - } - - /* Skip temporary files left behind by emacs */ - if (de->d_name[namelen-1] == '~') { - return 0; - } - - /* Filename should start with [0-9][0-9]. */ - if (!isdigit(de->d_name[0]) || !isdigit(de->d_name[1]) || - de->d_name[2] != '.') { - return 0; - } - - if (namelen > MAX_SCRIPT_NAME) { - return 0; - } - - return 1; -} - -/* Return true if OK, otherwise set errno. */ -static bool check_executable(const char *dir, const char *name) -{ - char *full; - struct stat st; - - full = talloc_asprintf(NULL, "%s/%s", dir, name); - if (!full) - return false; - - if (stat(full, &st) != 0) { - DEBUG(DEBUG_ERR,("Could not stat event script %s: %s\n", - full, strerror(errno))); - talloc_free(full); - return false; - } - - if (!(st.st_mode & S_IXUSR)) { - DEBUG(DEBUG_DEBUG,("Event script %s is not executable. Ignoring this event script\n", full)); - errno = ENOEXEC; - talloc_free(full); - return false; - } - - talloc_free(full); - return true; -} - -static struct ctdb_script_list_old *ctdb_get_script_list( - struct ctdb_context *ctdb, - TALLOC_CTX *mem_ctx) -{ - struct dirent **namelist; - struct ctdb_script_list_old *scripts; - int i, count; - - /* scan all directory entries and insert all valid scripts into the - tree - */ - count = scandir(ctdb->event_script_dir, &namelist, script_filter, alphasort); - if (count == -1) { - DEBUG(DEBUG_CRIT, ("Failed to read event script directory '%s' - %s\n", - ctdb->event_script_dir, strerror(errno))); - return NULL; - } - - /* Overallocates by one, but that's OK */ - scripts = talloc_zero_size(mem_ctx, - sizeof(*scripts) - + sizeof(scripts->scripts[0]) * count); - if (scripts == NULL) { - DEBUG(DEBUG_ERR, (__location__ " Failed to allocate scripts\n")); - goto done; - } - scripts->num_scripts = count; - - for (i = 0; i < count; i++) { - struct ctdb_script *s = &scripts->scripts[i]; - - if (strlcpy(s->name, namelist[i]->d_name, sizeof(s->name)) >= - sizeof(s->name)) { - s->status = -ENAMETOOLONG; - continue; - } - - s->status = 0; - if (!check_executable(ctdb->event_script_dir, - namelist[i]->d_name)) { - s->status = -errno; - } - } - -done: - for (i=0; ievent_script_dir, current->name); - tmp[2] = talloc_asprintf(tmp, "%s", ctdb_eventscript_call_names[call]); - n = 3; - - /* Split options into individual arguments */ - opt = talloc_strdup(mem_ctx, options); - if (opt == NULL) { - goto failed; - } - - t = strtok_r(opt, " ", &saveptr); - while (t != NULL) { - tmp[n++] = talloc_strdup(tmp, t); - if (n > MAX_HELPER_ARGS) { - goto args_failed; - } - t = strtok_r(NULL, " ", &saveptr); - } - - for (i=0; istart = timeval_current(); - - r = pipe(state->fd); - if (r != 0) { - DEBUG(DEBUG_ERR, (__location__ " pipe failed for child eventscript process\n")); - return -errno; - } - - /* Arguments for helper */ - if (!child_helper_args(state, ctdb, state->call, state->options, current, - state->fd[1], &argc, &argv)) { - DEBUG(DEBUG_ERR, (__location__ " failed to create arguments for eventscript helper\n")); - r = -ENOMEM; - close(state->fd[0]); - close(state->fd[1]); - return r; - } - - if (!ctdb_vfork_with_logging(state, ctdb, current->name, - helper_prog, argc, argv, - log_event_script_output, - state, &state->child)) { - talloc_free(argv); - r = -errno; - close(state->fd[0]); - close(state->fd[1]); - return r; - } - - talloc_free(argv); - - close(state->fd[1]); - set_close_on_exec(state->fd[0]); - - /* Set ourselves up to be called when that's done. */ - fde = tevent_add_fd(ctdb->ev, state, state->fd[0], TEVENT_FD_READ, - ctdb_event_script_handler, state); - tevent_fd_set_auto_close(fde); - - return 0; -} - -/* - Summarize status of this run of scripts. - */ -static int script_status(struct ctdb_script_list_old *scripts) -{ - unsigned int i; - - for (i = 0; i < scripts->num_scripts; i++) { - switch (scripts->scripts[i].status) { - case -ENAMETOOLONG: - case -ENOENT: - case -ENOEXEC: - /* Disabled or missing; that's OK. */ - break; - case 0: - /* No problem. */ - break; - default: - return scripts->scripts[i].status; - } - } - - /* All OK! */ - return 0; -} - -/* called when child is finished */ -static void ctdb_event_script_handler(struct tevent_context *ev, - struct tevent_fd *fde, - uint16_t flags, void *p) -{ - struct ctdb_event_script_state *state = - talloc_get_type(p, struct ctdb_event_script_state); - struct ctdb_script *current = get_current_script(state); - struct ctdb_context *ctdb = state->ctdb; - int r, status; - - if (ctdb == NULL) { - DEBUG(DEBUG_ERR,("Eventscript finished but ctdb is NULL\n")); - return; - } - - r = sys_read(state->fd[0], ¤t->status, sizeof(current->status)); - if (r < 0) { - current->status = -errno; - } else if (r == 0) { - current->status = -EINTR; - } else if (r != sizeof(current->status)) { - current->status = -EIO; - } - - current->finished = timeval_current(); - /* valgrind gets overloaded if we run next script as it's still doing - * post-execution analysis, so kill finished child here. */ - if (ctdb->valgrinding) { - ctdb_kill(ctdb, state->child, SIGKILL); - } - - state->child = 0; - - status = script_status(state->scripts); - - /* Aborted or finished all scripts? We're done. */ - if (status != 0 || state->current+1 == state->scripts->num_scripts) { - if (status != 0) { - DEBUG(DEBUG_INFO, - ("Eventscript %s %s finished with state %d\n", - ctdb_eventscript_call_names[state->call], - state->options, status)); - } - - talloc_free(state); - return; - } - - /* Forget about that old fd. */ - talloc_free(fde); - - /* Next script! */ - state->current++; - current++; - current->status = fork_child_for_script(ctdb, state); - if (current->status != 0) { - /* This calls the callback. */ - talloc_free(state); - } -} - -struct debug_hung_script_state { - struct ctdb_context *ctdb; - pid_t child; - enum ctdb_event call; -}; - -static int debug_hung_script_state_destructor(struct debug_hung_script_state *state) -{ - if (state->child) { - ctdb_kill(state->ctdb, state->child, SIGKILL); - } - return 0; -} - -static void debug_hung_script_timeout(struct tevent_context *ev, struct tevent_timer *te, - struct timeval t, void *p) -{ - struct debug_hung_script_state *state = - talloc_get_type(p, struct debug_hung_script_state); - - talloc_free(state); -} - -static void debug_hung_script_done(struct tevent_context *ev, struct tevent_fd *fde, - uint16_t flags, void *p) -{ - struct debug_hung_script_state *state = - talloc_get_type(p, struct debug_hung_script_state); - - talloc_free(state); -} - -static void ctdb_run_debug_hung_script(struct ctdb_context *ctdb, struct debug_hung_script_state *state) -{ - pid_t pid; - const char * debug_hung_script = CTDB_ETCDIR "/debug-hung-script.sh"; - int fd[2]; - struct tevent_timer *ttimer; - struct tevent_fd *tfd; - const char **argv; - int i; - - if (pipe(fd) < 0) { - DEBUG(DEBUG_ERR,("Failed to create pipe fd for debug hung script\n")); - return; - } - - if (getenv("CTDB_DEBUG_HUNG_SCRIPT") != NULL) { - debug_hung_script = getenv("CTDB_DEBUG_HUNG_SCRIPT"); - } - - argv = talloc_array(state, const char *, 5); - - argv[0] = talloc_asprintf(argv, "%d", fd[1]); - argv[1] = talloc_strdup(argv, debug_hung_script); - argv[2] = talloc_asprintf(argv, "%d", state->child); - argv[3] = talloc_strdup(argv, ctdb_eventscript_call_names[state->call]); - argv[4] = NULL; - - for (i=0; i<4; i++) { - if (argv[i] == NULL) { - close(fd[0]); - close(fd[1]); - talloc_free(argv); - return; - } - } - - - if (!ctdb_vfork_with_logging(state, ctdb, "Hung-script", - helper_prog, 5, argv, NULL, NULL, &pid)) { - DEBUG(DEBUG_ERR,("Failed to fork a child to track hung event script\n")); - talloc_free(argv); - close(fd[0]); - close(fd[1]); - return; - } - - talloc_free(argv); - close(fd[1]); - - ttimer = tevent_add_timer(ctdb->ev, state, - timeval_current_ofs(ctdb->tunable.script_timeout, 0), - debug_hung_script_timeout, state); - if (ttimer == NULL) { - close(fd[0]); - return; - } - - tfd = tevent_add_fd(ctdb->ev, state, fd[0], TEVENT_FD_READ, - debug_hung_script_done, state); - if (tfd == NULL) { - talloc_free(ttimer); - close(fd[0]); - return; - } - tevent_fd_set_auto_close(tfd); -} - -/* called when child times out */ -static void ctdb_event_script_timeout(struct tevent_context *ev, - struct tevent_timer *te, - struct timeval t, void *p) -{ - struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state); - struct ctdb_context *ctdb = state->ctdb; - struct ctdb_script *current = get_current_script(state); - struct debug_hung_script_state *debug_state; - - DEBUG(DEBUG_ERR,("Event script '%s %s %s' timed out after %.1fs, pid: %d\n", - current->name, ctdb_eventscript_call_names[state->call], state->options, - timeval_elapsed(¤t->start), - state->child)); - - /* ignore timeouts for these events */ - switch (state->call) { - case CTDB_EVENT_START_RECOVERY: - case CTDB_EVENT_RECOVERED: - case CTDB_EVENT_TAKE_IP: - case CTDB_EVENT_RELEASE_IP: - state->scripts->scripts[state->current].status = 0; - DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call)); - break; - default: - state->scripts->scripts[state->current].status = -ETIME; - } - - debug_state = talloc_zero(ctdb, struct debug_hung_script_state); - if (debug_state == NULL) { - talloc_free(state); - return; - } - - /* Save information useful for running debug hung script, so - * eventscript state can be freed. - */ - debug_state->ctdb = ctdb; - debug_state->child = state->child; - debug_state->call = state->call; - - /* This destructor will actually kill the hung event script */ - talloc_set_destructor(debug_state, debug_hung_script_state_destructor); - - state->child = 0; - talloc_free(state); - - ctdb_run_debug_hung_script(ctdb, debug_state); -} - -/* - destroy an event script: kill it if ->child != 0. - */ -static int event_script_destructor(struct ctdb_event_script_state *state) -{ - int status; - struct event_script_callback *callback; - - if (state->child) { - DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child)); - - if (ctdb_kill(state->ctdb, state->child, SIGTERM) != 0) { - DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno)); - } - } - - /* If we were the current monitor, we no longer are. */ - if (state->ctdb->current_monitor == state) { - state->ctdb->current_monitor = NULL; - } - - /* Save our scripts as the last executed status, if we have them. - * See ctdb_event_script_callback_v where we abort monitor event. */ - if (state->scripts) { - talloc_free(state->ctdb->last_status[state->call]); - state->ctdb->last_status[state->call] = state->scripts; - if (state->current < state->ctdb->last_status[state->call]->num_scripts) { - state->ctdb->last_status[state->call]->num_scripts = state->current+1; - } - } - - /* Use last status as result, or "OK" if none. */ - if (state->ctdb->last_status[state->call]) { - status = script_status(state->ctdb->last_status[state->call]); - } else { - status = 0; - } - - state->ctdb->active_events--; - if (state->ctdb->active_events < 0) { - ctdb_fatal(state->ctdb, "Active events < 0"); - } - - /* This is allowed to free us; talloc will prevent double free anyway, - * but beware if you call this outside the destructor! - * the callback hangs off a different context so we walk the list - * of "active" callbacks until we find the one state points to. - * if we cant find it it means the callback has been removed. - */ - for (callback = state->ctdb->script_callbacks; callback != NULL; callback = callback->next) { - if (callback == state->callback) { - break; - } - } - - state->callback = NULL; - - if (callback) { - /* Make sure destructor doesn't free itself! */ - talloc_steal(NULL, callback); - callback->fn(state->ctdb, status, callback->private_data); - talloc_free(callback); - } - - return 0; -} - static unsigned int count_words(const char *options) { unsigned int words = 0; + if (options == NULL) { + return 0; + } + options += strspn(options, " \t"); while (*options) { words++; @@ -1262,68 +672,6 @@ static bool check_options(enum ctdb_event call, const char *options) } } -static int remove_callback(struct event_script_callback *callback) -{ - DLIST_REMOVE(callback->ctdb->script_callbacks, callback); - return 0; -} - -struct schedule_callback_state { - struct ctdb_context *ctdb; - void (*callback)(struct ctdb_context *, int, void *); - void *private_data; - int status; - struct tevent_immediate *im; -}; - -static void schedule_callback_handler(struct tevent_context *ctx, - struct tevent_immediate *im, - void *private_data) -{ - struct schedule_callback_state *state = - talloc_get_type_abort(private_data, - struct schedule_callback_state); - - if (state->callback != NULL) { - state->callback(state->ctdb, state->status, - state->private_data); - } - talloc_free(state); -} - -static int -schedule_callback_immediate(struct ctdb_context *ctdb, - void (*callback)(struct ctdb_context *, - int, void *), - void *private_data, - int status) -{ - struct schedule_callback_state *state; - struct tevent_immediate *im; - - state = talloc_zero(ctdb, struct schedule_callback_state); - if (state == NULL) { - DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); - return -1; - } - im = tevent_create_immediate(state); - if (im == NULL) { - DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); - talloc_free(state); - return -1; - } - - state->ctdb = ctdb; - state->callback = callback; - state->private_data = private_data; - state->status = status; - state->im = im; - - tevent_schedule_immediate(im, ctdb->ev, - schedule_callback_handler, state); - return 0; -} - /* only specific events are allowed while in recovery */ static bool event_allowed_during_recovery(enum ctdb_event event) { @@ -1346,149 +694,6 @@ static bool event_allowed_during_recovery(enum ctdb_event event) return false; } -/* - run the event script in the background, calling the callback when - finished - */ -static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, - const void *mem_ctx, - void (*callback)(struct ctdb_context *, int, void *), - void *private_data, - enum ctdb_event call, - const char *fmt, va_list ap) - PRINTF_ATTRIBUTE(6,0); - -static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, - const void *mem_ctx, - void (*callback)(struct ctdb_context *, int, void *), - void *private_data, - enum ctdb_event call, - const char *fmt, va_list ap) -{ - struct ctdb_event_script_state *state; - - if ((ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) && - (! event_allowed_during_recovery(call))) { - DEBUG(DEBUG_ERR, - ("Refusing to run event '%s' while in recovery\n", - ctdb_eventscript_call_names[call])); - } - - /* Do not run new monitor events if some event is already - * running, unless the running event is a monitor event, in - * which case running a new one should cancel the old one. */ - if (call == CTDB_EVENT_MONITOR && - ctdb->active_events > 0 && - ctdb->current_monitor == NULL) { - if (callback != NULL) { - callback(ctdb, -ECANCELED, private_data); - } - return 0; - } - - /* Kill off any running monitor events to run this event. */ - if (ctdb->current_monitor) { - struct ctdb_event_script_state *ms = talloc_get_type(ctdb->current_monitor, struct ctdb_event_script_state); - - /* Cancel current monitor callback state only if monitoring - * context ctdb->monitor->monitor_context has not been freed */ - if (ms->callback != NULL && !ctdb_stopped_monitoring(ctdb)) { - ms->callback->fn(ctdb, -ECANCELED, ms->callback->private_data); - talloc_free(ms->callback); - } - - /* Discard script status so we don't save to last_status */ - talloc_free(ctdb->current_monitor->scripts); - ctdb->current_monitor->scripts = NULL; - talloc_free(ctdb->current_monitor); - ctdb->current_monitor = NULL; - } - - state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state); - CTDB_NO_MEMORY(ctdb, state); - - /* The callback isn't done if the context is freed. */ - state->callback = talloc(mem_ctx, struct event_script_callback); - CTDB_NO_MEMORY(ctdb, state->callback); - DLIST_ADD(ctdb->script_callbacks, state->callback); - talloc_set_destructor(state->callback, remove_callback); - state->callback->ctdb = ctdb; - state->callback->fn = callback; - state->callback->private_data = private_data; - - state->ctdb = ctdb; - state->call = call; - state->options = talloc_vasprintf(state, fmt, ap); - state->timeout = timeval_set(ctdb->tunable.script_timeout, 0); - state->scripts = NULL; - if (state->options == NULL) { - DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n")); - talloc_free(state); - return -1; - } - if (!check_options(state->call, state->options)) { - DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for '%s'\n", - state->options, - ctdb_eventscript_call_names[state->call])); - talloc_free(state); - return -1; - } - - DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n", - ctdb_eventscript_call_names[state->call], - state->options)); - - /* This is not a child of state, since we save it in destructor. */ - state->scripts = ctdb_get_script_list(ctdb, ctdb); - if (state->scripts == NULL) { - talloc_free(state); - return -1; - } - state->current = 0; - state->child = 0; - - /* Nothing to do? */ - if (state->scripts->num_scripts == 0) { - int ret = schedule_callback_immediate(ctdb, callback, - private_data, 0); - talloc_free(state); - if (ret != 0) { - DEBUG(DEBUG_ERR, - ("Unable to schedule callback for 0 scripts\n")); - return 1; - } - return 0; - } - - state->scripts->scripts[0].status = fork_child_for_script(ctdb, state); - if (state->scripts->scripts[0].status != 0) { - talloc_free(state); - return -1; - } - - if (call == CTDB_EVENT_MONITOR) { - ctdb->current_monitor = state; - } - - ctdb->active_events++; - - talloc_set_destructor(state, event_script_destructor); - - if (!timeval_is_zero(&state->timeout)) { - tevent_add_timer(ctdb->ev, state, - timeval_current_ofs(state->timeout.tv_sec, - state->timeout.tv_usec), - ctdb_event_script_timeout, state); - } else { - DEBUG(DEBUG_ERR, (__location__ " eventscript %s %s called with no timeout\n", - ctdb_eventscript_call_names[state->call], - state->options)); - } - - return 0; -} - - /* run the event script in the background, calling the callback when finished. If mem_ctx is freed, callback will never be called. @@ -1504,24 +709,25 @@ int ctdb_event_script_callback(struct ctdb_context *ctdb, int ret; va_start(ap, fmt); - ret = ctdb_event_script_callback_v(ctdb, mem_ctx, callback, private_data, call, fmt, ap); + ret = ctdb_event_script_run(ctdb, mem_ctx, callback, private_data, + call, fmt, ap); va_end(ap); return ret; } -struct callback_status { +struct ctdb_event_script_args_state { bool done; int status; }; -/* - called when ctdb_event_script() finishes - */ -static void event_script_callback(struct ctdb_context *ctdb, int status, void *private_data) +static void ctdb_event_script_args_done(struct ctdb_context *ctdb, + int status, void *private_data) { - struct callback_status *s = (struct callback_status *)private_data; + struct ctdb_event_script_args_state *s = + (struct ctdb_event_script_args_state *)private_data; + s->done = true; s->status = status; } @@ -1535,38 +741,41 @@ int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_event call, { va_list ap; int ret; - struct callback_status status = { + struct ctdb_event_script_args_state state = { .status = -1, .done = false, }; va_start(ap, fmt); - ret = ctdb_event_script_callback_v(ctdb, ctdb, - event_script_callback, &status, call, fmt, ap); + ret = ctdb_event_script_run(ctdb, ctdb, + ctdb_event_script_args_done, &state, + call, fmt, ap); va_end(ap); if (ret != 0) { return ret; } - while (status.done == false && tevent_loop_once(ctdb->ev) == 0) /* noop */; - - if (status.status == -ETIME) { - DEBUG(DEBUG_ERR, (__location__ " eventscript for '%s' timed out." - " Immediately banning ourself for %d seconds\n", - ctdb_eventscript_call_names[call], - ctdb->tunable.recovery_ban_period)); + while (! state.done) { + tevent_loop_once(ctdb->ev); + } + if (state.status == -ETIME) { /* Don't ban self if CTDB is starting up or shutting down */ if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) { + DEBUG(DEBUG_ERR, + (__location__ " eventscript for '%s' timed out." + " Immediately banning ourself for %d seconds\n", + ctdb_eventscript_call_names[call], + ctdb->tunable.recovery_ban_period)); ctdb_ban_self(ctdb); } } - return status.status; + return state.status; } int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_event call) { /* GCC complains about empty format string, so use %s and "". */ - return ctdb_event_script_args(ctdb, call, "%s", ""); + return ctdb_event_script_args(ctdb, call, NULL); } diff --git a/ctdb/tests/complex/90_debug_hung_script.sh b/ctdb/tests/complex/90_debug_hung_script.sh index 0cabb54e5b7..b2518df6fdb 100755 --- a/ctdb/tests/complex/90_debug_hung_script.sh +++ b/ctdb/tests/complex/90_debug_hung_script.sh @@ -84,11 +84,11 @@ done <<'EOF' ===== Start of hung script debug for PID=".*", event="monitor" ===== ===== End of hung script debug for PID=".*", event="monitor" ===== pstree -p -a .*: - *\`-99\\.timeout,.* /etc/ctdb/events.d/99.timeout monitor +99\\.timeout,.* /etc/ctdb/events.d/99.timeout monitor *\`-sleep,.* ---- Stack trace of interesting process [0-9]*\\[sleep\\] ---- [<[0-9a-f]*>] .*sleep+.* ---- ctdb scriptstatus monitor: ---- -99\\.timeout *Status:TIMEDOUT.* - *OUTPUT:sleeping for [0-9]* seconds\\.\\.\\. +99\\.timeout *TIMEDOUT.* + *OUTPUT: sleeping for [0-9]* seconds\\.\\.\\. EOF diff --git a/ctdb/tests/simple/scripts/local_daemons.bash b/ctdb/tests/simple/scripts/local_daemons.bash index 21dd3e4727b..1888941140a 100644 --- a/ctdb/tests/simple/scripts/local_daemons.bash +++ b/ctdb/tests/simple/scripts/local_daemons.bash @@ -7,6 +7,8 @@ if [ -n "$ctdb_dir" -a -d "${ctdb_dir}/bin" ] ; then # ctdbd_wrapper is in config/ directory PATH="${ctdb_dir}/bin:${ctdb_dir}/config:${PATH}" hdir="${ctdb_dir}/bin" + export CTDB_EVENTD="${hdir}/ctdb_eventd" + export CTDB_EVENT_HELPER="${hdir}/ctdb_event" export CTDB_LOCK_HELPER="${hdir}/ctdb_lock_helper" export CTDB_EVENT_HELPER="${hdir}/ctdb_event_helper" export CTDB_RECOVERY_HELPER="${hdir}/ctdb_recovery_helper"