mirror of
https://github.com/samba-team/samba.git
synced 2024-12-23 17:34:34 +03:00
eventscript: remove cb_status, fix uninitialized bug when monitoring aborted
Previously we updated cb_status a each script finished. Since we're storing the status anyway, we can calculate it by iterating the scripts array itself, providing clear and uniform behavior on all code paths. In particular, this fixes a longstanding bug when we abort monitor scripts to run some other script: the cb_status was uninitialized. In this case, we need to hand *something* to the callback; 0 might make us go healthy when we shouldn't. So we use the last status (normally, this will be the just-saved current status). In addition, we make the case of failing the first fork for the script and failing other script forks the same: the error is returned via the callback and saved for viewing through 'ctdb scriptstatus'. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (This used to be ctdb commit 5d50f0e16948d18009f6623f132113f7273efc7f)
This commit is contained in:
parent
4c722fe34c
commit
8aec7e5656
@ -64,7 +64,6 @@ struct ctdb_event_script_state {
|
||||
pid_t child;
|
||||
/* Warning: this can free us! */
|
||||
void (*callback)(struct ctdb_context *, int, void *);
|
||||
int cb_status;
|
||||
int fd[2];
|
||||
void *private_data;
|
||||
bool from_user;
|
||||
@ -423,6 +422,31 @@ static int fork_child_for_script(struct ctdb_context *ctdb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Summarize status of this run of scripts.
|
||||
*/
|
||||
static int script_status(struct ctdb_scripts_wire *scripts)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < scripts->num_scripts; i++) {
|
||||
switch (scripts->scripts[i].status) {
|
||||
case -ENOENT:
|
||||
case -ENOEXEC:
|
||||
/* Disabled or missing; that's OK. */
|
||||
break;
|
||||
case 0:
|
||||
/* No problem. */
|
||||
break;
|
||||
default:
|
||||
return scripts->scripts[i].status;
|
||||
}
|
||||
}
|
||||
|
||||
/* All OK! */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* called when child is finished */
|
||||
static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
|
||||
uint16_t flags, void *p)
|
||||
@ -431,7 +455,7 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
talloc_get_type(p, struct ctdb_event_script_state);
|
||||
struct ctdb_script_wire *current = get_current_script(state);
|
||||
struct ctdb_context *ctdb = state->ctdb;
|
||||
int r;
|
||||
int r, status;
|
||||
|
||||
r = read(state->fd[0], ¤t->status, sizeof(current->status));
|
||||
if (r < 0) {
|
||||
@ -441,15 +465,6 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
}
|
||||
|
||||
current->finished = timeval_current();
|
||||
|
||||
/* update overall status based on this script. */
|
||||
state->cb_status = current->status;
|
||||
|
||||
/* don't stop just because it vanished or was disabled. */
|
||||
if (current->status == -ENOENT || current->status == -ENOEXEC) {
|
||||
state->cb_status = 0;
|
||||
}
|
||||
|
||||
/* valgrind gets overloaded if we run next script as it's still doing
|
||||
* post-execution analysis, so kill finished child here. */
|
||||
if (ctdb->valgrinding) {
|
||||
@ -458,10 +473,12 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
|
||||
state->child = 0;
|
||||
|
||||
status = script_status(state->scripts);
|
||||
|
||||
/* Aborted or finished all scripts? We're done. */
|
||||
if (state->cb_status != 0 || state->current+1 == state->scripts->num_scripts) {
|
||||
if (status != 0 || state->current+1 == state->scripts->num_scripts) {
|
||||
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n",
|
||||
ctdb_eventscript_call_names[state->call], state->options, state->cb_status));
|
||||
ctdb_eventscript_call_names[state->call], state->options, status));
|
||||
|
||||
ctdb->event_script_timeouts = 0;
|
||||
talloc_free(state);
|
||||
@ -473,8 +490,9 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
|
||||
/* Next script! */
|
||||
state->current++;
|
||||
state->cb_status = fork_child_for_script(ctdb, state);
|
||||
if (state->cb_status != 0) {
|
||||
current++;
|
||||
current->status = fork_child_for_script(ctdb, state);
|
||||
if (current->status != 0) {
|
||||
/* This calls the callback. */
|
||||
talloc_free(state);
|
||||
}
|
||||
@ -486,19 +504,18 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
|
||||
{
|
||||
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
|
||||
struct ctdb_context *ctdb = state->ctdb;
|
||||
struct ctdb_script_wire *current = get_current_script(state);
|
||||
|
||||
DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n",
|
||||
ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
|
||||
DEBUG(DEBUG_ERR,("Event script timed out : %s %s %s count : %u pid : %d\n",
|
||||
current->name, ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
|
||||
|
||||
state->cb_status = -ETIME;
|
||||
state->scripts->scripts[state->current].status = -ETIME;
|
||||
|
||||
if (kill(state->child, 0) != 0) {
|
||||
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
|
||||
state->child = 0;
|
||||
}
|
||||
|
||||
state->scripts->scripts[state->current].status = state->cb_status;
|
||||
|
||||
talloc_free(state);
|
||||
}
|
||||
|
||||
@ -507,6 +524,8 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
|
||||
*/
|
||||
static int event_script_destructor(struct ctdb_event_script_state *state)
|
||||
{
|
||||
int status;
|
||||
|
||||
if (state->child) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
|
||||
|
||||
@ -520,7 +539,8 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
|
||||
state->ctdb->current_monitor = NULL;
|
||||
}
|
||||
|
||||
/* Save our scripts as the last executed status, if we have them. */
|
||||
/* Save our scripts as the last executed status, if we have them.
|
||||
* See ctdb_event_script_callback_v where we abort monitor event. */
|
||||
if (state->scripts) {
|
||||
talloc_free(state->ctdb->last_status[state->call]);
|
||||
state->ctdb->last_status[state->call] = state->scripts;
|
||||
@ -529,10 +549,17 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
|
||||
}
|
||||
}
|
||||
|
||||
/* Use last status as result, or "OK" if none. */
|
||||
if (state->ctdb->last_status[state->call]) {
|
||||
status = script_status(state->ctdb->last_status[state->call]);
|
||||
} else {
|
||||
status = 0;
|
||||
}
|
||||
|
||||
/* This is allowed to free us; talloc will prevent double free anyway,
|
||||
* but beware if you call this outside the destructor! */
|
||||
if (state->callback) {
|
||||
state->callback(state->ctdb, state->cb_status, state->private_data);
|
||||
state->callback(state->ctdb, status, state->private_data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -587,7 +614,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
const char *fmt, va_list ap)
|
||||
{
|
||||
struct ctdb_event_script_state *state;
|
||||
int ret;
|
||||
|
||||
state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
@ -653,22 +679,21 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
return -1;
|
||||
}
|
||||
state->current = 0;
|
||||
talloc_set_destructor(state, event_script_destructor);
|
||||
|
||||
/* Nothing to do? */
|
||||
if (state->scripts->num_scripts == 0) {
|
||||
ctdb->event_script_timeouts = 0;
|
||||
talloc_free(state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = fork_child_for_script(ctdb, state);
|
||||
if (ret != 0) {
|
||||
talloc_free(state->scripts);
|
||||
state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);
|
||||
if (state->scripts->scripts[0].status != 0) {
|
||||
/* Callback is called from destructor, with fail result. */
|
||||
talloc_free(state);
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
talloc_set_destructor(state, event_script_destructor);
|
||||
if (!timeval_is_zero(&state->timeout)) {
|
||||
event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user