1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-02 09:47:23 +03:00

daemon: On shutdown, destroy timed events that check if recoverd is active

When CTDB is shutting down, recovery daemon is stopped, but the
event that checks if recovery daemon is still alive is not destroyed.
So recovery master is restarted during shutdown if CTDB daemon takes
longer to shutdown.

There are two processes that check if recovery daemon is working.

1. ctdb_check_recd() - which checks every 30 seconds if the recovery
   daemon process exists.

2. ctdb_recd_ping_timeout() - which is triggered when recovery daemon
   fails to ping CTDB daemon.

Both the events are periodic and need to be destroyed when shutting down.

Signed-off-by: Amitay Isaacs <amitay@gmail.com>

(This used to be ctdb commit 746168df2e691058e601016110fae818c6a265c3)
This commit is contained in:
Amitay Isaacs 2012-12-04 15:05:44 +11:00
parent cad815164c
commit 30299c387f
3 changed files with 15 additions and 7 deletions

View File

@ -508,6 +508,7 @@ struct ctdb_context {
bool valgrinding;
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
uint32_t *recd_ping_count;
TALLOC_CTX *recd_ctx; /* a context used to track recoverd monitoring events */
TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
TALLOC_CTX *event_script_ctx;

View File

@ -1162,6 +1162,10 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda
return 0;
}
/* The recovery daemon will ping us at regular intervals.
If we havent been pinged for a while we assume the recovery
daemon is inoperable and we restart.
*/
static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
{
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
@ -1183,10 +1187,6 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event
ctdb_start_recoverd(ctdb);
}
/* The recovery daemon will ping us at regular intervals.
If we havent been pinged for a while we assume the recovery
daemon is inoperable and we shut down.
*/
int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
{
talloc_free(ctdb->recd_ping_count);

View File

@ -4020,8 +4020,12 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
}
if (ctdb->recoverd_pid != 0) {
talloc_free(ctdb->recd_ctx);
ctdb->recd_ctx = talloc_new(ctdb);
CTDB_NO_MEMORY(ctdb, ctdb->recd_ctx);
close(fd[0]);
event_add_timed(ctdb->ev, ctdb,
event_add_timed(ctdb->ev, ctdb->recd_ctx,
timeval_current_ofs(30, 0),
ctdb_check_recd, ctdb);
return 0;
@ -4069,6 +4073,9 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb)
DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
ctdb_kill(ctdb, ctdb->recoverd_pid, SIGTERM);
TALLOC_FREE(ctdb->recd_ctx);
TALLOC_FREE(ctdb->recd_ping_count);
}
static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te,