mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
use more libral handling of event scripts timing out.
If the event script that timed out was for the "monitor" event, then even if it timed out we still return SUCCESS back to the guy invoking the eventscript. Only consider the eventscript for "monitor" to have failed with an error IFF it actually terminated with an error, or if it timed out 5 times in a row and hung. (This used to be ctdb commit 60f3c04bd8b20ecbe937ffed08875cdc6898b422)
This commit is contained in:
parent
6eff9289d7
commit
6bfbec28a4
@ -38,7 +38,7 @@ static const struct {
|
||||
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
|
||||
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
|
||||
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
|
||||
{ "EventScriptBanCount", 3, offsetof(struct ctdb_tunable, script_ban_count) },
|
||||
{ "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) },
|
||||
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
|
||||
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
|
||||
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
|
||||
|
@ -257,15 +257,33 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
|
||||
|
||||
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
|
||||
|
||||
talloc_free(state);
|
||||
callback(ctdb, -1, private_data);
|
||||
if (!strcmp(state->options, "monitor")) {
|
||||
/* if it is a monitor event, we allow it to "hang" a few times
|
||||
before we declare it a failure and ban ourself (and make
|
||||
ourself unhealthy)
|
||||
*/
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
|
||||
|
||||
ctdb->event_script_timeouts++;
|
||||
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
|
||||
ctdb->event_script_timeouts = 0;
|
||||
DEBUG(DEBUG_ERR, ("Maximum timeout count reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.recovery_ban_period));
|
||||
ctdb->event_script_timeouts++;
|
||||
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
|
||||
ctdb->event_script_timeouts = 0;
|
||||
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
|
||||
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
|
||||
callback(ctdb, -1, private_data);
|
||||
} else {
|
||||
callback(ctdb, 0, private_data);
|
||||
}
|
||||
} else if (!strcmp(state->options, "startup")) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
|
||||
callback(ctdb, -1, private_data);
|
||||
} else {
|
||||
/* if it is not a monitor event we ban ourself immediately */
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
|
||||
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
|
||||
callback(ctdb, -1, private_data);
|
||||
}
|
||||
|
||||
talloc_free(state);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user