mirror of
https://github.com/samba-team/samba.git
synced 2025-01-08 21:18:16 +03:00
if the event scripts hangs EventScriptsBanCount consecutive times in a row
the node will ban itself for the default recovery ban period (This used to be ctdb commit 7239d7ecd54037b11eddf47328a3129d281e7d4a)
This commit is contained in:
parent
30535c815d
commit
779468ab3f
@ -102,6 +102,7 @@ struct ctdb_tunable {
|
|||||||
uint32_t monitor_interval;
|
uint32_t monitor_interval;
|
||||||
uint32_t tickle_update_interval;
|
uint32_t tickle_update_interval;
|
||||||
uint32_t script_timeout;
|
uint32_t script_timeout;
|
||||||
|
uint32_t script_ban_count; /* ban after this many consec timeouts*/
|
||||||
uint32_t recovery_grace_period;
|
uint32_t recovery_grace_period;
|
||||||
uint32_t recovery_ban_period;
|
uint32_t recovery_ban_period;
|
||||||
uint32_t database_hash_size;
|
uint32_t database_hash_size;
|
||||||
@ -410,6 +411,7 @@ struct ctdb_context {
|
|||||||
struct ctdb_monitor_state *monitor;
|
struct ctdb_monitor_state *monitor;
|
||||||
struct ctdb_log_state *log;
|
struct ctdb_log_state *log;
|
||||||
int start_as_disabled;
|
int start_as_disabled;
|
||||||
|
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
|
||||||
TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
|
TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ static const struct {
|
|||||||
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
|
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
|
||||||
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
|
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
|
||||||
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
|
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
|
||||||
|
{ "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) },
|
||||||
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
|
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
|
||||||
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
|
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
|
||||||
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
|
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
|
||||||
|
@ -222,6 +222,27 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
|||||||
talloc_set_destructor(state, NULL);
|
talloc_set_destructor(state, NULL);
|
||||||
talloc_free(state);
|
talloc_free(state);
|
||||||
callback(ctdb, status, private_data);
|
callback(ctdb, status, private_data);
|
||||||
|
|
||||||
|
ctdb->event_script_timeouts = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct ctdb_ban_info b;
|
||||||
|
TDB_DATA data;
|
||||||
|
|
||||||
|
b.pnn = ctdb->pnn;
|
||||||
|
b.ban_time = ban_period;
|
||||||
|
|
||||||
|
data.dptr = (uint8_t *)&b;
|
||||||
|
data.dsize = sizeof(b);
|
||||||
|
|
||||||
|
ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
|
||||||
|
CTDB_SRVID_BAN_NODE, data);
|
||||||
|
if (ret != 0) {
|
||||||
|
DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -234,9 +255,17 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
|
|||||||
void *private_data = state->private_data;
|
void *private_data = state->private_data;
|
||||||
struct ctdb_context *ctdb = state->ctdb;
|
struct ctdb_context *ctdb = state->ctdb;
|
||||||
|
|
||||||
DEBUG(DEBUG_ERR,("event script timed out : %s\n", state->options));
|
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
|
||||||
|
|
||||||
talloc_free(state);
|
talloc_free(state);
|
||||||
callback(ctdb, -1, private_data);
|
callback(ctdb, -1, private_data);
|
||||||
|
|
||||||
|
ctdb->event_script_timeouts++;
|
||||||
|
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
|
||||||
|
ctdb->event_script_timeouts = 0;
|
||||||
|
DEBUG(DEBUG_ERR, ("Maximum timeout count reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.recovery_ban_period));
|
||||||
|
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -308,7 +337,7 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
|||||||
if (!timeval_is_zero(&timeout)) {
|
if (!timeval_is_zero(&timeout)) {
|
||||||
event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
|
event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
|
||||||
} else {
|
} else {
|
||||||
DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", fmt));
|
DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user