1
0
mirror of https://github.com/samba-team/samba.git synced 2025-03-09 08:58:35 +03:00

Merge root@10.1.1.27:/shared/ctdb/ctdb-git

(This used to be ctdb commit b869bb0e32d32422a5ba6b235864acba07f2b412)
This commit is contained in:
Ronnie Sahlberg 2009-09-04 02:00:14 +10:00
commit a1084c687f
10 changed files with 378 additions and 348 deletions

View File

@ -54,7 +54,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
server/ctdb_serverids.o server/ctdb_persistent.o \
server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.o \
server/ctdb_vacuum.o \
server/ctdb_vacuum.o server/ctdb_banning.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_store \

View File

@ -3842,3 +3842,48 @@ int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, u
return 0;
}
int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
{
int ret;
TDB_DATA data;
int32_t res;
data.dsize = sizeof(*bantime);
data.dptr = (uint8_t *)bantime;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SET_BAN_STATE, 0, data,
NULL, NULL, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
return -1;
}
return 0;
}
int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
{
int ret;
TDB_DATA outdata;
int32_t res;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
tmp_ctx, &outdata, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
talloc_free(tmp_ctx);
return -1;
}
*bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
talloc_free(tmp_ctx);
return 0;
}

View File

@ -75,16 +75,6 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_SET_NODE_FLAGS 0xF400000000000000LL
/*
a message ID meaning that a node should be banned
*/
#define CTDB_SRVID_BAN_NODE 0xF500000000000000LL
/*
a message ID meaning that a node should be unbanned
*/
#define CTDB_SRVID_UNBAN_NODE 0xF600000000000000LL
/*
a message to tell the recovery daemon to fetch a set of records
*/
@ -669,4 +659,13 @@ int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout
int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script);
int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script);
struct ctdb_ban_time {
uint32_t pnn;
uint32_t time;
};
int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime);
int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime);
#endif

View File

@ -226,6 +226,8 @@ struct ctdb_node {
by each node.
*/
struct ctdb_all_public_ips *public_ips;
/* used by the recovery dameon to track when a node should be banned */
struct ctdb_banning_state *ban_state;
};
/*
@ -429,6 +431,7 @@ struct ctdb_context {
TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
TALLOC_CTX *script_monitoring_ctx; /* a context where we store results while running the monitor event */
TALLOC_CTX *last_monitoring_ctx;
TALLOC_CTX *banning_ctx;
};
struct ctdb_db_context {
@ -587,6 +590,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_EVENT_SCRIPT_DISABLED = 106,
CTDB_CONTROL_ENABLE_SCRIPT = 107,
CTDB_CONTROL_DISABLE_SCRIPT = 108,
CTDB_CONTROL_SET_BAN_STATE = 109,
CTDB_CONTROL_GET_BAN_STATE = 110,
};
/*
@ -1469,4 +1474,7 @@ int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
#endif

121
ctdb/server/ctdb_banning.c Normal file
View File

@ -0,0 +1,121 @@
/*
ctdb banning code
Copyright (C) Ronnie Sahlberg 2009
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "lib/tdb/include/tdb.h"
#include "system/time.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
static void
ctdb_ban_node_event(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
DEBUG(DEBUG_ERR,("Banning timedout\n"));
ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_BANNED;
if (ctdb->banning_ctx != NULL) {
talloc_free(ctdb->banning_ctx);
ctdb->banning_ctx = NULL;
}
}
int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)indata.dptr;
DEBUG(DEBUG_INFO,("SET BAN STATE\n"));
if (bantime->pnn != ctdb->pnn) {
if (bantime->pnn < 0 || bantime->pnn >= ctdb->num_nodes) {
DEBUG(DEBUG_ERR,(__location__ " ERROR: Invalid ban request. PNN:%d is invalid. Max nodes %d\n", bantime->pnn, ctdb->num_nodes));
return -1;
}
if (bantime->time == 0) {
DEBUG(DEBUG_INFO,("unbanning node %d\n", bantime->pnn));
ctdb->nodes[bantime->pnn]->flags &= ~NODE_FLAGS_BANNED;
} else {
DEBUG(DEBUG_INFO,("banning node %d\n", bantime->pnn));
if (ctdb->tunable.enable_bans == 0) {
DEBUG(DEBUG_INFO,("Bans are disabled - ignoring ban of node %u\n", bantime->pnn));
return 0;
}
ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
}
return 0;
}
if (ctdb->banning_ctx != NULL) {
talloc_free(ctdb->banning_ctx);
ctdb->banning_ctx = NULL;
}
if (bantime->time == 0) {
DEBUG(DEBUG_ERR,("Unbanning this node\n"));
ctdb->nodes[bantime->pnn]->flags &= ~NODE_FLAGS_BANNED;
return 0;
}
if (ctdb->tunable.enable_bans == 0) {
DEBUG(DEBUG_ERR,("Bans are disabled - ignoring ban of node %u\n", bantime->pnn));
return 0;
}
ctdb->banning_ctx = talloc(ctdb, struct ctdb_ban_time);
if (ctdb->banning_ctx == NULL) {
DEBUG(DEBUG_CRIT,(__location__ " ERROR Failed to allocate new banning state\n"));
return -1;
}
*((struct ctdb_ban_time *)(ctdb->banning_ctx)) = *bantime;
DEBUG(DEBUG_ERR,("Banning this node for %d seconds\n", bantime->time));
ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
event_add_timed(ctdb->ev, ctdb->banning_ctx, timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb);
return 0;
}
int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
struct ctdb_ban_time *bantime;
bantime = talloc(outdata, struct ctdb_ban_time);
CTDB_NO_MEMORY(ctdb, bantime);
if (ctdb->banning_ctx != NULL) {
*bantime = *(struct ctdb_ban_time *)(ctdb->banning_ctx);
} else {
bantime->pnn = ctdb->pnn;
bantime->time = 0;
}
outdata->dptr = (uint8_t *)bantime;
outdata->dsize = sizeof(struct ctdb_ban_time);
return 0;
}

View File

@ -518,6 +518,14 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
case CTDB_CONTROL_DISABLE_SCRIPT:
return ctdb_control_disable_script(ctdb, indata);
case CTDB_CONTROL_SET_BAN_STATE:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_ban_time));
return ctdb_control_set_ban_state(ctdb, indata);
case CTDB_CONTROL_GET_BAN_STATE:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_get_ban_state(ctdb, outdata);
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;

View File

@ -306,6 +306,14 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
}
}
/* we dont let other nodes modify our BANNED status */
if (c->pnn == ctdb->pnn) {
node->flags &= ~NODE_FLAGS_BANNED;
if (old_flags & NODE_FLAGS_BANNED) {
node->flags |= NODE_FLAGS_BANNED;
}
}
if (node->flags == c->old_flags) {
DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
return 0;

View File

@ -31,11 +31,6 @@
#include "dlinklist.h"
struct ban_state {
struct ctdb_recoverd *rec;
uint32_t banned_node;
};
/* list of "ctdb ipreallocate" processes to call back when we have
finished the takeover run.
*/
@ -44,6 +39,11 @@ struct ip_reallocate_list {
struct rd_memdump_reply *rd;
};
struct ctdb_banning_state {
uint32_t count;
struct timeval last_reported_time;
};
/*
private state of recovery daemon
*/
@ -52,11 +52,8 @@ struct ctdb_recoverd {
uint32_t recmaster;
uint32_t num_active;
uint32_t num_connected;
uint32_t last_culprit_node;
struct ctdb_node_map *nodemap;
uint32_t last_culprit;
uint32_t culprit_counter;
struct timeval first_recover_time;
struct ban_state **banned_nodes;
struct timeval priority_time;
bool need_takeover_run;
bool need_recovery;
@ -72,75 +69,14 @@ struct ctdb_recoverd {
#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
/*
unban a node
*/
static void ctdb_unban_node(struct ctdb_recoverd *rec, uint32_t pnn)
{
struct ctdb_context *ctdb = rec->ctdb;
DEBUG(DEBUG_NOTICE,("Unbanning node %u\n", pnn));
if (!ctdb_validate_pnn(ctdb, pnn)) {
DEBUG(DEBUG_ERR,("Bad pnn %u in ctdb_unban_node\n", pnn));
return;
}
/* If we are unbanning a different node then just pass the ban info on */
if (pnn != ctdb->pnn) {
TDB_DATA data;
int ret;
DEBUG(DEBUG_NOTICE,("Unanning remote node %u. Passing the ban request on to the remote node.\n", pnn));
data.dptr = (uint8_t *)&pnn;
data.dsize = sizeof(uint32_t);
ret = ctdb_send_message(ctdb, pnn, CTDB_SRVID_UNBAN_NODE, data);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to unban node %u\n", pnn));
return;
}
return;
}
/* make sure we remember we are no longer banned in case
there is an election */
rec->node_flags &= ~NODE_FLAGS_BANNED;
DEBUG(DEBUG_INFO,("Clearing ban flag on node %u\n", pnn));
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, 0, NODE_FLAGS_BANNED);
if (rec->banned_nodes[pnn] == NULL) {
DEBUG(DEBUG_INFO,("No ban recorded for this node. ctdb_unban_node() request ignored\n"));
return;
}
talloc_free(rec->banned_nodes[pnn]);
rec->banned_nodes[pnn] = NULL;
}
/*
called when a ban has timed out
*/
static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
{
struct ban_state *state = talloc_get_type(p, struct ban_state);
struct ctdb_recoverd *rec = state->rec;
uint32_t pnn = state->banned_node;
DEBUG(DEBUG_NOTICE,("Ban timeout. Node %u is now unbanned\n", pnn));
ctdb_unban_node(rec, pnn);
}
/*
ban a node for a period of time
*/
static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_time)
{
int ret;
struct ctdb_context *ctdb = rec->ctdb;
struct ctdb_ban_time bantime;
DEBUG(DEBUG_NOTICE,("Banning node %u for %u seconds\n", pnn, ban_time));
@ -149,61 +85,15 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_
return;
}
if (0 == ctdb->tunable.enable_bans) {
DEBUG(DEBUG_INFO,("Bans are disabled - ignoring ban of node %u\n", pnn));
return;
}
bantime.pnn = pnn;
bantime.time = ban_time;
/* If we are banning a different node then just pass the ban info on */
if (pnn != ctdb->pnn) {
struct ctdb_ban_info b;
TDB_DATA data;
int ret;
DEBUG(DEBUG_NOTICE,("Banning remote node %u for %u seconds. Passing the ban request on to the remote node.\n", pnn, ban_time));
b.pnn = pnn;
b.ban_time = ban_time;
data.dptr = (uint8_t *)&b;
data.dsize = sizeof(b);
ret = ctdb_send_message(ctdb, pnn, CTDB_SRVID_BAN_NODE, data);
ret = ctdb_ctrl_set_ban(ctdb, CONTROL_TIMEOUT(), pnn, &bantime);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to ban node %u\n", pnn));
DEBUG(DEBUG_ERR,(__location__ " Failed to ban node %d\n", pnn));
return;
}
return;
}
DEBUG(DEBUG_NOTICE,("self ban - lowering our election priority\n"));
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, NODE_FLAGS_BANNED, 0);
/* banning ourselves - lower our election priority */
rec->priority_time = timeval_current();
/* make sure we remember we are banned in case there is an
election */
rec->node_flags |= NODE_FLAGS_BANNED;
if (rec->banned_nodes[pnn] != NULL) {
DEBUG(DEBUG_NOTICE,("Re-banning an already banned node. Remove previous ban and set a new ban.\n"));
talloc_free(rec->banned_nodes[pnn]);
rec->banned_nodes[pnn] = NULL;
}
rec->banned_nodes[pnn] = talloc(rec->banned_nodes, struct ban_state);
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes[pnn]);
rec->banned_nodes[pnn]->rec = rec;
rec->banned_nodes[pnn]->banned_node = pnn;
if (ban_time != 0) {
event_add_timed(ctdb->ev, rec->banned_nodes[pnn],
timeval_current_ofs(ban_time, 0),
ctdb_ban_timeout, rec->banned_nodes[pnn]);
}
}
enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
@ -239,39 +129,44 @@ static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node
/*
remember the trouble maker
*/
static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit)
static void ctdb_set_culprit_count(struct ctdb_recoverd *rec, uint32_t culprit, uint32_t count)
{
struct ctdb_context *ctdb = rec->ctdb;
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
struct ctdb_banning_state *ban_state;
if (rec->last_culprit != culprit ||
timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
DEBUG(DEBUG_NOTICE,("New recovery culprit %u\n", culprit));
/* either a new node is the culprit, or we've decided to forgive them */
rec->last_culprit = culprit;
rec->first_recover_time = timeval_current();
rec->culprit_counter = 0;
if (culprit > ctdb->num_nodes) {
DEBUG(DEBUG_ERR,("Trying to set culprit %d but num_nodes is %d\n", culprit, ctdb->num_nodes));
return;
}
rec->culprit_counter++;
if (ctdb->nodes[culprit]->ban_state == NULL) {
ctdb->nodes[culprit]->ban_state = talloc_zero(ctdb->nodes[culprit], struct ctdb_banning_state);
CTDB_NO_MEMORY_VOID(ctdb, ctdb->nodes[culprit]->ban_state);
}
ban_state = ctdb->nodes[culprit]->ban_state;
if (timeval_elapsed(&ban_state->last_reported_time) > ctdb->tunable.recovery_grace_period) {
/* this was the first time in a long while this node
misbehaved so we will forgive any old transgressions.
*/
ban_state->count = 0;
}
ban_state->count += count;
ban_state->last_reported_time = timeval_current();
rec->last_culprit_node = culprit;
}
/*
remember the trouble maker
*/
static void ctdb_set_culprit_count(struct ctdb_recoverd *rec, uint32_t culprit, uint32_t count)
static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit)
{
struct ctdb_context *ctdb = rec->ctdb;
if (rec->last_culprit != culprit ||
timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
DEBUG(DEBUG_NOTICE,("New recovery culprit %u\n", culprit));
/* either a new node is the culprit, or we've decided to forgive them */
rec->last_culprit = culprit;
rec->first_recover_time = timeval_current();
rec->culprit_counter = 0;
}
rec->culprit_counter += count;
ctdb_set_culprit_count(rec, culprit, 1);
}
/* this callback is called for every node that failed to execute the
start recovery event
*/
@ -708,62 +603,6 @@ static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_nod
}
/*
handler for when the admin bans a node
*/
static void ban_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
if (data.dsize != sizeof(*b)) {
DEBUG(DEBUG_ERR,("Bad data in ban_handler\n"));
talloc_free(mem_ctx);
return;
}
if (b->pnn != ctdb->pnn) {
DEBUG(DEBUG_ERR,("Got a ban request for pnn:%u but our pnn is %u. Ignoring ban request\n", b->pnn, ctdb->pnn));
return;
}
DEBUG(DEBUG_NOTICE,("Node %u has been banned for %u seconds\n",
b->pnn, b->ban_time));
ctdb_ban_node(rec, b->pnn, b->ban_time);
talloc_free(mem_ctx);
}
/*
handler for when the admin unbans a node
*/
static void unban_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
uint32_t pnn;
if (data.dsize != sizeof(uint32_t)) {
DEBUG(DEBUG_ERR,("Bad data in unban_handler\n"));
talloc_free(mem_ctx);
return;
}
pnn = *(uint32_t *)data.dptr;
if (pnn != ctdb->pnn) {
DEBUG(DEBUG_ERR,("Got an unban request for pnn:%u but our pnn is %u. Ignoring unban request\n", pnn, ctdb->pnn));
return;
}
DEBUG(DEBUG_NOTICE,("Node %u has been unbanned.\n", pnn));
ctdb_unban_node(rec, pnn);
talloc_free(mem_ctx);
}
struct vacuum_info {
struct vacuum_info *next, *prev;
struct ctdb_recoverd *rec;
@ -1331,8 +1170,7 @@ static void reload_nodes_file(struct ctdb_context *ctdb)
*/
static int do_recovery(struct ctdb_recoverd *rec,
TALLOC_CTX *mem_ctx, uint32_t pnn,
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap,
int32_t culprit)
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
{
struct ctdb_context *ctdb = rec->ctdb;
int i, j, ret;
@ -1347,15 +1185,21 @@ static int do_recovery(struct ctdb_recoverd *rec,
/* if recovery fails, force it again */
rec->need_recovery = true;
if (culprit != -1) {
ctdb_set_culprit(rec, culprit);
}
for (i=0; i<ctdb->num_nodes; i++) {
struct ctdb_banning_state *ban_state;
if (rec->culprit_counter > 2*nodemap->num) {
DEBUG(DEBUG_NOTICE,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
rec->last_culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
if (ctdb->nodes[i]->ban_state == NULL) {
continue;
}
ban_state = (struct ctdb_banning_state *)ctdb->nodes[i]->ban_state;
if (ban_state->count < 2*ctdb->num_nodes) {
continue;
}
DEBUG(DEBUG_NOTICE,("Node %u has caused %u recoveries recently - banning it for %u seconds\n",
ctdb->nodes[i]->pnn, ban_state->count,
ctdb->tunable.recovery_ban_period));
ctdb_ban_node(rec, rec->last_culprit, ctdb->tunable.recovery_ban_period);
ctdb_ban_node(rec, ctdb->nodes[i]->pnn, ctdb->tunable.recovery_ban_period);
ban_state->count = 0;
}
@ -1371,7 +1215,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
DEBUG(DEBUG_ERR,("Recovery lock taken successfully by recovery daemon\n"));
}
DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node));
/* get a list of all databases */
ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
@ -1953,12 +1797,6 @@ static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
return;
}
/* release any bans */
rec->last_culprit = (uint32_t)-1;
talloc_free(rec->banned_nodes);
rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
talloc_free(mem_ctx);
return;
}
@ -2666,8 +2504,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
CTDB_NO_MEMORY_FATAL(ctdb, rec);
rec->ctdb = ctdb;
rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
rec->priority_time = timeval_current();
@ -2683,12 +2519,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
/* when we are asked to puch out a flag change */
ctdb_set_message_handler(ctdb, CTDB_SRVID_PUSH_NODE_FLAGS, push_flags_handler, rec);
/* when nodes are banned */
ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);
/* and one for when nodes are unbanned */
ctdb_set_message_handler(ctdb, CTDB_SRVID_UNBAN_NODE, unban_handler, rec);
/* register a message port for vacuum fetch */
ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);
@ -2739,11 +2569,21 @@ again:
as early as possible so we dont wait until we have pulled the node
map from the local node. thats why we have the hardcoded value 20
*/
if (rec->culprit_counter > 20) {
DEBUG(DEBUG_NOTICE,("Node %u has caused %u failures in %.0f seconds - banning it for %u seconds\n",
rec->last_culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
for (i=0; i<ctdb->num_nodes; i++) {
struct ctdb_banning_state *ban_state;
if (ctdb->nodes[i]->ban_state == NULL) {
continue;
}
ban_state = (struct ctdb_banning_state *)ctdb->nodes[i]->ban_state;
if (ban_state->count < 20) {
continue;
}
DEBUG(DEBUG_NOTICE,("Node %u has caused %u recoveries recently - banning it for %u seconds\n",
ctdb->nodes[i]->pnn, ban_state->count,
ctdb->tunable.recovery_ban_period));
ctdb_ban_node(rec, rec->last_culprit, ctdb->tunable.recovery_ban_period);
ctdb_ban_node(rec, ctdb->nodes[i]->pnn, ctdb->tunable.recovery_ban_period);
ban_state->count = 0;
}
/* get relevant tunables */
@ -2860,34 +2700,7 @@ again:
/* check that we (recovery daemon) and the local ctdb daemon
agrees on whether we are banned or not
*/
if (nodemap->nodes[pnn].flags & NODE_FLAGS_BANNED) {
if (rec->banned_nodes[pnn] == NULL) {
if (rec->recmaster == pnn) {
DEBUG(DEBUG_NOTICE,("Local ctdb daemon on recmaster thinks this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
ctdb_unban_node(rec, pnn);
} else {
DEBUG(DEBUG_NOTICE,("Local ctdb daemon on non-recmaster thinks this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
ctdb_set_culprit(rec, pnn);
}
goto again;
}
} else {
if (rec->banned_nodes[pnn] != NULL) {
if (rec->recmaster == pnn) {
DEBUG(DEBUG_NOTICE,("Local ctdb daemon on recmaster does not think this node is BANNED but the recovery master disagrees. Unbanning the node\n"));
ctdb_unban_node(rec, pnn);
} else {
DEBUG(DEBUG_NOTICE,("Local ctdb daemon on non-recmaster does not think this node is BANNED but the recovery master disagrees. Re-banning the node\n"));
ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
ctdb_set_culprit(rec, pnn);
}
goto again;
}
}
//qqq
/* remember our own node flags */
rec->node_flags = nodemap->nodes[pnn].flags;
@ -3021,7 +2834,7 @@ again:
if (rec->need_recovery) {
/* a previous recovery didn't finish */
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, -1);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
@ -3030,7 +2843,8 @@ again:
*/
switch (verify_recmode(ctdb, nodemap)) {
case MONITOR_RECOVERY_NEEDED:
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, ctdb->pnn);
ctdb_set_culprit(rec, ctdb->pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
case MONITOR_FAILED:
goto again;
@ -3046,7 +2860,8 @@ again:
ret = check_recovery_lock(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed check_recovery_lock. Force a recovery\n"));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, ctdb->pnn);
ctdb_set_culprit(rec, ctdb->pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
}
@ -3086,7 +2901,8 @@ again:
if (remote_nodemaps[j]->num != nodemap->num) {
DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
nodemap->nodes[j].pnn, remote_nodemaps[j]->num, nodemap->num));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, nodemap->nodes[j].pnn);
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
@ -3098,8 +2914,9 @@ again:
DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n",
nodemap->nodes[j].pnn, i,
remote_nodemaps[j]->nodes[i].pnn, nodemap->nodes[i].pnn));
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, nodemap->nodes[j].pnn);
vnnmap);
goto again;
}
}
@ -3120,14 +2937,16 @@ again:
if (i == j) {
DEBUG(DEBUG_ERR,("Use flags 0x%02x from remote node %d for cluster update of its own flags\n", remote_nodemaps[j]->nodes[i].flags, j));
update_flags_on_all_nodes(ctdb, nodemap, nodemap->nodes[i].pnn, remote_nodemaps[j]->nodes[i].flags);
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, nodemap->nodes[j].pnn);
vnnmap);
goto again;
} else {
DEBUG(DEBUG_ERR,("Use flags 0x%02x from local recmaster node for cluster update of node %d flags\n", nodemap->nodes[i].flags, i));
update_flags_on_all_nodes(ctdb, nodemap, nodemap->nodes[i].pnn, nodemap->nodes[i].flags);
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, nodemap->nodes[j].pnn);
vnnmap);
goto again;
}
}
@ -3141,7 +2960,8 @@ again:
if (vnnmap->size != rec->num_active) {
DEBUG(DEBUG_ERR, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n",
vnnmap->size, rec->num_active));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, ctdb->pnn);
ctdb_set_culprit(rec, ctdb->pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
@ -3164,7 +2984,8 @@ again:
if (i == vnnmap->size) {
DEBUG(DEBUG_ERR, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n",
nodemap->nodes[j].pnn));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, nodemap->nodes[j].pnn);
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
}
@ -3193,7 +3014,8 @@ again:
if (vnnmap->generation != remote_vnnmap->generation) {
DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n",
nodemap->nodes[j].pnn, remote_vnnmap->generation, vnnmap->generation));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, nodemap->nodes[j].pnn);
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
@ -3201,7 +3023,8 @@ again:
if (vnnmap->size != remote_vnnmap->size) {
DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n",
nodemap->nodes[j].pnn, remote_vnnmap->size, vnnmap->size));
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, nodemap->nodes[j].pnn);
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
goto again;
}
@ -3210,8 +3033,9 @@ again:
if (remote_vnnmap->map[i] != vnnmap->map[i]) {
DEBUG(DEBUG_ERR, (__location__ " Remote node %u has different vnnmap.\n",
nodemap->nodes[j].pnn));
ctdb_set_culprit(rec, nodemap->nodes[j].pnn);
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, nodemap->nodes[j].pnn);
vnnmap);
goto again;
}
}
@ -3225,15 +3049,15 @@ again:
ret = run_startrecovery_eventscript(rec, nodemap);
if (ret!=0) {
DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, ctdb->pnn);
ctdb_set_culprit(rec, ctdb->pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
}
ret = ctdb_takeover_run(ctdb, nodemap);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, ctdb->pnn);
ctdb_set_culprit(rec, ctdb->pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
}
/* execute the "recovered" event script on all nodes */
@ -3245,8 +3069,8 @@ again:
// cascading recovery.
if (ret!=0) {
DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event on cluster. Update of public ips failed.\n"));
do_recovery(rec, mem_ctx, pnn, nodemap,
vnnmap, ctdb->pnn);
ctdb_set_culprit(rec, ctdb->pnn);
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
}
#endif
}

View File

@ -625,21 +625,16 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
{
int ret;
struct ctdb_ban_info b;
TDB_DATA data;
struct ctdb_ban_time bantime;
b.pnn = ctdb->pnn;
b.ban_time = ban_period;
bantime.pnn = ctdb->pnn;
bantime.time = ban_period;
data.dptr = (uint8_t *)&b;
data.dsize = sizeof(b);
data.dsize = sizeof(bantime);
data.dptr = (uint8_t *)&bantime;
ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
CTDB_SRVID_BAN_NODE, data);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
}
ctdb_control_set_ban_state(ctdb, data);
}

View File

@ -1678,6 +1678,17 @@ again:
exit(10);
}
/* check tha there are nodes available that can act as a recmaster */
for (i=0; i<nodemap->num; i++) {
if (nodemap->nodes[i].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
continue;
}
}
if (i == nodemap->num) {
return 0;
}
/* verify the recovery master is not STOPPED, nor BANNED */
if (nodemap->nodes[recmaster].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
DEBUG(DEBUG_ERR,("No suitable recmaster found. Try again\n"));
@ -1898,20 +1909,13 @@ static uint32_t get_generation(struct ctdb_context *ctdb)
static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
struct ctdb_ban_info b;
TDB_DATA data;
uint32_t ban_time;
struct ctdb_node_map *nodemap=NULL;
uint32_t generation, next_generation;
struct ctdb_ban_time bantime;
if (argc < 1) {
usage();
}
/* record the current generation number */
generation = get_generation(ctdb);
/* verify the node exists */
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
if (ret != 0) {
@ -1924,27 +1928,19 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
ban_time = strtoul(argv[0], NULL, 0);
bantime.pnn = options.pnn;
bantime.time = strtoul(argv[0], NULL, 0);
b.pnn = options.pnn;
b.ban_time = ban_time;
data.dptr = (uint8_t *)&b;
data.dsize = sizeof(b);
ret = ctdb_send_message(ctdb, options.pnn, CTDB_SRVID_BAN_NODE, data);
ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to ban node %u\n", options.pnn));
DEBUG(DEBUG_ERR,("Banning node %d for %d seconds failed.\n", bantime.pnn, bantime.time));
return -1;
}
/* wait until we are in a new generation */
while (1) {
next_generation = get_generation(ctdb);
if (next_generation != generation) {
return 0;
}
sleep(1);
ret = control_ipreallocate(ctdb, argc, argv);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
}
return 0;
@ -1957,16 +1953,10 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
TDB_DATA data;
uint32_t generation, next_generation;
struct ctdb_node_map *nodemap=NULL;
struct ctdb_ban_time bantime;
/* record the current generation number */
generation = get_generation(ctdb);
data.dptr = (uint8_t *)&options.pnn;
data.dsize = sizeof(uint32_t);
/* verify the node exists */
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
@ -1974,29 +1964,60 @@ static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
}
if (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED)) {
DEBUG(DEBUG_ERR, ("Node %d is not banned. Can not unban\n", options.pnn));
DEBUG(DEBUG_ERR,("Node %u is not banned.\n", options.pnn));
return -1;
}
ret = ctdb_send_message(ctdb, options.pnn, CTDB_SRVID_UNBAN_NODE, data);
bantime.pnn = options.pnn;
bantime.time = 0;
ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to to unban node %u\n", options.pnn));
DEBUG(DEBUG_ERR,("Unbanning node %d failed.\n", bantime.pnn));
return -1;
}
/* wait until we are in a new generation */
while (1) {
next_generation = get_generation(ctdb);
if (next_generation != generation) {
return 0;
}
sleep(1);
ret = control_ipreallocate(ctdb, argc, argv);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
}
return 0;
}
/*
show ban information for a node
*/
static int control_showban(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
struct ctdb_node_map *nodemap=NULL;
struct ctdb_ban_time *bantime;
/* verify the node exists */
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
return ret;
}
ret = ctdb_ctrl_get_ban(ctdb, TIMELIMIT(), options.pnn, ctdb, &bantime);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Showing ban info for node %d failed.\n", options.pnn));
return -1;
}
if (bantime->time == 0) {
printf("Node %u is not banned\n", bantime->pnn);
} else {
printf("Node %u is banned banned for %d seconds\n", bantime->pnn, bantime->time);
}
return 0;
}
/*
shutdown a daemon
*/
@ -3360,7 +3381,8 @@ static const struct {
{ "stop", control_stop, true, false, "stop a node" },
{ "continue", control_continue, true, false, "re-start a stopped node" },
{ "ban", control_ban, true, false, "ban a node from the cluster", "<bantime|0>"},
{ "unban", control_unban, true, false, "unban a node from the cluster" },
{ "unban", control_unban, true, false, "unban a node" },
{ "showban", control_showban, true, false, "show ban information"},
{ "shutdown", control_shutdown, true, false, "shutdown ctdbd" },
{ "recover", control_recover, true, false, "force recovery" },
{ "ipreallocate", control_ipreallocate, true, false, "force the recovery daemon to perform a ip reallocation procedure" },