1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00

added admin commands to ban/unban nodes

(This used to be ctdb commit 4dad04172e7e4955b5bf6444a85b19901c9683ad)
This commit is contained in:
Andrew Tridgell 2007-06-07 16:34:33 +10:00
parent 59e74ac6eb
commit 23bf62fe30
5 changed files with 294 additions and 50 deletions

View File

@ -203,6 +203,9 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
return 0;
}
DEBUG(0, ("Control modflags on node %u - flags now 0x%x\n", ctdb->vnn, node->flags));
/* if we have been banned, go into recovery mode */
c.vnn = ctdb->vnn;
c.flags = node->flags;
@ -212,6 +215,15 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
/* tell the other nodes that something has changed */
ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_VNNMAP,
CTDB_SRVID_NODE_FLAGS_CHANGED, data);
if ((node->flags & NODE_FLAGS_BANNED) && !(old_flags & NODE_FLAGS_BANNED)) {
/* make sure we are frozen */
DEBUG(0,("This node has been banned - forcing freeze and recovery\n"));
if (!ctdb_blocking_freeze(ctdb)) {
ctdb_fatal(ctdb, "Unable to freeze when banned");
}
ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
}
return 0;
}

View File

@ -27,21 +27,80 @@
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
struct ban_state {
struct ctdb_recoverd *rec;
uint32_t banned_node;
};
/*
private state of recovery daemon
*/
struct ctdb_recoverd {
struct ctdb_context *ctdb;
TALLOC_CTX *mem_ctx;
uint32_t last_culprit;
uint32_t culprit_counter;
struct timeval first_recover_time;
bool *banned_nodes;
struct ban_state **banned_nodes;
};
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
/*
unban a node
*/
static void ctdb_unban_node(struct ctdb_recoverd *rec, uint32_t vnn)
{
struct ctdb_context *ctdb = rec->ctdb;
if (rec->banned_nodes[vnn] == NULL) {
return;
}
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, 0, NODE_FLAGS_BANNED);
talloc_free(rec->banned_nodes[vnn]);
rec->banned_nodes[vnn] = NULL;
}
/*
called when a ban has timed out
*/
static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
{
struct ban_state *state = talloc_get_type(p, struct ban_state);
struct ctdb_recoverd *rec = state->rec;
uint32_t vnn = state->banned_node;
DEBUG(0,("Node %u in now unbanned\n", vnn));
ctdb_unban_node(rec, vnn);
}
/*
ban a node for a period of time
*/
static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t vnn, uint32_t ban_time)
{
struct ctdb_context *ctdb = rec->ctdb;
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, NODE_FLAGS_BANNED, 0);
rec->banned_nodes[vnn] = talloc(rec, struct ban_state);
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes[vnn]);
rec->banned_nodes[vnn]->rec = rec;
rec->banned_nodes[vnn]->banned_node = vnn;
if (ban_time != 0) {
event_add_timed(ctdb->ev, rec->banned_nodes[vnn],
timeval_current_ofs(ban_time, 0),
ctdb_ban_timeout, rec->banned_nodes[vnn]);
}
}
/*
change recovery mode on all nodes
*/
@ -439,24 +498,72 @@ static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_nod
}
struct ban_state {
struct ctdb_recoverd *rec;
uint32_t banned_node;
};
/*
handler for when the admin bans a node
*/
static void ban_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;
uint32_t recmaster;
int ret;
if (data.dsize != sizeof(*b)) {
DEBUG(0,("Bad data in ban_handler\n"));
return;
}
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
return;
}
if (recmaster != ctdb->vnn) {
DEBUG(0,("We are not the recmaster - ignoring ban request\n"));
return;
}
DEBUG(0,("Node %u has been banned for %u seconds by the administrator\n",
b->vnn, b->ban_time));
ctdb_ban_node(rec, b->vnn, b->ban_time);
}
/*
called when a ban has timed out
*/
static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
handler for when the admin unbans a node
*/
static void unban_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ban_state *state = talloc_get_type(p, struct ban_state);
DEBUG(0,("Node %u in now unbanned\n", state->banned_node));
state->rec->banned_nodes[state->banned_node] = false;
talloc_free(state);
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
uint32_t vnn;
int ret;
uint32_t recmaster;
if (data.dsize != sizeof(uint32_t)) {
DEBUG(0,("Bad data in unban_handler\n"));
return;
}
vnn = *(uint32_t *)data.dptr;
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
return;
}
if (recmaster != ctdb->vnn) {
DEBUG(0,("We are not the recmaster - ignoring unban request\n"));
return;
}
DEBUG(0,("Node %u has been unbanned by the administrator\n", vnn));
ctdb_unban_node(rec, vnn);
}
/*
we are the recmaster, and recovery is needed - start a recovery run
*/
@ -480,22 +587,10 @@ static int do_recovery(struct ctdb_recoverd *rec,
rec->culprit_counter++;
if (rec->culprit_counter > 2*nodemap->num) {
struct ban_state *state;
DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
ctdb->tunable.recovery_ban_period));
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), culprit, NODE_FLAGS_BANNED, 0);
rec->banned_nodes[culprit] = true;
state = talloc(rec->mem_ctx, struct ban_state);
CTDB_NO_MEMORY_FATAL(ctdb, state);
state->rec = rec;
state->banned_node = culprit;
event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.recovery_ban_period, 0),
ctdb_ban_timeout, state);
ctdb_ban_node(rec, culprit, ctdb->tunable.recovery_ban_period);
}
if (!ctdb_recovery_lock(ctdb, true)) {
@ -592,7 +687,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
CTDB_NO_MEMORY(ctdb, vnnmap);
vnnmap->generation = generation;
vnnmap->size = num_active;
vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size);
vnnmap->map = talloc_zero_array(vnnmap, uint32_t, vnnmap->size);
for (i=j=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
vnnmap->map[j++] = nodemap->nodes[i].vnn;
@ -755,14 +850,10 @@ static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
return;
}
/* release any ban information */
talloc_free(rec->mem_ctx);
rec->mem_ctx = talloc_new(rec);
CTDB_NO_MEMORY_FATAL(rec->mem_ctx, rec->banned_nodes);
/* release any bans */
rec->last_culprit = (uint32_t)-1;
talloc_free(rec->banned_nodes);
rec->banned_nodes = talloc_zero_array(rec, bool, ctdb->num_nodes);
rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
talloc_free(mem_ctx);
@ -898,17 +989,20 @@ static void monitor_cluster(struct ctdb_context *ctdb)
CTDB_NO_MEMORY_FATAL(ctdb, rec);
rec->ctdb = ctdb;
rec->banned_nodes = talloc_zero_array(rec, bool, ctdb->num_nodes);
rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
rec->mem_ctx = talloc_new(rec);
CTDB_NO_MEMORY_FATAL(ctdb, rec->mem_ctx);
/* register a message port for recovery elections */
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
/* and one for when nodes are disabled/enabled */
ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);
/* and one for when nodes are banned */
ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);
/* and one for when nodes are unbanned */
ctdb_set_message_handler(ctdb, CTDB_SRVID_UNBAN_NODE, unban_handler, rec);
again:
need_takeover_run = false;
@ -965,7 +1059,7 @@ again:
/* count how many active nodes there are */
num_active = 0;
for (i=0; i<nodemap->num; i++) {
if (rec->banned_nodes[nodemap->nodes[i].vnn]) {
if (rec->banned_nodes[nodemap->nodes[i].vnn] != NULL) {
nodemap->nodes[i].flags |= NODE_FLAGS_BANNED;
} else {
nodemap->nodes[i].flags &= ~NODE_FLAGS_BANNED;

View File

@ -76,6 +76,16 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_NODE_FLAGS_CHANGED 0xF400000000000000LL
/*
a message ID meaning that a node should be banned
*/
#define CTDB_SRVID_BAN_NODE 0xF500000000000000LL
/*
a message ID meaning that a node should be unbanned
*/
#define CTDB_SRVID_UNBAN_NODE 0xF600000000000000LL
/* used on the domain socket, send a pdu to the local daemon */
#define CTDB_CURRENT_NODE 0xF0000001

View File

@ -479,6 +479,14 @@ struct ctdb_node_modflags {
uint32_t clear;
};
/*
struct for admin setting a ban
*/
struct ctdb_ban_info {
uint32_t vnn;
uint32_t ban_time;
};
enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
#define CTDB_LMASTER_ANY 0xffffffff

View File

@ -298,20 +298,37 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
printf("Number of nodes:%d\n", nodemap->num);
for(i=0;i<nodemap->num;i++){
const char *flags_str;
if (nodemap->nodes[i].flags & NODE_FLAGS_PERMANENTLY_DISABLED) {
flags_str = "DISABLED";
} else if (nodemap->nodes[i].flags & NODE_FLAGS_UNHEALTHY) {
flags_str = "UNHEALTHY";
} else if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
flags_str = "DISCONNECTED";
} else {
flags_str = "OK";
static const struct {
uint32_t flag;
const char *name;
} flag_names[] = {
{ NODE_FLAGS_DISCONNECTED, "DISCONNECTED" },
{ NODE_FLAGS_PERMANENTLY_DISABLED, "DISABLED" },
{ NODE_FLAGS_BANNED, "BANNED" },
{ NODE_FLAGS_UNHEALTHY, "UNHEALTHY" },
};
char *flags_str = NULL;
int j;
for (j=0;j<ARRAY_SIZE(flag_names);j++) {
if (nodemap->nodes[i].flags & flag_names[j].flag) {
if (flags_str == NULL) {
flags_str = talloc_strdup(ctdb, flag_names[j].name);
} else {
flags_str = talloc_asprintf_append(flags_str, "|%s",
flag_names[j].name);
}
CTDB_NO_MEMORY_FATAL(ctdb, flags_str);
}
}
if (flags_str == NULL) {
flags_str = talloc_strdup(ctdb, "OK");
CTDB_NO_MEMORY_FATAL(ctdb, flags_str);
}
printf("vnn:%d %-16s %s%s\n", nodemap->nodes[i].vnn,
inet_ntoa(nodemap->nodes[i].sin.sin_addr),
flags_str,
nodemap->nodes[i].vnn == myvnn?" (THIS NODE)":"");
talloc_free(flags_str);
}
ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), options.vnn, ctdb, &vnnmap);
@ -430,6 +447,107 @@ static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv
return 0;
}
/*
ban a node from the cluster
*/
static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
uint32_t recmaster;
struct ctdb_ban_info b;
TDB_DATA data;
uint32_t ban_time;
if (argc < 1) {
usage();
}
if (options.vnn == CTDB_BROADCAST_ALL) {
uint32_t *nodes;
uint32_t num_nodes;
int i;
ret = 0;
nodes = ctdb_get_connected_nodes(ctdb, TIMELIMIT(), ctdb, &num_nodes);
CTDB_NO_MEMORY(ctdb, nodes);
for (i=0;i<num_nodes;i++) {
options.vnn = nodes[i];
ret |= control_ban(ctdb, argc, argv);
}
talloc_free(nodes);
return ret;
}
ban_time = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0,("Failed to find the recmaster\n"));
return -1;
}
b.vnn = options.vnn;
b.ban_time = ban_time;
data.dptr = (uint8_t *)&b;
data.dsize = sizeof(b);
ret = ctdb_send_message(ctdb, recmaster, CTDB_SRVID_BAN_NODE, data);
if (ret != 0) {
DEBUG(0,("Failed to tell the recmaster to ban node %u\n", options.vnn));
return -1;
}
return 0;
}
/*
unban a node from the cluster
*/
static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
uint32_t recmaster;
TDB_DATA data;
if (options.vnn == CTDB_BROADCAST_ALL) {
uint32_t *nodes;
uint32_t num_nodes;
int i;
ret = 0;
nodes = ctdb_get_connected_nodes(ctdb, TIMELIMIT(), ctdb, &num_nodes);
CTDB_NO_MEMORY(ctdb, nodes);
for (i=0;i<num_nodes;i++) {
options.vnn = nodes[i];
ret |= control_unban(ctdb, argc, argv);
}
talloc_free(nodes);
return ret;
}
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0,("Failed to find the recmaster\n"));
return -1;
}
data.dptr = (uint8_t *)&options.vnn;
data.dsize = sizeof(uint32_t);
ret = ctdb_send_message(ctdb, recmaster, CTDB_SRVID_UNBAN_NODE, data);
if (ret != 0) {
DEBUG(0,("Failed to tell the recmaster to unban node %u\n", options.vnn));
return -1;
}
return 0;
}
/*
shutdown a daemon
*/
@ -871,8 +989,10 @@ static const struct {
{ "attach", control_attach, "attach to a database", "<dbname>" },
{ "dumpmemory", control_dumpmemory, "dump memory map to logs" },
{ "getpid", control_getpid, "get ctdbd process ID" },
{ "disable", control_disable, "disable a node" },
{ "enable", control_enable, "enable a node" },
{ "disable", control_disable, "disable a nodes public IP" },
{ "enable", control_enable, "enable a nodes public IP" },
{ "ban", control_ban, "ban a node from the cluster", "<bantime|0>"},
{ "unban", control_unban, "unban a node from the cluster" },
{ "shutdown", control_shutdown, "shutdown ctdbd" },
{ "recover", control_recover, "force recovery" },
{ "freeze", control_freeze, "freeze all databases" },