mirror of
https://github.com/samba-team/samba.git
synced 2024-12-23 17:34:34 +03:00
added admin commands to ban/unban nodes
(This used to be ctdb commit 4dad04172e7e4955b5bf6444a85b19901c9683ad)
This commit is contained in:
parent
59e74ac6eb
commit
23bf62fe30
@ -203,6 +203,9 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG(0, ("Control modflags on node %u - flags now 0x%x\n", ctdb->vnn, node->flags));
|
||||
|
||||
/* if we have been banned, go into recovery mode */
|
||||
c.vnn = ctdb->vnn;
|
||||
c.flags = node->flags;
|
||||
|
||||
@ -212,6 +215,15 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
/* tell the other nodes that something has changed */
|
||||
ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_VNNMAP,
|
||||
CTDB_SRVID_NODE_FLAGS_CHANGED, data);
|
||||
|
||||
if ((node->flags & NODE_FLAGS_BANNED) && !(old_flags & NODE_FLAGS_BANNED)) {
|
||||
/* make sure we are frozen */
|
||||
DEBUG(0,("This node has been banned - forcing freeze and recovery\n"));
|
||||
if (!ctdb_blocking_freeze(ctdb)) {
|
||||
ctdb_fatal(ctdb, "Unable to freeze when banned");
|
||||
}
|
||||
ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -27,21 +27,80 @@
|
||||
#include "../include/ctdb.h"
|
||||
#include "../include/ctdb_private.h"
|
||||
|
||||
|
||||
struct ban_state {
|
||||
struct ctdb_recoverd *rec;
|
||||
uint32_t banned_node;
|
||||
};
|
||||
|
||||
/*
|
||||
private state of recovery daemon
|
||||
*/
|
||||
struct ctdb_recoverd {
|
||||
struct ctdb_context *ctdb;
|
||||
TALLOC_CTX *mem_ctx;
|
||||
uint32_t last_culprit;
|
||||
uint32_t culprit_counter;
|
||||
struct timeval first_recover_time;
|
||||
bool *banned_nodes;
|
||||
struct ban_state **banned_nodes;
|
||||
};
|
||||
|
||||
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
|
||||
#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
|
||||
|
||||
/*
|
||||
unban a node
|
||||
*/
|
||||
static void ctdb_unban_node(struct ctdb_recoverd *rec, uint32_t vnn)
|
||||
{
|
||||
struct ctdb_context *ctdb = rec->ctdb;
|
||||
|
||||
if (rec->banned_nodes[vnn] == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, 0, NODE_FLAGS_BANNED);
|
||||
|
||||
talloc_free(rec->banned_nodes[vnn]);
|
||||
rec->banned_nodes[vnn] = NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
called when a ban has timed out
|
||||
*/
|
||||
static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
|
||||
{
|
||||
struct ban_state *state = talloc_get_type(p, struct ban_state);
|
||||
struct ctdb_recoverd *rec = state->rec;
|
||||
uint32_t vnn = state->banned_node;
|
||||
|
||||
DEBUG(0,("Node %u in now unbanned\n", vnn));
|
||||
ctdb_unban_node(rec, vnn);
|
||||
}
|
||||
|
||||
/*
|
||||
ban a node for a period of time
|
||||
*/
|
||||
static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t vnn, uint32_t ban_time)
|
||||
{
|
||||
struct ctdb_context *ctdb = rec->ctdb;
|
||||
|
||||
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, NODE_FLAGS_BANNED, 0);
|
||||
|
||||
rec->banned_nodes[vnn] = talloc(rec, struct ban_state);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes[vnn]);
|
||||
|
||||
rec->banned_nodes[vnn]->rec = rec;
|
||||
rec->banned_nodes[vnn]->banned_node = vnn;
|
||||
|
||||
if (ban_time != 0) {
|
||||
event_add_timed(ctdb->ev, rec->banned_nodes[vnn],
|
||||
timeval_current_ofs(ban_time, 0),
|
||||
ctdb_ban_timeout, rec->banned_nodes[vnn]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
change recovery mode on all nodes
|
||||
*/
|
||||
@ -439,24 +498,72 @@ static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_nod
|
||||
}
|
||||
|
||||
|
||||
struct ban_state {
|
||||
struct ctdb_recoverd *rec;
|
||||
uint32_t banned_node;
|
||||
};
|
||||
/*
|
||||
handler for when the admin bans a node
|
||||
*/
|
||||
static void ban_handler(struct ctdb_context *ctdb, uint64_t srvid,
|
||||
TDB_DATA data, void *private_data)
|
||||
{
|
||||
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
|
||||
struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;
|
||||
uint32_t recmaster;
|
||||
int ret;
|
||||
|
||||
if (data.dsize != sizeof(*b)) {
|
||||
DEBUG(0,("Bad data in ban_handler\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (recmaster != ctdb->vnn) {
|
||||
DEBUG(0,("We are not the recmaster - ignoring ban request\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(0,("Node %u has been banned for %u seconds by the administrator\n",
|
||||
b->vnn, b->ban_time));
|
||||
ctdb_ban_node(rec, b->vnn, b->ban_time);
|
||||
}
|
||||
|
||||
/*
|
||||
called when a ban has timed out
|
||||
*/
|
||||
static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
|
||||
handler for when the admin unbans a node
|
||||
*/
|
||||
static void unban_handler(struct ctdb_context *ctdb, uint64_t srvid,
|
||||
TDB_DATA data, void *private_data)
|
||||
{
|
||||
struct ban_state *state = talloc_get_type(p, struct ban_state);
|
||||
DEBUG(0,("Node %u in now unbanned\n", state->banned_node));
|
||||
|
||||
state->rec->banned_nodes[state->banned_node] = false;
|
||||
talloc_free(state);
|
||||
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
|
||||
uint32_t vnn;
|
||||
int ret;
|
||||
uint32_t recmaster;
|
||||
|
||||
if (data.dsize != sizeof(uint32_t)) {
|
||||
DEBUG(0,("Bad data in unban_handler\n"));
|
||||
return;
|
||||
}
|
||||
vnn = *(uint32_t *)data.dptr;
|
||||
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (recmaster != ctdb->vnn) {
|
||||
DEBUG(0,("We are not the recmaster - ignoring unban request\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(0,("Node %u has been unbanned by the administrator\n", vnn));
|
||||
ctdb_unban_node(rec, vnn);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
we are the recmaster, and recovery is needed - start a recovery run
|
||||
*/
|
||||
@ -480,22 +587,10 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
rec->culprit_counter++;
|
||||
|
||||
if (rec->culprit_counter > 2*nodemap->num) {
|
||||
struct ban_state *state;
|
||||
|
||||
DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
|
||||
culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
|
||||
ctdb->tunable.recovery_ban_period));
|
||||
ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), culprit, NODE_FLAGS_BANNED, 0);
|
||||
rec->banned_nodes[culprit] = true;
|
||||
|
||||
state = talloc(rec->mem_ctx, struct ban_state);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, state);
|
||||
|
||||
state->rec = rec;
|
||||
state->banned_node = culprit;
|
||||
|
||||
event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.recovery_ban_period, 0),
|
||||
ctdb_ban_timeout, state);
|
||||
ctdb_ban_node(rec, culprit, ctdb->tunable.recovery_ban_period);
|
||||
}
|
||||
|
||||
if (!ctdb_recovery_lock(ctdb, true)) {
|
||||
@ -592,7 +687,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
CTDB_NO_MEMORY(ctdb, vnnmap);
|
||||
vnnmap->generation = generation;
|
||||
vnnmap->size = num_active;
|
||||
vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size);
|
||||
vnnmap->map = talloc_zero_array(vnnmap, uint32_t, vnnmap->size);
|
||||
for (i=j=0;i<nodemap->num;i++) {
|
||||
if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
|
||||
vnnmap->map[j++] = nodemap->nodes[i].vnn;
|
||||
@ -755,14 +850,10 @@ static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
|
||||
return;
|
||||
}
|
||||
|
||||
/* release any ban information */
|
||||
talloc_free(rec->mem_ctx);
|
||||
rec->mem_ctx = talloc_new(rec);
|
||||
CTDB_NO_MEMORY_FATAL(rec->mem_ctx, rec->banned_nodes);
|
||||
|
||||
/* release any bans */
|
||||
rec->last_culprit = (uint32_t)-1;
|
||||
talloc_free(rec->banned_nodes);
|
||||
rec->banned_nodes = talloc_zero_array(rec, bool, ctdb->num_nodes);
|
||||
rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
|
||||
|
||||
talloc_free(mem_ctx);
|
||||
@ -898,17 +989,20 @@ static void monitor_cluster(struct ctdb_context *ctdb)
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, rec);
|
||||
|
||||
rec->ctdb = ctdb;
|
||||
rec->banned_nodes = talloc_zero_array(rec, bool, ctdb->num_nodes);
|
||||
rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
|
||||
|
||||
rec->mem_ctx = talloc_new(rec);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, rec->mem_ctx);
|
||||
|
||||
/* register a message port for recovery elections */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
|
||||
|
||||
/* and one for when nodes are disabled/enabled */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);
|
||||
|
||||
/* and one for when nodes are banned */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);
|
||||
|
||||
/* and one for when nodes are unbanned */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_UNBAN_NODE, unban_handler, rec);
|
||||
|
||||
again:
|
||||
need_takeover_run = false;
|
||||
@ -965,7 +1059,7 @@ again:
|
||||
/* count how many active nodes there are */
|
||||
num_active = 0;
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
if (rec->banned_nodes[nodemap->nodes[i].vnn]) {
|
||||
if (rec->banned_nodes[nodemap->nodes[i].vnn] != NULL) {
|
||||
nodemap->nodes[i].flags |= NODE_FLAGS_BANNED;
|
||||
} else {
|
||||
nodemap->nodes[i].flags &= ~NODE_FLAGS_BANNED;
|
||||
|
@ -76,6 +76,16 @@ struct ctdb_call_info {
|
||||
*/
|
||||
#define CTDB_SRVID_NODE_FLAGS_CHANGED 0xF400000000000000LL
|
||||
|
||||
/*
|
||||
a message ID meaning that a node should be banned
|
||||
*/
|
||||
#define CTDB_SRVID_BAN_NODE 0xF500000000000000LL
|
||||
|
||||
/*
|
||||
a message ID meaning that a node should be unbanned
|
||||
*/
|
||||
#define CTDB_SRVID_UNBAN_NODE 0xF600000000000000LL
|
||||
|
||||
|
||||
/* used on the domain socket, send a pdu to the local daemon */
|
||||
#define CTDB_CURRENT_NODE 0xF0000001
|
||||
|
@ -479,6 +479,14 @@ struct ctdb_node_modflags {
|
||||
uint32_t clear;
|
||||
};
|
||||
|
||||
/*
|
||||
struct for admin setting a ban
|
||||
*/
|
||||
struct ctdb_ban_info {
|
||||
uint32_t vnn;
|
||||
uint32_t ban_time;
|
||||
};
|
||||
|
||||
enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
|
||||
|
||||
#define CTDB_LMASTER_ANY 0xffffffff
|
||||
|
@ -298,20 +298,37 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
|
||||
|
||||
printf("Number of nodes:%d\n", nodemap->num);
|
||||
for(i=0;i<nodemap->num;i++){
|
||||
const char *flags_str;
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_PERMANENTLY_DISABLED) {
|
||||
flags_str = "DISABLED";
|
||||
} else if (nodemap->nodes[i].flags & NODE_FLAGS_UNHEALTHY) {
|
||||
flags_str = "UNHEALTHY";
|
||||
} else if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
flags_str = "DISCONNECTED";
|
||||
} else {
|
||||
flags_str = "OK";
|
||||
static const struct {
|
||||
uint32_t flag;
|
||||
const char *name;
|
||||
} flag_names[] = {
|
||||
{ NODE_FLAGS_DISCONNECTED, "DISCONNECTED" },
|
||||
{ NODE_FLAGS_PERMANENTLY_DISABLED, "DISABLED" },
|
||||
{ NODE_FLAGS_BANNED, "BANNED" },
|
||||
{ NODE_FLAGS_UNHEALTHY, "UNHEALTHY" },
|
||||
};
|
||||
char *flags_str = NULL;
|
||||
int j;
|
||||
for (j=0;j<ARRAY_SIZE(flag_names);j++) {
|
||||
if (nodemap->nodes[i].flags & flag_names[j].flag) {
|
||||
if (flags_str == NULL) {
|
||||
flags_str = talloc_strdup(ctdb, flag_names[j].name);
|
||||
} else {
|
||||
flags_str = talloc_asprintf_append(flags_str, "|%s",
|
||||
flag_names[j].name);
|
||||
}
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, flags_str);
|
||||
}
|
||||
}
|
||||
if (flags_str == NULL) {
|
||||
flags_str = talloc_strdup(ctdb, "OK");
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, flags_str);
|
||||
}
|
||||
printf("vnn:%d %-16s %s%s\n", nodemap->nodes[i].vnn,
|
||||
inet_ntoa(nodemap->nodes[i].sin.sin_addr),
|
||||
flags_str,
|
||||
nodemap->nodes[i].vnn == myvnn?" (THIS NODE)":"");
|
||||
talloc_free(flags_str);
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), options.vnn, ctdb, &vnnmap);
|
||||
@ -430,6 +447,107 @@ static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
ban a node from the cluster
|
||||
*/
|
||||
static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int ret;
|
||||
uint32_t recmaster;
|
||||
struct ctdb_ban_info b;
|
||||
TDB_DATA data;
|
||||
uint32_t ban_time;
|
||||
|
||||
if (argc < 1) {
|
||||
usage();
|
||||
}
|
||||
|
||||
if (options.vnn == CTDB_BROADCAST_ALL) {
|
||||
uint32_t *nodes;
|
||||
uint32_t num_nodes;
|
||||
int i;
|
||||
|
||||
ret = 0;
|
||||
|
||||
nodes = ctdb_get_connected_nodes(ctdb, TIMELIMIT(), ctdb, &num_nodes);
|
||||
CTDB_NO_MEMORY(ctdb, nodes);
|
||||
for (i=0;i<num_nodes;i++) {
|
||||
options.vnn = nodes[i];
|
||||
ret |= control_ban(ctdb, argc, argv);
|
||||
}
|
||||
talloc_free(nodes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ban_time = strtoul(argv[0], NULL, 0);
|
||||
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed to find the recmaster\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
b.vnn = options.vnn;
|
||||
b.ban_time = ban_time;
|
||||
|
||||
data.dptr = (uint8_t *)&b;
|
||||
data.dsize = sizeof(b);
|
||||
|
||||
ret = ctdb_send_message(ctdb, recmaster, CTDB_SRVID_BAN_NODE, data);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed to tell the recmaster to ban node %u\n", options.vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
unban a node from the cluster
|
||||
*/
|
||||
static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int ret;
|
||||
uint32_t recmaster;
|
||||
TDB_DATA data;
|
||||
|
||||
if (options.vnn == CTDB_BROADCAST_ALL) {
|
||||
uint32_t *nodes;
|
||||
uint32_t num_nodes;
|
||||
int i;
|
||||
|
||||
ret = 0;
|
||||
|
||||
nodes = ctdb_get_connected_nodes(ctdb, TIMELIMIT(), ctdb, &num_nodes);
|
||||
CTDB_NO_MEMORY(ctdb, nodes);
|
||||
for (i=0;i<num_nodes;i++) {
|
||||
options.vnn = nodes[i];
|
||||
ret |= control_unban(ctdb, argc, argv);
|
||||
}
|
||||
talloc_free(nodes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed to find the recmaster\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
data.dptr = (uint8_t *)&options.vnn;
|
||||
data.dsize = sizeof(uint32_t);
|
||||
|
||||
ret = ctdb_send_message(ctdb, recmaster, CTDB_SRVID_UNBAN_NODE, data);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed to tell the recmaster to unban node %u\n", options.vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
shutdown a daemon
|
||||
*/
|
||||
@ -871,8 +989,10 @@ static const struct {
|
||||
{ "attach", control_attach, "attach to a database", "<dbname>" },
|
||||
{ "dumpmemory", control_dumpmemory, "dump memory map to logs" },
|
||||
{ "getpid", control_getpid, "get ctdbd process ID" },
|
||||
{ "disable", control_disable, "disable a node" },
|
||||
{ "enable", control_enable, "enable a node" },
|
||||
{ "disable", control_disable, "disable a nodes public IP" },
|
||||
{ "enable", control_enable, "enable a nodes public IP" },
|
||||
{ "ban", control_ban, "ban a node from the cluster", "<bantime|0>"},
|
||||
{ "unban", control_unban, "unban a node from the cluster" },
|
||||
{ "shutdown", control_shutdown, "shutdown ctdbd" },
|
||||
{ "recover", control_recover, "force recovery" },
|
||||
{ "freeze", control_freeze, "freeze all databases" },
|
||||
|
Loading…
Reference in New Issue
Block a user