1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-08 21:18:16 +03:00

add a new node state : DELETED.

This is used to mark nodes as being DELETED internally in ctdb
so that nodes are not renumbered if / when they are removed from the nodes file.

This is used to be able to do "ctdb reloadnodes" at runtime without
causing nodes to be renumbered.
To do this, instead of deleting a node from the nodes file, just comment it out like

   1.0.0.1
   #1.0.0.2
   1.0.0.3

After removing 1.0.0.2 from the cluster,  the remaining nodes retain their
pnn's from prior to the deletion, namely 0 and 2

Any line in the nodes file that is commented out represents a DELETED pnn

(This used to be ctdb commit 6a5e4fd7fa391206b463bb4e976502f3ac5bd343)
This commit is contained in:
Ronnie Sahlberg 2009-06-01 14:18:34 +10:00
parent 4259156050
commit e6170b5389
10 changed files with 121 additions and 17 deletions

View File

@ -101,6 +101,11 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_PUSH_NODE_FLAGS 0xF900000000000000LL
/*
a message ID to get the recovery daemon to reload the nodes file
*/
#define CTDB_SRVID_RELOAD_NODES 0xFA00000000000000LL
/* used on the domain socket, send a pdu to the local daemon */

View File

@ -198,7 +198,8 @@ struct ctdb_node {
#define NODE_FLAGS_PERMANENTLY_DISABLED 0x00000004 /* administrator has disabled node */
#define NODE_FLAGS_BANNED 0x00000008 /* recovery daemon has banned the node */
#define NODE_FLAGS_DISABLED (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED)
#define NODE_FLAGS_DELETED 0x00000010 /* this node has been deleted */
#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED)
uint32_t flags;
/* used by the dead node monitoring */

View File

@ -37,6 +37,11 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
/* send a keepalive to all other nodes, unless */
for (i=0;i<ctdb->num_nodes;i++) {
struct ctdb_node *node = ctdb->nodes[i];
if (node->flags & NODE_FLAGS_DELETED) {
continue;
}
if (node->pnn == ctdb->pnn) {
continue;
}

View File

@ -242,6 +242,10 @@ ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te,
continue;
}
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
/* any new or different nodes must be added */
if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) {
DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i));
@ -253,6 +257,9 @@ ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te,
}
}
/* tell the recovery daemon to reaload the nodes file too */
ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELOAD_NODES, tdb_null);
talloc_free(tmp_ctx);
return;
}

View File

@ -1332,12 +1332,6 @@ static int do_recovery(struct ctdb_recoverd *rec,
DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n"));
if (ctdb->num_nodes != nodemap->num) {
DEBUG(DEBUG_ERR, (__location__ " ctdb->num_nodes (%d) != nodemap->num (%d) reloading nodes file\n", ctdb->num_nodes, nodemap->num));
reload_nodes_file(ctdb);
return -1;
}
/* if recovery fails, force it again */
rec->need_recovery = true;
@ -1803,6 +1797,21 @@ DEBUG(DEBUG_ERR, ("recovery master memory dump\n"));
talloc_free(tmp_ctx);
}
/*
handler for reload_nodes
*/
static void reload_nodes_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
DEBUG(DEBUG_ERR, (__location__ " Reload nodes file from recovery daemon\n"));
reload_nodes_file(rec->ctdb);
}
/*
handler for recovery master elections
*/
@ -2371,6 +2380,9 @@ static void monitor_cluster(struct ctdb_context *ctdb)
/* register a message port for vacuum fetch */
ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);
/* register a message port for reloadnodes */
ctdb_set_message_handler(ctdb, CTDB_SRVID_RELOAD_NODES, reload_nodes_handler, rec);
again:
if (mem_ctx) {
talloc_free(mem_ctx);
@ -2591,14 +2603,16 @@ again:
goto again;
}
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
/* release any existing data */
if (ctdb->nodes[j]->public_ips) {
talloc_free(ctdb->nodes[j]->public_ips);
ctdb->nodes[j]->public_ips = NULL;
}
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
/* grab a new shiny list of public ips from the node */
if (ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(),
ctdb->nodes[j]->pnn,

View File

@ -45,6 +45,9 @@ int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip)
int nodeid;
for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
continue;
}
if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) {
return nodeid;
}
@ -89,7 +92,7 @@ int ctdb_set_tdb_dir_persistent(struct ctdb_context *ctdb, const char *dir)
}
/*
add a node to the list of active nodes
add a node to the list of nodes
*/
static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
{
@ -136,6 +139,46 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
return 0;
}
/*
add an entry for a "deleted" node to the list of nodes.
a "deleted" node is a node that is commented out from the nodes file.
this is used to prevent that subsequent nodes in the nodes list
change their pnn value if a node is "delete" by commenting it out and then
using "ctdb reloadnodes" at runtime.
*/
static int ctdb_add_deleted_node(struct ctdb_context *ctdb)
{
struct ctdb_node *node, **nodep;
nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
CTDB_NO_MEMORY(ctdb, nodep);
ctdb->nodes = nodep;
nodep = &ctdb->nodes[ctdb->num_nodes];
(*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
CTDB_NO_MEMORY(ctdb, *nodep);
node = *nodep;
if (ctdb_parse_address(ctdb, node, "0.0.0.0", &node->address) != 0) {
DEBUG(DEBUG_ERR,("Failed to setup deleted node %d\n", ctdb->num_nodes));
return -1;
}
node->ctdb = ctdb;
node->name = talloc_strdup(node, "0.0.0.0:0");
/* this assumes that the nodes are kept in sorted order, and no gaps */
node->pnn = ctdb->num_nodes;
/* this node is permanently deleted/disconnected */
node->flags = NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED;
ctdb->num_nodes++;
node->dead_count = 0;
return 0;
}
/*
setup the node list from a file
*/
@ -167,6 +210,10 @@ int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
node++;
}
if (*node == '#') {
if (ctdb_add_deleted_node(ctdb) != 0) {
talloc_free(lines);
return -1;
}
continue;
}
if (strcmp(node, "") == 0) {
@ -188,7 +235,11 @@ int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
for(i=0;i<ctdb->vnn_map->size;i++) {
ctdb->vnn_map->map[i] = i;
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
ctdb->vnn_map->map[i] = -1;
} else {
ctdb->vnn_map->map[i] = i;
}
}
talloc_free(lines);
@ -437,7 +488,10 @@ static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr)
{
int i;
for (i=0;i<ctdb->num_nodes;i++) {
for (i=0; i < ctdb->num_nodes; i++) {
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
hdr->destnode = ctdb->nodes[i]->pnn;
ctdb_queue_packet(ctdb, hdr);
}
@ -463,7 +517,10 @@ static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr)
{
int i;
for (i=0;i<ctdb->num_nodes;i++) {
for (i=0; i < ctdb->num_nodes; i++) {
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
hdr->destnode = ctdb->nodes[i]->pnn;
ctdb_queue_packet(ctdb, hdr);

View File

@ -673,6 +673,10 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
for (i=0;i<ctdb->num_nodes;i++) {
public_ips = ctdb->nodes[i]->public_ips;
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
/* there were no public ips for this node */
if (public_ips == NULL) {
continue;

View File

@ -296,7 +296,11 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb)
return -1;
}
for (i=0;i<ctdb->num_nodes;i++) {
for (i=0; i < ctdb->num_nodes; i++) {
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
/* if node_ip is specified we will only try to bind to that
ip.
*/

View File

@ -69,7 +69,10 @@ static int ctdb_tcp_initialise(struct ctdb_context *ctdb)
exit(1);
}
for (i=0; i<ctdb->num_nodes; i++) {
for (i=0; i < ctdb->num_nodes; i++) {
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
if (ctdb_tcp_add_node(ctdb->nodes[i]) != 0) {
DEBUG(DEBUG_CRIT, ("methods->add_node failed at %d\n", i));
return -1;
@ -135,7 +138,10 @@ static int ctdb_tcp_start(struct ctdb_context *ctdb)
{
int i;
for (i=0; i<ctdb->num_nodes; i++) {
for (i=0; i < ctdb->num_nodes; i++) {
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
}
ctdb_tcp_connect_node(ctdb->nodes[i]);
}

View File

@ -492,6 +492,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
{ NODE_FLAGS_PERMANENTLY_DISABLED, "DISABLED" },
{ NODE_FLAGS_BANNED, "BANNED" },
{ NODE_FLAGS_UNHEALTHY, "UNHEALTHY" },
{ NODE_FLAGS_DELETED, "DELETED" },
};
char *flags_str = NULL;
int j;