1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-11 05:18:09 +03:00

when we change state between healthy/unhealthy, make sure we ask the recovery

master to perform an explicit ip reallocation.

This is more reliable and faster than having the recovery dameon track these
changes, and since we now have an explicit method to ask the recovery daemon
to perform an explicit ip reallocation, we should use this.

(This used to be ctdb commit 3807681e74f4bfe92befdae6ed616ff5f1a99880)
This commit is contained in:
Ronnie Sahlberg 2009-10-14 11:59:16 +11:00
parent 4b7a208b16
commit 80be59d35e
4 changed files with 39 additions and 1 deletions

View File

@ -106,6 +106,11 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_DISABLE_IP_CHECK 0xFC00000000000000LL
/* A dummy port used for sending back ipreallocate resposnes to the main
daemon
*/
#define CTDB_SRVID_TAKEOVER_RUN_RESPONSE 0xFD00000000000000LL
/* used on the domain socket, send a pdu to the local daemon */
#define CTDB_CURRENT_NODE 0xF0000001
/* send a broadcast to all nodes in the cluster, active or not */

View File

@ -45,6 +45,14 @@ struct rd_memdump_reply {
uint64_t srvid;
};
/*
description for a TAKEOVER_RUN message reply address
*/
struct takeover_run_reply {
uint32_t pnn;
uint64_t srvid;
};
/*
a tcp connection description
*/

View File

@ -110,10 +110,19 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
TDB_DATA data;
struct ctdb_node_flag_change c;
uint32_t next_interval;
int ret;
TDB_DATA rddata;
struct takeover_run_reply rd;
c.pnn = ctdb->pnn;
c.old_flags = node->flags;
rd.pnn = ctdb->pnn;
rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
rddata.dptr = (uint8_t *)&rd;
rddata.dsize = sizeof(rd);
if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
node->flags |= NODE_FLAGS_UNHEALTHY;
@ -124,12 +133,28 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
}
ctdb_run_notification_script(ctdb, "unhealthy");
/* ask the recmaster to reallocate all addresses */
DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
}
} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
node->flags &= ~NODE_FLAGS_UNHEALTHY;
ctdb->monitor->next_interval = 1;
ctdb_run_notification_script(ctdb, "healthy");
/* ask the recmaster to reallocate all addresses */
DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
}
}
next_interval = ctdb->monitor->next_interval;

View File

@ -1619,7 +1619,7 @@ static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char
{
int i, ret;
TDB_DATA data;
struct rd_memdump_reply rd;
struct takeover_run_reply rd;
uint32_t recmaster;
struct ctdb_node_map *nodemap=NULL;
int retries=0;