mirror of
https://github.com/samba-team/samba.git
synced 2025-01-11 05:18:09 +03:00
when we change state between healthy/unhealthy, make sure we ask the recovery
master to perform an explicit ip reallocation. This is more reliable and faster than having the recovery dameon track these changes, and since we now have an explicit method to ask the recovery daemon to perform an explicit ip reallocation, we should use this. (This used to be ctdb commit 3807681e74f4bfe92befdae6ed616ff5f1a99880)
This commit is contained in:
parent
4b7a208b16
commit
80be59d35e
@ -106,6 +106,11 @@ struct ctdb_call_info {
|
||||
*/
|
||||
#define CTDB_SRVID_DISABLE_IP_CHECK 0xFC00000000000000LL
|
||||
|
||||
/* A dummy port used for sending back ipreallocate resposnes to the main
|
||||
daemon
|
||||
*/
|
||||
#define CTDB_SRVID_TAKEOVER_RUN_RESPONSE 0xFD00000000000000LL
|
||||
|
||||
/* used on the domain socket, send a pdu to the local daemon */
|
||||
#define CTDB_CURRENT_NODE 0xF0000001
|
||||
/* send a broadcast to all nodes in the cluster, active or not */
|
||||
|
@ -45,6 +45,14 @@ struct rd_memdump_reply {
|
||||
uint64_t srvid;
|
||||
};
|
||||
|
||||
/*
|
||||
description for a TAKEOVER_RUN message reply address
|
||||
*/
|
||||
struct takeover_run_reply {
|
||||
uint32_t pnn;
|
||||
uint64_t srvid;
|
||||
};
|
||||
|
||||
/*
|
||||
a tcp connection description
|
||||
*/
|
||||
|
@ -110,10 +110,19 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
|
||||
TDB_DATA data;
|
||||
struct ctdb_node_flag_change c;
|
||||
uint32_t next_interval;
|
||||
int ret;
|
||||
TDB_DATA rddata;
|
||||
struct takeover_run_reply rd;
|
||||
|
||||
c.pnn = ctdb->pnn;
|
||||
c.old_flags = node->flags;
|
||||
|
||||
rd.pnn = ctdb->pnn;
|
||||
rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
|
||||
|
||||
rddata.dptr = (uint8_t *)&rd;
|
||||
rddata.dsize = sizeof(rd);
|
||||
|
||||
if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
|
||||
DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
|
||||
node->flags |= NODE_FLAGS_UNHEALTHY;
|
||||
@ -124,12 +133,28 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
|
||||
}
|
||||
|
||||
ctdb_run_notification_script(ctdb, "unhealthy");
|
||||
|
||||
/* ask the recmaster to reallocate all addresses */
|
||||
DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
|
||||
ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
|
||||
}
|
||||
|
||||
} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
|
||||
DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
|
||||
node->flags &= ~NODE_FLAGS_UNHEALTHY;
|
||||
ctdb->monitor->next_interval = 1;
|
||||
|
||||
ctdb_run_notification_script(ctdb, "healthy");
|
||||
|
||||
/* ask the recmaster to reallocate all addresses */
|
||||
DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
|
||||
ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
next_interval = ctdb->monitor->next_interval;
|
||||
|
@ -1619,7 +1619,7 @@ static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char
|
||||
{
|
||||
int i, ret;
|
||||
TDB_DATA data;
|
||||
struct rd_memdump_reply rd;
|
||||
struct takeover_run_reply rd;
|
||||
uint32_t recmaster;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
int retries=0;
|
||||
|
Loading…
Reference in New Issue
Block a user