2024-12-23 17:34:34 +03:00 · 2009-10-14 11:59:16 +11:00 · 2009-10-14 11:59:16 +11:00 · 80be59d35e
commit 80be59d35e
parent 4b7a208b16
4 changed files with 39 additions and 1 deletions
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@ -106,6 +106,11 @@ struct ctdb_call_info {
 */
 #define CTDB_SRVID_DISABLE_IP_CHECK  0xFC00000000000000LL

+/* A dummy port used for sending back ipreallocate resposnes to the main
+   daemon
+*/
+#define CTDB_SRVID_TAKEOVER_RUN_RESPONSE  0xFD00000000000000LL
+
 /* used on the domain socket, send a pdu to the local daemon */
 #define CTDB_CURRENT_NODE     0xF0000001
 /* send a broadcast to all nodes in the cluster, active or not */
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -45,6 +45,14 @@ struct rd_memdump_reply {
 	uint64_t srvid;
 };

+/*
+  description for a TAKEOVER_RUN message reply address
+ */
+struct takeover_run_reply {
+	uint32_t pnn;
+	uint64_t srvid;
+};
+
 /*
  a tcp connection description
 */
--- a/ctdb/server/ctdb_monitor.c
+++ b/ctdb/server/ctdb_monitor.c
@ -110,10 +110,19 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 	TDB_DATA data;
 	struct ctdb_node_flag_change c;
 	uint32_t next_interval;
+	int ret;
+	TDB_DATA rddata;
+	struct takeover_run_reply rd;

 	c.pnn = ctdb->pnn;
 	c.old_flags = node->flags;

+	rd.pnn   = ctdb->pnn;
+	rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
+
+	rddata.dptr = (uint8_t *)&rd;
+	rddata.dsize = sizeof(rd);
+
 	if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
 		DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
 		node->flags |= NODE_FLAGS_UNHEALTHY;
@ -124,12 +133,28 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 		}

 		ctdb_run_notification_script(ctdb, "unhealthy");
+
+		/* ask the recmaster to reallocate all addresses */
+		DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
+		ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
+		if (ret != 0) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
+		}
+
 	} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
 		DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
 		node->flags &= ~NODE_FLAGS_UNHEALTHY;
 		ctdb->monitor->next_interval = 1;

 		ctdb_run_notification_script(ctdb, "healthy");
+
+		/* ask the recmaster to reallocate all addresses */
+		DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
+		ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
+		if (ret != 0) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
+		}
+
 	}

 	next_interval = ctdb->monitor->next_interval;
--- a/ctdb/tools/ctdb.c
+++ b/ctdb/tools/ctdb.c
@ -1619,7 +1619,7 @@ static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char
 {
 	int i, ret;
 	TDB_DATA data;
-	struct rd_memdump_reply rd;
+	struct takeover_run_reply rd;
 	uint32_t recmaster;
 	struct ctdb_node_map *nodemap=NULL;
 	int retries=0;