2024-12-23 17:34:34 +03:00 · 2007-08-21 17:25:15 +10:00 · 2007-08-21 17:25:15 +10:00 · 8b06fc7284
commit 8b06fc7284
parent 4e4dd6b886
4 changed files with 35 additions and 15 deletions
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -506,7 +506,8 @@ struct ctdb_control_tcp_vnn {
 */
 struct ctdb_node_flag_change {
 	uint32_t vnn;
-	uint32_t flags;
+	uint32_t new_flags;
+	uint32_t old_flags;
 };

 /*
--- a/ctdb/server/ctdb_daemon.c
+++ b/ctdb/server/ctdb_daemon.c
@ -51,7 +51,7 @@ static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	/* don't get the disconnected flag from the other node */
 	ctdb->nodes[c->vnn]->flags = 
 		(ctdb->nodes[c->vnn]->flags&NODE_FLAGS_DISCONNECTED) 
-		| (c->flags & ~NODE_FLAGS_DISCONNECTED);	
+		| (c->new_flags & ~NODE_FLAGS_DISCONNECTED);	
 	DEBUG(2,("Node flags for node %u are now 0x%x\n", c->vnn, ctdb->nodes[c->vnn]->flags));

 	/* make sure we don't hold any IPs when we shouldn't */
--- a/ctdb/server/ctdb_monitor.c
+++ b/ctdb/server/ctdb_monitor.c
@ -103,6 +103,9 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 			timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
 			ctdb_check_health, ctdb);

+	c.vnn = ctdb->vnn;
+	c.old_flags = node->flags;
+
 	if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
 		DEBUG(0,("monitor event failed - disabling node\n"));
 		node->flags |= NODE_FLAGS_UNHEALTHY;
@ -114,8 +117,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 		return;
 	}

-	c.vnn = ctdb->vnn;
-	c.flags = node->flags;
+	c.new_flags = node->flags;

 	data.dptr = (uint8_t *)&c;
 	data.dsize = sizeof(c);
@ -206,7 +208,8 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)

 	/* if we have been banned, go into recovery mode */
 	c.vnn = ctdb->vnn;
-	c.flags = node->flags;
+	c.old_flags = old_flags;
+	c.new_flags = node->flags;

 	data.dptr = (uint8_t *)&c;
 	data.dsize = sizeof(c);
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@ -386,7 +386,8 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
 		TDB_DATA data;

 		c.vnn = nodemap->nodes[i].vnn;
-		c.flags = nodemap->nodes[i].flags;
+		c.old_flags = nodemap->nodes[i].flags;
+		c.new_flags = nodemap->nodes[i].flags;

 		data.dptr = (uint8_t *)&c;
 		data.dsize = sizeof(c);
@ -815,7 +816,7 @@ static int do_recovery(struct ctdb_recoverd *rec,

 	/* send a message to all clients telling them that the cluster 
 	   has been reconfigured */
-	ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, CTDB_SRVID_RECONFIGURE, tdb_null);
+	ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);

 	DEBUG(0, (__location__ " Recovery complete\n"));

@ -1045,6 +1046,7 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
 	struct ctdb_node_map *nodemap=NULL;
 	TALLOC_CTX *tmp_ctx;
+	uint32_t changed_flags;
 	int i;

 	if (data.dsize != sizeof(*c)) {
@ -1067,20 +1069,22 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 		return;
 	}

+	changed_flags = c->old_flags ^ c->new_flags;
+
 	/* Dont let messages from remote nodes change the DISCONNECTED flag. 
 	   This flag is handled locally based on whether the local node
 	   can communicate with the node or not.
 	*/
-	c->flags &= ~NODE_FLAGS_DISCONNECTED;
+	c->new_flags &= ~NODE_FLAGS_DISCONNECTED;
 	if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
-		c->flags |= NODE_FLAGS_DISCONNECTED;
+		c->new_flags |= NODE_FLAGS_DISCONNECTED;
 	}

-	if (nodemap->nodes[i].flags != c->flags) {
-		DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));
+	if (nodemap->nodes[i].flags != c->new_flags) {
+		DEBUG(0,("Node %u has changed flags - now 0x%x  was 0x%x\n", c->vnn, c->new_flags, c->old_flags));
 	}

-	nodemap->nodes[i].flags = c->flags;
+	nodemap->nodes[i].flags = c->new_flags;

 	ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), 
 				     CTDB_CURRENT_NODE, &ctdb->recovery_master);
@ -1094,9 +1098,21 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	    ctdb->recovery_master == ctdb->vnn &&
 	    ctdb->recovery_mode == CTDB_RECOVERY_NORMAL &&
 	    ctdb->takeover.enabled) {
-		ret = ctdb_takeover_run(ctdb, nodemap);
-		if (ret != 0) {
-			DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+		/* Only do the takeover run if the perm disabled or unhealthy
+		   flags changed since these will cause an ip failover but not
+		   a recovery.
+		   If the node became disconnected or banned this will also
+		   lead to an ip address failover but that is handled 
+		   during recovery
+		*/
+		if (changed_flags & NODE_FLAGS_DISABLED) {
+			ret = ctdb_takeover_run(ctdb, nodemap);
+			if (ret != 0) {
+				DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+			}
+			/* send a message to all clients telling them that the 
+			   cluster has been reconfigured */
+			ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);
 		}
 	}