2025-03-03 12:58:35 +03:00 · 2007-06-06 10:41:22 +10:00 · 2007-06-06 10:41:22 +10:00 · 56f19eaddc
commit 56f19eaddc
parent 83e1d488eb b130540102
6 changed files with 186 additions and 27 deletions
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@ -34,9 +34,9 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 	int i;

-	if (ctdb->monitoring_mode==CTDB_MONITORING_DISABLED) {
+	if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
 		event_add_timed(ctdb->ev, ctdb, 
-			timeval_current_ofs(ctdb->tunable.monitoring_timeout, 0), 
+			timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
 			ctdb_check_for_dead_nodes, ctdb);
 		return;
 	}
@ -65,7 +65,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve

 		node->rx_cnt = 0;

-		if (node->dead_count >= ctdb->tunable.monitoring_limit) {
+		if (node->dead_count >= ctdb->tunable.keepalive_limit) {
 			DEBUG(0,("dead count reached for node %u\n", node->vnn));
 			ctdb_node_dead(node);
 			ctdb_send_keepalive(ctdb, node->vnn);
@ -84,19 +84,84 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
 	}
 	
 	event_add_timed(ctdb->ev, ctdb, 
-			timeval_current_ofs(ctdb->tunable.monitoring_timeout, 0), 
+			timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
 			ctdb_check_for_dead_nodes, ctdb);
 }

+static void ctdb_check_health(struct event_context *ev, struct timed_event *te, 
+			      struct timeval t, void *private_data);
+
+/*
+  called when a health monitoring event script finishes
+ */
+static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+	struct ctdb_node *node = ctdb->nodes[ctdb->vnn];
+	TDB_DATA data;
+	struct ctdb_node_flag_change c;
+
+	event_add_timed(ctdb->ev, ctdb, 
+			timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+			ctdb_check_health, ctdb);
+
+	if (status != 0 && !(node->flags & NODE_FLAGS_DISABLED)) {
+		DEBUG(0,("monitor event failed - disabling node\n"));
+		node->flags |= NODE_FLAGS_DISABLED;
+	} else if (status == 0 && (node->flags & NODE_FLAGS_DISABLED)) {
+		DEBUG(0,("monitor event OK - node re-enabled\n"));
+		ctdb->nodes[ctdb->vnn]->flags &= ~NODE_FLAGS_DISABLED;
+	} else {
+		/* no change */
+		return;
+	}
+
+	c.vnn = ctdb->vnn;
+	c.flags = node->flags;
+
+	data.dptr = (uint8_t *)&c;
+	data.dsize = sizeof(c);
+
+	/* tell the recmaster that something has changed */
+	ctdb_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+}
+
+
+/*
+  see if the event scripts think we are healthy
+ */
+static void ctdb_check_health(struct event_context *ev, struct timed_event *te, 
+			      struct timeval t, void *private_data)
+{
+	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+	int ret;
+
+	if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
+		event_add_timed(ctdb->ev, ctdb, 
+				timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+				ctdb_check_health, ctdb);
+		return;
+	}
+	
+	ret = ctdb_event_script_callback(ctdb, ctdb, ctdb_health_callback, ctdb, "monitor");
+	if (ret != 0) {
+		DEBUG(0,("Unable to launch monitor event script\n"));
+		event_add_timed(ctdb->ev, ctdb, 
+				timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+				ctdb_check_health, ctdb);
+	}	
+}
+
+
 /*
  start watching for nodes that might be dead
 */
 int ctdb_start_monitoring(struct ctdb_context *ctdb)
 {
 	event_add_timed(ctdb->ev, ctdb, 
-			timeval_current_ofs(ctdb->tunable.monitoring_timeout, 0), 
+			timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
 			ctdb_check_for_dead_nodes, ctdb);
+	event_add_timed(ctdb->ev, ctdb, 
+			timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+			ctdb_check_health, ctdb);
 	return 0;
 }
-
-
--- a/ctdb/common/ctdb_recoverd.c
+++ b/ctdb/common/ctdb_recoverd.c
@ -36,7 +36,7 @@ static void timeout_func(struct event_context *ev, struct timed_event *te,
 }

 #define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
-#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.monitor_frequency, 0)
+#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)

 static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
 {
@ -560,7 +560,7 @@ static int send_election_request(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
 	struct election_message emsg;
 	uint64_t srvid;
 	
-	srvid = CTDB_SRVTYPE_RECOVERY;
+	srvid = CTDB_SRVID_RECOVERY;

 	emsg.vnn = vnn;

@ -671,8 +671,11 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	struct ctdb_vnn_map *vnnmap=NULL;
 	struct ctdb_vnn_map *remote_vnnmap=NULL;
 	int i, j, ret;
+	bool need_takeover_run;
 	
 again:
+	need_takeover_run = false;
+
 	if (mem_ctx) {
 		talloc_free(mem_ctx);
 		mem_ctx = NULL;
@ -694,7 +697,7 @@ again:
 	ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, 
 			      "RecoverTimeout", &ctdb->tunable.recover_timeout);
 	ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, 
-			      "MonitorFrequency", &ctdb->tunable.monitor_frequency);
+			      "RecoverInterval", &ctdb->tunable.recover_interval);
 	ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, 
 			      "ElectionTimeout", &ctdb->tunable.election_timeout);
 	ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, 
@ -849,13 +852,23 @@ again:
 		 */
 		for (i=0;i<nodemap->num;i++) {
 			if ((remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn)
-			||  (remote_nodemap->nodes[i].flags != nodemap->nodes[i].flags)) {
+			    || ((remote_nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) != 
+				(nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED))) {
 				DEBUG(0, (__location__ " Remote node:%u has different nodemap.\n", nodemap->nodes[j].vnn));
 				do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
 				goto again;
 			}
 		}

+		/* update our nodemap flags according to the other
+		   server - this gets the NODE_FLAGS_DISABLED
+		   flag. Note that the remote node is authoritative
+		   for its flags (except CONNECTED, which we know
+		   matches in this code) */
+		if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) {
+			nodemap->nodes[j].flags = remote_nodemap->nodes[j].flags;
+			need_takeover_run = true;
+		}
 	}


@ -872,7 +885,7 @@ again:
 	   the vnnmap.
 	 */
 	for (j=0; j<nodemap->num; j++) {
-		if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+		if (!(nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED)) {
 			continue;
 		}
 		if (nodemap->nodes[j].vnn == vnn) {
@ -896,7 +909,7 @@ again:
 	   and are from the same generation
 	 */
 	for (j=0; j<nodemap->num; j++) {
-		if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+		if (!(nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED)) {
 			continue;
 		}
 		if (nodemap->nodes[j].vnn == vnn) {
@ -933,10 +946,67 @@ again:
 		}
 	}

+	/* we might need to change who has what IP assigned */
+	if (need_takeover_run && ctdb->takeover.enabled) {
+		ret = ctdb_takeover_run(ctdb, nodemap);
+		if (ret != 0) {
+			DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+		}
+	}
+
 	goto again;

 }

+
+/*
+  handler for when a node changes its flags
+*/
+static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+			    TDB_DATA data, void *private_data)
+{
+	int ret;
+	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
+	struct ctdb_node_map *nodemap=NULL;
+	TALLOC_CTX *tmp_ctx;
+	int i;
+
+	if (data.dsize != sizeof(*c)) {
+		DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));
+		return;
+	}
+
+	tmp_ctx = talloc_new(ctdb);
+	CTDB_NO_MEMORY_VOID(ctdb, tmp_ctx);
+
+	ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
+
+	for (i=0;i<nodemap->num;i++) {
+		if (nodemap->nodes[i].vnn == c->vnn) break;
+	}
+
+	if (i == nodemap->num) {
+		DEBUG(0,(__location__ "Flag change for non-existant node %u\n", c->vnn));
+		talloc_free(tmp_ctx);
+		return;
+	}
+
+	DEBUG(0,("Node %u has changed flags - was 0x%x now 0x%x\n", 
+		 c->vnn, nodemap->nodes[i].flags, c->flags));
+
+	nodemap->nodes[i].flags = c->flags;
+	
+	if (ctdb->takeover.enabled) {
+		ret = ctdb_takeover_run(ctdb, nodemap);
+		if (ret != 0) {
+			DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+		}
+	}
+
+	talloc_free(tmp_ctx);
+}
+
+
 static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde, 
 				 uint16_t flags, void *private_data)
 {
@ -947,7 +1017,6 @@ static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde,
 int ctdb_start_recoverd(struct ctdb_context *ctdb)
 {
 	int ret;
-	uint64_t srvid;
 	int fd[2];
 	pid_t child;

@ -990,8 +1059,10 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
 	}

 	/* register a message port for recovery elections */
-	srvid = CTDB_SRVTYPE_RECOVERY;
-	ctdb_set_message_handler(ctdb, srvid, election_handler, NULL);
+	ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, NULL);
+
+	/* and one for when nodes are disabled/enabled */
+	ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, NULL);

 	monitor_cluster(ctdb);

--- a/ctdb/common/ctdb_tunables.c
+++ b/ctdb/common/ctdb_tunables.c
@ -29,13 +29,14 @@ static const struct {
 	{ "SeqnumFrequency",   1,  offsetof(struct ctdb_tunable, seqnum_frequency) },
 	{ "ControlTimeout",    60, offsetof(struct ctdb_tunable, control_timeout) },
 	{ "TraverseTimeout",   20, offsetof(struct ctdb_tunable, traverse_timeout) },
-	{ "MonitoringTimeout", 2,  offsetof(struct ctdb_tunable, monitoring_timeout) },
-	{ "MonitoringLimit",   3,  offsetof(struct ctdb_tunable, monitoring_limit) },
+	{ "KeepaliveInterval", 2,  offsetof(struct ctdb_tunable, keepalive_interval) },
+	{ "KeepaliveLimit",    3,  offsetof(struct ctdb_tunable, keepalive_limit) },
 	{ "MaxLACount",        7,  offsetof(struct ctdb_tunable, max_lacount) },
 	{ "RecoverTimeout",    5,  offsetof(struct ctdb_tunable, recover_timeout) },
-	{ "MonitorFrequency",  1,  offsetof(struct ctdb_tunable, monitor_frequency) },
+	{ "RecoverInterval",   1,  offsetof(struct ctdb_tunable, recover_interval) },
 	{ "ElectionTimeout",   3,  offsetof(struct ctdb_tunable, election_timeout) },
 	{ "TakeoverTimeout",   5,  offsetof(struct ctdb_tunable, takeover_timeout) },
+	{ "MonitorInterval",  60,  offsetof(struct ctdb_tunable, monitor_interval) },
 };

 /*
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@ -59,7 +59,7 @@ struct ctdb_call_info {
 /*
  srvid type : RECOVERY
 */
-#define CTDB_SRVTYPE_RECOVERY	0xF100000000000000LL
+#define CTDB_SRVID_RECOVERY	0xF100000000000000LL

 /* 
   a message handler ID meaning that the cluster has been reconfigured
@ -71,6 +71,12 @@ struct ctdb_call_info {
 */
 #define CTDB_SRVID_RELEASE_IP 0xF300000000000000LL

+/* 
+   a message ID meaning that a nodes flags have changed
+ */
+#define CTDB_SRVID_NODE_FLAGS_CHANGED 0xF400000000000000LL
+
+
 /* used on the domain socket, send a pdu to the local daemon */
 #define CTDB_CURRENT_NODE     0xF0000001
 /* send a broadcast to all nodes in the cluster, active or not */
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -44,13 +44,14 @@ struct ctdb_tunable {
 	uint32_t seqnum_frequency;
 	uint32_t control_timeout;
 	uint32_t traverse_timeout;
-	uint32_t monitoring_timeout;
-	uint32_t monitoring_limit;
+	uint32_t keepalive_interval;
+	uint32_t keepalive_limit;
 	uint32_t max_lacount;
 	uint32_t recover_timeout;
-	uint32_t monitor_frequency;
+	uint32_t recover_interval;
 	uint32_t election_timeout;
 	uint32_t takeover_timeout;
+	uint32_t monitor_interval;
 };

 /*
@ -453,6 +454,14 @@ struct ctdb_control_tcp_vnn {
 	struct sockaddr_in dest;
 };

+/*
+  structure used for CTDB_SRVID_NODE_FLAGS_CHANGED
+ */
+struct ctdb_node_flag_change {
+	uint32_t vnn;
+	uint32_t flags;
+};
+
 enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};

 #define CTDB_LMASTER_ANY	0xffffffff
--- a/ctdb/tools/ctdb_control.c
+++ b/ctdb/tools/ctdb_control.c
@ -296,11 +296,18 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv

 	printf("Number of nodes:%d\n", nodemap->num);
 	for(i=0;i<nodemap->num;i++){
+		const char *flags_str;
+		if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
+			flags_str = "DISABLED";
+		} else if (nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) {
+			flags_str = "CONNECTED";
+		} else {
+			flags_str = "UNAVAILABLE";
+		}
 		printf("vnn:%d %-16s %s%s\n", nodemap->nodes[i].vnn,
-			inet_ntoa(nodemap->nodes[i].sin.sin_addr),
-			nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED?
-				"CONNECTED":"UNAVAILABLE",
-			nodemap->nodes[i].vnn == myvnn?" (THIS NODE)":"");
+		       inet_ntoa(nodemap->nodes[i].sin.sin_addr),
+		       flags_str,
+		       nodemap->nodes[i].vnn == myvnn?" (THIS NODE)":"");
 	}

 	ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), options.vnn, ctdb, &vnnmap);