2024-12-25 23:21:54 +03:00 · 2007-05-19 10:12:37 +10:00 · 2007-05-19 10:12:37 +10:00 · 99652bdb94
commit 99652bdb94
parent 334a294481 28f2fc669b
6 changed files with 105 additions and 81 deletions
--- a/ctdb/common/ctdb.c
+++ b/ctdb/common/ctdb.c
@ -116,8 +116,7 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
 	node->name = talloc_asprintf(node, "%s:%u", 
 				     node->address.address, 
 				     node->address.port);
-	/* for now we just set the vnn to the line in the file - this
-	   will change! */
+	/* this assumes that the nodes are kept in sorted order, and no gaps */
 	node->vnn = ctdb->num_nodes;

 	if (ctdb->address.address &&
@ -275,6 +274,11 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
 		 "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length,
 		 hdr->srcnode, hdr->destnode));

+	/* up the counter for this source node, so we know its alive */
+	if (ctdb_validate_vnn(ctdb, hdr->srcnode)) {
+		ctdb->nodes[hdr->srcnode]->rx_cnt++;
+	}
+
 	switch (hdr->operation) {
 	case CTDB_REQ_CALL:
 	case CTDB_REPLY_CALL:
@ -345,7 +349,6 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)

 	case CTDB_REQ_KEEPALIVE:
 		ctdb->status.keepalive_packets_recv++;
-		ctdb_request_keepalive(ctdb, hdr);
 		break;

 	default:
@ -376,6 +379,7 @@ static void ctdb_node_dead(struct ctdb_node *node)
 	node->flags &= ~NODE_FLAGS_CONNECTED;
 	DEBUG(1,("%s: node %s is dead: %d connected\n", 
 		 node->ctdb->name, node->name, node->ctdb->num_connected));
+	ctdb_daemon_cancel_controls(node->ctdb, node);
 }

 /*
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@ -607,37 +607,20 @@ void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
 */
 static int ctdb_call_destructor(struct ctdb_call_state *state)
 {
+	DLIST_REMOVE(state->ctdb_db->ctdb->pending_calls, state);
 	ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
 	return 0;
 }


 /*
-  called when a ctdb_call times out
+  called when a ctdb_call needs to be resent after a reconfigure event
 */
-static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te, 
-			      struct timeval t, void *private_data)
+static void ctdb_call_resend(struct ctdb_call_state *state)
 {
-	struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
 	struct ctdb_context *ctdb = state->ctdb_db->ctdb;

-	ctdb->status.timeouts.call++;
-
-	event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), 
-			ctdb_call_timeout, state);
-
-	if (++state->resend_count < 10 &&
-	    (ctdb->vnn_map->generation == state->generation ||
-	     ctdb->recovery_mode != CTDB_RECOVERY_NORMAL)) {
-		/* the call is just being slow, or we are curently
-		   recovering, give it more time */
-		return;
-	}
-
-	/* the generation count changed or we're timing out too much -
-	   the call must be re-issued */
 	state->generation = ctdb->vnn_map->generation;
-	state->resend_count = 0;

 	/* use a new reqid, in case the old reply does eventually come in */
 	ctdb_reqid_remove(ctdb, state->reqid);
@ -651,7 +634,19 @@ static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
 	state->c->hdr.destnode = ctdb->vnn;

 	ctdb_queue_packet(ctdb, &state->c->hdr);
-	DEBUG(0,("requeued ctdb_call after timeout\n"));
+	DEBUG(0,("resent ctdb_call\n"));
+}
+
+/*
+  resend all pending calls on recovery
+ */
+void ctdb_call_resend_all(struct ctdb_context *ctdb)
+{
+	struct ctdb_call_state *state, *next;
+	for (state=ctdb->pending_calls;state;state=next) {
+		next = state->next;
+		ctdb_call_resend(state);
+	}
 }

 /*
@ -743,10 +738,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
 	state->state  = CTDB_CALL_WAIT;
 	state->generation = ctdb->vnn_map->generation;

+	DLIST_ADD(ctdb->pending_calls, state);
+
 	ctdb_queue_packet(ctdb, &state->c->hdr);

-	event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), 
-			ctdb_call_timeout, state);
 	return state;
 }

@ -785,13 +780,11 @@ int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call)
 /* 
   send a keepalive packet to the other node
 */
-void ctdb_send_keepalive(struct ctdb_context *ctdb,
-				TALLOC_CTX *mem_ctx,
-				uint32_t destnode)
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
 {
 	struct ctdb_req_keepalive *r;
 	
-	r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE,
+	r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
 				    sizeof(struct ctdb_req_keepalive), 
 				    struct ctdb_req_keepalive);
 	CTDB_NO_MEMORY_FATAL(ctdb, r);
--- a/ctdb/common/ctdb_daemon.c
+++ b/ctdb/common/ctdb_daemon.c
@ -836,16 +836,18 @@ void ctdb_request_finished(struct ctdb_context *ctdb, struct ctdb_req_header *hd


 struct daemon_control_state {
+	struct daemon_control_state *next, *prev;
 	struct ctdb_client *client;
 	struct ctdb_req_control *c;
 	uint32_t reqid;
+	struct ctdb_node *node;
 };

 /*
  callback when a control reply comes in
 */
 static void daemon_control_callback(struct ctdb_context *ctdb,
-				    uint32_t status, TDB_DATA data, 
+				    int32_t status, TDB_DATA data, 
 				    const char *errormsg,
 				    void *private_data)
 {
@ -879,6 +881,30 @@ static void daemon_control_callback(struct ctdb_context *ctdb,
 	talloc_free(state);
 }

+/*
+  fail all pending controls to a disconnected node
+ */
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
+{
+	struct daemon_control_state *state;
+	while ((state = node->pending_controls)) {
+		DLIST_REMOVE(node->pending_controls, state);
+		daemon_control_callback(ctdb, (uint32_t)-1, tdb_null, 
+					"node is disconnected", state);
+	}
+}
+
+/*
+  destroy a daemon_control_state
+ */
+static int daemon_control_destructor(struct daemon_control_state *state)
+{
+	if (state->node) {
+		DLIST_REMOVE(state->node->pending_controls, state);
+	}
+	return 0;
+}
+
 /*
  this is called when the ctdb daemon received a ctdb request control
  from a local client over the unix domain socket
@ -900,6 +926,14 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
 	state->client = client;
 	state->c = talloc_steal(state, c);
 	state->reqid = c->hdr.reqid;
+	if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) {
+		state->node = client->ctdb->nodes[c->hdr.destnode];
+		DLIST_ADD(state->node->pending_controls, state);
+	} else {
+		state->node = NULL;
+	}
+
+	talloc_set_destructor(state, daemon_control_destructor);
 	
 	data.dptr = &c->data[0];
 	data.dsize = c->datalen;
@ -912,6 +946,10 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
 		DEBUG(0,(__location__ " Failed to send control to remote node %u\n",
 			 c->hdr.destnode));
 	}
+
+	if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+		talloc_free(state);
+	}
 }

 /*
--- a/ctdb/common/ctdb_freeze.c
+++ b/ctdb/common/ctdb_freeze.c
@ -223,5 +223,6 @@ int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
 {
 	talloc_free(ctdb->freeze_handle);
 	ctdb->freeze_handle = NULL;
+	ctdb_call_resend_all(ctdb);
 	return 0;
 }
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@ -26,73 +26,56 @@
 #include "../include/ctdb_private.h"

 /*
-  called when a CTDB_REQ_KEEPALIVE packet comes in
+  see if any nodes are dead
 */
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
-{
-	struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr;
-	struct ctdb_node *node = NULL;
-	int i;
-
-	for (i=0;i<ctdb->num_nodes;i++) {
-		if (ctdb->nodes[i]->vnn == r->hdr.srcnode) {
-			node = ctdb->nodes[i];
-			break;
-		}
-	}
-	if (!node) {
-		DEBUG(0,(__location__ " Keepalive received from node not in ctdb->nodes : %u\n", r->hdr.srcnode));
-		return;
-	}
-
-	node->rx_cnt++;
-}
-
-
 static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, 
 			   struct timeval t, void *private_data)
 {
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 	int i;
-	TALLOC_CTX *mem_ctx = talloc_new(ctdb);

 	/* send a keepalive to all other nodes, unless */
 	for (i=0;i<ctdb->num_nodes;i++) {
-		if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) {
-			continue;
-		}
-		if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) {
+		struct ctdb_node *node = ctdb->nodes[i];
+		if (node->vnn == ctdb->vnn) {
 			continue;
 		}
 		
-		if (ctdb->nodes[i]->rx_cnt == 0) {
-			ctdb->nodes[i]->dead_count++;
+		/* it might have come alive again */
+		if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) {
+			DEBUG(0,("Node %u is alive again - marking as connected\n", node->vnn));
+			node->flags |= NODE_FLAGS_CONNECTED;
+		}
+
+		if (node->rx_cnt == 0) {
+			node->dead_count++;
 		} else {
-			ctdb->nodes[i]->dead_count = 0;
+			node->dead_count = 0;
 		}

-		if (ctdb->nodes[i]->dead_count>=3) {
-			ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED;
-			/* should probably tell the transport layer
-			   to kill the sockets as well 
+		node->rx_cnt = 0;
+
+		if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+			DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
+			node->flags &= ~NODE_FLAGS_CONNECTED;
+			ctdb_daemon_cancel_controls(ctdb, node);
+			/* maybe tell the transport layer to kill the
+			   sockets as well?
 			*/
 			continue;
 		}

-		ctdb_send_keepalive(ctdb, mem_ctx, i);
-		ctdb->nodes[i]->rx_cnt = 0;
+		ctdb_send_keepalive(ctdb, node->vnn);
 	}
 	
-
-
-	
-	talloc_free(mem_ctx);
-
 	event_add_timed(ctdb->ev, ctdb, 
 			timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0), 
 			ctdb_check_for_dead_nodes, ctdb);
 }

+/*
+  start watching for nodes that might be dead
+ */
 int ctdb_start_monitoring(struct ctdb_context *ctdb)
 {
 	event_add_timed(ctdb->ev, ctdb, 
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -74,7 +74,7 @@ typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length,

 /* used for callbacks in ctdb_control requests */
 typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
-					   uint32_t status, TDB_DATA data, 
+					   int32_t status, TDB_DATA data, 
 					   const char *errormsg,
 					   void *private_data);

@ -93,6 +93,10 @@ struct ctdb_node {
 	/* used by the dead node monitoring */
 	uint32_t dead_count;
 	uint32_t rx_cnt;
+
+	/* a list of controls pending to this node, so we can time them out quickly
+	   if the node becomes disconnected */
+	struct daemon_control_state *pending_controls;
 };

 /*
@ -261,6 +265,7 @@ struct ctdb_context {
 	uint32_t num_clients;
 	uint32_t seqnum_frequency;
 	uint32_t recovery_master;
+	struct ctdb_call_state *pending_calls;
 };

 struct ctdb_db_context {
@ -296,11 +301,6 @@ struct ctdb_db_context {
          ctdb_fatal(ctdb, "Out of memory in " __location__ ); \
 	  }} while (0)

-/* timeout for ctdb call operations. When this timeout expires we
-   check if the generation count has changed, and if it has then
-   re-issue the call */
-#define CTDB_CALL_TIMEOUT 2
-
 /* maximum timeout for ctdb control calls */
 #define CTDB_CONTROL_TIMEOUT 60

@ -311,6 +311,9 @@ struct ctdb_db_context {
 /* timeout between dead-node monitoring events */
 #define CTDB_MONITORING_TIMEOUT 5

+/* number of monitoring timeouts before a node is considered dead */
+#define CTDB_MONITORING_DEAD_COUNT 3
+

 /* number of consecutive calls from the same node before we give them
   the record */
@ -383,6 +386,7 @@ enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
  state of a in-progress ctdb call
 */
 struct ctdb_call_state {
+	struct ctdb_call_state *next, *prev;
 	enum call_state state;
 	uint32_t reqid;
 	struct ctdb_req_call *c;
@ -390,7 +394,6 @@ struct ctdb_call_state {
 	const char *errmsg;
 	struct ctdb_call call;
 	uint32_t generation;
-	uint32_t resend_count;
 	struct {
 		void (*fn)(struct ctdb_call_state *);
 		void *private_data;
@ -710,7 +713,6 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty
 void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid);

 void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
 void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);

 int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
@ -819,6 +821,9 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb);
 uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);

 int ctdb_start_monitoring(struct ctdb_context *ctdb);
-void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t destnode);
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
+
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+void ctdb_call_resend_all(struct ctdb_context *ctdb);

 #endif