From 346dfc1bef22c6ee41d1ec7cdee19c1a0fbd11d6 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 18 May 2007 23:23:36 +1000
Subject: [PATCH 1/3] - up rx_cnt on all packet types - notice when a node
 becomes available again

(This used to be ctdb commit e05110dd6112e81f224937dfd7370d963ce9531a)
---
 ctdb/common/ctdb.c          |  9 ++++--
 ctdb/common/ctdb_call.c     |  6 ++--
 ctdb/common/ctdb_monitor.c  | 64 +++++++++++++------------------------
 ctdb/include/ctdb_private.h |  6 ++--
 4 files changed, 35 insertions(+), 50 deletions(-)
diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c
index b5829e55d70..5471463105e 100644
--- a/ctdb/common/ctdb.c
+++ b/ctdb/common/ctdb.c
@@ -116,8 +116,7 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
 	node->name = talloc_asprintf(node, "%s:%u", 
 				     node->address.address, 
 				     node->address.port);
-	/* for now we just set the vnn to the line in the file - this
-	   will change! */
+	/* this assumes that the nodes are kept in sorted order, and no gaps */
 	node->vnn = ctdb->num_nodes;
 
 	if (ctdb->address.address &&
@@ -275,6 +274,11 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
 		 "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length,
 		 hdr->srcnode, hdr->destnode));
 
+	/* up the counter for this source node, so we know its alive */
+	if (ctdb_validate_vnn(ctdb, hdr->srcnode)) {
+		ctdb->nodes[hdr->srcnode]->rx_cnt++;
+	}
+
 	switch (hdr->operation) {
 	case CTDB_REQ_CALL:
 	case CTDB_REPLY_CALL:
@@ -345,7 +349,6 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
 
 	case CTDB_REQ_KEEPALIVE:
 		ctdb->status.keepalive_packets_recv++;
-		ctdb_request_keepalive(ctdb, hdr);
 		break;
 
 	default:
diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c
index fadbfac9474..cd7244ff157 100644
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -785,13 +785,11 @@ int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call)
 /* 
    send a keepalive packet to the other node
 */
-void ctdb_send_keepalive(struct ctdb_context *ctdb,
-				TALLOC_CTX *mem_ctx,
-				uint32_t destnode)
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
 {
 	struct ctdb_req_keepalive *r;
 	
-	r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE,
+	r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
 				    sizeof(struct ctdb_req_keepalive), 
 				    struct ctdb_req_keepalive);
 	CTDB_NO_MEMORY_FATAL(ctdb, r);
diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c
index 3f8b68128ec..ff2046ed8ad 100644
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@@ -26,73 +26,55 @@
 #include "../include/ctdb_private.h"
 
 /*
-  called when a CTDB_REQ_KEEPALIVE packet comes in
-*/
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
-{
-	struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr;
-	struct ctdb_node *node = NULL;
-	int i;
-
-	for (i=0;i<ctdb->num_nodes;i++) {
-		if (ctdb->nodes[i]->vnn == r->hdr.srcnode) {
-			node = ctdb->nodes[i];
-			break;
-		}
-	}
-	if (!node) {
-		DEBUG(0,(__location__ " Keepalive received from node not in ctdb->nodes : %u\n", r->hdr.srcnode));
-		return;
-	}
-
-	node->rx_cnt++;
-}
-
-
+  see if any nodes are dead
+ */
 static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, 
 			   struct timeval t, void *private_data)
 {
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 	int i;
-	TALLOC_CTX *mem_ctx = talloc_new(ctdb);
 
 	/* send a keepalive to all other nodes, unless */
 	for (i=0;i<ctdb->num_nodes;i++) {
-		if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) {
+		struct ctdb_node *node = ctdb->nodes[i];
+		if (node->vnn == ctdb->vnn) {
 			continue;
 		}
-		if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) {
-			continue;
+		
+		/* it might have come alive again */
+		if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) {
+			DEBUG(0,("Node %u is alive again - marking as connected\n", node->vnn));
+			node->flags |= NODE_FLAGS_CONNECTED;
 		}
 
-		if (ctdb->nodes[i]->rx_cnt == 0) {
-			ctdb->nodes[i]->dead_count++;
+		if (node->rx_cnt == 0) {
+			node->dead_count++;
 		} else {
-			ctdb->nodes[i]->dead_count = 0;
+			node->dead_count = 0;
 		}
 
-		if (ctdb->nodes[i]->dead_count>=3) {
-			ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED;
-			/* should probably tell the transport layer
-			   to kill the sockets as well 
+		node->rx_cnt = 0;
+
+		if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+			DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
+			node->flags &= ~NODE_FLAGS_CONNECTED;
+			/* maybe tell the transport layer to kill the
+			   sockets as well?
 			*/
 			continue;
 		}
 
-		ctdb_send_keepalive(ctdb, mem_ctx, i);
-		ctdb->nodes[i]->rx_cnt = 0;
+		ctdb_send_keepalive(ctdb, node->vnn);
 	}
-
-
-
 	
-	talloc_free(mem_ctx);
-
 	event_add_timed(ctdb->ev, ctdb, 
 			timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0), 
 			ctdb_check_for_dead_nodes, ctdb);
 }
 
+/*
+  start watching for nodes that might be dead
+ */
 int ctdb_start_monitoring(struct ctdb_context *ctdb)
 {
 	event_add_timed(ctdb->ev, ctdb, 
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 47d0fbb9910..821a99efd42 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -311,6 +311,9 @@ struct ctdb_db_context {
 /* timeout between dead-node monitoring events */
 #define CTDB_MONITORING_TIMEOUT 5
 
+/* number of monitoring timeouts before a node is considered dead */
+#define CTDB_MONITORING_DEAD_COUNT 3
+
 
 /* number of consecutive calls from the same node before we give them
    the record */
@@ -710,7 +713,6 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty
 void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid);
 
 void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
 void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
 
 int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
@@ -819,6 +821,6 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb);
 uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
 
 int ctdb_start_monitoring(struct ctdb_context *ctdb);
-void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t destnode);
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
 #endif

From 049e1504ee7b62f6abd61dddc59558963780d641 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 18 May 2007 23:48:29 +1000
Subject: [PATCH 2/3] timeout pending controls immediately when a node becomes
 disconnected (This used to be ctdb commit
 93c4b16f4efef383ba8db83953019ef4821613e0)

---
 ctdb/common/ctdb.c          |  1 +
 ctdb/common/ctdb_daemon.c   | 40 ++++++++++++++++++++++++++++++++++++-
 ctdb/common/ctdb_monitor.c  |  1 +
 ctdb/include/ctdb_private.h |  8 +++++++-
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c
index 5471463105e..230f3285e55 100644
--- a/ctdb/common/ctdb.c
+++ b/ctdb/common/ctdb.c
@@ -379,6 +379,7 @@ static void ctdb_node_dead(struct ctdb_node *node)
 	node->flags &= ~NODE_FLAGS_CONNECTED;
 	DEBUG(1,("%s: node %s is dead: %d connected\n", 
 		 node->ctdb->name, node->name, node->ctdb->num_connected));
+	ctdb_daemon_cancel_controls(node->ctdb, node);
 }
 
 /*
diff --git a/ctdb/common/ctdb_daemon.c b/ctdb/common/ctdb_daemon.c
index d9abe2bce04..c0f8d422e8e 100644
--- a/ctdb/common/ctdb_daemon.c
+++ b/ctdb/common/ctdb_daemon.c
@@ -836,16 +836,18 @@ void ctdb_request_finished(struct ctdb_context *ctdb, struct ctdb_req_header *hd
 
 
 struct daemon_control_state {
+	struct daemon_control_state *next, *prev;
 	struct ctdb_client *client;
 	struct ctdb_req_control *c;
 	uint32_t reqid;
+	struct ctdb_node *node;
 };
 
 /*
   callback when a control reply comes in
  */
 static void daemon_control_callback(struct ctdb_context *ctdb,
-				    uint32_t status, TDB_DATA data, 
+				    int32_t status, TDB_DATA data, 
 				    const char *errormsg,
 				    void *private_data)
 {
@@ -879,6 +881,30 @@ static void daemon_control_callback(struct ctdb_context *ctdb,
 	talloc_free(state);
 }
 
+/*
+  fail all pending controls to a disconnected node
+ */
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
+{
+	struct daemon_control_state *state;
+	while ((state = node->pending_controls)) {
+		DLIST_REMOVE(node->pending_controls, state);
+		daemon_control_callback(ctdb, (uint32_t)-1, tdb_null, 
+					"node is disconnected", state);
+	}
+}
+
+/*
+  destroy a daemon_control_state
+ */
+static int daemon_control_destructor(struct daemon_control_state *state)
+{
+	if (state->node) {
+		DLIST_REMOVE(state->node->pending_controls, state);
+	}
+	return 0;
+}
+
 /*
   this is called when the ctdb daemon received a ctdb request control
   from a local client over the unix domain socket
@@ -900,6 +926,14 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
 	state->client = client;
 	state->c = talloc_steal(state, c);
 	state->reqid = c->hdr.reqid;
+	if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) {
+		state->node = client->ctdb->nodes[c->hdr.destnode];
+		DLIST_ADD(state->node->pending_controls, state);
+	} else {
+		state->node = NULL;
+	}
+
+	talloc_set_destructor(state, daemon_control_destructor);
 	
 	data.dptr = &c->data[0];
 	data.dsize = c->datalen;
@@ -912,6 +946,10 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
 		DEBUG(0,(__location__ " Failed to send control to remote node %u\n",
 			 c->hdr.destnode));
 	}
+
+	if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+		talloc_free(state);
+	}
 }
 
 /*
diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c
index ff2046ed8ad..255ea5ee30a 100644
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@@ -58,6 +58,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
 		if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
 			DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
 			node->flags &= ~NODE_FLAGS_CONNECTED;
+			ctdb_daemon_cancel_controls(ctdb, node);
 			/* maybe tell the transport layer to kill the
 			   sockets as well?
 			*/
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 821a99efd42..57901ed6a99 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -74,7 +74,7 @@ typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length,
 
 /* used for callbacks in ctdb_control requests */
 typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
-					   uint32_t status, TDB_DATA data, 
+					   int32_t status, TDB_DATA data, 
 					   const char *errormsg,
 					   void *private_data);
 
@@ -93,6 +93,10 @@ struct ctdb_node {
 	/* used by the dead node monitoring */
 	uint32_t dead_count;
 	uint32_t rx_cnt;
+
+	/* a list of controls pending to this node, so we can time them out quickly
+	   if the node becomes disconnected */
+	struct daemon_control_state *pending_controls;
 };
 
 /*
@@ -823,4 +827,6 @@ uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
 int ctdb_start_monitoring(struct ctdb_context *ctdb);
 void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+
 #endif

From 28f2fc669b5697eb2e8fb01c8ab2514ecb9f1199 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Sat, 19 May 2007 00:56:49 +1000
Subject: [PATCH 3/3] a better way to resend calls after recovery (This used to
 be ctdb commit 444f52e134fc22aaf254d05c86d8b357ded876f4)

---
 ctdb/common/ctdb_call.c     | 41 ++++++++++++++++---------------------
 ctdb/common/ctdb_freeze.c   |  1 +
 ctdb/include/ctdb_private.h |  9 +++-----
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c
index cd7244ff157..c19d88f660e 100644
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -607,37 +607,20 @@ void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
 */
 static int ctdb_call_destructor(struct ctdb_call_state *state)
 {
+	DLIST_REMOVE(state->ctdb_db->ctdb->pending_calls, state);
 	ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
 	return 0;
 }
 
 
 /*
-  called when a ctdb_call times out
+  called when a ctdb_call needs to be resent after a reconfigure event
 */
-static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te, 
-			      struct timeval t, void *private_data)
+static void ctdb_call_resend(struct ctdb_call_state *state)
 {
-	struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
 	struct ctdb_context *ctdb = state->ctdb_db->ctdb;
 
-	ctdb->status.timeouts.call++;
-
-	event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), 
-			ctdb_call_timeout, state);
-
-	if (++state->resend_count < 10 &&
-	    (ctdb->vnn_map->generation == state->generation ||
-	     ctdb->recovery_mode != CTDB_RECOVERY_NORMAL)) {
-		/* the call is just being slow, or we are curently
-		   recovering, give it more time */
-		return;
-	}
-
-	/* the generation count changed or we're timing out too much -
-	   the call must be re-issued */
 	state->generation = ctdb->vnn_map->generation;
-	state->resend_count = 0;
 
 	/* use a new reqid, in case the old reply does eventually come in */
 	ctdb_reqid_remove(ctdb, state->reqid);
@@ -651,7 +634,19 @@ static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
 	state->c->hdr.destnode = ctdb->vnn;
 
 	ctdb_queue_packet(ctdb, &state->c->hdr);
-	DEBUG(0,("requeued ctdb_call after timeout\n"));
+	DEBUG(0,("resent ctdb_call\n"));
+}
+
+/*
+  resend all pending calls on recovery
+ */
+void ctdb_call_resend_all(struct ctdb_context *ctdb)
+{
+	struct ctdb_call_state *state, *next;
+	for (state=ctdb->pending_calls;state;state=next) {
+		next = state->next;
+		ctdb_call_resend(state);
+	}
 }
 
 /*
@@ -743,10 +738,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
 	state->state  = CTDB_CALL_WAIT;
 	state->generation = ctdb->vnn_map->generation;
 
+	DLIST_ADD(ctdb->pending_calls, state);
+
 	ctdb_queue_packet(ctdb, &state->c->hdr);
 
-	event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), 
-			ctdb_call_timeout, state);
 	return state;
 }
 
diff --git a/ctdb/common/ctdb_freeze.c b/ctdb/common/ctdb_freeze.c
index 96a128332e4..5868ed099c9 100644
--- a/ctdb/common/ctdb_freeze.c
+++ b/ctdb/common/ctdb_freeze.c
@@ -223,5 +223,6 @@ int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
 {
 	talloc_free(ctdb->freeze_handle);
 	ctdb->freeze_handle = NULL;
+	ctdb_call_resend_all(ctdb);
 	return 0;
 }
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 57901ed6a99..0149714c856 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -265,6 +265,7 @@ struct ctdb_context {
 	uint32_t num_clients;
 	uint32_t seqnum_frequency;
 	uint32_t recovery_master;
+	struct ctdb_call_state *pending_calls;
 };
 
 struct ctdb_db_context {
@@ -300,11 +301,6 @@ struct ctdb_db_context {
           ctdb_fatal(ctdb, "Out of memory in " __location__ ); \
 	  }} while (0)
 
-/* timeout for ctdb call operations. When this timeout expires we
-   check if the generation count has changed, and if it has then
-   re-issue the call */
-#define CTDB_CALL_TIMEOUT 2
-
 /* maximum timeout for ctdb control calls */
 #define CTDB_CONTROL_TIMEOUT 60
 
@@ -390,6 +386,7 @@ enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
   state of a in-progress ctdb call
 */
 struct ctdb_call_state {
+	struct ctdb_call_state *next, *prev;
 	enum call_state state;
 	uint32_t reqid;
 	struct ctdb_req_call *c;
@@ -397,7 +394,6 @@ struct ctdb_call_state {
 	const char *errmsg;
 	struct ctdb_call call;
 	uint32_t generation;
-	uint32_t resend_count;
 	struct {
 		void (*fn)(struct ctdb_call_state *);
 		void *private_data;
@@ -828,5 +824,6 @@ int ctdb_start_monitoring(struct ctdb_context *ctdb);
 void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
 void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+void ctdb_call_resend_all(struct ctdb_context *ctdb);
 
 #endif