From 346dfc1bef22c6ee41d1ec7cdee19c1a0fbd11d6 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 18 May 2007 23:23:36 +1000 Subject: [PATCH 1/3] - up rx_cnt on all packet types - notice when a node becomes available again (This used to be ctdb commit e05110dd6112e81f224937dfd7370d963ce9531a) --- ctdb/common/ctdb.c | 9 ++++-- ctdb/common/ctdb_call.c | 6 ++-- ctdb/common/ctdb_monitor.c | 64 +++++++++++++------------------------ ctdb/include/ctdb_private.h | 6 ++-- 4 files changed, 35 insertions(+), 50 deletions(-) diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c index b5829e55d70..5471463105e 100644 --- a/ctdb/common/ctdb.c +++ b/ctdb/common/ctdb.c @@ -116,8 +116,7 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr) node->name = talloc_asprintf(node, "%s:%u", node->address.address, node->address.port); - /* for now we just set the vnn to the line in the file - this - will change! */ + /* this assumes that the nodes are kept in sorted order, and no gaps */ node->vnn = ctdb->num_nodes; if (ctdb->address.address && @@ -275,6 +274,11 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length) "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length, hdr->srcnode, hdr->destnode)); + /* up the counter for this source node, so we know its alive */ + if (ctdb_validate_vnn(ctdb, hdr->srcnode)) { + ctdb->nodes[hdr->srcnode]->rx_cnt++; + } + switch (hdr->operation) { case CTDB_REQ_CALL: case CTDB_REPLY_CALL: @@ -345,7 +349,6 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length) case CTDB_REQ_KEEPALIVE: ctdb->status.keepalive_packets_recv++; - ctdb_request_keepalive(ctdb, hdr); break; default: diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c index fadbfac9474..cd7244ff157 100644 --- a/ctdb/common/ctdb_call.c +++ b/ctdb/common/ctdb_call.c @@ -785,13 +785,11 @@ int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call) /* send a keepalive packet to the other node */ -void ctdb_send_keepalive(struct ctdb_context *ctdb, - TALLOC_CTX *mem_ctx, - uint32_t destnode) +void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode) { struct ctdb_req_keepalive *r; - r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE, + r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE, sizeof(struct ctdb_req_keepalive), struct ctdb_req_keepalive); CTDB_NO_MEMORY_FATAL(ctdb, r); diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c index 3f8b68128ec..ff2046ed8ad 100644 --- a/ctdb/common/ctdb_monitor.c +++ b/ctdb/common/ctdb_monitor.c @@ -26,73 +26,55 @@ #include "../include/ctdb_private.h" /* - called when a CTDB_REQ_KEEPALIVE packet comes in -*/ -void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) -{ - struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr; - struct ctdb_node *node = NULL; - int i; - - for (i=0;inum_nodes;i++) { - if (ctdb->nodes[i]->vnn == r->hdr.srcnode) { - node = ctdb->nodes[i]; - break; - } - } - if (!node) { - DEBUG(0,(__location__ " Keepalive received from node not in ctdb->nodes : %u\n", r->hdr.srcnode)); - return; - } - - node->rx_cnt++; -} - - + see if any nodes are dead + */ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) { struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); int i; - TALLOC_CTX *mem_ctx = talloc_new(ctdb); /* send a keepalive to all other nodes, unless */ for (i=0;inum_nodes;i++) { - if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) { + struct ctdb_node *node = ctdb->nodes[i]; + if (node->vnn == ctdb->vnn) { continue; } - if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) { - continue; + + /* it might have come alive again */ + if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) { + DEBUG(0,("Node %u is alive again - marking as connected\n", node->vnn)); + node->flags |= NODE_FLAGS_CONNECTED; } - if (ctdb->nodes[i]->rx_cnt == 0) { - ctdb->nodes[i]->dead_count++; + if (node->rx_cnt == 0) { + node->dead_count++; } else { - ctdb->nodes[i]->dead_count = 0; + node->dead_count = 0; } - if (ctdb->nodes[i]->dead_count>=3) { - ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED; - /* should probably tell the transport layer - to kill the sockets as well + node->rx_cnt = 0; + + if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) { + DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn)); + node->flags &= ~NODE_FLAGS_CONNECTED; + /* maybe tell the transport layer to kill the + sockets as well? */ continue; } - ctdb_send_keepalive(ctdb, mem_ctx, i); - ctdb->nodes[i]->rx_cnt = 0; + ctdb_send_keepalive(ctdb, node->vnn); } - - - - talloc_free(mem_ctx); - event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0), ctdb_check_for_dead_nodes, ctdb); } +/* + start watching for nodes that might be dead + */ int ctdb_start_monitoring(struct ctdb_context *ctdb) { event_add_timed(ctdb->ev, ctdb, diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 47d0fbb9910..821a99efd42 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -311,6 +311,9 @@ struct ctdb_db_context { /* timeout between dead-node monitoring events */ #define CTDB_MONITORING_TIMEOUT 5 +/* number of monitoring timeouts before a node is considered dead */ +#define CTDB_MONITORING_DEAD_COUNT 3 + /* number of consecutive calls from the same node before we give them the record */ @@ -710,7 +713,6 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid); void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); -void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode, @@ -819,6 +821,6 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb); uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb); int ctdb_start_monitoring(struct ctdb_context *ctdb); -void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t destnode); +void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode); #endif From 049e1504ee7b62f6abd61dddc59558963780d641 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 18 May 2007 23:48:29 +1000 Subject: [PATCH 2/3] timeout pending controls immediately when a node becomes disconnected (This used to be ctdb commit 93c4b16f4efef383ba8db83953019ef4821613e0) --- ctdb/common/ctdb.c | 1 + ctdb/common/ctdb_daemon.c | 40 ++++++++++++++++++++++++++++++++++++- ctdb/common/ctdb_monitor.c | 1 + ctdb/include/ctdb_private.h | 8 +++++++- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c index 5471463105e..230f3285e55 100644 --- a/ctdb/common/ctdb.c +++ b/ctdb/common/ctdb.c @@ -379,6 +379,7 @@ static void ctdb_node_dead(struct ctdb_node *node) node->flags &= ~NODE_FLAGS_CONNECTED; DEBUG(1,("%s: node %s is dead: %d connected\n", node->ctdb->name, node->name, node->ctdb->num_connected)); + ctdb_daemon_cancel_controls(node->ctdb, node); } /* diff --git a/ctdb/common/ctdb_daemon.c b/ctdb/common/ctdb_daemon.c index d9abe2bce04..c0f8d422e8e 100644 --- a/ctdb/common/ctdb_daemon.c +++ b/ctdb/common/ctdb_daemon.c @@ -836,16 +836,18 @@ void ctdb_request_finished(struct ctdb_context *ctdb, struct ctdb_req_header *hd struct daemon_control_state { + struct daemon_control_state *next, *prev; struct ctdb_client *client; struct ctdb_req_control *c; uint32_t reqid; + struct ctdb_node *node; }; /* callback when a control reply comes in */ static void daemon_control_callback(struct ctdb_context *ctdb, - uint32_t status, TDB_DATA data, + int32_t status, TDB_DATA data, const char *errormsg, void *private_data) { @@ -879,6 +881,30 @@ static void daemon_control_callback(struct ctdb_context *ctdb, talloc_free(state); } +/* + fail all pending controls to a disconnected node + */ +void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node) +{ + struct daemon_control_state *state; + while ((state = node->pending_controls)) { + DLIST_REMOVE(node->pending_controls, state); + daemon_control_callback(ctdb, (uint32_t)-1, tdb_null, + "node is disconnected", state); + } +} + +/* + destroy a daemon_control_state + */ +static int daemon_control_destructor(struct daemon_control_state *state) +{ + if (state->node) { + DLIST_REMOVE(state->node->pending_controls, state); + } + return 0; +} + /* this is called when the ctdb daemon received a ctdb request control from a local client over the unix domain socket @@ -900,6 +926,14 @@ static void daemon_request_control_from_client(struct ctdb_client *client, state->client = client; state->c = talloc_steal(state, c); state->reqid = c->hdr.reqid; + if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) { + state->node = client->ctdb->nodes[c->hdr.destnode]; + DLIST_ADD(state->node->pending_controls, state); + } else { + state->node = NULL; + } + + talloc_set_destructor(state, daemon_control_destructor); data.dptr = &c->data[0]; data.dsize = c->datalen; @@ -912,6 +946,10 @@ static void daemon_request_control_from_client(struct ctdb_client *client, DEBUG(0,(__location__ " Failed to send control to remote node %u\n", c->hdr.destnode)); } + + if (c->flags & CTDB_CTRL_FLAG_NOREPLY) { + talloc_free(state); + } } /* diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c index ff2046ed8ad..255ea5ee30a 100644 --- a/ctdb/common/ctdb_monitor.c +++ b/ctdb/common/ctdb_monitor.c @@ -58,6 +58,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) { DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn)); node->flags &= ~NODE_FLAGS_CONNECTED; + ctdb_daemon_cancel_controls(ctdb, node); /* maybe tell the transport layer to kill the sockets as well? */ diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 821a99efd42..57901ed6a99 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -74,7 +74,7 @@ typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length, /* used for callbacks in ctdb_control requests */ typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *, - uint32_t status, TDB_DATA data, + int32_t status, TDB_DATA data, const char *errormsg, void *private_data); @@ -93,6 +93,10 @@ struct ctdb_node { /* used by the dead node monitoring */ uint32_t dead_count; uint32_t rx_cnt; + + /* a list of controls pending to this node, so we can time them out quickly + if the node becomes disconnected */ + struct daemon_control_state *pending_controls; }; /* @@ -823,4 +827,6 @@ uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb); int ctdb_start_monitoring(struct ctdb_context *ctdb); void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode); +void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node); + #endif From 28f2fc669b5697eb2e8fb01c8ab2514ecb9f1199 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 19 May 2007 00:56:49 +1000 Subject: [PATCH 3/3] a better way to resend calls after recovery (This used to be ctdb commit 444f52e134fc22aaf254d05c86d8b357ded876f4) --- ctdb/common/ctdb_call.c | 41 ++++++++++++++++--------------------- ctdb/common/ctdb_freeze.c | 1 + ctdb/include/ctdb_private.h | 9 +++----- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c index cd7244ff157..c19d88f660e 100644 --- a/ctdb/common/ctdb_call.c +++ b/ctdb/common/ctdb_call.c @@ -607,37 +607,20 @@ void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) */ static int ctdb_call_destructor(struct ctdb_call_state *state) { + DLIST_REMOVE(state->ctdb_db->ctdb->pending_calls, state); ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid); return 0; } /* - called when a ctdb_call times out + called when a ctdb_call needs to be resent after a reconfigure event */ -static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private_data) +static void ctdb_call_resend(struct ctdb_call_state *state) { - struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state); struct ctdb_context *ctdb = state->ctdb_db->ctdb; - ctdb->status.timeouts.call++; - - event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), - ctdb_call_timeout, state); - - if (++state->resend_count < 10 && - (ctdb->vnn_map->generation == state->generation || - ctdb->recovery_mode != CTDB_RECOVERY_NORMAL)) { - /* the call is just being slow, or we are curently - recovering, give it more time */ - return; - } - - /* the generation count changed or we're timing out too much - - the call must be re-issued */ state->generation = ctdb->vnn_map->generation; - state->resend_count = 0; /* use a new reqid, in case the old reply does eventually come in */ ctdb_reqid_remove(ctdb, state->reqid); @@ -651,7 +634,19 @@ static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te, state->c->hdr.destnode = ctdb->vnn; ctdb_queue_packet(ctdb, &state->c->hdr); - DEBUG(0,("requeued ctdb_call after timeout\n")); + DEBUG(0,("resent ctdb_call\n")); +} + +/* + resend all pending calls on recovery + */ +void ctdb_call_resend_all(struct ctdb_context *ctdb) +{ + struct ctdb_call_state *state, *next; + for (state=ctdb->pending_calls;state;state=next) { + next = state->next; + ctdb_call_resend(state); + } } /* @@ -743,10 +738,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd state->state = CTDB_CALL_WAIT; state->generation = ctdb->vnn_map->generation; + DLIST_ADD(ctdb->pending_calls, state); + ctdb_queue_packet(ctdb, &state->c->hdr); - event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), - ctdb_call_timeout, state); return state; } diff --git a/ctdb/common/ctdb_freeze.c b/ctdb/common/ctdb_freeze.c index 96a128332e4..5868ed099c9 100644 --- a/ctdb/common/ctdb_freeze.c +++ b/ctdb/common/ctdb_freeze.c @@ -223,5 +223,6 @@ int32_t ctdb_control_thaw(struct ctdb_context *ctdb) { talloc_free(ctdb->freeze_handle); ctdb->freeze_handle = NULL; + ctdb_call_resend_all(ctdb); return 0; } diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 57901ed6a99..0149714c856 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -265,6 +265,7 @@ struct ctdb_context { uint32_t num_clients; uint32_t seqnum_frequency; uint32_t recovery_master; + struct ctdb_call_state *pending_calls; }; struct ctdb_db_context { @@ -300,11 +301,6 @@ struct ctdb_db_context { ctdb_fatal(ctdb, "Out of memory in " __location__ ); \ }} while (0) -/* timeout for ctdb call operations. When this timeout expires we - check if the generation count has changed, and if it has then - re-issue the call */ -#define CTDB_CALL_TIMEOUT 2 - /* maximum timeout for ctdb control calls */ #define CTDB_CONTROL_TIMEOUT 60 @@ -390,6 +386,7 @@ enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR}; state of a in-progress ctdb call */ struct ctdb_call_state { + struct ctdb_call_state *next, *prev; enum call_state state; uint32_t reqid; struct ctdb_req_call *c; @@ -397,7 +394,6 @@ struct ctdb_call_state { const char *errmsg; struct ctdb_call call; uint32_t generation; - uint32_t resend_count; struct { void (*fn)(struct ctdb_call_state *); void *private_data; @@ -828,5 +824,6 @@ int ctdb_start_monitoring(struct ctdb_context *ctdb); void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode); void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node); +void ctdb_call_resend_all(struct ctdb_context *ctdb); #endif