2024-12-22 13:34:15 +03:00 · 2008-09-09 13:44:46 +10:00 · 2008-09-09 13:44:46 +10:00 · 6474f3278d
commit 6474f3278d
parent 7a78a78a1c
8 changed files with 73 additions and 0 deletions
--- a/ctdb/client/ctdb_client.c
+++ b/ctdb/client/ctdb_client.c
@ -3280,3 +3280,21 @@ again:
 	talloc_free(h);
 	return 0;
 }
+
+/*
+  recovery daemon ping to main daemon
+ */
+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
+{
+	int ret;
+	int32_t res;
+
+	ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null, 
+			   ctdb, NULL, &res, NULL, NULL);
+	if (ret != 0 || res != 0) {
+		DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
+		return -1;
+	}
+
+	return 0;
+}
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@ -566,4 +566,6 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h,
 			   TDB_DATA key, TDB_DATA data);
 int ctdb_transaction_commit(struct ctdb_transaction_handle *h);

+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb);
+
 #endif
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -114,6 +114,7 @@ struct ctdb_tunable {
 	uint32_t reclock_ping_period;
 	uint32_t no_ip_failback;
 	uint32_t verbose_memory_names;
+	uint32_t recd_ping_timeout;
 };

 /*
@ -417,6 +418,7 @@ struct ctdb_context {
 	int start_as_disabled;
 	uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
 	TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
+	TALLOC_CTX *recd_ping_ctx;
 };

 struct ctdb_db_context {
@ -550,6 +552,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_TRANS2_FINISHED         = 84,
 		    CTDB_CONTROL_TRANS2_ERROR            = 85,
 		    CTDB_CONTROL_TRANS2_COMMIT_RETRY     = 86,
+		    CTDB_CONTROL_RECD_PING		 = 87,
 };	

 /*
@ -1378,5 +1381,6 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
 char *ctdb_addr_to_str(ctdb_sock_addr *addr);
 void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);

+int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);

 #endif
--- a/ctdb/server/ctdb_control.c
+++ b/ctdb/server/ctdb_control.c
@ -406,6 +406,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_TRANS2_FINISHED:
 		return ctdb_control_trans2_finished(ctdb, c);

+	case CTDB_CONTROL_RECD_PING:
+		CHECK_CONTROL_DATA_SIZE(0);
+		return ctdb_control_recd_ping(ctdb);
+
 	default:
 		DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
 		return -1;
--- a/ctdb/server/ctdb_daemon.c
+++ b/ctdb/server/ctdb_daemon.c
@ -103,6 +103,9 @@ static void ctdb_start_transport(struct ctdb_context *ctdb)

 	/* start periodic update of tcp tickle lists */
       	ctdb_start_tcp_tickle_update(ctdb);
+
+	/* start listening for recovery daemon pings */
+	ctdb_control_recd_ping(ctdb);
 }

 static void block_signal(int signum)
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@ -971,3 +971,41 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda
 	return 0;	
 }

+static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
+{
+	struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+
+	DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Shutting down ctdb daemon\n"));
+
+	ctdb_stop_recoverd(ctdb);
+	ctdb_stop_keepalive(ctdb);
+	ctdb_stop_monitoring(ctdb);
+	ctdb_release_all_ips(ctdb);
+	if (ctdb->methods != NULL) {
+		ctdb->methods->shutdown(ctdb);
+	}
+	ctdb_event_script(ctdb, "shutdown");
+	DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n"));
+	exit(0);
+}
+
+/* The recovery daemon will ping us at regular intervals.
+   If we havent been pinged for a while we assume the recovery
+   daemon is inoperable and we shut down.
+*/
+int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
+{
+	talloc_free(ctdb->recd_ping_ctx);
+
+	ctdb->recd_ping_ctx = talloc_new(ctdb);
+	CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_ctx);
+
+	if (ctdb->tunable.recd_ping_timeout != 0) {
+		event_add_timed(ctdb->ev, ctdb->recd_ping_ctx, 
+			timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
+			ctdb_recd_ping_timeout, ctdb);
+	}
+
+	return 0;
+}
+
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@ -2317,6 +2317,9 @@ again:
 		exit(-1);
 	}

+	/* ping the local daemon to tell it we are alive */
+	ctdb_ctrl_recd_ping(ctdb);
+
 	if (rec->election_timeout) {
 		/* an election is in progress */
 		goto again;
--- a/ctdb/server/ctdb_tunables.c
+++ b/ctdb/server/ctdb_tunables.c
@ -50,6 +50,7 @@ static const struct {
 	{ "ReclockPingPeriod",   60,  offsetof(struct ctdb_tunable,  reclock_ping_period) },
 	{ "NoIPFailback",         0,  offsetof(struct ctdb_tunable, no_ip_failback) },
 	{ "VerboseMemoryNames",   0,  offsetof(struct ctdb_tunable, verbose_memory_names) },
+	{ "RecdPingTimeout",	 60,  offsetof(struct ctdb_tunable, recd_ping_timeout) },
 };

 /*