2025-01-11 05:18:09 +03:00 · 2008-08-08 00:48:19 +10:00 · 2008-08-08 00:48:19 +10:00 · 66d154ef5f
commit 66d154ef5f
parent 5ee51ae84e 2ffa38ad59
7 changed files with 11 additions and 253 deletions
--- a/ctdb/client/ctdb_client.c
+++ b/ctdb/client/ctdb_client.c
@ -1231,29 +1231,6 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32
 	return 0;
 }

-/*
-  get the reclock filename
- */
-int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, 
-		       TALLOC_CTX *mem_ctx, const char **reclock)
-{
-	int ret;
-	TDB_DATA outdata;
-	int32_t res;
-
-	ret = ctdb_control(ctdb, destnode, 0, 
-			   CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null, 
-			   mem_ctx, &outdata, &res, &timeout, NULL);
-	if (ret != 0 || res != 0) {
-		DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n"));
-		return -1;
-	}
-
-	*reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr);
-
-	return 0;
-}
-
 /*
  get a list of nodes (vnn and flags ) from a remote node
 */
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -536,7 +536,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_START_RECOVERY          = 70,
 		    CTDB_CONTROL_END_RECOVERY            = 71,
 		    CTDB_CONTROL_RELOAD_NODES_FILE       = 72,
-		    CTDB_CONTROL_GET_RECLOCK_FILE        = 73,
+		    /* #73 removed */
 		    CTDB_CONTROL_TRY_DELETE_RECORDS      = 74,
 		    CTDB_CONTROL_ENABLE_MONITOR          = 75,
 		    CTDB_CONTROL_DISABLE_MONITOR         = 76,
@ -1240,7 +1240,6 @@ int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
 				 TDB_DATA *outdata);
 int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata);
-int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
--- a/ctdb/packaging/RPM/ctdb.spec
+++ b/ctdb/packaging/RPM/ctdb.spec
@ -5,7 +5,7 @@ Vendor: Samba Team
 Packager: Samba Team <samba@samba.org>
 Name: ctdb
 Version: 1.0
-Release: 51
+Release: 53
 Epoch: 0
 License: GNU GPL version 3
 Group: System Environment/Daemons
@ -118,6 +118,15 @@ fi
 %{_includedir}/ctdb_private.h

 %changelog
+* Thu Aug 7 2008 : Version 1.0.53
+ - Remove the reclock.pnn file   it can cause gpfs to fail to umount
+ - New transaction code
+* Mon Aug 4 2008 : Version 1.0.52
+ - Send an explicit gratious arp when starting sending the tcp tickles.
+ - When doing failover, issue a killtcp to non-NFS/non-CIFS clients
+   so that they fail quickly. NFS and CIFS already fail and recover 
+   quickly.
+ - Update the test scripts to handle CTRL-C to kill off the test.
 * Mon Jul 28 2008 : Version 1.0.51
 - Strip off the vlan tag from bond devices before we check in /proc
   if the interface is up or not.
--- a/ctdb/server/ctdb_control.c
+++ b/ctdb/server/ctdb_control.c
@ -378,10 +378,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_END_RECOVERY:
 		return ctdb_control_end_recovery(ctdb, c, async_reply);

-	case CTDB_CONTROL_GET_RECLOCK_FILE:
-		CHECK_CONTROL_DATA_SIZE(0);
-		return ctdb_control_get_reclock_file(ctdb, outdata);
-
 	case CTDB_CONTROL_TRY_DELETE_RECORDS:
 		return ctdb_control_try_delete_records(ctdb, indata, outdata);

--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@ -865,22 +865,6 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
 	return 0;
 }

-/*
-  report the location for the reclock file
- */
-int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata)
-{
-	char *reclock = NULL;
-
-	reclock = talloc_strdup(outdata, ctdb->recovery_lock_file);
-	CTDB_NO_MEMORY(ctdb, reclock);
-
-	outdata->dsize = strlen(reclock)+1;
-	outdata->dptr = (uint8_t *)reclock;
-
-	return 0;	
-}
-
 /*
 try to delete all these records as part of the vacuuming process
 and return the records we failed to delete
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@ -41,7 +41,6 @@ struct ban_state {
 */
 struct ctdb_recoverd {
 	struct ctdb_context *ctdb;
-	int rec_file_fd;
 	uint32_t recmaster;
 	uint32_t num_active;
 	uint32_t num_connected;
@ -2143,148 +2142,6 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
 	return status;
 }

-/*
-  this function writes the number of connected nodes we have for this pnn
-  to the pnn slot in the reclock file
-*/
-static void
-ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec)
-{
-	const char count = rec->num_connected;
-	struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
-
-	if (rec->rec_file_fd == -1) {
-		DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n"));
-		return;
-	} 
-
-	if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) {
-		DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n"));
-		close(rec->rec_file_fd);
-		rec->rec_file_fd = -1;
-	}
-}
-
-/* 
-  this function opens the reclock file and sets a byterage lock for the single
-  byte at position pnn+1.
-  the existence/non-existence of such a lock provides an alternative mechanism
-  to know whether a remote node(recovery daemon) is running or not.
-*/
-static void
-ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
-{
-	struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
-	struct flock lock;
-	char *pnnfile = NULL;
-
-	DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn));
-
-	if (rec->rec_file_fd != -1) {
-		close(rec->rec_file_fd);
-		rec->rec_file_fd = -1;
-	}
-
-	pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file);
-	CTDB_NO_MEMORY_FATAL(ctdb, pnnfile);
-
-	rec->rec_file_fd = open(pnnfile, O_RDWR|O_CREAT, 0600);
-	if (rec->rec_file_fd == -1) {
-		DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n", 
-			 pnnfile, strerror(errno)));
-		talloc_free(pnnfile);
-		return;
-	}
-
-	set_close_on_exec(rec->rec_file_fd);
-	lock.l_type = F_WRLCK;
-	lock.l_whence = SEEK_SET;
-	lock.l_start = ctdb->pnn;
-	lock.l_len = 1;
-	lock.l_pid = 0;
-
-	if (fcntl(rec->rec_file_fd, F_SETLK, &lock) != 0) {
-		close(rec->rec_file_fd);
-		rec->rec_file_fd = -1;
-		DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile));
-		talloc_free(pnnfile);
-		return;
-	}
-
-
-	DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile));
-	talloc_free(pnnfile);
-
-	/* we start out with 0 connected nodes */
-	ctdb_recoverd_write_pnn_connect_count(rec);
-}
-
-/*
-  called when we need to do the periodical reclock pnn count update
- */
-static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *te, 
-				  struct timeval t, void *p)
-{
-	int i, count;
-	struct ctdb_recoverd *rec     = talloc_get_type(p, struct ctdb_recoverd);
-	struct ctdb_context *ctdb     = rec->ctdb;
-	struct ctdb_node_map *nodemap = rec->nodemap;
-
-	/* close and reopen the pnn lock file */
-	ctdb_recoverd_get_pnn_lock(rec);
-
-	ctdb_recoverd_write_pnn_connect_count(rec);
-
-	event_add_timed(rec->ctdb->ev, rec->ctdb,
-		timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0), 
-		ctdb_update_pnn_count, rec);
-
-	/* check if there is a split cluster and yeld the recmaster role
-	   it the other half of the cluster is larger 
-	*/
-	DEBUG(DEBUG_DEBUG, ("CHECK FOR SPLIT CLUSTER\n"));
-	if (rec->nodemap == NULL) {
-		return;
-	}
-	if (rec->rec_file_fd == -1) {
-		return;
-	}
-	/* only test this if we think we are the recmaster */
-	if (ctdb->pnn != rec->recmaster) {
-		DEBUG(DEBUG_DEBUG, ("We are not recmaster, skip test\n"));
-		return;
-	}
-	if (ctdb->recovery_lock_fd == -1) {
-		DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n"));
-		close(ctdb->recovery_lock_fd);
-		ctdb->recovery_lock_fd = -1;
-		force_election(rec, ctdb->pnn, rec->nodemap);
-		return;
-	}
-	for (i=0; i<nodemap->num; i++) {
-		/* we dont need to check ourself */
-		if (nodemap->nodes[i].pnn == ctdb->pnn) {
-			continue;
-		}
-		/* dont check nodes that are connected to us */
-		if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
-			continue;
-		}
-		/* check if the node is "connected" and how connected it it */
-		count = ctdb_read_pnn_lock(rec->rec_file_fd, nodemap->nodes[i].pnn);
-		if (count < 0) {
-			continue;
-		}
-		/* check if that node is more connected that us */
-		if (count > rec->num_connected) {
-			DEBUG(DEBUG_ERR, ("DISCONNECTED Node %u is more connected than we are, yielding recmaster role\n", nodemap->nodes[i].pnn));
-			close(ctdb->recovery_lock_fd);
-			ctdb->recovery_lock_fd = -1;
-			force_election(rec, ctdb->pnn, rec->nodemap);
-			return;
-		}
-	}
-}

 /* called to check that the allocation of public ip addresses is ok.
 */
@ -2419,10 +2276,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)

 	rec->priority_time = timeval_current();

-	/* open the rec file fd and lock our slot */
-	rec->rec_file_fd = -1;
-	ctdb_recoverd_get_pnn_lock(rec);
-
 	/* register a message port for sending memory dumps */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);

@ -2441,11 +2294,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
 	/* register a message port for vacuum fetch */
 	ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);

-	/* update the reclock pnn file connected count on a regular basis */
-	event_add_timed(ctdb->ev, ctdb,
-		timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0), 
-		ctdb_update_pnn_count, rec);
-
 again:
 	if (mem_ctx) {
 		talloc_free(mem_ctx);
--- a/ctdb/tools/ctdb.c
+++ b/ctdb/tools/ctdb.c
@ -1490,60 +1490,6 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
 	return 0;
 }

-/*
-  get the filename of the reclock file
- */
-static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **argv)
-{
-	int i, ret, fd;
-	const char *reclock;
-	struct ctdb_node_map *nodemap=NULL;
-	char *pnnfile;
-
-	ret = ctdb_ctrl_getreclock(ctdb, TIMELIMIT(), options.pnn, ctdb, &reclock);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get reclock file from node %u\n", options.pnn));
-		return ret;
-	}
-
-	ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
-		return ret;
-	}
-
-
-	pnnfile = talloc_asprintf(ctdb, "%s.pnn", reclock);
-	CTDB_NO_MEMORY(ctdb, pnnfile);
-
-	fd = open(pnnfile, O_RDONLY);
-	if (fd == -1) {
-		DEBUG(DEBUG_CRIT,(__location__ " Failed to open reclock pnn file %s - (%s)\n", 
-			 pnnfile, strerror(errno)));
-		exit(10);
-	}
-
-
-	printf("Reclock file : %s\n", reclock);
-	for (i=0; i<nodemap->num; i++) {
-		int count;
-
-		count = ctdb_read_pnn_lock(fd, nodemap->nodes[i].pnn);
-
-		printf("pnn:%d %-16s", nodemap->nodes[i].pnn,
-		       inet_ntoa(nodemap->nodes[i].sin.sin_addr));
-		if (count == -1) {
-			printf(" NOT ACTIVE\n");
-		} else {
-			printf(" ACTIVE with %d connections\n", count);
-		}
-	}
-
-	close(fd);
-	return 0;
-}
-
-
 /*
  check if the local node is recmaster or not
  it will return 1 if this node is the recmaster and 0 if it is not
@ -2030,7 +1976,6 @@ static const struct {
 	{ "repack",          ctdb_repack,		false, "repack all databases", "[max_freelist]"},
 	{ "listnodes",       control_listnodes,		false, "list all nodes in the cluster"},
 	{ "reloadnodes",     control_reload_nodes_file,		false, "reload the nodes file and restart the transport on all nodes"},
-	{ "getreclock",      control_getreclock,        false,  "get the path to the reclock file" },
 	{ "moveip",          control_moveip,		false, "move/failover an ip address to another node", "<ip> <node>"},
 	{ "addip",           control_addip,		true, "add a ip address to a node", "<ip/mask> <iface>"},
 	{ "delip",           control_delip,		false, "delete an ip address from a node", "<ip>"},