2025-03-26 18:50:30 +03:00 · 2007-07-10 14:59:23 +10:00 · 2007-07-10 14:59:23 +10:00 · 9fe446f231
commit 9fe446f231
parent f1db15ffe1 ed1a52b293
4 changed files with 47 additions and 19 deletions
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@ -61,12 +61,32 @@ case $cmd in
 	;;

     recovered)
-        # restart NFS to ensure that all TCP connections to the released ip
-	# are closed
+	[ -f /etc/ctdb/state/nfs/restart ] && [ ! -z "$LOCKD_TCPPORT" ] && {
+	        # RST all tcp connections used for NLM to ensure that they do
+		# not survive in ESTABLISHED state across a failover/failback
+		# and create an ack storm
+		netstat -tn |egrep "^tcp.*\s+[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:${LOCKD_TCPPORT}\s+.*ESTABLISHED" | awk '{print $4" "$5}' | while read dest src; do
+			srcip=`echo $src | cut -d: -f1`
+			srcport=`echo $src | cut -d: -f2`
+			destip=`echo $dest | cut -d: -f1`
+			destport=`echo $dest | cut -d: -f2`
+			ctdb killtcp $srcip:$srcport $destip:$destport 1 >/dev/null 2>&1 
+#			ctdb killtcp $destip:$destport $srcip:$srcport 1 >/dev/null 2>&1
+		done
+	} > /dev/null 2>&1
+
 	[ -f /etc/ctdb/state/nfs/restart ] && {
-		( service nfs status > /dev/null 2>&1 && 
-                      service nfs restart > /dev/null 2>&1 &&
-		      service nfslock restart > /dev/null 2>&1 ) &
+	        # RST all tcp connections used for NFS to ensure that they do
+		# not survive in ESTABLISHED state across a failover/failback
+		# and create an ack storm
+		netstat -tn |egrep '^tcp.*\s+[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:2049\s+.*ESTABLISHED' | awk '{print $4" "$5}' | while read dest src; do
+			srcip=`echo $src | cut -d: -f1`
+			srcport=`echo $src | cut -d: -f2`
+			destip=`echo $dest | cut -d: -f1`
+			destport=`echo $dest | cut -d: -f2`
+			ctdb killtcp $srcip:$srcport $destip:$destport 1 >/dev/null 2>&1 
+			ctdb killtcp $destip:$destport $srcip:$srcport 1 >/dev/null 2>&1
+		done
 	} > /dev/null 2>&1
 	/bin/rm -f /etc/ctdb/state/nfs/restart

--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@ -385,11 +385,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
 	for (i=0;i<nodemap->num;i++) {
 		struct ctdb_node_flag_change c;
 		TDB_DATA data;
-		uint32_t flags = nodemap->nodes[i].flags;
-
-		if (flags & NODE_FLAGS_DISCONNECTED) {
-			continue;
-		}

 		c.vnn = nodemap->nodes[i].vnn;
 		c.flags = nodemap->nodes[i].flags;
@ -1073,6 +1068,15 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 		return;
 	}

+	/* Dont let messages from remote nodes change the DISCONNECTED flag. 
+	   This flag is handled locally based on whether the local node
+	   can communicate with the node or not.
+	*/
+	c->flags &= ~NODE_FLAGS_DISCONNECTED;
+	if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
+		c->flags |= NODE_FLAGS_DISCONNECTED;
+	}
+
 	if (nodemap->nodes[i].flags != c->flags) {
 		DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));
 	}
@ -1327,7 +1331,7 @@ again:
 			}
 			if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) != 
 			    (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
-				DEBUG(0, (__location__ " Remote node:%u has different nodemap flags for %d (0x%x vs 0x%x)\n", 
+				DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n", 
 					  nodemap->nodes[j].vnn, i,
 					  remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));
 				do_recovery(rec, mem_ctx, vnn, num_active, nodemap, 
--- a/ctdb/tools/ctdb.c
+++ b/ctdb/tools/ctdb.c
@ -308,10 +308,10 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
 */
 static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-	int i, ret;
+	int i, ret, numrst;
 	struct sockaddr_in src, dst;

-	if (argc < 2) {
+	if (argc < 3) {
 		usage();
 	}

@ -325,7 +325,9 @@ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv)
 		return -1;
 	}

-	for (i=0;i<5;i++) {
+	numrst = strtoul(argv[2], NULL, 0);
+
+	for (i=0;i<numrst;i++) {
 		ret = ctdb_sys_kill_tcp(ctdb->ev, &src, &dst);

 		printf("ret:%d\n", ret);
@ -889,7 +891,7 @@ static const struct {
 	{ "recover",         control_recover,           true,  "force recovery" },
 	{ "freeze",          control_freeze,            true,  "freeze all databases" },
 	{ "thaw",            control_thaw,              true,  "thaw all databases" },
-	{ "killtcp",         kill_tcp,                  false, "kill a tcp connection", "<srcip:port> <dstip:port>" },
+	{ "killtcp",         kill_tcp,                  false, "kill a tcp connection. Try <num> times.", "<srcip:port> <dstip:port> <num>" },
 	{ "tickle",          tickle_tcp,                false, "send a tcp tickle ack", "<srcip:port> <dstip:port>" },
 };

--- a/ctdb/web/nfs.html
+++ b/ctdb/web/nfs.html
@ -47,16 +47,18 @@ which causes problems on some clients.<br>
 This file should look something like :
 <pre>
  CTDB_MANAGES_NFS=yes
-  CTDB_MANAGES_NFSLOCK=yes
+  LOCKD_TCPPORT=599
+  LOCKD_UDPPORT=599
  STATD_SHARED_DIRECTORY=/gpfs0/nfs-state
-  STATD_HOSTNAME=\"ctdb -P $STATD_SHARED_DIRECTORY/192.168.1.1 -H /etc/ctdb/statd-callout -p 97\"
+  STATD_HOSTNAME="ctdb -P $STATD_SHARED_DIRECTORY/192.168.1.1 -H /etc/ctdb/statd-callout -p 97"
 </pre>

 The CTDB_MANAGES_NFS line tells the events scripts that CTDB is to manage startup and shutdown of the NFS and NFSLOCK services.<br>

-The CTDB_MANAGES_NFSLOCK line tells the events scripts that CTDB is also to manage the nfs lock manager.<br>
+With this set to yes, CTDB will start/stop/restart these services as required.<br><br>

-With these set to yes, CTDB will start/stop/restart these services as required.<br><br>
+You need to make sure that the lock manager runs on the same port on all nodes in the cluster since some clients will have "issues" and take very long to recover if the port suddenly changes.<br>
+599 above is only an example. You can run the lock manager on any available port as long as you use the same port on all nodes.<br><br>

 STATD_SHARED_DIRECTORY is the shared directory where statd and the statd-callout script expects that the state variables and lists of clients to notify are found.<br>