diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs index 48b50f126b3..7f0710c4006 100755 --- a/ctdb/config/events.d/60.nfs +++ b/ctdb/config/events.d/60.nfs @@ -61,12 +61,32 @@ case $cmd in ;; recovered) - # restart NFS to ensure that all TCP connections to the released ip - # are closed + [ -f /etc/ctdb/state/nfs/restart ] && [ ! -z "$LOCKD_TCPPORT" ] && { + # RST all tcp connections used for NLM to ensure that they do + # not survive in ESTABLISHED state across a failover/failback + # and create an ack storm + netstat -tn |egrep "^tcp.*\s+[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:${LOCKD_TCPPORT}\s+.*ESTABLISHED" | awk '{print $4" "$5}' | while read dest src; do + srcip=`echo $src | cut -d: -f1` + srcport=`echo $src | cut -d: -f2` + destip=`echo $dest | cut -d: -f1` + destport=`echo $dest | cut -d: -f2` + ctdb killtcp $srcip:$srcport $destip:$destport 1 >/dev/null 2>&1 +# ctdb killtcp $destip:$destport $srcip:$srcport 1 >/dev/null 2>&1 + done + } > /dev/null 2>&1 + [ -f /etc/ctdb/state/nfs/restart ] && { - ( service nfs status > /dev/null 2>&1 && - service nfs restart > /dev/null 2>&1 && - service nfslock restart > /dev/null 2>&1 ) & + # RST all tcp connections used for NFS to ensure that they do + # not survive in ESTABLISHED state across a failover/failback + # and create an ack storm + netstat -tn |egrep '^tcp.*\s+[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:2049\s+.*ESTABLISHED' | awk '{print $4" "$5}' | while read dest src; do + srcip=`echo $src | cut -d: -f1` + srcport=`echo $src | cut -d: -f2` + destip=`echo $dest | cut -d: -f1` + destport=`echo $dest | cut -d: -f2` + ctdb killtcp $srcip:$srcport $destip:$destport 1 >/dev/null 2>&1 + ctdb killtcp $destip:$destport $srcip:$srcport 1 >/dev/null 2>&1 + done } > /dev/null 2>&1 /bin/rm -f /etc/ctdb/state/nfs/restart diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index ef867efac12..14808268782 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -385,11 +385,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node for (i=0;inum;i++) { struct ctdb_node_flag_change c; TDB_DATA data; - uint32_t flags = nodemap->nodes[i].flags; - - if (flags & NODE_FLAGS_DISCONNECTED) { - continue; - } c.vnn = nodemap->nodes[i].vnn; c.flags = nodemap->nodes[i].flags; @@ -1073,6 +1068,15 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, return; } + /* Dont let messages from remote nodes change the DISCONNECTED flag. + This flag is handled locally based on whether the local node + can communicate with the node or not. + */ + c->flags &= ~NODE_FLAGS_DISCONNECTED; + if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) { + c->flags |= NODE_FLAGS_DISCONNECTED; + } + if (nodemap->nodes[i].flags != c->flags) { DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags)); } @@ -1327,7 +1331,7 @@ again: } if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) != (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) { - DEBUG(0, (__location__ " Remote node:%u has different nodemap flags for %d (0x%x vs 0x%x)\n", + DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n", nodemap->nodes[j].vnn, i, remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags)); do_recovery(rec, mem_ctx, vnn, num_active, nodemap, diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 371f6e867a6..4e0378d7b9c 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -308,10 +308,10 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv */ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv) { - int i, ret; + int i, ret, numrst; struct sockaddr_in src, dst; - if (argc < 2) { + if (argc < 3) { usage(); } @@ -325,7 +325,9 @@ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv) return -1; } - for (i=0;i<5;i++) { + numrst = strtoul(argv[2], NULL, 0); + + for (i=0;iev, &src, &dst); printf("ret:%d\n", ret); @@ -889,7 +891,7 @@ static const struct { { "recover", control_recover, true, "force recovery" }, { "freeze", control_freeze, true, "freeze all databases" }, { "thaw", control_thaw, true, "thaw all databases" }, - { "killtcp", kill_tcp, false, "kill a tcp connection", " " }, + { "killtcp", kill_tcp, false, "kill a tcp connection. Try times.", " " }, { "tickle", tickle_tcp, false, "send a tcp tickle ack", " " }, }; diff --git a/ctdb/web/nfs.html b/ctdb/web/nfs.html index caff4001f8a..6bec33bc648 100644 --- a/ctdb/web/nfs.html +++ b/ctdb/web/nfs.html @@ -47,16 +47,18 @@ which causes problems on some clients.
This file should look something like :
   CTDB_MANAGES_NFS=yes
-  CTDB_MANAGES_NFSLOCK=yes
+  LOCKD_TCPPORT=599
+  LOCKD_UDPPORT=599
   STATD_SHARED_DIRECTORY=/gpfs0/nfs-state
-  STATD_HOSTNAME=\"ctdb -P $STATD_SHARED_DIRECTORY/192.168.1.1 -H /etc/ctdb/statd-callout -p 97\"
+  STATD_HOSTNAME="ctdb -P $STATD_SHARED_DIRECTORY/192.168.1.1 -H /etc/ctdb/statd-callout -p 97"
 
The CTDB_MANAGES_NFS line tells the events scripts that CTDB is to manage startup and shutdown of the NFS and NFSLOCK services.
-The CTDB_MANAGES_NFSLOCK line tells the events scripts that CTDB is also to manage the nfs lock manager.
+With this set to yes, CTDB will start/stop/restart these services as required.

-With these set to yes, CTDB will start/stop/restart these services as required.

+You need to make sure that the lock manager runs on the same port on all nodes in the cluster since some clients will have "issues" and take very long to recover if the port suddenly changes.
+599 above is only an example. You can run the lock manager on any available port as long as you use the same port on all nodes.

STATD_SHARED_DIRECTORY is the shared directory where statd and the statd-callout script expects that the state variables and lists of clients to notify are found.