1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-08 21:18:16 +03:00

merge from ronnie

(This used to be ctdb commit 6975042fca832aae07f84c0e9ac7fa4773b8cb51)
This commit is contained in:
Andrew Tridgell 2007-07-10 14:59:23 +10:00
commit 9fe446f231
4 changed files with 47 additions and 19 deletions

View File

@ -61,12 +61,32 @@ case $cmd in
;;
recovered)
# restart NFS to ensure that all TCP connections to the released ip
# are closed
[ -f /etc/ctdb/state/nfs/restart ] && [ ! -z "$LOCKD_TCPPORT" ] && {
# RST all tcp connections used for NLM to ensure that they do
# not survive in ESTABLISHED state across a failover/failback
# and create an ack storm
netstat -tn |egrep "^tcp.*\s+[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:${LOCKD_TCPPORT}\s+.*ESTABLISHED" | awk '{print $4" "$5}' | while read dest src; do
srcip=`echo $src | cut -d: -f1`
srcport=`echo $src | cut -d: -f2`
destip=`echo $dest | cut -d: -f1`
destport=`echo $dest | cut -d: -f2`
ctdb killtcp $srcip:$srcport $destip:$destport 1 >/dev/null 2>&1
# ctdb killtcp $destip:$destport $srcip:$srcport 1 >/dev/null 2>&1
done
} > /dev/null 2>&1
[ -f /etc/ctdb/state/nfs/restart ] && {
( service nfs status > /dev/null 2>&1 &&
service nfs restart > /dev/null 2>&1 &&
service nfslock restart > /dev/null 2>&1 ) &
# RST all tcp connections used for NFS to ensure that they do
# not survive in ESTABLISHED state across a failover/failback
# and create an ack storm
netstat -tn |egrep '^tcp.*\s+[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:2049\s+.*ESTABLISHED' | awk '{print $4" "$5}' | while read dest src; do
srcip=`echo $src | cut -d: -f1`
srcport=`echo $src | cut -d: -f2`
destip=`echo $dest | cut -d: -f1`
destport=`echo $dest | cut -d: -f2`
ctdb killtcp $srcip:$srcport $destip:$destport 1 >/dev/null 2>&1
ctdb killtcp $destip:$destport $srcip:$srcport 1 >/dev/null 2>&1
done
} > /dev/null 2>&1
/bin/rm -f /etc/ctdb/state/nfs/restart

View File

@ -385,11 +385,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
for (i=0;i<nodemap->num;i++) {
struct ctdb_node_flag_change c;
TDB_DATA data;
uint32_t flags = nodemap->nodes[i].flags;
if (flags & NODE_FLAGS_DISCONNECTED) {
continue;
}
c.vnn = nodemap->nodes[i].vnn;
c.flags = nodemap->nodes[i].flags;
@ -1073,6 +1068,15 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
return;
}
/* Dont let messages from remote nodes change the DISCONNECTED flag.
This flag is handled locally based on whether the local node
can communicate with the node or not.
*/
c->flags &= ~NODE_FLAGS_DISCONNECTED;
if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
c->flags |= NODE_FLAGS_DISCONNECTED;
}
if (nodemap->nodes[i].flags != c->flags) {
DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));
}
@ -1327,7 +1331,7 @@ again:
}
if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) !=
(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
DEBUG(0, (__location__ " Remote node:%u has different nodemap flags for %d (0x%x vs 0x%x)\n",
DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
nodemap->nodes[j].vnn, i,
remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));
do_recovery(rec, mem_ctx, vnn, num_active, nodemap,

View File

@ -308,10 +308,10 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
*/
static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv)
{
int i, ret;
int i, ret, numrst;
struct sockaddr_in src, dst;
if (argc < 2) {
if (argc < 3) {
usage();
}
@ -325,7 +325,9 @@ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
for (i=0;i<5;i++) {
numrst = strtoul(argv[2], NULL, 0);
for (i=0;i<numrst;i++) {
ret = ctdb_sys_kill_tcp(ctdb->ev, &src, &dst);
printf("ret:%d\n", ret);
@ -889,7 +891,7 @@ static const struct {
{ "recover", control_recover, true, "force recovery" },
{ "freeze", control_freeze, true, "freeze all databases" },
{ "thaw", control_thaw, true, "thaw all databases" },
{ "killtcp", kill_tcp, false, "kill a tcp connection", "<srcip:port> <dstip:port>" },
{ "killtcp", kill_tcp, false, "kill a tcp connection. Try <num> times.", "<srcip:port> <dstip:port> <num>" },
{ "tickle", tickle_tcp, false, "send a tcp tickle ack", "<srcip:port> <dstip:port>" },
};

View File

@ -47,16 +47,18 @@ which causes problems on some clients.<br>
This file should look something like :
<pre>
CTDB_MANAGES_NFS=yes
CTDB_MANAGES_NFSLOCK=yes
LOCKD_TCPPORT=599
LOCKD_UDPPORT=599
STATD_SHARED_DIRECTORY=/gpfs0/nfs-state
STATD_HOSTNAME=\"ctdb -P $STATD_SHARED_DIRECTORY/192.168.1.1 -H /etc/ctdb/statd-callout -p 97\"
STATD_HOSTNAME="ctdb -P $STATD_SHARED_DIRECTORY/192.168.1.1 -H /etc/ctdb/statd-callout -p 97"
</pre>
The CTDB_MANAGES_NFS line tells the events scripts that CTDB is to manage startup and shutdown of the NFS and NFSLOCK services.<br>
The CTDB_MANAGES_NFSLOCK line tells the events scripts that CTDB is also to manage the nfs lock manager.<br>
With this set to yes, CTDB will start/stop/restart these services as required.<br><br>
With these set to yes, CTDB will start/stop/restart these services as required.<br><br>
You need to make sure that the lock manager runs on the same port on all nodes in the cluster since some clients will have "issues" and take very long to recover if the port suddenly changes.<br>
599 above is only an example. You can run the lock manager on any available port as long as you use the same port on all nodes.<br><br>
STATD_SHARED_DIRECTORY is the shared directory where statd and the statd-callout script expects that the state variables and lists of clients to notify are found.<br>