#!/bin/sh # This must run as root as CTDB tool commands need to access CTDB socket [ "$(id -u)" -eq 0 ] || exec sudo "$0" "$@" # statd must be configured to use this script as its high availability call-out. # # In most Linux versions this can be done using something like the following... # # /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian): # NFS_HOSTNAME=myhostname # STATD_HOSTNAME="${NFS_HOSTNAME} -H /etc/ctdb/statd-callout" # # Newer Red Hat Linux variants instead use /etc/nfs.conf: # [statd] # name = myhostname # ha-callout = /etc/ctdb/statd-callout [ -n "$CTDB_BASE" ] || \ CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; echo "$PWD") . "${CTDB_BASE}/functions" # Overwrite this so we get some logging die () { script_log "statd-callout" "$@" exit 1 } # Try different variables to find config file for NFS_HOSTNAME load_system_config "nfs" "nfs-common" # If NFS_HOSTNAME not set then try to pull it out of /etc/nfs.conf if [ -z "$NFS_HOSTNAME" ] && type nfsconf >/dev/null 2>&1 ; then NFS_HOSTNAME=$(nfsconf --get statd name) fi [ -n "$NFS_HOSTNAME" ] || \ die "NFS_HOSTNAME is not configured. statd-callout failed" ############################################################ ctdb_setup_state_dir "service" "nfs" # script_state_dir set by ctdb_setup_state_dir() # shellcheck disable=SC2154 d="${script_state_dir}/statd-callout" mkdir -p "$d" || die "Failed to create directory \"${d}\"" cd "$d" || die "Failed to change directory to \"${d}\"" pnn=$(ctdb_get_pnn) ############################################################ send_notifies () { _smnotify="${CTDB_HELPER_BINDIR}/smnotify" # State must monotonically increase, across the entire # cluster. Use seconds since epoch and hope the time is in # sync across nodes. Even numbers mean service is shut down, # odd numbers mean service is started. # Intentionally round to an even number # shellcheck disable=SC2017 _state_even=$(( $(date '+%s') / 2 * 2)) _prev="" while read _sip _cip ; do # NOTE: Consider optimising smnotify to read all the # data from stdin and then run it in the background. # Reset stateval for each serverip if [ "$_sip" != "$_prev" ] ; then _stateval="$_state_even" fi # Send notifies for server shutdown "$_smnotify" --client="$_cip" --ip="$_sip" \ --server="$_sip" --stateval="$_stateval" "$_smnotify" --client="$_cip" --ip="$_sip" \ --server="$NFS_HOSTNAME" --stateval="$_stateval" # Send notifies for server startup _stateval=$((_stateval + 1)) "$_smnotify" --client="$_cip" --ip="$_sip" \ --server="$_sip" --stateval="$_stateval" "$_smnotify" --client="$_cip" --ip="$_sip" \ --server="$NFS_HOSTNAME" --stateval="$_stateval" done } delete_records () { while read _sip _cip ; do _key="statd-state@${_sip}@${_cip}" echo "\"${_key}\" \"\"" done | $CTDB ptrans "ctdb.tdb" } ############################################################ case "$1" in # Keep a single file to keep track of the last "add-client" or # "del-client'. These get pushed to ctdb.tdb during "update", # which will generally be run once each "monitor" cycle. In this # way we avoid scalability problems with flood of persistent # transactions after a "notify" when all the clients re-take their # locks. add-client) # statd does not tell us to which IP the client connected so # we must add it to all the IPs that we serve cip="$2" date=$(date '+%s') # x is intentionally ignored # shellcheck disable=SC2034 $CTDB ip -X | tail -n +2 | while IFS="|" read x sip node x ; do [ "$node" = "$pnn" ] || continue # not us key="statd-state@${sip}@${cip}" echo "\"${key}\" \"${date}\"" >"$key" done ;; del-client) # statd does not tell us from which IP the client disconnected # so we must add it to all the IPs that we serve cip="$2" # x is intentionally ignored # shellcheck disable=SC2034 $CTDB ip -X | tail -n +2 | while IFS="|" read x sip node x ; do [ "$node" = "$pnn" ] || continue # not us key="statd-state@${sip}@${cip}" echo "\"${key}\" \"\"" >"$key" done ;; update) files=$(echo statd-state@*) if [ "$files" = "statd-state@*" ] ; then # No files! exit 0 fi # Filter out lines for any IP addresses that are not currently # hosted public IP addresses. ctdb_ips=$($CTDB ip | tail -n +2) sed_expr=$(echo "$ctdb_ips" | awk -v pnn="$pnn" 'pnn == $2 { ip = $1; gsub(/\./, "\\.", ip); printf "/statd-state@%s@/p\n", ip }') # Intentional multi-word expansion for multiple files # shellcheck disable=SC2086 items=$(sed -n "$sed_expr" $files) if [ -n "$items" ] ; then if echo "$items" | $CTDB ptrans "ctdb.tdb" ; then # shellcheck disable=SC2086 rm $files fi fi ;; notify) # we must restart the lockmanager (on all nodes) so that we get # a clusterwide grace period (so other clients don't take out # conflicting locks through other nodes before all locks have been # reclaimed) # we need these settings to make sure that no tcp connections survive # across a very fast failover/failback #echo 10 > /proc/sys/net/ipv4/tcp_fin_timeout #echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets #echo 0 > /proc/sys/net/ipv4/tcp_max_orphans # Delete the notification list for statd, we don't want it to # ping any clients rm -f /var/lib/nfs/statd/sm/* rm -f /var/lib/nfs/statd/sm.bak/* # We must also let some time pass between stopping and # restarting the lock manager. Otherwise there is a window # where the lock manager will respond "strangely" immediately # after restarting it, which causes clients to fail to reclaim # their locks. nfs_callout_init "$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1 sleep 2 "$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1 # we now need to send out additional statd notifications to ensure # that clients understand that the lockmanager has restarted. # we have three cases: # 1, clients that ignore the ip address the stat notification came from # and ONLY care about the 'name' in the notify packet. # these clients ONLY work with lock failover IFF that name # can be resolved into an ipaddress that matches the one used # to mount the share. (==linux clients) # This is handled when starting lockmanager above, but those # packets are sent from the "wrong" ip address, something linux # clients are ok with, buth other clients will barf at. # 2, Some clients only accept statd packets IFF they come from the # 'correct' ip address. # 2a,Send out the notification using the 'correct' ip address and also # specify the 'correct' hostname in the statd packet. # Some clients require both the correct source address and also the # correct name. (these clients also ONLY work if the ip addresses # used to map the share can be resolved into the name returned in # the notify packet.) # 2b,Other clients require that the source ip address of the notify # packet matches the ip address used to take out the lock. # I.e. that the correct source address is used. # These clients also require that the statd notify packet contains # the name as the ip address used when the lock was taken out. # # Both 2a and 2b are commonly used in lockmanagers since they maximize # probability that the client will accept the statd notify packet and # not just ignore it. # For all IPs we serve, collect info and push to the config database # Construct a sed expression to take catdb output and produce pairs of: # server-IP client-IP # but only for the server-IPs that are hosted on this node. ctdb_all_ips=$($CTDB ip all | tail -n +2) sed_expr=$(echo "$ctdb_all_ips" | awk -v pnn="$pnn" 'pnn == $2 { ip = $1; gsub(/\./, "\\.", ip); printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }') statd_state=$($CTDB catdb ctdb.tdb | sed -n "$sed_expr" | sort) [ -n "$statd_state" ] || exit 0 echo "$statd_state" | send_notifies echo "$statd_state" | delete_records # Remove any stale touch files (i.e. for IPs not currently # hosted on this node and created since the last "update"). # There's nothing else we can do with them at this stage. echo "$ctdb_all_ips" | awk -v pnn="$pnn" 'pnn != $2 { print $1 }' | while read sip ; do rm -f "statd-state@${sip}@"* done ;; esac