#!/bin/sh # statd must be configured to use statd_callout, CTDB's binary # counterpart to this script, as its availability call-out. # # Modern NFS utils versions use /etc/nfs.conf: # # [statd] # name = mycluster # ha-callout = /usr/local/libexec/ctdb/statd_callout # # Older Linux versions may use something like the following... # # /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian): # STATD_HOSTNAME="mycluster -H /usr/local/libexec/ctdb/statd_callout" # # If using Linux kernel NFS then the following should also be set in # /etc/nfs.conf: # # [sm-notify] # lift-grace = n # # See sm-notify(8) for details. This doesn't matter when using # NFS-Ganesha because sm-notify's attempt to lift grace will fail # silently if /proc/fs/lockd/nlm_end_grace is not found. # if [ -z "$CTDB_BASE" ]; then export CTDB_BASE="/usr/local/etc/ctdb" fi . "${CTDB_BASE}/functions" # Overwrite this so we get some logging die() { script_log "statd_callout_helper" "$@" exit 1 } ############################################################ ctdb_setup_state_dir "service" "nfs" find_statd_sm_dir() { if [ -n "$CTDB_TEST_MODE" ]; then _f="${CTDB_TEST_TMP_DIR}/sm" mkdir -p "$_f" "${_f}.bak" echo "$_f" return fi for _sm_dir in /var/lib/nfs/statd/sm /var/lib/nfs/sm; do if [ -d "$_sm_dir" ]; then echo "$_sm_dir" break fi done } # Ensure the state directory exists and can be written when called as # a non-root user. Assume the user to run as is the owner of the # system statd sm directory, since both rpc.statd and sm-notify run as # this directory's owner, so it can read and modify the directory. create_add_del_client_dir() { _dir="$1" if [ ! -d "$_dir" ]; then mkdir -p "$_dir" || die "Failed to create directory \"${_dir}\"" ref=$(find_statd_sm_dir) [ -n "$ref" ] || die "Failed to find statd sm directory" chown --reference="$ref" "$_dir" fi } # script_state_dir set by ctdb_setup_state_dir() # shellcheck disable=SC2154 statd_callout_state_dir="${script_state_dir}/statd_callout" statd_callout_db="ctdb.tdb" statd_callout_queue_dir="${statd_callout_state_dir}/queue" ############################################################ # Read pairs of: # server-IP client-IP # from stdin and send associated SM_NOTIFY packets. send_notifies() { # State must monotonically increase, across the entire # cluster. Use seconds since epoch and assume the time is in # sync across nodes. Even numbers mean service is shut down, # odd numbers mean service is up. However, sm-notify always # reads the state and converts it to odd (if necessary, by # adding 1 when it is even) because it only sends "up" # notifications. Note that there is a 2038 issue here but we # will get to that later. _state=$(date '+%s') _helper="${CTDB_HELPER_BINDIR}/ctdb_smnotify_helper" _notify_dir="${statd_callout_state_dir}/sm-notify" mkdir -p "$_notify_dir" while read -r _sip _cip; do # Create a directory per server IP containing a file # for each client IP mkdir -p \ "${_notify_dir}/${_sip}/sm" \ "${_notify_dir}/${_sip}/sm.bak" _out="${_notify_dir}/${_sip}/sm/${_cip}" "$_helper" "monitor" "$_cip" "$_sip" >"$_out" done # Send notifications for server startup _ref=$(find_statd_sm_dir) for _sip_dir in "$_notify_dir"/*; do if [ "$_sip_dir" = "${_notify_dir}/*" ]; then break fi _sip="${_sip_dir##*/}" # basename # Write the state as a host order 32-bit integer. See # note at top of function about state. _out="${_sip_dir}/state" "$_helper" "state" "$_state" >"$_out" # The ownership of the directory and contents should # match the system's statd sm directory, so that # sm-notify drops privileges and switches to run as # the directory owner. chown -R --reference="$_ref" "$_sip_dir" timeout 10 sm-notify -d -f -m 0 -n -P "$_sip_dir" -v "$_sip" rm -rf "$_sip_dir" done } delete_records() { while read -r _sip _cip; do _key="statd-state@${_sip}@${_cip}" echo "\"${_key}\" \"\"" done | $CTDB ptrans "$statd_callout_db" } ############################################################ # Keep a file per server-IP/client-IP pair, to keep track of the last # "add-client" or "del-client'. These get pushed to a database during # "update", which will generally be run once each "monitor" cycle. In # this way we avoid scalability problems with flood of persistent # transactions after a "notify" when all the clients re-take their # locks. startup() { create_add_del_client_dir "$statd_callout_queue_dir" $CTDB attach "$statd_callout_db" persistent _default="${CTDB_SCRIPT_VARDIR}/statd_callout.conf" _config_file="${CTDB_STATD_CALLOUT_CONFIG_FILE:-"${_default}"}" cat >"$_config_file" < /proc/sys/net/ipv4/tcp_fin_timeout #echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets #echo 0 > /proc/sys/net/ipv4/tcp_max_orphans # Delete the notification list for statd, we don't want it to # ping any clients dir=$(find_statd_sm_dir) rm -f "${dir}/"* "${dir}.bak/"* # We must also let some time pass between stopping and # restarting the lock manager. Otherwise there is a window # where the lock manager will respond "strangely" immediately # after restarting it, which causes clients to fail to reclaim # their locks. nfs_callout_init "$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1 sleep 2 "$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1 # Construct a sed expression to take catdb output and produce pairs of: # server-IP client-IP # but only for the server-IPs that are hosted on this node. sed_expr=$(awk '{ ip = $1; gsub(/\./, "\\.", ip); printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }' \ "$CTDB_MY_PUBLIC_IPS_CACHE") statd_state=$($CTDB catdb "$statd_callout_db" | sed -n "$sed_expr" | sort) [ -n "$statd_state" ] || exit 0 echo "$statd_state" | send_notifies echo "$statd_state" | delete_records # Remove any stale touch files (i.e. for IPs not currently # hosted on this node and created since the last "update"). # There's nothing else we can do with them at this stage. pnn=$(ctdb_get_pnn) $CTDB ip all | tail -n +2 | awk -v pnn="$pnn" 'pnn != $2 { print $1 }' | while read -r sip; do rm -f "${statd_callout_queue_dir}/statd-state@${sip}@"* done ;; esac