samba-mirror/60.nfs at 32fe247e37fe97c98c12f196d955d9be682eaa74

mirror of https://github.com/samba-team/samba.git synced 2025-01-14 19:24:43 +03:00

Martin Schwenke 1d71dd08e3 Eventscripts: change failure counts and behaviour for statd and nfsd.

We reduce the number of failures before attempting a restart.
However, after 6 failures we mark the cluster unhealthy and no longer
try to restart.  If the previous 2 attempts didn't work then there
isn't any use in bogging the system down with an attempted restart on
every monitor event.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit f654739080b40b7ac1b7f998cacc689d3d4e3193)

2011-08-12 14:16:17 +10:00

123 lines

2.8 KiB

Bash

Executable File

Raw Blame History

 #!/bin/sh
 # script to manage nfs in a clustered environment
 . $CTDB_BASE/functions
 service_name="nfs"
 service_start ()
 {
     startstop_nfs stop
     startstop_nfs start
     set_proc "sys/net/ipv4/tcp_tw_recycle" 1
     touch "$service_state_dir/update-trigger"
 }
 service_stop ()
 {
     startstop_nfs stop
 }
 service_reconfigure ()
 {
     startstop_nfs restart
     # if the ips have been reallocated, we must restart the lockmanager
     # across all nodes and ping all statd listeners
     [ -x $CTDB_BASE/statd-callout ] && {
 	$CTDB_BASE/statd-callout notify &
     } >/dev/null 2>&1
 }
 loadconfig
 [ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
 ctdb_setup_service_state_dir
 ctdb_start_stop_service
 is_ctdb_managed_service || exit 0
 ctdb_service_check_reconfigure
 case "$1" in
      init)
 	# read statd from persistent database
 	;;
      startup)
 	ctdb_service_start
 	;;
      shutdown)
 	ctdb_service_stop
 	;;
      takeip)
 	ctdb_service_set_reconfigure
 	;;
      releaseip)
 	ctdb_service_set_reconfigure
 	;;
       monitor)
 	# Check that directories for shares actually exist.
 	[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
 	    exportfs | grep -v '^#' | grep '^/' |
 	    sed -e 's/[[:space:]]\+[^[:space:]]*$//' |
 	    ctdb_check_directories
 	} || exit $?
 	update_tickles 2049
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it
 	# we only do this IF we have a rpc.statd command.
 	# For platforms where rpc.statd does not exist, we skip
 	# the check completely
         p="rpc.statd"
         which $p >/dev/null 2>/dev/null && \
 	    nfs_check_rpc_service "statd" \
 	        -ge 6 "verbose unhealthy" \
 	        -eq 4 "verbose restart" \
 		-eq 2 "restart:bs"
 	# check that NFS responds to rpc requests
 	if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
 	    nfs_check_rpc_service "knfsd" \
 		-ge 6 "verbose unhealthy" \
 		-eq 4 "verbose restart" \
 		-eq 2 "restart:bs"
 	fi
 	# check that lockd responds to rpc requests
 	nfs_check_rpc_service "lockd" \
 	    -ge 15 "verbose restart unhealthy" \
 	    -eq 10 "restart:bs"
 	# mountd is sometimes not started correctly on RHEL5
 	nfs_check_rpc_service "mountd" \
 	    -ge 10 "verbose restart:b unhealthy" \
 	    -eq 5 "restart:b"
 	# rquotad is sometimes not started correctly on RHEL5
 	# not a critical service so we dont flag the node as unhealthy
 	nfs_check_rpc_service "rquotad" \
 	    -gt 0 "verbose restart:b"
 	# once every 600 seconds, update the statd state database for which
 	# clients need notifications
 	LAST_UPDATE=`stat --printf="%Y" "$service_state_dir/update-trigger"`
 	CURRENT_TIME=`date +"%s"`
 	[ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
 	    touch "$service_state_dir/update-trigger"
 	    $CTDB_BASE/statd-callout updatelocal &
 	    $CTDB_BASE/statd-callout updateremote &
 	}
        	;;
     *)
 	ctdb_standard_event_handler "$@"
 	;;
 esac
 exit 0

123 lines 2.8 KiB Bash Executable File Raw Blame History

123 lines

2.8 KiB

Bash

Executable File

Raw Blame History