mirror of
https://github.com/samba-team/samba.git
synced 2025-01-14 19:24:43 +03:00
1d71dd08e3
We reduce the number of failures before attempting a restart. However, after 6 failures we mark the cluster unhealthy and no longer try to restart. If the previous 2 attempts didn't work then there isn't any use in bogging the system down with an attempted restart on every monitor event. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit f654739080b40b7ac1b7f998cacc689d3d4e3193)
123 lines
2.8 KiB
Bash
Executable File
123 lines
2.8 KiB
Bash
Executable File
#!/bin/sh
|
|
# script to manage nfs in a clustered environment
|
|
|
|
. $CTDB_BASE/functions
|
|
|
|
service_name="nfs"
|
|
service_start ()
|
|
{
|
|
startstop_nfs stop
|
|
startstop_nfs start
|
|
set_proc "sys/net/ipv4/tcp_tw_recycle" 1
|
|
touch "$service_state_dir/update-trigger"
|
|
}
|
|
service_stop ()
|
|
{
|
|
startstop_nfs stop
|
|
}
|
|
service_reconfigure ()
|
|
{
|
|
startstop_nfs restart
|
|
|
|
# if the ips have been reallocated, we must restart the lockmanager
|
|
# across all nodes and ping all statd listeners
|
|
[ -x $CTDB_BASE/statd-callout ] && {
|
|
$CTDB_BASE/statd-callout notify &
|
|
} >/dev/null 2>&1
|
|
}
|
|
|
|
loadconfig
|
|
|
|
[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
|
|
|
|
ctdb_setup_service_state_dir
|
|
|
|
ctdb_start_stop_service
|
|
|
|
is_ctdb_managed_service || exit 0
|
|
|
|
ctdb_service_check_reconfigure
|
|
|
|
case "$1" in
|
|
init)
|
|
# read statd from persistent database
|
|
;;
|
|
startup)
|
|
ctdb_service_start
|
|
;;
|
|
|
|
shutdown)
|
|
ctdb_service_stop
|
|
;;
|
|
|
|
takeip)
|
|
ctdb_service_set_reconfigure
|
|
;;
|
|
|
|
releaseip)
|
|
ctdb_service_set_reconfigure
|
|
;;
|
|
|
|
monitor)
|
|
# Check that directories for shares actually exist.
|
|
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
|
|
exportfs | grep -v '^#' | grep '^/' |
|
|
sed -e 's/[[:space:]]\+[^[:space:]]*$//' |
|
|
ctdb_check_directories
|
|
} || exit $?
|
|
|
|
update_tickles 2049
|
|
|
|
# check that statd responds to rpc requests
|
|
# if statd is not running we try to restart it
|
|
# we only do this IF we have a rpc.statd command.
|
|
# For platforms where rpc.statd does not exist, we skip
|
|
# the check completely
|
|
p="rpc.statd"
|
|
which $p >/dev/null 2>/dev/null && \
|
|
nfs_check_rpc_service "statd" \
|
|
-ge 6 "verbose unhealthy" \
|
|
-eq 4 "verbose restart" \
|
|
-eq 2 "restart:bs"
|
|
|
|
# check that NFS responds to rpc requests
|
|
if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
|
|
nfs_check_rpc_service "knfsd" \
|
|
-ge 6 "verbose unhealthy" \
|
|
-eq 4 "verbose restart" \
|
|
-eq 2 "restart:bs"
|
|
fi
|
|
|
|
# check that lockd responds to rpc requests
|
|
nfs_check_rpc_service "lockd" \
|
|
-ge 15 "verbose restart unhealthy" \
|
|
-eq 10 "restart:bs"
|
|
|
|
# mountd is sometimes not started correctly on RHEL5
|
|
nfs_check_rpc_service "mountd" \
|
|
-ge 10 "verbose restart:b unhealthy" \
|
|
-eq 5 "restart:b"
|
|
|
|
# rquotad is sometimes not started correctly on RHEL5
|
|
# not a critical service so we dont flag the node as unhealthy
|
|
nfs_check_rpc_service "rquotad" \
|
|
-gt 0 "verbose restart:b"
|
|
|
|
# once every 600 seconds, update the statd state database for which
|
|
# clients need notifications
|
|
LAST_UPDATE=`stat --printf="%Y" "$service_state_dir/update-trigger"`
|
|
CURRENT_TIME=`date +"%s"`
|
|
[ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
|
|
touch "$service_state_dir/update-trigger"
|
|
$CTDB_BASE/statd-callout updatelocal &
|
|
$CTDB_BASE/statd-callout updateremote &
|
|
}
|
|
;;
|
|
|
|
*)
|
|
ctdb_standard_event_handler "$@"
|
|
;;
|
|
esac
|
|
|
|
exit 0
|