1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-14 19:24:43 +03:00

123 lines
2.8 KiB
Plaintext
Raw Normal View History

#!/bin/sh
# script to manage nfs in a clustered environment
. $CTDB_BASE/functions
service_name="nfs"
service_start ()
{
startstop_nfs stop
startstop_nfs start
set_proc "sys/net/ipv4/tcp_tw_recycle" 1
touch "$service_state_dir/update-trigger"
}
service_stop ()
{
startstop_nfs stop
}
service_reconfigure ()
{
startstop_nfs restart
# if the ips have been reallocated, we must restart the lockmanager
# across all nodes and ping all statd listeners
[ -x $CTDB_BASE/statd-callout ] && {
$CTDB_BASE/statd-callout notify &
} >/dev/null 2>&1
}
loadconfig
[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
ctdb_setup_service_state_dir
ctdb_start_stop_service
is_ctdb_managed_service || exit 0
ctdb_service_check_reconfigure
case "$1" in
init)
# read statd from persistent database
;;
startup)
ctdb_service_start
;;
shutdown)
ctdb_service_stop
;;
takeip)
ctdb_service_set_reconfigure
;;
releaseip)
ctdb_service_set_reconfigure
;;
monitor)
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 16:25:04 +11:00
# Check that directories for shares actually exist.
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
exportfs | grep -v '^#' | grep '^/' |
sed -e 's/[[:space:]]\+[^[:space:]]*$//' |
ctdb_check_directories
} || exit $?
update_tickles 2049
# check that statd responds to rpc requests
# if statd is not running we try to restart it
# we only do this IF we have a rpc.statd command.
# For platforms where rpc.statd does not exist, we skip
# the check completely
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 16:25:04 +11:00
p="rpc.statd"
which $p >/dev/null 2>/dev/null && \
nfs_check_rpc_service "statd" \
-ge 6 "verbose unhealthy" \
-eq 4 "verbose restart" \
-eq 2 "restart:bs"
# check that NFS responds to rpc requests
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 16:25:04 +11:00
if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
nfs_check_rpc_service "knfsd" \
-ge 6 "verbose unhealthy" \
-eq 4 "verbose restart" \
-eq 2 "restart:bs"
fi
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 16:25:04 +11:00
# check that lockd responds to rpc requests
nfs_check_rpc_service "lockd" \
-ge 15 "verbose restart unhealthy" \
-eq 10 "restart:bs"
# mountd is sometimes not started correctly on RHEL5
nfs_check_rpc_service "mountd" \
-ge 10 "verbose restart:b unhealthy" \
-eq 5 "restart:b"
# rquotad is sometimes not started correctly on RHEL5
# not a critical service so we dont flag the node as unhealthy
nfs_check_rpc_service "rquotad" \
-gt 0 "verbose restart:b"
# once every 600 seconds, update the statd state database for which
# clients need notifications
LAST_UPDATE=`stat --printf="%Y" "$service_state_dir/update-trigger"`
CURRENT_TIME=`date +"%s"`
[ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
touch "$service_state_dir/update-trigger"
$CTDB_BASE/statd-callout updatelocal &
$CTDB_BASE/statd-callout updateremote &
}
;;
*)
ctdb_standard_event_handler "$@"
;;
esac
exit 0