1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-25 23:21:54 +03:00
samba-mirror/ctdb/config/events.d/60.nfs

126 lines
3.0 KiB
Plaintext
Raw Normal View History

#!/bin/sh
# script to manage nfs in a clustered environment
[ -n "$CTDB_BASE" ] || \
export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
. $CTDB_BASE/functions
service_name="nfs"
service_start ()
{
startstop_nfs stop
startstop_nfs start
set_proc "sys/net/ipv4/tcp_tw_recycle" 1
}
service_stop ()
{
startstop_nfs stop
}
service_reconfigure ()
{
# if the ips have been reallocated, we must restart the lockmanager
# across all nodes and ping all statd listeners
[ -x $CTDB_BASE/statd-callout ] && {
$CTDB_BASE/statd-callout notify &
} >/dev/null 2>&1
}
loadconfig
[ "$NFS_SERVER_MODE" != "ganesha" ] || exit 0
ctdb_setup_service_state_dir
statd_update_trigger="$service_state_dir/update-trigger"
# We want this file to always exist. The corner case is when
# auto-start/stop is switched off, NFS is added as a managed service
# some time after ctdbd is started and someone else starts the NFS
# service for us. In this case this file might not otherwise exist
# when we get to a monitor event.
touch "$statd_update_trigger"
ctdb_start_stop_service
is_ctdb_managed_service || exit 0
ctdb_service_check_reconfigure
case "$1" in
init)
# read statd from persistent database
;;
startup)
ctdb_service_start
;;
shutdown)
ctdb_service_stop
;;
takeip)
ctdb_service_set_reconfigure
;;
releaseip)
ctdb_service_set_reconfigure
;;
monitor)
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
# Check that directories for shares actually exist.
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
exportfs -v | grep '^/' |
sed -r -e 's@[[:space:]]+[^[:space:]()]+\([^[:space:]()]+\)$@@' |
sort -u |
ctdb_check_directories
} || exit $?
update_tickles 2049
# check that statd responds to rpc requests
nfs_check_rpc_service "statd" \
-ge 6 "verbose unhealthy" \
-eq 4 "verbose restart" \
-eq 2 "restart:bs"
# check that NFS responds to rpc requests
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
nfs_check_rpc_service "knfsd" \
-ge 6 "verbose unhealthy" \
-eq 4 "verbose restart" \
-eq 2 "restart:bs"
fi
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
# check that lockd responds to rpc requests
nfs_check_rpc_service "lockd" \
-ge 15 "verbose restart:b unhealthy" \
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
-eq 10 "restart:bs"
# mountd is sometimes not started correctly on RHEL5
nfs_check_rpc_service "mountd" \
-ge 10 "verbose restart:b unhealthy" \
-eq 5 "restart:b"
# rquotad is sometimes not started correctly on RHEL5
# not a critical service so we dont flag the node as unhealthy
nfs_check_rpc_service "rquotad" \
-gt 0 "verbose restart:b"
# once every 600 seconds, update the statd state database for which
# clients need notifications
LAST_UPDATE=`stat --printf="%Y" "$statd_update_trigger"`
CURRENT_TIME=`date +"%s"`
[ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
touch "$statd_update_trigger"
$CTDB_BASE/statd-callout updatelocal &
$CTDB_BASE/statd-callout updateremote &
}
;;
*)
ctdb_standard_event_handler "$@"
;;
esac
exit 0