2007-06-01 18:10:22 +04:00
#!/bin/sh
# script to manage nfs in a clustered environment
2013-01-03 08:26:12 +04:00
[ -n "$CTDB_BASE" ] || \
2016-06-29 10:36:05 +03:00
CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD")
2013-01-03 08:26:12 +04:00
2016-06-29 10:36:05 +03:00
. "${CTDB_BASE}/functions"
2007-06-06 06:08:42 +04:00
2009-11-19 07:00:17 +03:00
service_name="nfs"
2016-07-06 10:16:44 +03:00
2018-02-06 03:25:56 +03:00
load_system_config "nfs"
2018-02-20 04:56:42 +03:00
load_script_options
2018-03-07 03:12:29 +03:00
ctdb_setup_state_dir "service" "$service_name"
2015-07-14 05:11:39 +03:00
######################################################################
2015-06-24 14:36:14 +03:00
2011-08-11 07:55:02 +04:00
service_reconfigure ()
{
2015-07-08 07:57:51 +03:00
# Restart lock manager, notify clients
if [ -x "${CTDB_BASE}/statd-callout" ] ; then
"${CTDB_BASE}/statd-callout" notify &
fi >/dev/null 2>&1
2011-08-11 07:55:02 +04:00
}
2008-02-11 01:35:37 +03:00
2015-07-13 08:22:23 +03:00
######################################################################
######################################################
# Check the health of NFS services
#
2016-06-29 10:05:17 +03:00
# Use .check files in $CTDB_NFS_CHECKS_DIR.
2015-07-13 08:22:23 +03:00
# Default is "${CTDB_BASE}/nfs-checks.d/"
######################################################
nfs_check_services ()
{
2016-06-29 10:05:17 +03:00
_dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
2015-07-13 08:22:23 +03:00
# Files must end with .check - avoids editor backups, RPM fu, ...
for _f in "$_dir"/[0-9][0-9].*.check ; do
2017-01-19 06:40:20 +03:00
[ -r "$_f" ] || continue
2015-07-13 08:22:23 +03:00
_t="${_f%.check}"
_progname="${_t##*/[0-9][0-9].}"
nfs_check_service "$_progname" <"$_f"
done
}
######################################################
# Check the health of an NFS service
#
# $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
#
# Reads variables from stdin
#
# Variables are:
#
# * family - "tcp" or "udp" or space separated list
# default: tcp, not used with "service_check_cmd"
# * version - optional, RPC service version number
# default is to omit to check for any version,
# not used with "service_check_cmd"
# * unhealthy_after - number of check fails before unhealthy
# default: 1
# * restart_every - number of check fails before restart
# default: 0, meaning no restart
# * service_stop_cmd - command to stop service
# default: no default, must be provided if
# restart_every > 0
# * service_start_cmd - command to start service
# default: no default, must be provided if
# restart_every > 0
# * service_check_cmd - command to check health of service
# default is to check RPC service using rpcinfo
# * service_debug_cmd - command to debug a service after trying to stop it;
# for example, it can be useful to print stack
# traces of threads that have not exited, since
# they may be stuck doing I/O;
# no default, see also function program_stack_traces()
#
# Quoting in values is not preserved
#
######################################################
nfs_check_service ()
{
_progname="$1"
2016-07-06 09:13:27 +03:00
# This sub-shell is created to intentionally limit the scope of
# variable values read from the .check files.
# shellcheck disable=SC2030
2015-07-13 08:22:23 +03:00
(
# Subshell to restrict scope variables...
# Defaults
family="tcp"
version=""
unhealthy_after=1
restart_every=0
service_stop_cmd=""
service_start_cmd=""
service_check_cmd=""
service_debug_cmd=""
# Eval line-by-line. Expands variable references in values.
# Also allows variable name checking, which seems useful.
while read _line ; do
case "$_line" in
\#*|"") : ;; # Ignore comments, blank lines
family=*|version=*|\
unhealthy_after=*|restart_every=*|\
service_stop_cmd=*|service_start_cmd=*|\
service_check_cmd=*|service_debug_cmd=*)
eval "$_line"
;;
*)
echo "ERROR: Unknown variable for ${_progname}: ${_line}"
exit 1
esac
done
_ok=false
if [ -n "$service_check_cmd" ] ; then
# Using eval means variables can contain semicolon separated commands
if eval "$service_check_cmd" ; then
_ok=true
2016-04-20 08:19:45 +03:00
else
_err="monitoring service \"${_progname}\" failed"
2015-07-13 08:22:23 +03:00
fi
else
if nfs_check_rpcinfo \
"$_progname" "$version" "$family" >/dev/null ; then
_ok=true
2016-04-20 08:19:45 +03:00
else
_err="$ctdb_check_rpc_out"
2015-07-13 08:22:23 +03:00
fi
fi
if $_ok ; then
2019-09-02 07:58:22 +03:00
if [ $unhealthy_after -ne 1 ] || [ $restart_every -ne 0 ] ; then
2018-02-06 05:56:05 +03:00
ctdb_counter_init "$_progname"
2015-07-13 08:22:23 +03:00
fi
exit 0
fi
2018-02-06 05:56:05 +03:00
ctdb_counter_incr "$_progname"
_failcount=$(ctdb_counter_get "$_progname")
2015-07-13 08:22:23 +03:00
_unhealthy=false
2016-07-06 10:31:51 +03:00
if [ "$unhealthy_after" -gt 0 ] ; then
if [ "$_failcount" -ge "$unhealthy_after" ] ; then
2015-07-13 08:22:23 +03:00
_unhealthy=true
2016-04-20 08:19:45 +03:00
echo "ERROR: $_err"
2015-07-13 08:22:23 +03:00
fi
fi
2016-07-06 10:31:51 +03:00
if [ "$restart_every" -gt 0 ] ; then
2016-07-06 09:50:30 +03:00
if [ $((_failcount % restart_every)) -eq 0 ] ; then
2015-07-13 08:22:23 +03:00
if ! $_unhealthy ; then
2016-04-20 08:19:45 +03:00
echo "WARNING: $_err"
2015-07-13 08:22:23 +03:00
fi
nfs_restart_service
fi
fi
if $_unhealthy ; then
exit 1
fi
return 0
) || exit 1
}
2016-07-06 09:13:27 +03:00
# Uses: service_stop_cmd, service_start_cmd, service_debug_cmd
# This function is called within the sub-shell that shellcheck thinks
# loses the above variable values.
# shellcheck disable=SC2031
2015-07-13 08:22:23 +03:00
nfs_restart_service ()
{
2019-09-02 07:58:22 +03:00
if [ -z "$service_stop_cmd" ] || [ -z "$service_start_cmd" ] ; then
2015-07-13 08:22:23 +03:00
die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
fi
echo "Trying to restart service \"${_progname}\"..."
# Using eval means variables can contain semicolon separated commands
eval "$service_stop_cmd"
if [ -n "$service_debug_cmd" ] ; then
eval "$service_debug_cmd"
fi
background_with_logging eval "$service_start_cmd"
}
######################################################
# Check an RPC service with rpcinfo
######################################################
ctdb_check_rpc ()
{
_progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
_version="$2" # optional, not passed if empty/unset
_family="${3:-tcp}" # optional, default is "tcp"
2015-07-15 13:15:46 +03:00
case "$_family" in
tcp6|udp6)
_localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}"
;;
*)
_localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
esac
2015-07-13 08:22:23 +03:00
2016-07-06 10:31:51 +03:00
# $_version is not quoted because it is optional
# shellcheck disable=SC2086
2016-06-29 11:11:44 +03:00
if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \
"$_progname" $_version 2>&1) ; then
2015-07-13 08:22:23 +03:00
ctdb_check_rpc_out="$_progname failed RPC check:
$ctdb_check_rpc_out"
echo "$ctdb_check_rpc_out"
return 1
fi
}
nfs_check_rpcinfo ()
{
_progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
_versions="$2" # optional, space separated, not passed if empty/unset
_families="${3:-tcp}" # optional, space separated, default is "tcp"
for _family in $_families ; do
if [ -n "$_versions" ] ; then
for _version in $_versions ; do
2016-06-29 11:11:44 +03:00
ctdb_check_rpc "$_progname" "$_version" "$_family" || return $?
2015-07-13 08:22:23 +03:00
done
else
2016-06-29 11:11:44 +03:00
ctdb_check_rpc "$_progname" "" "$_family" || return $?
2015-07-13 08:22:23 +03:00
fi
done
}
##################################################################
# use statd-callout to update NFS lock info
##################################################################
nfs_update_lock_info ()
{
if [ -x "$CTDB_BASE/statd-callout" ] ; then
"$CTDB_BASE/statd-callout" update
fi
}
######################################################################
2018-03-07 03:12:29 +03:00
# script_state_dir set by ctdb_setup_state_dir()
# shellcheck disable=SC2154
nfs_callout_init "$script_state_dir"
2016-06-06 06:56:55 +03:00
2015-06-24 14:36:14 +03:00
case "$1" in
2016-07-06 07:44:14 +03:00
startup)
2017-06-08 07:45:43 +03:00
nfs_callout "$@" || exit $?
2007-06-02 10:44:15 +04:00
;;
2016-07-06 07:44:14 +03:00
shutdown)
2017-06-08 07:45:43 +03:00
nfs_callout "$@" || exit $?
2007-07-06 04:54:42 +04:00
;;
2016-07-06 07:44:14 +03:00
takeip)
2017-06-08 07:45:43 +03:00
nfs_callout "$@" || exit $?
2009-11-19 07:00:17 +03:00
ctdb_service_set_reconfigure
2007-06-01 18:10:22 +04:00
;;
2016-07-06 07:44:14 +03:00
releaseip)
2017-06-08 07:45:43 +03:00
nfs_callout "$@" || exit $?
2009-11-19 07:00:17 +03:00
ctdb_service_set_reconfigure
2007-06-01 18:10:22 +04:00
;;
2016-12-14 07:06:45 +03:00
ipreallocated)
if ctdb_service_needs_reconfigure ; then
ctdb_service_reconfigure
fi
;;
2016-07-06 07:44:14 +03:00
monitor)
2015-06-24 14:36:14 +03:00
nfs_callout "monitor-pre" || exit $?
# Check that directories for shares actually exist
if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
nfs_callout "monitor-list-shares" | ctdb_check_directories || \
exit $?
fi
2011-08-11 01:13:28 +04:00
2010-08-26 08:59:59 +04:00
update_tickles 2049
2015-02-13 12:55:43 +03:00
nfs_update_lock_info
2010-08-26 08:59:59 +04:00
2015-06-19 09:35:12 +03:00
nfs_check_services
2010-08-30 12:13:28 +04:00
2015-06-24 14:36:14 +03:00
nfs_callout "monitor-post" || exit $?
2008-07-10 02:05:34 +04:00
;;
2007-06-01 18:10:22 +04:00
esac
exit 0