mirror of
https://github.com/samba-team/samba.git
synced 2025-01-06 13:18:07 +03:00
4766d4568b
This one does an rpcinfo check, along with statistics mitigation. It can be used in combination with the existing 20.nfs_ganesha.check. The equivalent kernel NFS file only restarts every 10 failures. This one can be a little more proactive given that false positives are less likely with the statistics mitigation. Signed-off-by: Martin Schwenke <mschwenke@ddn.com> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
463 lines
9.7 KiB
Bash
Executable File
463 lines
9.7 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
# This is an example CTDB NFS callout script for Ganesha. It is based
|
|
# on the last version of 60.ganesha shipped with CTDB. As such, it
|
|
# does not try to monitor RPC services that were not monitored by
|
|
# 60.ganesha - this might be a useful improvement. It has also not
|
|
# been properly tested.
|
|
|
|
# You should check your version of NFS Ganesha to see if it ships with
|
|
# a newer callout.
|
|
|
|
# To use this:
|
|
#
|
|
# * Set CTDB_NFS_CALLOUT in your CTDB configuration to point to (a
|
|
# copy of) this script, making sure it is executable.
|
|
#
|
|
# * Create a new directory alongside the nfs-checks.d directory, for
|
|
# example nfs-checks-ganesha.d. Install 20.nfs_ganesha.check and
|
|
# 21.nfs.check in this directory. Symlink to any other check files
|
|
# from nfs-checks.d that should still be used, such as
|
|
# 00.portmapper.check. Set CTDB_NFS_CHECKS_DIR to point to this new
|
|
# directory of check files.
|
|
#
|
|
# * It is recommended, but not required, to install the grace_period
|
|
# script (usually shipped in a utils package for NFS-Ganesha) to
|
|
# /usr/bin/grace_period
|
|
|
|
# I (Martin Schwenke) hereby relicense all of my contributions to this
|
|
# callout (and, previously, to 60.ganesha) to a license compatible
|
|
# with NFS Ganesha (right now this is LGPLv3, but I'm flexible).
|
|
# There may be other contributions to be considered for relicensing,
|
|
# particularly those in commit 28cbe527d47822f870e8252495ab2a1c8fddd12f.
|
|
|
|
######################################################################
|
|
|
|
# Exit on 1st error
|
|
set -e
|
|
|
|
# Filesystem type and mount point for the (typically clustered)
|
|
# volume that will contain the NFS-Ganesha state.
|
|
state_fs="${CTDB_NFS_STATE_FS_TYPE:-gpfs}"
|
|
state_dir="${CTDB_NFS_STATE_MNT}" # No sane default.
|
|
|
|
# Location of exports file
|
|
nfs_exports_file="${CTDB_NFS_EXPORTS_FILE:-/etc/ganesha/ganesha.conf}"
|
|
|
|
# To change the following, edit the default values below. Do not set
|
|
# these - they aren't configuration variables, just hooks for testing.
|
|
nfs_service="${CTDB_NFS_SERVICE:-nfs-ganesha}"
|
|
ganesha_rec_subdir=${CTDB_GANESHA_REC_SUBDIR:-.ganesha}
|
|
procfs=${PROCFS_PATH:-/proc}
|
|
|
|
case "$state_fs" in
|
|
gpfs)
|
|
GANRECDIR="/var/lib/nfs/ganesha"
|
|
;;
|
|
glusterfs)
|
|
if [ -z "${state_dir}" ]; then
|
|
echo "CTDB_NFS_STATE_MNT not defined for GlusterFS"
|
|
exit 1
|
|
fi
|
|
host=$(hostname)
|
|
NODESTATEDIR="$state_dir/nfs-ganesha/$host"
|
|
GANSTATEDIR="$state_dir/nfs-ganesha/.noderefs"
|
|
NODESTATELN="$GANSTATEDIR/$host"
|
|
;;
|
|
esac
|
|
|
|
##################################################
|
|
|
|
usage()
|
|
{
|
|
_c=$(basename "$0")
|
|
cat <<EOF
|
|
usage: $_c { shutdown | startup }
|
|
$_c { stop | start | check | stats } nfs
|
|
$_c { releaseip | takeip } <iface> <ip> <maskbits>
|
|
$_c { monitor-list-shares }
|
|
EOF
|
|
exit 1
|
|
}
|
|
|
|
##################################################
|
|
# Basic service stop and start
|
|
|
|
basic_stop()
|
|
{
|
|
case "$1" in
|
|
nfs)
|
|
service "$nfs_service" stop
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
}
|
|
|
|
basic_start()
|
|
{
|
|
case "$1" in
|
|
nfs)
|
|
service "$nfs_service" start
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
}
|
|
|
|
##################################################
|
|
# "stop" and "start" options for restarting
|
|
|
|
service_stop()
|
|
{
|
|
case "$1" in
|
|
nfs)
|
|
basic_stop "nfs"
|
|
;;
|
|
nlockmgr)
|
|
# Do nothing - used by statd-callout
|
|
:
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
}
|
|
|
|
service_start()
|
|
{
|
|
case "$1" in
|
|
nfs)
|
|
basic_start "nfs"
|
|
;;
|
|
nlockmgr)
|
|
# Do nothing - used by statd-callout
|
|
:
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
}
|
|
|
|
##################################################
|
|
# Nitty gritty - monitoring and IP handling
|
|
|
|
# Check that a symlink exists, create it otherwise.
|
|
# Usage: check_ln <TARGET> <LINK>
|
|
check_ln()
|
|
{
|
|
if [ ! -L "${2}" ]; then
|
|
rm -vrf "${2}"
|
|
else
|
|
_t=$(readlink "${2}")
|
|
if [ "$_t" != "${1}" ]; then
|
|
rm -v "${2}"
|
|
fi
|
|
fi
|
|
# This is not an "else". It also re-creates the link if it was
|
|
# removed above!
|
|
if [ ! -e "${2}" ]; then
|
|
ln -sfv "${1}" "${2}"
|
|
fi
|
|
}
|
|
|
|
# Return 'active' if the shared filesystem is accessible.
|
|
get_cluster_fs_state()
|
|
{
|
|
case "$state_fs" in
|
|
gpfs)
|
|
/usr/lpp/mmfs/bin/mmgetstate | awk 'NR == 4 { print $3 }'
|
|
;;
|
|
glusterfs)
|
|
# Since we're past create_ganesha_recdirs(), we're active.
|
|
echo "active"
|
|
;;
|
|
*)
|
|
echo "File system $state_fs not supported"
|
|
exit 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
create_ganesha_recdirs()
|
|
{
|
|
if ! _mounts=$(mount | grep "$state_fs"); then
|
|
echo "Failed to find mounts of type $state_fs"
|
|
exit 1
|
|
fi
|
|
if [ -z "$_mounts" ]; then
|
|
echo "startup $state_fs not ready"
|
|
exit 0
|
|
fi
|
|
|
|
case "$state_fs" in
|
|
gpfs)
|
|
_mntpt=$(echo "$_mounts" | sort | awk 'NR == 1 {print $3}')
|
|
_link_dst="${_mntpt}/${ganesha_rec_subdir}"
|
|
mkdir -vp "$_link_dst"
|
|
check_ln "$_link_dst" "$GANRECDIR"
|
|
;;
|
|
glusterfs)
|
|
[ -d /var/lib/nfs.backup ] ||
|
|
mv /var/lib/nfs /var/lib/nfs.backup
|
|
check_ln "$NODESTATEDIR" /var/lib/nfs
|
|
|
|
mkdir -p "${NODESTATEDIR}/ganesha/v4recov"
|
|
mkdir -p "${NODESTATEDIR}/ganesha/v4old"
|
|
mkdir -p "${NODESTATEDIR}/statd/sm"
|
|
mkdir -p "${NODESTATEDIR}/statd/sm.bak"
|
|
touch "${NODESTATEDIR}/state"
|
|
touch "${NODESTATEDIR}/statd/state"
|
|
|
|
mkdir -p "$GANSTATEDIR"
|
|
check_ln "$NODESTATEDIR" "$NODESTATELN"
|
|
for _dir in "${GANSTATEDIR}/"*; do
|
|
# Handle no directories case
|
|
if [ ! -d "$_dir" ]; then
|
|
break
|
|
fi
|
|
|
|
_node="${_dir##*/}" # basename
|
|
if [ "${_node}" != "${host}" ]; then
|
|
check_ln "${GANSTATEDIR}/${_node}/ganesha" \
|
|
"${NODESTATEDIR}/ganesha/${_node}"
|
|
check_ln "${GANSTATEDIR}/${_node}/statd" \
|
|
"${NODESTATEDIR}/statd/${_node}"
|
|
fi
|
|
done
|
|
;;
|
|
esac
|
|
}
|
|
|
|
is_ganesha_running()
|
|
{
|
|
# Check that NFS Ganesha is running, according to PID file
|
|
_pidfile="/var/run/ganesha/ganesha.pid"
|
|
_ganesha="ganesha.nfsd"
|
|
if ! {
|
|
read -r _pid <"$_pidfile" &&
|
|
[ "$(ps -p "$_pid" -o comm=)" = "$_ganesha" ]
|
|
} >/dev/null 2>&1; then
|
|
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
service_check()
|
|
{
|
|
create_ganesha_recdirs
|
|
|
|
# Always succeed if cluster filesystem is not active
|
|
_cluster_fs_state=$(get_cluster_fs_state)
|
|
if [ "$_cluster_fs_state" != "active" ]; then
|
|
return 0
|
|
fi
|
|
|
|
if ! is_ganesha_running; then
|
|
echo "ERROR: NFS Ganesha not running according to PID file"
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
nfs_stats()
|
|
{
|
|
_service="$1"
|
|
|
|
case "$_service" in
|
|
nfs)
|
|
timeout -v 5 ganesha_stats | grep '^Total NFSv.* ops:'
|
|
;;
|
|
*)
|
|
# This will never change, so is intentionally
|
|
# unhelpful for avoiding an unhealthy service
|
|
echo "Not implemented" >&2
|
|
exit 1
|
|
esac
|
|
}
|
|
|
|
#-------------------------------------------------
|
|
|
|
grace_period()
|
|
{
|
|
_arg="$1"
|
|
|
|
_gp_status=0
|
|
if [ -x "/usr/bin/grace_period" ]; then
|
|
_out=$(/usr/bin/grace_period "$_arg" 2>&1) ||
|
|
_gp_status=$?
|
|
_down_msg="Error: Can't talk to ganesha service on d-bus"
|
|
else
|
|
_out=$(dbus-send \
|
|
--print-reply --system --dest=org.ganesha.nfsd \
|
|
/org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace \
|
|
string:"$_arg" 2>&1) ||
|
|
_gp_status=$?
|
|
_down_msg="Error org.freedesktop.DBus.Error.ServiceUnknown"
|
|
fi
|
|
|
|
if [ -n "$_out" ]; then
|
|
echo "$_out"
|
|
fi
|
|
|
|
if [ $_gp_status -ne 0 ]; then
|
|
# If $_out contains $_down_msg then NFS-Ganesha is
|
|
# either down or is starting, so will be in grace
|
|
# anyway.
|
|
if [ "${_out#*"${_down_msg}"}" != "$_out" ]; then
|
|
return 3 # ESRCH - No such process
|
|
fi
|
|
fi
|
|
|
|
return $_gp_status
|
|
}
|
|
|
|
grace_period_if_running()
|
|
{
|
|
_arg="$1"
|
|
|
|
if ! is_ganesha_running; then
|
|
echo "WARNING: NFS Ganesha not running according to PID file"
|
|
return 0
|
|
fi
|
|
|
|
_status=0
|
|
grace_period "$_arg" || _status=$?
|
|
case $_status in
|
|
3)
|
|
# Convert to success
|
|
return 0
|
|
;;
|
|
*)
|
|
return $_status
|
|
;;
|
|
esac
|
|
}
|
|
|
|
nfs_startipreallocate()
|
|
{
|
|
grace_period_if_running "0:"
|
|
}
|
|
|
|
nfs_releaseip()
|
|
{
|
|
_ip="$2"
|
|
|
|
# NFS-Ganesha recovery code only processes items matching $_ip
|
|
grace_period_if_running "2:${_ip}"
|
|
}
|
|
|
|
nfs_takeip()
|
|
{
|
|
_ip="$2"
|
|
|
|
case "$state_fs" in
|
|
glusterfs)
|
|
check_ln "$NODESTATEDIR" "${GANSTATEDIR}/${_ip}"
|
|
;;
|
|
esac
|
|
|
|
grace_period "5:${_ip}"
|
|
}
|
|
|
|
##################################################
|
|
# service init startup and final shutdown
|
|
|
|
nfs_shutdown()
|
|
{
|
|
basic_stop "nfs"
|
|
}
|
|
|
|
nfs_startup()
|
|
{
|
|
basic_stop "nfs" || true
|
|
|
|
create_ganesha_recdirs
|
|
|
|
basic_start "nfs"
|
|
_f="${procfs}/sys/net/ipv4/tcp_tw_recycle"
|
|
if [ -f "$_f" ]; then
|
|
echo 1 >"$_f"
|
|
fi
|
|
}
|
|
|
|
##################################################
|
|
# list share directories
|
|
|
|
nfs_monitor_list_shares()
|
|
{
|
|
# The 1st sed command prints anything after "Path = ", where
|
|
# Path is matched case-insensitively, and must be on a word
|
|
# boundary. This also deletes any semicolon-terminated items
|
|
# before Path. Each output line now starts with a value for
|
|
# Path, but may have other settings after a semicolon.
|
|
_s1='s/.*;*[[:space:]]*\<path\>[[:space:]]*=[[:space:]]*//ip'
|
|
|
|
# The 2nd sed command has 2 steps:
|
|
#
|
|
# a. Attempt to match an unquoted value not containing
|
|
# semicolon or double-quote, followed by an optional
|
|
# line-terminating semicolon or a semicolon followed by
|
|
# anything else. Keep the value and double-quote it. If
|
|
# the value was already quoted then the line will be
|
|
# unchanged. The pattern space now starts with a
|
|
# double-quoted value.
|
|
_s2a='s/^\([^";][^";]*\)[[:space:]]*\(;*[[:space:]]*$\|;.*\)/"\1"/'
|
|
# b. Finally, print the contents of double-quotes at the
|
|
# beginning of the pattern space, discarding anything
|
|
# that follows.
|
|
_s2b='s/^"\([^"][^"]*\)".*/\1/p'
|
|
|
|
sed -n -e "$_s1" "$nfs_exports_file" | sed -n -e "$_s2a" -e "$_s2b"
|
|
}
|
|
|
|
##################################################
|
|
|
|
nfs_register()
|
|
{
|
|
cat <<EOF
|
|
shutdown
|
|
startup
|
|
stop
|
|
start
|
|
check
|
|
stats
|
|
startipreallocate
|
|
releaseip
|
|
takeip
|
|
monitor-list-shares
|
|
EOF
|
|
}
|
|
|
|
##################################################
|
|
|
|
action="$1"
|
|
shift
|
|
|
|
case "$action" in
|
|
shutdown) nfs_shutdown ;;
|
|
startup) nfs_startup ;;
|
|
stop) service_stop "$1" ;;
|
|
start) service_start "$1" ;;
|
|
check) service_check "$1" ;;
|
|
stats) nfs_stats "$1" ;;
|
|
startipreallocate) nfs_startipreallocate ;;
|
|
releaseip) nfs_releaseip "$@" ;;
|
|
takeip) nfs_takeip "$@" ;;
|
|
monitor-list-shares) nfs_monitor_list_shares ;;
|
|
register) nfs_register ;;
|
|
monitor-pre | monitor-post)
|
|
# Not required/implemented
|
|
:
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|