1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-05 09:18:06 +03:00
samba-mirror/ctdb/doc/examples/nfs-ganesha-callout
Martin Schwenke 4766d4568b ctdb-doc: Add example for NFS-Ganesha RPC checking
This one does an rpcinfo check, along with statistics mitigation.  It
can be used in combination with the existing 20.nfs_ganesha.check.

The equivalent kernel NFS file only restarts every 10 failures.  This
one can be a little more proactive given that false positives are less
likely with the statistics mitigation.

Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
2024-06-25 03:16:37 +00:00

463 lines
9.7 KiB
Bash
Executable File

#!/bin/sh
# This is an example CTDB NFS callout script for Ganesha. It is based
# on the last version of 60.ganesha shipped with CTDB. As such, it
# does not try to monitor RPC services that were not monitored by
# 60.ganesha - this might be a useful improvement. It has also not
# been properly tested.
# You should check your version of NFS Ganesha to see if it ships with
# a newer callout.
# To use this:
#
# * Set CTDB_NFS_CALLOUT in your CTDB configuration to point to (a
# copy of) this script, making sure it is executable.
#
# * Create a new directory alongside the nfs-checks.d directory, for
# example nfs-checks-ganesha.d. Install 20.nfs_ganesha.check and
# 21.nfs.check in this directory. Symlink to any other check files
# from nfs-checks.d that should still be used, such as
# 00.portmapper.check. Set CTDB_NFS_CHECKS_DIR to point to this new
# directory of check files.
#
# * It is recommended, but not required, to install the grace_period
# script (usually shipped in a utils package for NFS-Ganesha) to
# /usr/bin/grace_period
# I (Martin Schwenke) hereby relicense all of my contributions to this
# callout (and, previously, to 60.ganesha) to a license compatible
# with NFS Ganesha (right now this is LGPLv3, but I'm flexible).
# There may be other contributions to be considered for relicensing,
# particularly those in commit 28cbe527d47822f870e8252495ab2a1c8fddd12f.
######################################################################
# Exit on 1st error
set -e
# Filesystem type and mount point for the (typically clustered)
# volume that will contain the NFS-Ganesha state.
state_fs="${CTDB_NFS_STATE_FS_TYPE:-gpfs}"
state_dir="${CTDB_NFS_STATE_MNT}" # No sane default.
# Location of exports file
nfs_exports_file="${CTDB_NFS_EXPORTS_FILE:-/etc/ganesha/ganesha.conf}"
# To change the following, edit the default values below. Do not set
# these - they aren't configuration variables, just hooks for testing.
nfs_service="${CTDB_NFS_SERVICE:-nfs-ganesha}"
ganesha_rec_subdir=${CTDB_GANESHA_REC_SUBDIR:-.ganesha}
procfs=${PROCFS_PATH:-/proc}
case "$state_fs" in
gpfs)
GANRECDIR="/var/lib/nfs/ganesha"
;;
glusterfs)
if [ -z "${state_dir}" ]; then
echo "CTDB_NFS_STATE_MNT not defined for GlusterFS"
exit 1
fi
host=$(hostname)
NODESTATEDIR="$state_dir/nfs-ganesha/$host"
GANSTATEDIR="$state_dir/nfs-ganesha/.noderefs"
NODESTATELN="$GANSTATEDIR/$host"
;;
esac
##################################################
usage()
{
_c=$(basename "$0")
cat <<EOF
usage: $_c { shutdown | startup }
$_c { stop | start | check | stats } nfs
$_c { releaseip | takeip } <iface> <ip> <maskbits>
$_c { monitor-list-shares }
EOF
exit 1
}
##################################################
# Basic service stop and start
basic_stop()
{
case "$1" in
nfs)
service "$nfs_service" stop
;;
*)
usage
;;
esac
}
basic_start()
{
case "$1" in
nfs)
service "$nfs_service" start
;;
*)
usage
;;
esac
}
##################################################
# "stop" and "start" options for restarting
service_stop()
{
case "$1" in
nfs)
basic_stop "nfs"
;;
nlockmgr)
# Do nothing - used by statd-callout
:
;;
*)
usage
;;
esac
}
service_start()
{
case "$1" in
nfs)
basic_start "nfs"
;;
nlockmgr)
# Do nothing - used by statd-callout
:
;;
*)
usage
;;
esac
}
##################################################
# Nitty gritty - monitoring and IP handling
# Check that a symlink exists, create it otherwise.
# Usage: check_ln <TARGET> <LINK>
check_ln()
{
if [ ! -L "${2}" ]; then
rm -vrf "${2}"
else
_t=$(readlink "${2}")
if [ "$_t" != "${1}" ]; then
rm -v "${2}"
fi
fi
# This is not an "else". It also re-creates the link if it was
# removed above!
if [ ! -e "${2}" ]; then
ln -sfv "${1}" "${2}"
fi
}
# Return 'active' if the shared filesystem is accessible.
get_cluster_fs_state()
{
case "$state_fs" in
gpfs)
/usr/lpp/mmfs/bin/mmgetstate | awk 'NR == 4 { print $3 }'
;;
glusterfs)
# Since we're past create_ganesha_recdirs(), we're active.
echo "active"
;;
*)
echo "File system $state_fs not supported"
exit 1
;;
esac
}
create_ganesha_recdirs()
{
if ! _mounts=$(mount | grep "$state_fs"); then
echo "Failed to find mounts of type $state_fs"
exit 1
fi
if [ -z "$_mounts" ]; then
echo "startup $state_fs not ready"
exit 0
fi
case "$state_fs" in
gpfs)
_mntpt=$(echo "$_mounts" | sort | awk 'NR == 1 {print $3}')
_link_dst="${_mntpt}/${ganesha_rec_subdir}"
mkdir -vp "$_link_dst"
check_ln "$_link_dst" "$GANRECDIR"
;;
glusterfs)
[ -d /var/lib/nfs.backup ] ||
mv /var/lib/nfs /var/lib/nfs.backup
check_ln "$NODESTATEDIR" /var/lib/nfs
mkdir -p "${NODESTATEDIR}/ganesha/v4recov"
mkdir -p "${NODESTATEDIR}/ganesha/v4old"
mkdir -p "${NODESTATEDIR}/statd/sm"
mkdir -p "${NODESTATEDIR}/statd/sm.bak"
touch "${NODESTATEDIR}/state"
touch "${NODESTATEDIR}/statd/state"
mkdir -p "$GANSTATEDIR"
check_ln "$NODESTATEDIR" "$NODESTATELN"
for _dir in "${GANSTATEDIR}/"*; do
# Handle no directories case
if [ ! -d "$_dir" ]; then
break
fi
_node="${_dir##*/}" # basename
if [ "${_node}" != "${host}" ]; then
check_ln "${GANSTATEDIR}/${_node}/ganesha" \
"${NODESTATEDIR}/ganesha/${_node}"
check_ln "${GANSTATEDIR}/${_node}/statd" \
"${NODESTATEDIR}/statd/${_node}"
fi
done
;;
esac
}
is_ganesha_running()
{
# Check that NFS Ganesha is running, according to PID file
_pidfile="/var/run/ganesha/ganesha.pid"
_ganesha="ganesha.nfsd"
if ! {
read -r _pid <"$_pidfile" &&
[ "$(ps -p "$_pid" -o comm=)" = "$_ganesha" ]
} >/dev/null 2>&1; then
return 1
fi
return 0
}
service_check()
{
create_ganesha_recdirs
# Always succeed if cluster filesystem is not active
_cluster_fs_state=$(get_cluster_fs_state)
if [ "$_cluster_fs_state" != "active" ]; then
return 0
fi
if ! is_ganesha_running; then
echo "ERROR: NFS Ganesha not running according to PID file"
return 1
fi
return 0
}
nfs_stats()
{
_service="$1"
case "$_service" in
nfs)
timeout -v 5 ganesha_stats | grep '^Total NFSv.* ops:'
;;
*)
# This will never change, so is intentionally
# unhelpful for avoiding an unhealthy service
echo "Not implemented" >&2
exit 1
esac
}
#-------------------------------------------------
grace_period()
{
_arg="$1"
_gp_status=0
if [ -x "/usr/bin/grace_period" ]; then
_out=$(/usr/bin/grace_period "$_arg" 2>&1) ||
_gp_status=$?
_down_msg="Error: Can't talk to ganesha service on d-bus"
else
_out=$(dbus-send \
--print-reply --system --dest=org.ganesha.nfsd \
/org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace \
string:"$_arg" 2>&1) ||
_gp_status=$?
_down_msg="Error org.freedesktop.DBus.Error.ServiceUnknown"
fi
if [ -n "$_out" ]; then
echo "$_out"
fi
if [ $_gp_status -ne 0 ]; then
# If $_out contains $_down_msg then NFS-Ganesha is
# either down or is starting, so will be in grace
# anyway.
if [ "${_out#*"${_down_msg}"}" != "$_out" ]; then
return 3 # ESRCH - No such process
fi
fi
return $_gp_status
}
grace_period_if_running()
{
_arg="$1"
if ! is_ganesha_running; then
echo "WARNING: NFS Ganesha not running according to PID file"
return 0
fi
_status=0
grace_period "$_arg" || _status=$?
case $_status in
3)
# Convert to success
return 0
;;
*)
return $_status
;;
esac
}
nfs_startipreallocate()
{
grace_period_if_running "0:"
}
nfs_releaseip()
{
_ip="$2"
# NFS-Ganesha recovery code only processes items matching $_ip
grace_period_if_running "2:${_ip}"
}
nfs_takeip()
{
_ip="$2"
case "$state_fs" in
glusterfs)
check_ln "$NODESTATEDIR" "${GANSTATEDIR}/${_ip}"
;;
esac
grace_period "5:${_ip}"
}
##################################################
# service init startup and final shutdown
nfs_shutdown()
{
basic_stop "nfs"
}
nfs_startup()
{
basic_stop "nfs" || true
create_ganesha_recdirs
basic_start "nfs"
_f="${procfs}/sys/net/ipv4/tcp_tw_recycle"
if [ -f "$_f" ]; then
echo 1 >"$_f"
fi
}
##################################################
# list share directories
nfs_monitor_list_shares()
{
# The 1st sed command prints anything after "Path = ", where
# Path is matched case-insensitively, and must be on a word
# boundary. This also deletes any semicolon-terminated items
# before Path. Each output line now starts with a value for
# Path, but may have other settings after a semicolon.
_s1='s/.*;*[[:space:]]*\<path\>[[:space:]]*=[[:space:]]*//ip'
# The 2nd sed command has 2 steps:
#
# a. Attempt to match an unquoted value not containing
# semicolon or double-quote, followed by an optional
# line-terminating semicolon or a semicolon followed by
# anything else. Keep the value and double-quote it. If
# the value was already quoted then the line will be
# unchanged. The pattern space now starts with a
# double-quoted value.
_s2a='s/^\([^";][^";]*\)[[:space:]]*\(;*[[:space:]]*$\|;.*\)/"\1"/'
# b. Finally, print the contents of double-quotes at the
# beginning of the pattern space, discarding anything
# that follows.
_s2b='s/^"\([^"][^"]*\)".*/\1/p'
sed -n -e "$_s1" "$nfs_exports_file" | sed -n -e "$_s2a" -e "$_s2b"
}
##################################################
nfs_register()
{
cat <<EOF
shutdown
startup
stop
start
check
stats
startipreallocate
releaseip
takeip
monitor-list-shares
EOF
}
##################################################
action="$1"
shift
case "$action" in
shutdown) nfs_shutdown ;;
startup) nfs_startup ;;
stop) service_stop "$1" ;;
start) service_start "$1" ;;
check) service_check "$1" ;;
stats) nfs_stats "$1" ;;
startipreallocate) nfs_startipreallocate ;;
releaseip) nfs_releaseip "$@" ;;
takeip) nfs_takeip "$@" ;;
monitor-list-shares) nfs_monitor_list_shares ;;
register) nfs_register ;;
monitor-pre | monitor-post)
# Not required/implemented
:
;;
*)
usage
;;
esac