mirror of
https://github.com/samba-team/samba.git
synced 2025-01-26 10:04:02 +03:00
500c6e194b
Updating ctdb.tdb on each add-client, del-client and each delete during notify was too ambitious. Persistent transactions do not perform well enough to do this. Revert to having add-client and del-client create touch files. Each monitor event calls "statd-callout update" to convert touch files into ctdb.tdb records. Update testcases to do the "update" and add an extra test. Signed-off-by: Martin Schwenke <martin@meltin.net> Pair-programmed-with: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
255 lines
6.0 KiB
Bash
Executable File
255 lines
6.0 KiB
Bash
Executable File
#!/bin/sh
|
|
# script to manage nfs in a clustered environment
|
|
|
|
[ -n "$CTDB_BASE" ] || \
|
|
export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
|
|
|
|
. $CTDB_BASE/functions
|
|
|
|
GANRECDIR="/var/lib/nfs/ganesha"
|
|
GANRECDIR2="/var/lib/nfs/ganesha/recevents"
|
|
GPFS_STATE="/usr/lpp/mmfs/bin/mmgetstate"
|
|
GANRECDIR3="/var/lib/nfs/ganesha_local"
|
|
|
|
|
|
service_start ()
|
|
{
|
|
startstop_ganesha stop
|
|
startstop_ganesha start
|
|
set_proc "sys/net/ipv4/tcp_tw_recycle" 1
|
|
}
|
|
|
|
service_stop ()
|
|
{
|
|
startstop_ganesha stop
|
|
}
|
|
|
|
service_reconfigure ()
|
|
{
|
|
# if the ips have been reallocated, we must restart ganesha
|
|
# across all nodes and ping all statd listeners
|
|
[ -x $CTDB_BASE/statd-callout ] && {
|
|
$CTDB_BASE/statd-callout notify &
|
|
} >/dev/null 2>&1
|
|
}
|
|
|
|
loadconfig "nfs"
|
|
|
|
|
|
[ -n "$CTDB_CLUSTER_FILESYSTEM_TYPE" ] || CTDB_CLUSTER_FILESYSTEM_TYPE="gpfs"
|
|
|
|
service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
|
|
|
|
[ "${CTDB_NFS_SERVER_MODE:-${NFS_SERVER_MODE}}" = "ganesha" ] || exit 0
|
|
|
|
ctdb_setup_service_state_dir
|
|
|
|
ctdb_start_stop_service
|
|
|
|
is_ctdb_managed_service || exit 0
|
|
|
|
ctdb_service_check_reconfigure
|
|
|
|
nodenum_file="${service_state_dir}/gpfs_nodenum"
|
|
|
|
get_cluster_fs_state ()
|
|
{
|
|
case $CTDB_CLUSTER_FILESYSTEM_TYPE in
|
|
gpfs)
|
|
STATE=`$GPFS_STATE | awk 'NR <= 3 {next} {printf "%-6s", $3}'`
|
|
echo $STATE
|
|
;;
|
|
*)
|
|
die "File system $CTDB_CLUSTER_FILESYSTEM_TYPE not supported"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
create_nodenum_file()
|
|
{
|
|
NNUM=$(/usr/lpp/mmfs/bin/mmlsconfig myNodeConfigNumber | awk '{print $2}')
|
|
echo $NNUM > $nodenum_file
|
|
}
|
|
|
|
get_nodenum()
|
|
{
|
|
if [ ! -f $nodenum_file ]; then
|
|
create_nodenum_file
|
|
fi
|
|
cat $nodenum_file
|
|
}
|
|
|
|
|
|
create_ganesha_recdirs ()
|
|
{
|
|
[ -n "$CTDB_GANESHA_REC_SUBDIR" ] || CTDB_GANESHA_REC_SUBDIR=".ganesha"
|
|
|
|
_mounts=$(mount -t $CTDB_CLUSTER_FILESYSTEM_TYPE)
|
|
if [ -z "$_mounts" ]; then
|
|
echo "startup $CTDB_CLUSTER_FILESYSTEM_TYPE not ready"
|
|
exit 0
|
|
fi
|
|
_mntpt=$(echo "$_mounts" | sort | awk 'NR == 1 {print $3}')
|
|
_link_dst="${_mntpt}/${CTDB_GANESHA_REC_SUBDIR}"
|
|
mkdir -vp "$_link_dst"
|
|
if [ -e "$GANRECDIR" ]; then
|
|
if [ ! -L "$GANRECDIR" ] ; then
|
|
rm -vrf "$GANRECDIR"
|
|
else
|
|
_t=$(readlink "$GANRECDIR")
|
|
if [ "$_t" != "$_link_dst" ] ; then
|
|
rm -v "$GANRECDIR"
|
|
fi
|
|
fi
|
|
fi
|
|
# This is not an "else". It also re-creates the link if it was
|
|
# removed above!
|
|
if [ ! -e "$GANRECDIR" ]; then
|
|
ln -sv "$_link_dst" "$GANRECDIR"
|
|
fi
|
|
|
|
mkdir -p $GANRECDIR2
|
|
mkdir -p $GANRECDIR3
|
|
}
|
|
|
|
monitor_ganesha_nfsd ()
|
|
{
|
|
create_ganesha_recdirs
|
|
service_name=${service_name}_process
|
|
|
|
PIDFILE="/var/run/ganesha.pid"
|
|
CUR_STATE=`get_cluster_fs_state`
|
|
GANESHA="/usr/bin/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.nfsd"
|
|
if { read PID < $PIDFILE && \
|
|
grep "$GANESHA" "/proc/$PID/cmdline" ; } >/dev/null 2>&1 ; then
|
|
ctdb_counter_init "$service_name"
|
|
else
|
|
if [ $CUR_STATE = "active" ]; then
|
|
echo "Trying fast restart of NFS service"
|
|
startstop_ganesha restart
|
|
ctdb_counter_incr "$service_name"
|
|
ctdb_check_counter "error" "-ge" "6" "$service_name"
|
|
fi
|
|
fi
|
|
|
|
service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"_service
|
|
# check that NFS is posting forward progress
|
|
if [ $CUR_STATE = "active" -a "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
|
|
MAXREDS=2
|
|
MAXSTALL=120
|
|
RESTART=0
|
|
|
|
NUMREDS=`ls $GANRECDIR3 | grep "red" | wc -l`
|
|
LASTONE=`ls -t $GANRECDIR3 | sed 's/_/ /' | awk 'NR > 1 {next} {printf $1} '`
|
|
# Beware of startup
|
|
if [ -z $LASTONE ] ; then
|
|
LASTONE=`date +"%s"`
|
|
fi
|
|
TNOW=$(date +"%s")
|
|
TSTALL=$(($TNOW - $LASTONE))
|
|
if [ $NUMREDS -ge $MAXREDS ] ; then
|
|
echo restarting because of $NUMREDS red conditions
|
|
RESTART=1
|
|
ctdb_counter_incr "$service_name"
|
|
ctdb_check_counter "error" "-ge" "6" "$service_name"
|
|
fi
|
|
if [ $TSTALL -ge $MAXSTALL ] ; then
|
|
echo restarting because of $TSTALL second stall
|
|
RESTART=1
|
|
ctdb_counter_incr "$service_name"
|
|
ctdb_check_counter "error" "-ge" "6" "$service_name"
|
|
fi
|
|
if [ $RESTART -gt 0 ] ; then
|
|
startstop_ganesha restart
|
|
else
|
|
ctdb_counter_init "$service_name"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
############################################################
|
|
|
|
case "$1" in
|
|
init)
|
|
# read statd from persistent database
|
|
;;
|
|
startup)
|
|
create_ganesha_recdirs
|
|
ctdb_service_start
|
|
create_nodenum_file
|
|
;;
|
|
|
|
shutdown)
|
|
ctdb_service_stop
|
|
;;
|
|
|
|
takeip)
|
|
if [ -n "$2" ] ; then
|
|
case $CTDB_CLUSTER_FILESYSTEM_TYPE in
|
|
gpfs)
|
|
NNUM=$(get_nodenum)
|
|
TDATE=`date +"%s"`
|
|
TOUCHTGT=$1"_"$TDATE"_"$NNUM"_"$3"_"$4"_"$2
|
|
touch $GANRECDIR2/$TOUCHTGT
|
|
;;
|
|
esac
|
|
fi
|
|
ctdb_service_set_reconfigure
|
|
;;
|
|
|
|
releaseip)
|
|
if [ -n "$2" ] ; then
|
|
case $CTDB_CLUSTER_FILESYSTEM_TYPE in
|
|
gpfs)
|
|
NNUM=$(get_nodenum)
|
|
TDATE=`date +"%s"`
|
|
TOUCHTGT=$1"_"$TDATE"_"$NNUM"_"$3"_"$4"_"$2
|
|
touch $GANRECDIR2/$TOUCHTGT
|
|
TOUCHTGT="my"$TOUCHTGT
|
|
touch $GANRECDIR2/$TOUCHTGT
|
|
;;
|
|
esac
|
|
fi
|
|
ctdb_service_set_reconfigure
|
|
;;
|
|
|
|
monitor)
|
|
# Check that directories for shares actually exist.
|
|
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
|
|
grep Path /etc/ganesha/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.exports.conf |
|
|
cut -f2 -d\" | sort -u | ctdb_check_directories
|
|
} || exit $?
|
|
|
|
update_tickles 2049
|
|
nfs_update_lock_info
|
|
|
|
# check that statd responds to rpc requests
|
|
# if statd is not running we try to restart it
|
|
# we only do this IF we have a rpc.statd command.
|
|
# For platforms where rpc.statd does not exist, we skip
|
|
# the check completely
|
|
p="rpc.statd"
|
|
which $p >/dev/null 2>/dev/null && \
|
|
nfs_check_rpc_service "statd" \
|
|
% 10 "verbose restart:b unhealthy" \
|
|
-ge 6 "verbose unhealthy" \
|
|
-eq 4 "verbose restart" \
|
|
-eq 2 "restart:b"
|
|
|
|
if [ "$CTDB_SKIP_GANESHA_NFSD_CHECK" != "yes" ] ; then
|
|
monitor_ganesha_nfsd
|
|
fi
|
|
|
|
# rquotad is sometimes not started correctly on RHEL5
|
|
# not a critical service so we dont flag the node as unhealthy
|
|
nfs_check_rpc_service "rquotad" \
|
|
-gt 0 "verbose restart:b"
|
|
;;
|
|
|
|
*)
|
|
ctdb_standard_event_handler "$@"
|
|
;;
|
|
esac
|
|
|
|
exit 0
|