samba-mirror/functions at 205c7c7663210abee37521edb2742cf6be54b11e

mirror of https://github.com/samba-team/samba.git synced 2025-01-11 05:18:09 +03:00

Martin Schwenke 205c7c7663 Eventscripts - enhance ctdb_replay_monitor_status()

Print useful output and return a suitable exit code.

The DISABLED and TIMEDOUT statuses use fake negative return codes, and
these can't be faked from the shell.  So we map DISABLED to OK and
TIMEDOUT to ERROR - this should avoid nearly all surprises.  When we
do this we add a note to the beginning of the output.  The alternative
is to "fix" ctdbd to use only codes that can actually be returned by
shell scripts.  However, the reason for using negative codes is
probably to distinguish them from real ones...

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit dda44d026e0c1b02feb02185b8c200a542be341a)

2011-08-31 15:34:43 +10:00

1428 lines

36 KiB

Bash

Executable File

Raw Blame History

 # Hey Emacs, this is a -*- shell-script -*- !!!
 # utility functions for ctdb event scripts
 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
 [ -z "$CTDB_VARDIR" ] && {
     export CTDB_VARDIR="/var/ctdb"
 }
 [ -z "$CTDB_ETCDIR" ] && {
     export CTDB_ETCDIR="/etc"
 }
 #######################################
 # pull in a system config file, if any
 _loadconfig() {
     if [ -z "$1" ] ; then
 	foo="${service_config:-${service_name}}"
 	if [ -n "$foo" ] ; then
 	    loadconfig "$foo"
 	fi
     elif [ "$1" != "ctdb" ] ; then
 	loadconfig "ctdb"
     fi
     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
 	. $CTDB_ETCDIR/sysconfig/$1
     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
 	. $CTDB_ETCDIR/default/$1
     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
 	. $CTDB_BASE/sysconfig/$1
     fi
 }
 loadconfig () {
     _loadconfig "$@"
 }
 ##############################################################
 # make sure CTDB_CURRENT_DEBUGLEVEL is set to the desired debug level
 # (integer)
 #
 # If it is already set then do nothing, since it might have been set
 # via a file in rc.local.d/.  If it is not set then set it by sourcing
 # /var/ctdb/eventscript_debuglevel. If this file does not exist then
 # create it using output from "ctdb getdebug".  If the option 1st arg
 # is "create" then don't source an existing file but create a new one
 # instead - this is useful for creating the file just once in each
 # event run in 00.ctdb.  If there's a problem getting the debug level
 # from ctdb then it is silently set to 0 - no use spamming logs if our
 # debug code is broken...
 ctdb_set_current_debuglevel ()
 {
     [ -z "$CTDB_CURRENT_DEBUGLEVEL" ] || return 0
     _f="$CTDB_VARDIR/eventscript_debuglevel"
     if [ "$1" = "create" -o ! -r "$_f" ] ; then
 	_t=$(ctdb getdebug -Y 2>/dev/null)
 	# get last field of output
 	_t="${_t%:}"
 	_t="${_t##*:}"
 	# Defaults to 0
 	echo "export CTDB_CURRENT_DEBUGLEVEL=\"${_t:-0}\"" >"$_f"
     fi
     . "$_f"
 }
 debug ()
 {
     if [ $CTDB_CURRENT_DEBUGLEVEL -ge 4 ] ; then
 	# If there are arguments then echo them.  Otherwise expect to
 	# use stdin, which allows us to pass lots of debug using a
 	# here document.
 	if [ -n "$1" ] ; then
 	    echo "DEBUG: $*"
 	elif ! tty -s ; then
 	    sed -e 's@^@DEBUG: @'
 	fi
     fi
 }
 ##############################################################
 # check number of args for different events
 ctdb_check_args ()
 {
     case "$1" in
 	takeip|releaseip)
 	    if [ $# != 4 ]; then
 		echo "ERROR: must supply interface, IP and maskbits"
 		exit 1
 	    fi
 	    ;;
 	updateip)
 	    if [ $# != 5 ]; then
 		echo "ERROR: must supply old interface, new interface, IP and maskbits"
 		exit 1
 	    fi
 	    ;;
     esac
 }
 ##############################################################
 # determine on what type of system (init style) we are running
 detect_init_style() {
     # only do detection if not already set:
     test "x$CTDB_INIT_STYLE" != "x" && return
     if [ -x /sbin/startproc ]; then
         CTDB_INIT_STYLE="suse"
     elif [ -x /sbin/start-stop-daemon ]; then
         CTDB_INIT_STYLE="debian"
     else
         CTDB_INIT_STYLE="redhat"
     fi
 }
 ######################################################
 # simulate /sbin/service on platforms that don't have it
 # _service() makes it easier to hook the service() function for
 # testing.
 _service ()
 {
   _service_name="$1"
   _op="$2"
   # do nothing, when no service was specified
   [ -z "$_service_name" ] && return
   if [ -x /sbin/service ]; then
       $_nice /sbin/service "$_service_name" "$_op"
   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
   fi
 }
 service()
 {
     _nice=""
     _service "$@"
 }
 ######################################################
 # simulate /sbin/service (niced) on platforms that don't have it
 nice_service()
 {
     _nice="nice"
     _service "$@"
 }
 ######################################################
 # wrapper around /proc/ settings to allow them to be hooked
 # for testing
 # 1st arg is relative path under /proc/, 2nd arg is value to set
 set_proc ()
 {
     echo "$2" >"/proc/$1"
 }
 ######################################################
 # wrapper around getting file contents from /proc/ to allow
 # this to be hooked for testing
 # 1st arg is relative path under /proc/
 get_proc ()
 {
     cat "/proc/$1"
 }
 ######################################################
 # Check that an RPC service is healthy -
 # this includes allowing a certain number of failures
 # before marking the NFS service unhealthy.
 #
 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 #
 # each triple is a set of 3 arguments: an operator, a
 # fail count limit and an action string.
 #
 # For example:
 #
 # 	nfs_check_rpc_service "lockd" \
 #	    -ge 15 "verbose restart unhealthy" \
 #	    -eq 10 "restart:bs"
 #
 # says that if lockd is down for 15 iterations then do
 # a verbose restart of lockd and mark the node unhealthy.
 # Before this, after 10 iterations of failure, the
 # service is restarted silently in the background.
 # Order is important: the number of failures need to be
 # specified in reverse order because processing stops
 # after the first condition that is true.
 ######################################################
 nfs_check_rpc_service ()
 {
     _prog_name="$1" ; shift
     _version=1
     _rpc_prog="$_prog_name"
     _restart=""
     _opts=""
     case "$_prog_name" in
 	knfsd)
 	    _rpc_prog=nfs
 	    _version=3
 	    _restart="echo 'Trying to restart NFS service'"
 	    _restart="${_restart}; startstop_nfs restart"
 	    ;;
 	mountd)
 	    _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
 	    ;;
 	rquotad)
 	    _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
 	    ;;
 	lockd)
 	    _rpc_prog=nlockmgr
 	    _version=4
 	    _restart="echo 'Trying to restart lock manager service'"
 	    _restart="${_restart}; startstop_nfslock restart"
 	    ;;
 	statd)
 	    _rpc_prog=status
 	    _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
 	    _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
 	    _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
 	    ;;
 	*)
 	    echo "Internal error: unknown RPC program \"$_prog_name\"."
 	    exit 1
     esac
     _service_name="nfs_${_prog_name}"
     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
 	ctdb_counter_init "$_service_name"
 	return 0
     fi
     ctdb_counter_incr "$_service_name"
     while [ -n "$3" ] ; do
 	ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
 	    for _action in $3 ; do
 		case "$_action" in
 		    verbose)
 			echo "$ctdb_check_rpc_out"
 			;;
 		    restart|restart:*)
 			# No explicit command specified, construct rpc command.
 			if [ -z "$_restart" ] ; then
 			    _p="rpc.${_prog_name}"
 			    _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
 			    _restart="${_restart}; killall -q -9 $_p"
 			    _restart="${_restart}; $_p $_opts"
 			fi
 			# Process restart flags...
 			_flags="${_action#restart:}"
 			# There may not have been a colon...
 			[ "$_flags" != "$_action" ] || _flags=""
 			# q=quiet - everything to /dev/null
 			if [ "${_flags#*q}" != "$_flags" ] ; then
 			    _restart="{ ${_restart} ; } >/dev/null 2>&1"
 			fi
 			# s=stealthy - last command to /dev/null
 			if [ "${_flags#*s}" != "$_flags" ] ; then
 			    _restart="${_restart} >/dev/null 2>&1"
 			fi
 			# b=background - the whole thing, easy and reliable
 			if [ "${_flags#*b}" != "$_flags" ] ; then
 			    _restart="{ ${_restart} ; } &"
 			fi
 			# Do it!
 			eval "${_restart}"
 			;;
 		    unhealthy)
 			exit 1
 			;;
 		    *)
 			echo "Internal error: unknown action \"$_action\"."
 			exit 1
 		esac
 	    done
 	    # Only process the first action group.
 	    break
 	}
 	shift 3
     done
 }
 ######################################################
 # check that a rpc server is registered with portmap
 # and responding to requests
 # usage: ctdb_check_rpc SERVICE_NAME VERSION
 ######################################################
 ctdb_check_rpc ()
 {
     progname="$1"
     version="$2"
     if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
 	ctdb_check_rpc_out="ERROR: $progname failed RPC check:
 $ctdb_check_rpc_out"
 	echo "$ctdb_check_rpc_out"
 	return 1
     fi
 }
 ######################################################
 # check a set of directories is available
 # return 1 on a missing directory
 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
 ######################################################
 ctdb_check_directories_probe() {
     while IFS="" read d ; do
 	case "$d" in
 	    *%*)
 		continue
 		;;
 	    *)
 		[ -d "${d}/." ] || return 1
 	esac
     done
 }
 ######################################################
 # check a set of directories is available
 # usage: ctdb_check_directories SERVICE_NAME <directories...>
 ######################################################
 ctdb_check_directories() {
     n="${1:-${service_name}}"
     ctdb_check_directories_probe || {
 	echo "ERROR: $n directory \"$d\" not available"
 	exit 1
     }
 }
 ######################################################
 # check a set of tcp ports
 # usage: ctdb_check_tcp_ports <ports...>
 ######################################################
 # This flag file is created when a service is initially started.  It
 # is deleted the first time TCP port checks for that service succeed.
 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 # message if a port check fails.
 _ctdb_check_tcp_common ()
 {
     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 }
 ctdb_check_tcp_init ()
 {
     _ctdb_check_tcp_common
     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
     touch "$_ctdb_service_started_file"
 }
 ctdb_check_tcp_ports()
 {
     if [ -z "$1" ] ; then
 	echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
 	exit 1
     fi
     # Set default value for CTDB_TCP_PORT_CHECKS if unset.
     # If any of these defaults are unsupported then this variable can
     # be overridden in /etc/sysconfig/ctdb or via a file in
     # /etc/ctdb/rc.local.d/.
     : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
     for _c in $CTDB_TCP_PORT_CHECKERS ; do
 	ctdb_check_tcp_ports_$_c "$@"
 	case "$?" in
 )
 		_ctdb_check_tcp_common
 		rm -f "$_ctdb_service_started_file"
 		return 0
 		;;
 )
 		_ctdb_check_tcp_common
 		if [ ! -f "$_ctdb_service_started_file" ] ; then
 		    echo "ERROR: $service_name tcp port $_p is not responding"
 		    debug <<EOF
 $ctdb_check_tcp_ports_debug
 EOF
 		else
 		    echo "INFO: $service_name tcp port $_p is not responding"
 		fi
 		return 1
 		;;
 )
 		debug <<EOF
 ctdb_check_ports - checker $_c not implemented
 output from checker was:
 $ctdb_check_tcp_ports_debug
 EOF
 		;;
 	    *)
 	esac
     done
     echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
     return 127
 }
 ctdb_check_tcp_ports_netstat ()
 {
     _cmd='netstat -l -t -n'
     _ns=$($_cmd 2>&1)
     if [ $? -eq 127 ] ; then
 	# netstat probably not installed - unlikely?
 	ctdb_check_tcp_ports_debug="$_ns"
 	return 127
     fi
     for _p ; do  # process each function argument (port)
 	for _a in '0\.0\.0\.0' '::' ; do
 	    _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
 	    if echo "$_ns" | grep -E -q "$_pat" ; then
 		# We matched the port, so process next port
 		continue 2
 	    fi
 	done
 	# We didn't match the port, so flag an error.
 	ctdb_check_tcp_ports_debug="$_cmd shows this output:
 $_ns"
 	return 1
     done
     return 0
 }
 ctdb_check_tcp_ports_nmap ()
 {
     # nmap wants a comma-separated list of ports
     _ports=""
     for _p ; do
 	_ports="${_ports}${_ports:+,}${_p}"
     done
     _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
     _nmap_out=$($_cmd 2>&1)
     if [ $? -eq 127 ] ; then
 	# nmap probably not installed
 	ctdb_check_tcp_ports_debug="$_nmap_out"
 	return 127
     fi
     # get the port-related output
     _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
     for _p ; do
 	# looking for something like this:
 	#  445/open/tcp//microsoft-ds///
 	# possibly followed by a comma
 	_t="$_p/open/tcp//"
 	case "$_port_info" in
 	    # The info we're after must be either at the beginning of
 	    # the string or it must follow a space.
             $_t*|*\ $_t*) : ;;
 	    *)
 		# Nope, flag an error...
 		ctdb_check_tcp_ports_debug="$_cmd shows this output:
 $_nmap_out"
 		return 1
 	esac
     done
     return 0
 }
 # Use the new "ctdb checktcpport" command to check the port.
 # This is very cheap.
 ctdb_check_tcp_ports_ctdb ()
 {
     for _p ; do  # process each function argument (port)
 	_cmd="ctdb checktcpport $_p"
 	_out=$($_cmd 2>&1)
 	_ret=$?
 	case "$_ret" in
 )
 		ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
 		return 1
 		;;
 )
 		# Couldn't bind, something already listening, next port...
 		continue
 		;;
 	    *)
 		ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
 $_out"
 		# assume not implemented
 		return 127
 	esac
     done
     return 0
 }
 ######################################################
 # check a unix socket
 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 ######################################################
 ctdb_check_unix_socket() {
     socket_path="$1"
     [ -z "$socket_path" ] && return
     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
         echo "ERROR: $service_name socket $socket_path not found"
         return 1
     fi
 }
 ######################################################
 # check a command returns zero status
 # usage: ctdb_check_command SERVICE_NAME <command>
 ######################################################
 ctdb_check_command() {
   service_name="$1"
   wait_cmd="$2"
   [ -z "$wait_cmd" ] && return;
   $wait_cmd > /dev/null 2>&1 || {
       echo "ERROR: $service_name - $wait_cmd returned error"
       exit 1
   }
 }
 ################################################
 # kill off any TCP connections with the given IP
 ################################################
 kill_tcp_connections() {
     _IP="$1"
     _failed=0
     _killcount=0
     connfile="$CTDB_VARDIR/state/connections.$_IP"
     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
     while read dest src; do
 	srcip=`echo $src | sed -e "s/:[^:]*$//"`
 	srcport=`echo $src | sed -e "s/^.*://"`
 	destip=`echo $dest | sed -e "s/:[^:]*$//"`
 	destport=`echo $dest | sed -e "s/^.*://"`
 	echo "Killing TCP connection $srcip:$srcport $destip:$destport"
 	ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
 	case $destport in
 	  # we only do one-way killtcp for CIFS
 |445) : ;;
 	  # for all others we do 2-way
 	  *)
 	  	ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
 		;;
 	esac
 	_killcount=`expr $_killcount + 1`
      done < $connfile
     rm -f $connfile
     [ $_failed = 0 ] || {
 	echo "Failed to send killtcp control"
 	return;
     }
     [ $_killcount -gt 0 ] || {
 	return;
     }
     _count=0
     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
 	sleep 1
 	_count=`expr $_count + 1`
 	[ $_count -gt 3 ] && {
 	    echo "Timed out killing tcp connections for IP $_IP"
 	    return;
 	}
     done
     echo "killed $_killcount TCP connections to released IP $_IP"
 }
 ##################################################################
 # kill off the local end for any TCP connections with the given IP
 ##################################################################
 kill_tcp_connections_local_only() {
     _IP="$1"
     _failed=0
     _killcount=0
     connfile="$CTDB_VARDIR/state/connections.$_IP"
     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
     while read dest src; do
 	srcip=`echo $src | sed -e "s/:[^:]*$//"`
 	srcport=`echo $src | sed -e "s/^.*://"`
 	destip=`echo $dest | sed -e "s/:[^:]*$//"`
 	destport=`echo $dest | sed -e "s/^.*://"`
 	echo "Killing TCP connection $srcip:$srcport $destip:$destport"
 	ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
 	_killcount=`expr $_killcount + 1`
      done < $connfile
     rm -f $connfile
     [ $_failed = 0 ] || {
 	echo "Failed to send killtcp control"
 	return;
     }
     [ $_killcount -gt 0 ] || {
 	return;
     }
     _count=0
     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
 	sleep 1
 	_count=`expr $_count + 1`
 	[ $_count -gt 3 ] && {
 	    echo "Timed out killing tcp connections for IP $_IP"
 	    return;
 	}
     done
     echo "killed $_killcount TCP connections to released IP $_IP"
 }
 ##################################################################
 # tickle any TCP connections with the given IP
 ##################################################################
 tickle_tcp_connections() {
     _IP="$1"
     _failed=0
     _killcount=0
     connfile="$CTDB_VARDIR/state/connections.$_IP"
     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
     while read dest src; do
 	srcip=`echo $src | sed -e "s/:[^:]*$//"`
 	srcport=`echo $src | sed -e "s/^.*://"`
 	destip=`echo $dest | sed -e "s/:[^:]*$//"`
 	destport=`echo $dest | sed -e "s/^.*://"`
 	echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
 	ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
 	echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
 	ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
      done < $connfile
     rm -f $connfile
     [ $_failed = 0 ] || {
 	echo "Failed to send tickle control"
 	return;
     }
 }
 ########################################################
 # start/stop the nfs service on different platforms
 ########################################################
 startstop_nfs() {
 	PLATFORM="unknown"
 	[ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 		PLATFORM="sles"
 	}
 	[ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 		PLATFORM="rhel"
 	}
 	case $PLATFORM in
 	sles)
 		case $1 in
 		start)
 			service nfsserver start
 			;;
 		stop)
 			service nfsserver stop > /dev/null 2>&1
 			;;
 		restart)
 			set_proc "fs/nfsd/threads" 0
 			service nfsserver stop > /dev/null 2>&1
 			pkill -9 nfsd
 			service nfsserver start
 			;;
 		esac
 		;;
 	rhel)
 		case $1 in
 		start)
 			service nfslock start
 			service nfs start
 			;;
 		stop)
 			service nfs stop
 			service nfslock stop
 			;;
 		restart)
 			set_proc "fs/nfsd/threads" 0
 			service nfs stop > /dev/null 2>&1
 			service nfslock stop > /dev/null 2>&1
 			pkill -9 nfsd
 			service nfslock start
 			service nfs start
 			;;
 		esac
 		;;
 	*)
 		echo "Unknown platform. NFS is not supported with ctdb"
 		exit 1
 		;;
 	esac
 }
 ########################################################
 # start/stop the nfs lockmanager service on different platforms
 ########################################################
 startstop_nfslock() {
 	PLATFORM="unknown"
 	[ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 		PLATFORM="sles"
 	}
 	[ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 		PLATFORM="rhel"
 	}
 	case $PLATFORM in
 	sles)
 		# for sles there is no service for lockmanager
 		# so we instead just shutdown/restart nfs
 		case $1 in
 		start)
 			service nfsserver start
 			;;
 		stop)
 			service nfsserver stop > /dev/null 2>&1
 			;;
 		restart)
 			service nfsserver stop
 			service nfsserver start
 			;;
 		esac
 		;;
 	rhel)
 		case $1 in
 		start)
 			service nfslock start
 			;;
 		stop)
 			service nfslock stop > /dev/null 2>&1
 			;;
 		restart)
 			service nfslock stop
 			service nfslock start
 			;;
 		esac
 		;;
 	*)
 		echo "Unknown platform. NFS locking is not supported with ctdb"
 		exit 1
 		;;
 	esac
 }
 add_ip_to_iface()
 {
 	local _iface=$1
 	local _ip=$2
 	local _maskbits=$3
 	local _state_dir="$CTDB_VARDIR/state/interface_modify"
 	local _lockfile="$_state_dir/$_iface.flock"
 	local _readd_base="$_state_dir/$_iface.readd.d"
 	mkdir -p $_state_dir || {
 		ret=$?
 		echo "Failed to mkdir -p $_state_dir - $ret"
 		return $ret
 	}
 	test -f $_lockfile || {
 		touch $_lockfile
 	}
 	flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
 	return $?
 }
 delete_ip_from_iface()
 {
 	local _iface=$1
 	local _ip=$2
 	local _maskbits=$3
 	local _state_dir="$CTDB_VARDIR/state/interface_modify"
 	local _lockfile="$_state_dir/$_iface.flock"
 	local _readd_base="$_state_dir/$_iface.readd.d"
 	mkdir -p $_state_dir || {
 		ret=$?
 		echo "Failed to mkdir -p $_state_dir - $ret"
 		return $ret
 	}
 	test -f $_lockfile || {
 		touch $_lockfile
 	}
 	flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
 	return $?
 }
 setup_iface_ip_readd_script()
 {
 	local _iface=$1
 	local _ip=$2
 	local _maskbits=$3
 	local _readd_script=$4
 	local _state_dir="$CTDB_VARDIR/state/interface_modify"
 	local _lockfile="$_state_dir/$_iface.flock"
 	local _readd_base="$_state_dir/$_iface.readd.d"
 	mkdir -p $_state_dir || {
 		ret=$?
 		echo "Failed to mkdir -p $_state_dir - $ret"
 		return $ret
 	}
 	test -f $_lockfile || {
 		touch $_lockfile
 	}
 	flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
 	return $?
 }
 ########################################################
 # some simple logic for counting events - per eventscript
 # usage: ctdb_counter_init
 #        ctdb_counter_incr
 #        ctdb_check_counter_limit <limit>
 # ctdb_check_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
     _service_name="${1:-${service_name}}"
     _counter_file="$ctdb_fail_dir/$_service_name"
     mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
     _ctdb_counter_common "$1"
     >"$_counter_file"
 }
 ctdb_counter_incr () {
     _ctdb_counter_common "$1"
     # unary counting!
     echo -n 1 >> "$_counter_file"
 }
 ctdb_check_counter_limit () {
     _ctdb_counter_common
     _limit="${1:-${service_fail_limit}}"
     _quiet="$2"
     # unary counting!
     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
     if [ $_size -ge $_limit ] ; then
 	echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
 	exit 1
     elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
 	echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
     fi
 }
 ctdb_check_counter_equal () {
     _ctdb_counter_common
     _limit=$1
     # unary counting!
     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
     if [ $_size -eq $_limit ] ; then
 	return 1
     fi
     return 0
 }
 ctdb_check_counter () {
     _msg="${1:-error}"  # "error"  - anything else is silent on fail
     _op="${2:--ge}"  # an integer operator supported by test
     _limit="${3:-${service_fail_limit}}"
     shift 3
     _ctdb_counter_common "$1"
     # unary counting!
     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
     if [ $_size $_op $_limit ] ; then
 	if [ "$_msg" = "error" ] ; then
 	    echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
 	    exit 1
 	else
 	    return 1
 	fi
     fi
 }
 ########################################################
 ctdb_status_dir="$CTDB_VARDIR/status"
 ctdb_fail_dir="$CTDB_VARDIR/failcount"
 ctdb_setup_service_state_dir ()
 {
     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
     mkdir -p "$service_state_dir" || {
 	echo "Error creating state dir \"$service_state_dir\""
 	exit 1
     }
 }
 ########################################################
 # Managed status history, for auto-start/stop
 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
 _ctdb_managed_common ()
 {
     _service_name="${1:-${service_name}}"
     _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
 }
 ctdb_service_managed ()
 {
     _ctdb_managed_common "$@"
     mkdir -p "$ctdb_managed_dir"
     touch "$_ctdb_managed_file"
 }
 ctdb_service_unmanaged ()
 {
     _ctdb_managed_common "$@"
     rm -f "$_ctdb_managed_file"
 }
 is_ctdb_previously_managed_service ()
 {
     _ctdb_managed_common "$@"
     [ -f "$_ctdb_managed_file" ]
 }
 ########################################################
 # Check and set status
 log_status_cat ()
 {
     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
 }
 ctdb_checkstatus ()
 {
     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
 	log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
 	return 1
     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
 	log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
 	return 2
     else
 	return 0
     fi
 }
 ctdb_setstatus ()
 {
     d="$ctdb_status_dir/$script_name"
     case "$1" in
 	unhealthy|banned)
 	    mkdir -p "$d"
 	    cat "$2" >"$d/$1"
 	    ;;
 	*)
 	    for i in "banned" "unhealthy" ; do
 		rm -f "$d/$i"
 	    done
 	    ;;
     esac
 }
 ##################################################################
 # Reconfigure a service on demand
 _ctdb_service_reconfigure_common ()
 {
     _d="$ctdb_status_dir/${1:-${service_name}}"
     mkdir -p "$_d"
     _ctdb_service_reconfigure_flag="$_d/reconfigure"
 }
 ctdb_service_needs_reconfigure ()
 {
     _ctdb_service_reconfigure_common "$@"
     [ -e "$_ctdb_service_reconfigure_flag" ]
 }
 ctdb_service_set_reconfigure ()
 {
     _ctdb_service_reconfigure_common "$@"
     >"$_ctdb_service_reconfigure_flag"
 }
 ctdb_service_unset_reconfigure ()
 {
     _ctdb_service_reconfigure_common "$@"
     rm -f "$_ctdb_service_reconfigure_flag"
 }
 ctdb_service_reconfigure ()
 {
     echo "Reconfiguring service \"$@\"..."
     ctdb_service_unset_reconfigure "$@"
     service_reconfigure "$@" || return $?
     ctdb_counter_init "$@"
 }
 # Default service_reconfigure() function.
 service_reconfigure ()
 {
     service "${1:-$service_name}" restart
 }
 ctdb_reconfigure_try_lock ()
 {
     _ctdb_service_reconfigure_common "$@"
     _lock="${_d}/reconfigure_lock"
     touch "$_lock"
     (
 	flock 0
 	# This is overkill but will work if we need to extend this to
 	# allow certain events to run multiple times in parallel
 	# (e.g. takeip) and write multiple PIDs to the file.
 	read _locker_event
 	if [ -n "$_locker_event" ] ; then
 	    while read _pid ; do
 		if [ -n "$_pid" -a "$_pid" != $$ ] && \
 		    kill -0 "$_pid" 2>/dev/null ; then
 		    exit 1
 		fi
 	    done
 	fi
 	printf "%s\n%s\n" "$event_name" $$ >"$_lock"
 	exit 0
     ) <"$_lock"
 }
 ctdb_replay_monitor_status ()
 {
     echo "Replaying previous status for this script due to reconfigure..."
     # Leading colon (':') is missing in some versions...
     _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
     # Output looks like this:
     # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
     # This is the cheapest way of getting fields in the middle.
     set -- $(IFS=":" ; echo $_out)
     _code="$3"
     _status="$4"
     # The error output field can include colons so we'll try to
     # preserve them.  The weak checking at the beginning tries to make
     # this work for both broken (no leading ':') and fixed output.
     _out="${_out%:}"
     _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
     case "$_status" in
 	OK) : ;;  # Do nothing special.
 	TIMEDOUT)
 	    # Recast this as an error, since we can't exit with the
 	    # correct negative number.
 	    _code=1
 	    _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
 	    ;;
 	DISABLED)
 	    # Recast this as an OK, since we can't exit with the
 	    # correct negative number.
 	    _code=0
 	    _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
 	    ;;
 	*) : ;;  # Must be ERROR, do nothing special.
     esac
     echo "$_err_out"
     exit $_code
 }
 ctdb_service_check_reconfigure ()
 {
     [ -n "$1" ] || set -- "$service_name"
     # We only care about some events in this function.  For others we
     # return now.
     case "$event_name" in
 	monitor|ipreallocated|reconfigure) : ;;
 	*) return 0 ;;
     esac
     if ctdb_reconfigure_try_lock "$@" ; then
 	# No events covered by this function are running, so proceed
 	# with gay abandon.
 	case "$event_name" in
 	    reconfigure)
 		(ctdb_service_reconfigure "$@")
 		exit $?
 		;;
 	    ipreallocated)
 		if ctdb_service_needs_reconfigure "$@" ; then
 		    ctdb_service_reconfigure "$@"
 		fi
 		;;
 	    monitor)
 		if ctdb_service_needs_reconfigure "$@" ; then
 		    ctdb_service_reconfigure "$@"
 		    # Given that the reconfigure might not have
 		    # resulted in the service being stable yet, we
 		    # replay the previous status since that's the best
 		    # information we have.
 		    ctdb_replay_monitor_status
 		fi
 		;;
 	esac
     else
 	# Somebody else is running an event we don't want to collide
 	# with.  We proceed with caution.
 	case "$event_name" in
 	    reconfigure)
 		# Tell whoever called us to retry.
 		exit 2
 		;;
 	    ipreallocated)
 		# Defer any scheduled reconfigure and just run the
 		# rest of the ipreallocated event, as per the
 		# eventscript.  There's an assumption here that the
 		# event doesn't depend on any scheduled reconfigure.
 		# This is true in the current code.
 		return 0
 		;;
 	    monitor)
 		# There is most likely a reconfigure in progress so
 		# the service is possibly unstable.  As above, we
 		# defer any scheduled reconfigured.  We also replay
 		# the previous monitor status since that's the best
 		# information we have.
 		ctdb_replay_monitor_status
 		;;
 	esac
     fi
 }
 ##################################################################
 # Does CTDB manage this service? - and associated auto-start/stop
 ctdb_compat_managed_service ()
 {
     if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
 	CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
     fi
 }
 is_ctdb_managed_service ()
 {
     _service_name="${1:-${service_name}}"
     # $t is used just for readability and to allow better accurate
     # matching via leading/trailing spaces
     t=" $CTDB_MANAGED_SERVICES "
     # Return 0 if "<space>$_service_name<space>" appears in $t
     if [ "${t#* ${_service_name} }" != "${t}" ] ; then
 	return 0
     fi
     # If above didn't match then update $CTDB_MANAGED_SERVICES for
     # backward compatibility and try again.
     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
     t=" $CTDB_MANAGED_SERVICES "
     # Return 0 if "<space>$_service_name<space>" appears in $t
     [ "${t#* ${_service_name} }" != "${t}" ]
 }
 ctdb_start_stop_service ()
 {
     # Do nothing unless configured to...
     [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
     _service_name="${1:-${service_name}}"
     [ "$event_name" = "monitor" ] || return 0
     if is_ctdb_managed_service "$_service_name" ; then
 	if ! is_ctdb_previously_managed_service "$_service_name" ; then
 	    echo "Starting service \"$_service_name\" - now managed"
 	    ctdb_service_start "$_service_name"
 	    exit $?
 	fi
     else
 	if is_ctdb_previously_managed_service "$_service_name" ; then
 	    echo "Stopping service \"$_service_name\" - no longer managed"
 	    ctdb_service_stop "$_service_name"
 	    exit $?
 	fi
     fi
 }
 ctdb_service_start ()
 {
     # The service is marked managed if we've ever tried to start it.
     ctdb_service_managed "$@"
     # Here we only want $1.  If no argument is passed then
     # service_start needs to know.
     service_start "$@" || return $?
     ctdb_counter_init "$@"
     ctdb_check_tcp_init
 }
 ctdb_service_stop ()
 {
     ctdb_service_unmanaged "$@"
     service_stop "$@"
 }
 # Default service_start() and service_stop() functions.
 # These may be overridden in an eventscript.  When overriding, the
 # following convention must be followed.  If these functions are
 # called with no arguments then they may use internal logic to
 # determine whether the service is managed and, therefore, whether
 # they should take any action.  However, if the service name is
 # specified as an argument then an attempt must be made to start or
 # stop the service.  This is because the auto-start/stop code calls
 # them with the service name as an argument.
 service_start ()
 {
     service "${1:-${service_name}}" start
 }
 service_stop ()
 {
     service "${1:-${service_name}}" stop
 }
 ##################################################################
 ctdb_standard_event_handler ()
 {
     case "$1" in
 	status)
 	    ctdb_checkstatus
 	    exit
 	    ;;
 	setstatus)
             shift
 	    ctdb_setstatus "$@"
 	    exit
 	    ;;
     esac
 }
 ipv4_host_addr_to_net_addr()
 {
 	local HOST=$1
 	local MASKBITS=$2
 	local HOST0=$(echo $HOST | awk -F . '{print $4}')
 	local HOST1=$(echo $HOST | awk -F . '{print $3}')
 	local HOST2=$(echo $HOST | awk -F . '{print $2}')
 	local HOST3=$(echo $HOST | awk -F . '{print $1}')
 	local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
 	local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
 	local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
 	local NET0=$(( $NET_NUM & 255 ))
 	local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
 	local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
 	local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
 	echo "$NET3.$NET2.$NET1.$NET0"
 }
 ipv4_maskbits_to_net_mask()
 {
 	local MASKBITS=$1
 	local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
 	local MASK0=$(( $MASK_NUM & 255 ))
 	local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
 	local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
 	local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
 	echo "$MASK3.$MASK2.$MASK1.$MASK0"
 }
 ipv4_is_valid_addr()
 {
 	local ADDR=$1
 	local fail=0
 	local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
 	test -n "$N" && fail=1
 	local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
 	local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
 	local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
 	local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
 	test "$ADDR0" -gt 255 && fail=1
 	test "$ADDR1" -gt 255 && fail=1
 	test "$ADDR2" -gt 255 && fail=1
 	test "$ADDR3" -gt 255 && fail=1
 	test x"$fail" != x"0" && {
 		#echo "IPv4: '$ADDR' is not a valid address"
 		return 1;
 	}
 	return 0;
 }
 # iptables doesn't like being re-entered, so flock-wrap it.
 iptables()
 {
 	flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
 }
 ########################################################
 # tickle handling
 ########################################################
 # Temporary directory for tickles.
 tickledir="$CTDB_VARDIR/state/tickles"
 mkdir -p "$tickledir"
 update_tickles ()
 {
 	_port="$1"
 	mkdir -p "$tickledir" # Just in case
 	# Who am I?
 	_pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
 	# What public IPs do I hold?
 	_ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
 	# IPs as a regexp choice
 	_ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
 	# Record connections to our public IPs in a temporary file
 	_my_connections="${tickledir}/${_port}.connections"
 	rm -f "$_my_connections"
 	netstat -tn |
 	awk -v destpat="^${_ipschoice}:${_port}\$" \
 	  '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
 	sort >"$_my_connections"
 	# Record our current tickles in a temporary file
 	_my_tickles="${tickledir}/${_port}.tickles"
 	rm -f "$_my_tickles"
 	for _i in $_ips ; do
 		ctdb -Y gettickles $_i $_port |
 		awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
 	done |
 	sort >"$_my_tickles"
 	# Add tickles for connections that we haven't already got tickles for
 	comm -23 "$_my_connections" "$_my_tickles" |
 	while read _src _dst ; do
 		ctdb addtickle $_src $_dst
 	done
 	# Remove tickles for connections that are no longer there
 	comm -13 "$_my_connections" "$_my_tickles" |
 	while read _src _dst ; do
 		ctdb deltickle $_src $_dst
 	done
 	rm -f "$_my_connections" "$_my_tickles"
 }
 ########################################################
 # load a site local config file
 ########################################################
 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
 	. "$CTDB_RC_LOCAL"
 }
 [ -x $CTDB_BASE/rc.local ] && {
 	. $CTDB_BASE/rc.local
 }
 [ -d $CTDB_BASE/rc.local.d ] && {
 	for i in $CTDB_BASE/rc.local.d/* ; do
 		[ -x "$i" ] && . "$i"
 	done
 }
 # We'll call this here to ensure $CTDB_CURRENT_DEBUGLEVEL is set.
 # This gives us a chance to override the debug level using a file in
 # $CTDB_BASE/rc.local.d/.
 ctdb_set_current_debuglevel
 script_name="${0##*/}"       # basename
 service_name="$script_name"  # default is just the script name
 service_fail_limit=1
 event_name="$1"

1428 lines 36 KiB Bash Executable File Raw Blame History

1428 lines

36 KiB

Bash

Executable File

Raw Blame History