# Hey Emacs, this is a -*- shell-script -*- !!! # utility functions for ctdb event scripts PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH [ -z "$CTDB_VARDIR" ] && { export CTDB_VARDIR="/var/ctdb" } [ -z "$CTDB_ETCDIR" ] && { export CTDB_ETCDIR="/etc" } ####################################### # pull in a system config file, if any _loadconfig() { if [ -z "$1" ] ; then foo="${service_config:-${service_name}}" if [ -n "$foo" ] ; then loadconfig "$foo" fi elif [ "$1" != "ctdb" ] ; then loadconfig "ctdb" fi if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then . $CTDB_ETCDIR/sysconfig/$1 elif [ -f $CTDB_ETCDIR/default/$1 ]; then . $CTDB_ETCDIR/default/$1 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then . $CTDB_BASE/sysconfig/$1 fi } loadconfig () { _loadconfig "$@" } ############################################################## # determine on what type of system (init style) we are running detect_init_style() { # only do detection if not already set: test "x$CTDB_INIT_STYLE" != "x" && return if [ -x /sbin/startproc ]; then CTDB_INIT_STYLE="suse" elif [ -x /sbin/start-stop-daemon ]; then CTDB_INIT_STYLE="debian" else CTDB_INIT_STYLE="redhat" fi } ###################################################### # simulate /sbin/service on platforms that don't have it # _service() makes it easier to hook the service() function for # testing. _service () { _service_name="$1" _op="$2" # do nothing, when no service was specified [ -z "$_service_name" ] && return if [ -x /sbin/service ]; then $_nice /sbin/service "$_service_name" "$_op" elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op" elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op" fi } service() { _nice="" _service "$@" } ###################################################### # simulate /sbin/service (niced) on platforms that don't have it nice_service() { _nice="nice" _service "$@" } ###################################################### # wrapper around /proc/ settings to allow them to be hooked # for testing # 1st arg is relative path under /proc/, 2nd arg is value to set set_proc () { echo "$2" >"/proc/$1" } ###################################################### # wrapper around getting file contents from /proc/ to allow # this to be hooked for testing # 1st arg is relative path under /proc/ get_proc () { cat "/proc/$1" } ###################################################### # check that a rpc server is registered with portmap # and responding to requests # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION ###################################################### ctdb_check_rpc() { progname="$1" prognum="$2" version="$3" ctdb_check_rpc_out=$(rpcinfo -u localhost $prognum $version 2>&1) if [ $? -ne 0 ] ; then ctdb_check_rpc_out="ERROR: $progname failed RPC check: $ctdb_check_rpc_out" echo "$ctdb_check_rpc_out" return 1 fi } ###################################################### # check a set of directories is available # return 1 on a missing directory # usage: ctdb_check_directories_probe SERVICE_NAME ###################################################### ctdb_check_directories_probe() { while IFS="" read d ; do case "$d" in *%*) continue ;; *) [ -d "${d}/." ] || return 1 esac done } ###################################################### # check a set of directories is available # usage: ctdb_check_directories SERVICE_NAME ###################################################### ctdb_check_directories() { n="${1:-${service_name}}" ctdb_check_directories_probe || { echo "ERROR: $n directory \"$d\" not available" exit 1 } } ###################################################### # check a set of tcp ports # usage: ctdb_check_tcp_ports ###################################################### ctdb_check_tcp_ports() { for p ; do if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then echo "ERROR: $service_name tcp port $p is not responding" return 1 fi fi done } ###################################################### # check a unix socket # usage: ctdb_check_unix_socket SERVICE_NAME ###################################################### ctdb_check_unix_socket() { socket_path="$1" [ -z "$socket_path" ] && return if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then echo "ERROR: $service_name socket $socket_path not found" return 1 fi } ###################################################### # check a command returns zero status # usage: ctdb_check_command SERVICE_NAME ###################################################### ctdb_check_command() { service_name="$1" wait_cmd="$2" [ -z "$wait_cmd" ] && return; $wait_cmd > /dev/null 2>&1 || { echo "ERROR: $service_name - $wait_cmd returned error" exit 1 } } ################################################ # kill off any TCP connections with the given IP ################################################ kill_tcp_connections() { _IP="$1" _failed=0 _killcount=0 connfile="$CTDB_VARDIR/state/connections.$_IP" netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile while read dest src; do srcip=`echo $src | sed -e "s/:[^:]*$//"` srcport=`echo $src | sed -e "s/^.*://"` destip=`echo $dest | sed -e "s/:[^:]*$//"` destport=`echo $dest | sed -e "s/^.*://"` echo "Killing TCP connection $srcip:$srcport $destip:$destport" ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 case $destport in # we only do one-way killtcp for CIFS 139|445) : ;; # for all others we do 2-way *) ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1 ;; esac _killcount=`expr $_killcount + 1` done < $connfile rm -f $connfile [ $_failed = 0 ] || { echo "Failed to send killtcp control" return; } [ $_killcount -gt 0 ] || { return; } _count=0 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do sleep 1 _count=`expr $_count + 1` [ $_count -gt 3 ] && { echo "Timed out killing tcp connections for IP $_IP" return; } done echo "killed $_killcount TCP connections to released IP $_IP" } ################################################################## # kill off the local end for any TCP connections with the given IP ################################################################## kill_tcp_connections_local_only() { _IP="$1" _failed=0 _killcount=0 connfile="$CTDB_VARDIR/state/connections.$_IP" netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile while read dest src; do srcip=`echo $src | sed -e "s/:[^:]*$//"` srcport=`echo $src | sed -e "s/^.*://"` destip=`echo $dest | sed -e "s/:[^:]*$//"` destport=`echo $dest | sed -e "s/^.*://"` echo "Killing TCP connection $srcip:$srcport $destip:$destport" ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 _killcount=`expr $_killcount + 1` done < $connfile rm -f $connfile [ $_failed = 0 ] || { echo "Failed to send killtcp control" return; } [ $_killcount -gt 0 ] || { return; } _count=0 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do sleep 1 _count=`expr $_count + 1` [ $_count -gt 3 ] && { echo "Timed out killing tcp connections for IP $_IP" return; } done echo "killed $_killcount TCP connections to released IP $_IP" } ################################################################## # tickle any TCP connections with the given IP ################################################################## tickle_tcp_connections() { _IP="$1" _failed=0 _killcount=0 connfile="$CTDB_VARDIR/state/connections.$_IP" netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile while read dest src; do srcip=`echo $src | sed -e "s/:[^:]*$//"` srcport=`echo $src | sed -e "s/^.*://"` destip=`echo $dest | sed -e "s/:[^:]*$//"` destport=`echo $dest | sed -e "s/^.*://"` echo "Tickle TCP connection $srcip:$srcport $destip:$destport" ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 echo "Tickle TCP connection $destip:$destport $srcip:$srcport" ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1 done < $connfile rm -f $connfile [ $_failed = 0 ] || { echo "Failed to send tickle control" return; } } ######################################################## # start/stop the nfs service on different platforms ######################################################## startstop_nfs() { PLATFORM="unknown" [ -x $CTDB_ETCDIR/init.d/nfsserver ] && { PLATFORM="sles" } [ -x $CTDB_ETCDIR/init.d/nfslock ] && { PLATFORM="rhel" } case $PLATFORM in sles) case $1 in start) service nfsserver start ;; stop) service nfsserver stop > /dev/null 2>&1 ;; restart) set_proc "fs/nfsd/threads" 0 service nfsserver stop > /dev/null 2>&1 pkill -9 nfsd service nfsserver start ;; esac ;; rhel) case $1 in start) service nfslock start service nfs start ;; stop) service nfs stop > /dev/null 2>&1 service nfslock stop > /dev/null 2>&1 ;; restart) set_proc "fs/nfsd/threads" 0 service nfs stop > /dev/null 2>&1 service nfslock stop > /dev/null 2>&1 pkill -9 nfsd service nfslock start service nfs start ;; esac ;; *) echo "Unknown platform. NFS is not supported with ctdb" exit 1 ;; esac } ######################################################## # start/stop the nfs lockmanager service on different platforms ######################################################## startstop_nfslock() { PLATFORM="unknown" [ -x $CTDB_ETCDIR/init.d/nfsserver ] && { PLATFORM="sles" } [ -x $CTDB_ETCDIR/init.d/nfslock ] && { PLATFORM="rhel" } case $PLATFORM in sles) # for sles there is no service for lockmanager # so we instead just shutdown/restart nfs case $1 in start) service nfsserver start ;; stop) service nfsserver stop > /dev/null 2>&1 ;; restart) service nfsserver stop service nfsserver start ;; esac ;; rhel) case $1 in start) service nfslock start ;; stop) service nfslock stop > /dev/null 2>&1 ;; restart) service nfslock stop service nfslock start ;; esac ;; *) echo "Unknown platform. NFS locking is not supported with ctdb" exit 1 ;; esac } # better use delete_ip_from_iface() together with add_ip_to_iface # remove_ip should be removed in future remove_ip() { local _ip_maskbits=$1 local _iface=$2 local _ip=`echo "$_ip_maskbits" | cut -d '/' -f1` local _maskbits=`echo "$_ip_maskbits" | cut -d '/' -f2` delete_ip_from_iface "$_iface" "$_ip" "$_maskbits" return $? } add_ip_to_iface() { local _iface=$1 local _ip=$2 local _maskbits=$3 local _state_dir="$CTDB_VARDIR/state/interface_modify" local _lockfile="$_state_dir/$_iface.flock" local _readd_base="$_state_dir/$_iface.readd.d" mkdir -p $_state_dir || { ret=$? echo "Failed to mkdir -p $_state_dir - $ret" return $ret } test -f $_lockfile || { touch $_lockfile } flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base" return $? } delete_ip_from_iface() { local _iface=$1 local _ip=$2 local _maskbits=$3 local _state_dir="$CTDB_VARDIR/state/interface_modify" local _lockfile="$_state_dir/$_iface.flock" local _readd_base="$_state_dir/$_iface.readd.d" mkdir -p $_state_dir || { ret=$? echo "Failed to mkdir -p $_state_dir - $ret" return $ret } test -f $_lockfile || { touch $_lockfile } flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base" return $? } setup_iface_ip_readd_script() { local _iface=$1 local _ip=$2 local _maskbits=$3 local _readd_script=$4 local _state_dir="$CTDB_VARDIR/state/interface_modify" local _lockfile="$_state_dir/$_iface.flock" local _readd_base="$_state_dir/$_iface.readd.d" mkdir -p $_state_dir || { ret=$? echo "Failed to mkdir -p $_state_dir - $ret" return $ret } test -f $_lockfile || { touch $_lockfile } flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script" return $? } ######################################################## # some simple logic for counting events - per eventscript # usage: ctdb_counter_init # ctdb_counter_incr # ctdb_check_counter_limit # ctdb_check_counter_limit succeeds when count >= ######################################################## _ctdb_counter_common () { _counter_file="$ctdb_fail_dir/$service_name" mkdir -p "${_counter_file%/*}" # dirname } ctdb_counter_init () { _ctdb_counter_common >"$_counter_file" } ctdb_counter_incr () { _ctdb_counter_common # unary counting! echo -n 1 >> "$_counter_file" } ctdb_check_counter_limit () { _ctdb_counter_common _limit="${1:-${service_fail_limit}}" _quiet="$2" # unary counting! _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0) if [ $_size -ge $_limit ] ; then echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy" exit 1 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet" fi } ctdb_check_counter_equal () { _ctdb_counter_common _limit=$1 # unary counting! _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0) if [ $_size -eq $_limit ] ; then return 1 fi return 0 } ######################################################## ctdb_status_dir="$CTDB_VARDIR/status" ctdb_fail_dir="$CTDB_VARDIR/failcount" ######################################################## # Managed status history, for auto-start/stop ctdb_managed_dir="$CTDB_VARDIR/managed_history" _ctdb_managed_common () { _service_name="${1:-${service_name}}" _ctdb_managed_file="$ctdb_managed_dir/$_service_name" } ctdb_service_managed () { _ctdb_managed_common "$@" mkdir -p "$ctdb_managed_dir" touch "$_ctdb_managed_file" } ctdb_service_unmanaged () { _ctdb_managed_common "$@" rm -f "$_ctdb_managed_file" } is_ctdb_previously_managed_service () { _ctdb_managed_common "$@" [ -f "$_ctdb_managed_file" ] } ######################################################## # Check and set status log_status_cat () { echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)" } ctdb_checkstatus () { if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy" return 1 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then log_status_cat "banned" "$ctdb_status_dir/$script_name/banned" return 2 else return 0 fi } ctdb_setstatus () { d="$ctdb_status_dir/$script_name" case "$1" in unhealthy|banned) mkdir -p "$d" cat "$2" >"$d/$1" ;; *) for i in "banned" "unhealthy" ; do rm -f "$d/$i" done ;; esac } ctdb_service_needs_reconfigure () { [ -e "$ctdb_status_dir/$service_name/reconfigure" ] } ctdb_service_set_reconfigure () { d="$ctdb_status_dir/$service_name" mkdir -p "$d" >"$d/reconfigure" } ctdb_service_unset_reconfigure () { rm -f "$ctdb_status_dir/$service_name/reconfigure" } ctdb_service_reconfigure () { echo "Reconfiguring service \"$service_name\"..." if [ -n "$service_reconfigure" ] ; then eval $service_reconfigure else service "$service_name" restart fi ctdb_service_unset_reconfigure ctdb_counter_init } ctdb_compat_managed_service () { if [ "$1" = "yes" ] ; then t="$t $2 " fi } is_ctdb_managed_service () { _service_name="${1:-${service_name}}" t=" $CTDB_MANAGED_SERVICES " ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd" ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba" ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp" ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind" ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd" ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi" ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd" ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs" ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs" # Returns 0 if "$_service_name" appears in $t [ "${t#* ${_service_name} }" != "${t}" ] } ctdb_start_stop_service () { _service_name="${1:-${service_name}}" [ "$event_name" = "monitor" ] || return 0 if is_ctdb_managed_service "$_service_name" ; then if ! is_ctdb_previously_managed_service "$_service_name" ; then echo "Starting service $_service_name" ctdb_service_start || exit $? ctdb_service_managed "$_service_name" exit 0 fi else if is_ctdb_previously_managed_service "$_service_name" ; then echo "Stopping service $_service_name" ctdb_service_stop || exit $? ctdb_service_unmanaged "$_service_name" exit 0 fi fi } ctdb_service_start () { if [ -n "$service_start" ] ; then eval $service_start || return $? else service "$service_name" start || return $? fi ctdb_counter_init } ctdb_service_stop () { if [ -n "$service_stop" ] ; then eval $service_stop else service "$service_name" stop fi } ctdb_standard_event_handler () { case "$1" in status) ctdb_checkstatus exit ;; setstatus) shift ctdb_setstatus "$@" exit ;; esac } ipv4_host_addr_to_net_addr() { local HOST=$1 local MASKBITS=$2 local HOST0=$(echo $HOST | awk -F . '{print $4}') local HOST1=$(echo $HOST | awk -F . '{print $3}') local HOST2=$(echo $HOST | awk -F . '{print $2}') local HOST3=$(echo $HOST | awk -F . '{print $1}') local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) )) local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) )) local NET_NUM=$(( $HOST_NUM & $MASK_NUM)) local NET0=$(( $NET_NUM & 255 )) local NET1=$(( ($NET_NUM & (255 * 256)) / 256 )) local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 )) local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 )) echo "$NET3.$NET2.$NET1.$NET0" } ipv4_maskbits_to_net_mask() { local MASKBITS=$1 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) )) local MASK0=$(( $MASK_NUM & 255 )) local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 )) local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 )) local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 )) echo "$MASK3.$MASK2.$MASK1.$MASK0" } ipv4_is_valid_addr() { local ADDR=$1 local fail=0 local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'` test -n "$N" && fail=1 local ADDR0=$(echo $ADDR | awk -F . '{print $4}') local ADDR1=$(echo $ADDR | awk -F . '{print $3}') local ADDR2=$(echo $ADDR | awk -F . '{print $2}') local ADDR3=$(echo $ADDR | awk -F . '{print $1}') test "$ADDR0" -gt 255 && fail=1 test "$ADDR1" -gt 255 && fail=1 test "$ADDR2" -gt 255 && fail=1 test "$ADDR3" -gt 255 && fail=1 test x"$fail" != x"0" && { #echo "IPv4: '$ADDR' is not a valid address" return 1; } return 0; } # iptables doesn't like being re-entered, so flock-wrap it. iptables() { flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@" } ######################################################## # tickle handling ######################################################## # Temporary directory for tickles. tickledir="$CTDB_VARDIR/state/tickles" mkdir -p "$tickledir" update_tickles () { _port="$1" mkdir -p "$tickledir" # Just in case # Who am I? _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:} # What public IPs do I hold? _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}') # IPs as a regexp choice _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))" # Record connections to our public IPs in a temporary file _my_connections="${tickledir}/${_port}.connections" rm -f "$_my_connections" netstat -tn | awk -v destpat="^${_ipschoice}:${_port}\$" \ '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' | sort >"$_my_connections" # Record our current tickles in a temporary file _my_tickles="${tickledir}/${_port}.tickles" rm -f "$_my_tickles" for _i in $_ips ; do ctdb -Y gettickles $_i $_port | awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }' done | sort >"$_my_tickles" # Add tickles for connections that we haven't already got tickles for comm -23 "$_my_connections" "$_my_tickles" | while read _src _dst ; do ctdb addtickle $_src $_dst done # Remove tickles for connections that are no longer there comm -13 "$_my_connections" "$_my_tickles" | while read _src _dst ; do ctdb deltickle $_src $_dst done rm -f "$_my_connections" "$_my_tickles" } ######################################################## # load a site local config file ######################################################## [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && { . "$CTDB_RC_LOCAL" } [ -x $CTDB_BASE/rc.local ] && { . $CTDB_BASE/rc.local } [ -d $CTDB_BASE/rc.local.d ] && { for i in $CTDB_BASE/rc.local.d/* ; do [ -x "$i" ] && . "$i" done } script_name="${0##*/}" # basename service_name="$script_name" # default is just the script name service_fail_limit=1 event_name="$1"