2025-03-23 06:50:21 +03:00 · 2009-11-19 15:00:17 +11:00 · 2009-11-19 15:00:17 +11:00 · ee513c1ba2
commit ee513c1ba2
parent 73cb65bf1a
16 changed files with 160 additions and 240 deletions
--- a/ctdb/config/events.d/00.ctdb
+++ b/ctdb/config/events.d/00.ctdb
@ -10,13 +10,7 @@
 #     recovered  : called when ctdb has finished a recovery event

 . $CTDB_BASE/functions
-loadconfig ctdb
-
-# ensure we have /bin and /usr/bin in the path
-PATH=/bin:/usr/bin:$PATH
-
-cmd="$1"
-shift
+loadconfig

 case $cmd in 
     startup)
--- a/ctdb/config/events.d/01.reclock
+++ b/ctdb/config/events.d/01.reclock
@ -2,55 +2,43 @@
 # script to check accessibility to the reclock file on a node

 . $CTDB_BASE/functions
-loadconfig ctdb
-
-cmd="$1"
-shift
-
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
-# Count the number of intervals that have passed when we have tried to
-# but failed to stat the reclock file.  after third failure the node
-# becomes unhealthy after the twentieth failure the node we shutdown
-# ctdbd
-RECLOCKCOUNT="fail-count"
+loadconfig

 case $cmd in 
-     startup)
-	ctdb_counter_init "$RECLOCKCOUNT"
+    startup)
+	ctdb_counter_init
 	;;
-
-      monitor)
-	ctdb_counter_incr "$RECLOCKCOUNT"
-	ctdb_counter_limit "$RECLOCKCOUNT" 200 && {
-		echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Shutting down."
-		df
-		sleep 1
-		ctdb shutdown
+    
+    monitor)
+	ctdb_counter_incr
+	(ctdb_counter_limit 200 >/dev/null 2>&1) || {
+	    echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Shutting down."
+	    df
+	    sleep 1
+	    ctdb shutdown
 	}

-	RECLOCKFILE=`ctdb -Y getreclock`
+	RECLOCKFILE=$(ctdb -Y getreclock)
 	[ -z "$RECLOCKFILE" ] && {
-		# we are not using a reclock file
-		ctdb_counter_init "$RECLOCKCOUNT"
-		exit 0
+	    # we are not using a reclock file
+	    ctdb_counter_init
+	    exit 0
 	}

 	# try stat the reclock file as a background process
 	# so that we dont block in case the cluster filesystem is unavailable
 	(
-		stat $RECLOCKFILE && {
-			# we could stat the file, reset the counter
-			ctdb_counter_init "$RECLOCKCOUNT"
-		}
+	    stat $RECLOCKFILE && {
+		# we could stat the file, reset the counter
+		ctdb_counter_init
+	    }
 	) >/dev/null 2>/dev/null &


-	ctdb_counter_limit "$RECLOCKCOUNT" 3 && {
-		echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Mark node UNHEALTHY."
-		df
-		exit 1;
-	}
+	ctdb_counter_limit 3
+	;;
+    status)
+	ctdb_checkstatus || exit $?
 	;;
 esac

--- a/ctdb/config/events.d/10.interface
+++ b/ctdb/config/events.d/10.interface
@ -6,10 +6,7 @@
 # public interface

 . $CTDB_BASE/functions
-loadconfig ctdb
-
-cmd="$1"
-shift
+loadconfig

 [ -z "$CTDB_PUBLIC_ADDRESSES" ] && {
 	CTDB_PUBLIC_ADDRESSES=$CTDB_BASE/public_addresses
@ -177,10 +174,10 @@ case $cmd in
 	    esac
 	done
 	;;
-
+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0

-
-
--- a/ctdb/config/events.d/11.natgw
+++ b/ctdb/config/events.d/11.natgw
@ -6,15 +6,10 @@
 #

 . $CTDB_BASE/functions
-loadconfig ctdb
+loadconfig

 [ -z "$CTDB_NATGW_PUBLIC_IFACE" ] && exit 0

-cmd="$1"
-shift
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
-
 delete_all() {
 	remove_ip $CTDB_NATGW_PUBLIC_IP $CTDB_NATGW_PUBLIC_IFACE
 	remove_ip $CTDB_NATGW_PUBLIC_IP_HOST lo
@ -28,7 +23,7 @@ delete_all() {
 }

 case $cmd in 
-     startup)
+    startup)
 	# do not respond to ARPs that are for ip addresses with scope 'host'
 	echo 3 > /proc/sys/net/ipv4/conf/all/arp_ignore
 	# do not send out arp requests from loopback addresses
@ -37,13 +32,13 @@ case $cmd in
 	ctdb setnatgwstate on
 	;;

-     recovered|updatenatgw)
+    recovered|updatenatgw)
 	MYPNN=`ctdb pnn | cut -d: -f2`
 	NATGWMASTER=`ctdb natgwlist | head -1 | sed -e "s/ .*//"`
 	NATGWIP=`ctdb natgwlist | head -1 | sed -e "s/^[^ ]* *//"`

 	CTDB_NATGW_PUBLIC_IP_HOST=`echo $CTDB_NATGW_PUBLIC_IP | sed -e "s/\/.*/\/32/"`
-	if [ "$NATGWMASTER" = "-1" ]; then
+	if [ "$NATGWMASTER" == "-1" ]; then
 		echo "There is not NATGW master node"
 		exit 1
 	fi
@ -71,7 +66,7 @@ case $cmd in
 	echo 1 > /proc/sys/net/ipv4/route/flush
 	;;

-     shutdown|removenatgw)
+    shutdown|removenatgw)
 	delete_all
 	;;

--- a/ctdb/config/events.d/11.routing
+++ b/ctdb/config/events.d/11.routing
@ -13,16 +13,12 @@
 # bond1 10.3.3.0/24 10.0.0.1

 . $CTDB_BASE/functions
-loadconfig ctdb
+loadconfig

 [ -f $CTDB_BASE/static-routes ] || {
    exit 0
 }

-cmd="$1"
-shift
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
 case $cmd in 
     takeip|releaseip)
 	iface=$1
--- a/ctdb/config/events.d/20.multipathd
+++ b/ctdb/config/events.d/20.multipathd
@ -6,14 +6,10 @@
 #   CTDB_MONITOR_MPDEVICES="device1 device2 ..."
 #

-PATH=/bin:/usr/bin:$PATH
+service_name="multipathd"

 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig multipathd
-
-cmd="$1"
-shift
+loadconfig 

 [ -z "$CTDB_MONITOR_MPDEVICES" ] && {
 	exit 0
--- a/ctdb/config/events.d/31.clamd
+++ b/ctdb/config/events.d/31.clamd
@ -2,52 +2,45 @@
 # event script to manage clamd in a cluster environment

 . $CTDB_BASE/functions
-loadconfig ctdb

 detect_init_style

 case $CTDB_INIT_STYLE in
 	redhat)
-		CTDB_SERVICE_CLAMD="clamd"
-		CTDB_CONFIG_CLAMD="clamd"
-		;;
-	suse)
-		CTDB_SERVICE_CLAMD="clamav"
-		CTDB_CONFIG_CLAMD="clamav"
-		;;
-	debian)
-		CTDB_SERVICE_CLAMD="clamav"
-		CTDB_CONFIG_CLAMD="clamav"
+		service_name="clamd"
+		service_config="clamd"
 		;;
 	*)
-		# should not happen.
-		# for now use red hat style as default
-		CTDB_SERVICE_CLAMD="clamd"
-		CTDB_CONFIG_CLAMD="clamd"
+		service_name="clamav"
+		service_config="clamav"
 		;;
 esac

-loadconfig "${CTDB_CONFIG_CLAMD}"
+service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
+service_stop="service $service_name stop"

-[ "$CTDB_MANAGES_CLAMD" = "yes" ] || exit 0
+loadconfig

-cmd="$1"
-shift
+ctdb_start_stop_service
+
+is_ctdb_managed_service || exit 0

 case $cmd in 
     startup)
-        service "${CTDB_SERVICE_CLAMD}" stop > /dev/null 2>&1
-        service "${CTDB_SERVICE_CLAMD}" start
+	ctdb_service_start
        ;;

     shutdown)
-        service "${CTDB_SERVICE_CLAMD}" stop
+        ctdb_service_stop
        ;;

     monitor)
-        ctdb_check_unix_socket "clamd" ${CTDB_CLAMD_SOCKET}
+        ctdb_check_unix_socket ${CTDB_CLAMD_SOCKET} || exit $?
        ;;
+
+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0
-
--- a/ctdb/config/events.d/40.vsftpd
+++ b/ctdb/config/events.d/40.vsftpd
@ -3,8 +3,8 @@

 service_name="vsftpd"
 # make sure the service is stopped first
-service_start="service vsftpd stop > /dev/null 2>&1 ; service vsftpd start"
-service_stop="service vsftpd stop"
+service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
+service_stop="service $service_name stop"
 service_reconfigure="service $service_name restart"
 service_fail_limit=2
 service_tcp_ports=21
@ -54,6 +54,7 @@ case $cmd in
 	    fi
 	fi	
 	;;
+
    status)
 	ctdb_checkstatus || exit $?
 	;;
--- a/ctdb/config/events.d/41.httpd
+++ b/ctdb/config/events.d/41.httpd
@ -35,6 +35,8 @@ loadconfig

 ctdb_start_stop_service

+is_ctdb_managed_service || exit 0
+
 case $cmd in
    startup)
 	ctdb_service_start
@ -56,6 +58,10 @@ case $cmd in
 	    exit 1
 	fi
 	;;
+
+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@ -1,71 +1,69 @@
 #!/bin/sh
 # script to manage nfs in a clustered environment

-. $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig nfs
-
-[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
-[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
-
-cmd="$1"
-shift
-
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
-
-
-case $cmd in 
-     startup)
+start_nfs() {
 	/bin/mkdir -p $CTDB_BASE/state/nfs
 	/bin/mkdir -p $CTDB_BASE/state/statd/ip
 	/bin/mkdir -p $STATD_SHARED_DIRECTORY
-
-	# make sure nfs is stopped before we start it, or it may get a bind error
 	startstop_nfs stop
 	startstop_nfs start
-	;;
-
-     shutdown)
-	startstop_nfs stop
-	exit 0
-	;;
-
-     takeip)
-	ip=$2
-
-	echo $ip >> $CTDB_BASE/state/statd/restart
-
-	# having a list of what IPs we have allows statd to do the right 
-	# thing via $CTDB_BASE/statd-callout
-	touch $CTDB_BASE/state/statd/ip/$ip
-	exit 0
-	;;
-
-     releaseip)
-	iface=$1
-	ip=$2
-	maskbits=$3
-
-	echo $ip >> $CTDB_BASE/state/statd/restart
-	/bin/rm -f $CTDB_BASE/state/statd/ip/$ip
-	exit 0
-	;;
-
-     recovered)
-	# if no IPs have changed then don't need to restart statd 
-	[ -f $CTDB_BASE/state/statd/restart ] || exit 0;
+}

+reconfigure_nfs() {
 	# always restart the lockmanager so that we start with a clusterwide
 	# graceperiod when ip addresses has changed
 	[ -x $CTDB_BASE/statd-callout ] && {
 		$CTDB_BASE/statd-callout notify &
 	} >/dev/null 2>&1

-	/bin/rm -f $CTDB_BASE/state/statd/restart
+}
+
+. $CTDB_BASE/functions
+
+service_name="nfs"
+service_start="start_nfs"
+service_stop="startstop_nfs stop"
+service_reconfigure="reconfigure_nfs"
+
+loadconfig
+
+[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
+
+ctdb_start_stop_service
+
+case $cmd in 
+     startup)
+	ctdb_service_start
+	;;
+
+     shutdown)
+	ctdb_service_stop
+	;;
+
+     takeip)
+	ctdb_service_set_reconfigure
+	touch $CTDB_BASE/state/statd/ip/$2
+	;;
+
+     releaseip)
+	ctdb_service_set_reconfigure
+	/bin/rm -f $CTDB_BASE/state/statd/ip/$2
+	;;
+
+     recovered)
+	# if we have taken or released any ips we must 
+	# restart the lock manager so that we enter a clusterwide grace period
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	fi
 	;;

      monitor)
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	    exit 0
+	fi
+
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it
 	rpcinfo -u localhost 100024 1 > /dev/null || {
@ -103,6 +101,9 @@ case $cmd in
 	}
       	;;

+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0
--- a/ctdb/config/events.d/61.nfstickle
+++ b/ctdb/config/events.d/61.nfstickle
@ -1,25 +1,21 @@
 #!/bin/sh
 # ctdb event script for NFS tickle acks

-PATH=/bin:/usr/bin:$PATH
-
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig nfs

-cmd="$1"
-shift
+service_name="nfs"
+service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
+service_reconfigure=$service_start
+
+loadconfig
+
+ctdb_start_stop_service

-[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
 [ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0

 case $cmd in 
     startup)
-	mkdir -p $CTDB_BASE/state/nfstickle
-	mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-	# we rely on fast tcp wait1 recycling
-	echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
-	exit 0
+	ctdb_service_start
 	;;
 	
     takeip)
@ -31,46 +27,25 @@ case $cmd in
 	# send tickle acks for all the connections the old server had
 	for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
 		[ -f $f ] && cat $f | while read dest; do
-			dip=`echo $dest | cut -d: -f1`
-			dport=`echo $dest | cut -d: -f2`
 			# send three, in case of lost packets
-			echo "Sending NFS tickle ack for $ip to $dip:$dport"
+			echo "Sending NFS tickle ack for $ip to $dest"
 			for i in `seq 1 3`; do
-				ctdb tickle $dip:$dport $ip:2049
+				ctdb tickle $dest $ip:2049
 			done
 		done
 	done
-	exit 0
-	;;
-
-     releaseip)
-	exit 0
-	;;
-
-     recovered)
-	exit 0
-	;;
-
-     shutdown)
-	exit 0
 	;;

     monitor)
-        # always create these direcotries since NFS might be enabled at runtime
-	# and we dont want to restart ctdbd
-	mkdir -p $CTDB_BASE/state/nfstickle
-	mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-
 	mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
 	rm -f $mydir/*
 	# record our connections to shared storage
 	netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' |
 		awk '{print $4" "$5}' | 
 		while read dest src; do
-			ip=`echo $dest | cut -d: -f1`
+			ip=${dest%:*}
 			echo $src >> $mydir/$ip
 		done
-	exit 0
 	;;

 esac
--- a/ctdb/config/events.d/70.iscsi
+++ b/ctdb/config/events.d/70.iscsi
@ -5,10 +5,7 @@ service_name="iscsi"

 . $CTDB_BASE/functions

-cmd="$1"
-shift
-
-[ "$CTDB_MANAGES_ISCSI" = "yes" ] || exit 0
+ctdb_start_stop_service

 [ -z "$CTDB_START_ISCSI_SCRIPTS" ] && {
 	echo "No iscsi start script directory found"
@ -16,15 +13,6 @@ shift
 }

 case $cmd in 
-     startup)
-	;;
-
-     takeip)
-	;;
-
-     releaseip)
-	;;
-
     recovered)
 	# block the iscsi port
 	iptables -I INPUT 1 -p tcp --dport 3260 -j DROP
@ -49,8 +37,8 @@ case $cmd in
 	done

 	# remove all iptables rules
-	while `iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null` ;  do
-		true;
+	while iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null ;  do
+	    :
 	done

 	;;
@ -61,9 +49,11 @@ case $cmd in
 	;;

     monitor)
-	[ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
-		ctdb_check_tcp_ports 3260 || exit $?
-	}
+	ctdb_check_tcp_ports 3260 || exit $?
+	;;
+
+    status)
+	ctdb_checkstatus || exit $?
 	;;
 esac

--- a/ctdb/config/events.d/91.lvs
+++ b/ctdb/config/events.d/91.lvs
@ -2,6 +2,7 @@
 # script to manage the lvs ip multiplexer for a single public address cluster

 . $CTDB_BASE/functions
+
 loadconfig ctdb

 [ -z "$CTDB_LVS_PUBLIC_IP" ] && exit 0
@ -12,12 +13,6 @@ loadconfig ctdb
    exit 0
 }

-
-cmd="$1"
-shift
-
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
 case $cmd in 
     startup)
 	ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
@ -42,12 +37,6 @@ case $cmd in
 	echo 1 > /proc/sys/net/ipv4/route/flush
 	;;

-     takeip)
-	;;
-
-     releaseip)
-	;;
-
     recovered|stopped)
 	# kill off any tcp connections
 	ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
@ -89,9 +78,6 @@ case $cmd in
 	echo 1 > /proc/sys/net/ipv4/route/flush
 	;;

-      monitor)
-	;;
-
 esac

 exit 0
--- a/ctdb/config/events.d/99.timeout
+++ b/ctdb/config/events.d/99.timeout
@ -9,15 +9,12 @@ loadconfig ctdb

 [ "x$CTDB_RUN_TIMEOUT_MONITOR" = "xyes" ] || exit 0

-cmd="$1"
-shift
-
 case $cmd in
-	monitor)
-		TIMEOUT=$(ctdb listvars | grep EventScriptTimeout | awk	'{print $3}')
-		echo "sleeping for $((TIMEOUT * 2)) seconds..."
-		sleep $((TIMEOUT * 2))
-		;;
+    monitor)
+	TIMEOUT=$(ctdb listvars | awk '$1 == "EventScriptTimeout" {print $3}')
+	echo "sleeping for $((TIMEOUT * 2)) seconds..."
+	sleep $((TIMEOUT * 2))
+	;;
 esac

 exit 0
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@ -1,15 +1,20 @@
 # utility functions for ctdb event scripts

-PATH=/bin:/usr/bin:$PATH
+PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH

 #######################################
 # pull in a system config file, if any
 loadconfig() {
    name="$1"

-    if [ -n "$name" ] ; then
-	loadconfig ctdb
-	loadconfig "${service_config:-${service_name}}"
+    if [ "$name" != "ctdb" ] ; then
+	loadconfig "ctdb"
+    fi
+    if [ -z "$name" ] ; then
+	foo="${service_config:-${service_name}}"
+	if [ -n "$foo" ] ; then
+	    loadconfig "$foo"
+	fi
    fi

    if [ -f /etc/sysconfig/$name ]; then
@ -236,7 +241,7 @@ ctdb_check_tcp_ports() {

      [ $all_ok -eq 1 ] || {
 	  echo "ERROR: $service_name tcp port $p is not responding"
-	  exit 1
+	  return 1
      }
  done
 }
@ -246,8 +251,7 @@ ctdb_check_tcp_ports() {
 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 ######################################################
 ctdb_check_unix_socket() {
-  service_name="$1"
-  socket_path="$2"
+  socket_path="$1"
  [ -z "$socket_path" ] && return;

  # check availability of netstat first
@ -273,7 +277,7 @@ ctdb_check_unix_socket() {

  [ $all_ok -eq 1 ] || {
    echo "ERROR: $service_name socket $socket_path not found"
-    exit 1
+    return 1
  }
 }

@ -493,15 +497,13 @@ remove_ip() {

 ########################################################
 # some simple logic for counting events - per eventscript
-# usage: ctdb_counter_init <tag>
-#        ctdb_counter_incr <tag>
-#        ctdb_counter_limit <tag> <limit>
-#        e.g. <tag> = "fail-count"
-# ctdb_counter_limit succeeds when count >= <limit>
+# usage: ctdb_counter_init
+#        ctdb_counter_incr
+#        ctdb_check_counter_limit <limit>
+# ctdb_check_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
-    _eventscript="${0##*/}" # basename
-    _counter_file="$ctdb_fail_dir/${service_name:-${_eventscript}}"
+    _counter_file="$ctdb_fail_dir/$service_name"
    mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
@ -669,5 +671,8 @@ ctdb_service_stop ()
 	done
 }

+# A reasonable default is the basename of the eventscript.
+service_name="${0##*/}" # basename
+
 ctdb_event="$1" ; shift
 cmd="$ctdb_event"
--- a/ctdb/config/statd-callout
+++ b/ctdb/config/statd-callout
@ -28,7 +28,7 @@ case "$1" in
  add-client)
 	# the callout does not tell us to which ip the client connected
 	# so we must add it to all the ips that we serve
-        for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
+        for f in $CTDB_BASE/state/statd/ip/*; do
 	    ip=`basename $f`
 	    [ -d $STATD_SHARED_DIRECTORY/$ip ] || /bin/mkdir $STATD_SHARED_DIRECTORY/$ip
 	    touch $STATD_SHARED_DIRECTORY/$ip/$2
@ -37,7 +37,7 @@ case "$1" in
  del-client)
 	# the callout does not tell us to which ip the client connected
 	# so we must add it to all the ips that we serve
-        for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
+        for f in $CTDB_BASE/state/statd/ip/*; do
 	    ip=`basename $f`
 	    /bin/rm -f $STATD_SHARED_DIRECTORY/$ip/$2
 	done