2025-01-10 01:18:15 +03:00 · 2009-12-01 10:53:18 +11:00 · 2009-12-01 10:53:18 +11:00 · 569001afd0
commit 569001afd0
parent e17fa0fdee ad431c3520
26 changed files with 850 additions and 800 deletions
--- a/ctdb/config/ctdb.init
+++ b/ctdb/config/ctdb.init
@ -250,7 +250,7 @@ status() {
 }


-case "$1" in
+case "$cmd" in
    start)
  	start
 	;;
--- a/ctdb/config/events.d/00.ctdb
+++ b/ctdb/config/events.d/00.ctdb
@ -10,13 +10,7 @@
 #     recovered  : called when ctdb has finished a recovery event

 . $CTDB_BASE/functions
-loadconfig ctdb
-
-# ensure we have /bin and /usr/bin in the path
-PATH=/bin:/usr/bin:$PATH
-
-cmd="$1"
-shift
+loadconfig

 case $cmd in 
     startup)
--- a/ctdb/config/events.d/01.reclock
+++ b/ctdb/config/events.d/01.reclock
@ -2,55 +2,43 @@
 # script to check accessibility to the reclock file on a node

 . $CTDB_BASE/functions
-loadconfig ctdb
-
-cmd="$1"
-shift
-
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
-# Count the number of intervals that have passed when we have tried to
-# but failed to stat the reclock file.  after third failure the node
-# becomes unhealthy after the twentieth failure the node we shutdown
-# ctdbd
-RECLOCKCOUNT="fail-count"
+loadconfig

 case $cmd in 
-     startup)
-	ctdb_counter_init "$RECLOCKCOUNT"
+    startup)
+	ctdb_counter_init
 	;;
+    
+    monitor)
+	RECLOCKFILE=$(ctdb -Y getreclock)

-      monitor)
-	ctdb_counter_incr "$RECLOCKCOUNT"
-	ctdb_counter_limit "$RECLOCKCOUNT" 200 && {
-		echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Shutting down."
-		df
-		sleep 1
-		ctdb shutdown
+	ctdb_counter_incr
+	(ctdb_check_counter_limit 200 >/dev/null 2>&1) || {
+	    echo "Reclock file $RECLOCKFILE\" can not be accessed. Shutting down."
+	    df
+	    sleep 1
+	    ctdb shutdown
 	}

-	RECLOCKFILE=`ctdb -Y getreclock`
 	[ -z "$RECLOCKFILE" ] && {
-		# we are not using a reclock file
-		ctdb_counter_init "$RECLOCKCOUNT"
-		exit 0
+	    # we are not using a reclock file
+	    ctdb_counter_init
+	    exit 0
 	}

 	# try stat the reclock file as a background process
 	# so that we dont block in case the cluster filesystem is unavailable
 	(
-		stat $RECLOCKFILE && {
-			# we could stat the file, reset the counter
-			ctdb_counter_init "$RECLOCKCOUNT"
-		}
+	    stat $RECLOCKFILE && {
+		# we could stat the file, reset the counter
+		ctdb_counter_init
+	    }
 	) >/dev/null 2>/dev/null &

-
-	ctdb_counter_limit "$RECLOCKCOUNT" 3 && {
-		echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Mark node UNHEALTHY."
-		df
-		exit 1;
-	}
+	ctdb_check_counter_limit 3 quiet
+	;;
+    status)
+	ctdb_checkstatus || exit $?
 	;;
 esac

--- a/ctdb/config/events.d/10.interface
+++ b/ctdb/config/events.d/10.interface
@ -6,10 +6,7 @@
 # public interface

 . $CTDB_BASE/functions
-loadconfig ctdb
-
-cmd="$1"
-shift
+loadconfig

 [ -z "$CTDB_PUBLIC_ADDRESSES" ] && {
 	CTDB_PUBLIC_ADDRESSES=$CTDB_BASE/public_addresses
@ -177,10 +174,10 @@ case $cmd in
 	    esac
 	done
 	;;
-
+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0

-
-
--- a/ctdb/config/events.d/11.natgw
+++ b/ctdb/config/events.d/11.natgw
@ -6,15 +6,10 @@
 #

 . $CTDB_BASE/functions
-loadconfig ctdb
+loadconfig

 [ -z "$CTDB_NATGW_PUBLIC_IFACE" ] && exit 0

-cmd="$1"
-shift
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
-
 delete_all() {
 	remove_ip $CTDB_NATGW_PUBLIC_IP $CTDB_NATGW_PUBLIC_IFACE
 	remove_ip $CTDB_NATGW_PUBLIC_IP_HOST lo
@ -28,7 +23,7 @@ delete_all() {
 }

 case $cmd in 
-     startup)
+    startup)
 	# do not respond to ARPs that are for ip addresses with scope 'host'
 	echo 3 > /proc/sys/net/ipv4/conf/all/arp_ignore
 	# do not send out arp requests from loopback addresses
@ -37,13 +32,13 @@ case $cmd in
 	ctdb setnatgwstate on
 	;;

-     recovered|updatenatgw)
+    recovered|updatenatgw)
 	MYPNN=`ctdb pnn | cut -d: -f2`
 	NATGWMASTER=`ctdb natgwlist | head -1 | sed -e "s/ .*//"`
 	NATGWIP=`ctdb natgwlist | head -1 | sed -e "s/^[^ ]* *//"`

 	CTDB_NATGW_PUBLIC_IP_HOST=`echo $CTDB_NATGW_PUBLIC_IP | sed -e "s/\/.*/\/32/"`
-	if [ "$NATGWMASTER" = "-1" ]; then
+	if [ "$NATGWMASTER" == "-1" ]; then
 		echo "There is not NATGW master node"
 		exit 1
 	fi
@ -71,7 +66,7 @@ case $cmd in
 	echo 1 > /proc/sys/net/ipv4/route/flush
 	;;

-     shutdown|removenatgw)
+    shutdown|removenatgw)
 	delete_all
 	;;

--- a/ctdb/config/events.d/11.routing
+++ b/ctdb/config/events.d/11.routing
@ -13,16 +13,12 @@
 # bond1 10.3.3.0/24 10.0.0.1

 . $CTDB_BASE/functions
-loadconfig ctdb
+loadconfig

 [ -f $CTDB_BASE/static-routes ] || {
    exit 0
 }

-cmd="$1"
-shift
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
 case $cmd in 
     takeip|releaseip)
 	iface=$1
--- a/ctdb/config/events.d/20.multipathd
+++ b/ctdb/config/events.d/20.multipathd
@ -6,14 +6,11 @@
 #   CTDB_MONITOR_MPDEVICES="device1 device2 ..."
 #

-PATH=/bin:/usr/bin:$PATH
-
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig multipathd

-cmd="$1"
-shift
+service_name="multipathd"
+
+loadconfig 

 [ -z "$CTDB_MONITOR_MPDEVICES" ] && {
 	exit 0
--- a/ctdb/config/events.d/31.clamd
+++ b/ctdb/config/events.d/31.clamd
@ -2,52 +2,45 @@
 # event script to manage clamd in a cluster environment

 . $CTDB_BASE/functions
-loadconfig ctdb

 detect_init_style

 case $CTDB_INIT_STYLE in
 	redhat)
-		CTDB_SERVICE_CLAMD="clamd"
-		CTDB_CONFIG_CLAMD="clamd"
-		;;
-	suse)
-		CTDB_SERVICE_CLAMD="clamav"
-		CTDB_CONFIG_CLAMD="clamav"
-		;;
-	debian)
-		CTDB_SERVICE_CLAMD="clamav"
-		CTDB_CONFIG_CLAMD="clamav"
+		service_name="clamd"
+		service_config="clamd"
 		;;
 	*)
-		# should not happen.
-		# for now use red hat style as default
-		CTDB_SERVICE_CLAMD="clamd"
-		CTDB_CONFIG_CLAMD="clamd"
+		service_name="clamav"
+		service_config="clamav"
 		;;
 esac

-loadconfig "${CTDB_CONFIG_CLAMD}"
+service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
+service_stop="service $service_name stop"

-[ "$CTDB_MANAGES_CLAMD" = "yes" ] || exit 0
+loadconfig

-cmd="$1"
-shift
+ctdb_start_stop_service
+
+is_ctdb_managed_service || exit 0

 case $cmd in 
     startup)
-        service "${CTDB_SERVICE_CLAMD}" stop > /dev/null 2>&1
-        service "${CTDB_SERVICE_CLAMD}" start
+	ctdb_service_start
        ;;

     shutdown)
-        service "${CTDB_SERVICE_CLAMD}" stop
+        ctdb_service_stop
        ;;

     monitor)
-        ctdb_check_unix_socket "clamd" ${CTDB_CLAMD_SOCKET}
+        ctdb_check_unix_socket ${CTDB_CLAMD_SOCKET} || exit $?
        ;;
+
+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0
-
--- a/ctdb/config/events.d/40.vsftpd
+++ b/ctdb/config/events.d/40.vsftpd
@ -2,67 +2,61 @@
 # event strict to manage vsftpd in a cluster environment

 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig vsftpd

-[ "$CTDB_MANAGES_VSFTPD" = "yes" ] || exit 0
+service_name="vsftpd"
+# make sure the service is stopped first
+service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
+service_stop="service $service_name stop"
+service_reconfigure="service $service_name restart"
+service_fail_limit=2
+service_tcp_ports=21

-cmd="$1"
-shift
+loadconfig

-# Count the number of monitor failures.  The cluster only becomes
-# unhealthy after 2 failures.
-VSFTPD_FAILS="fail-count"
-VSFTPD_LIMIT=2
+ctdb_start_stop_service
+
+is_ctdb_managed_service || exit 0

 case $cmd in 
     startup)
-	/bin/mkdir -p $CTDB_BASE/state/vsftpd
-
-	# make sure the service is stopped first
-	service vsftpd stop > /dev/null 2>&1
-	service vsftpd start
-
-	ctdb_counter_init "$VSFTPD_FAILS"
+	ctdb_service_start
 	;;

     shutdown)
-	service vsftpd stop
+	ctdb_service_stop
 	;;

-     takeip)
-	echo "restart" > $CTDB_BASE/state/vsftpd/restart
-	;;
-
-     releaseip)
-	echo "restart" > $CTDB_BASE/state/vsftpd/restart
+     takeip|releaseip)
+	ctdb_service_set_reconfigure
 	;;

     recovered)
 	# if we have taken or released any ips we must 
 	# restart vsftpd to ensure that all tcp connections are reset
-	[ -f $CTDB_BASE/state/vsftpd/restart ] && {
-		service vsftpd stop > /dev/null 2>&1
-		service vsftpd start
-		/bin/rm -f $CTDB_BASE/state/vsftpd/restart 2>/dev/null
-		ctdb_counter_init "$VSFTPD_FAILS"
-	} >/dev/null 2>&1
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	fi
 	;;

     monitor)
-	# Subshell catches the "exit 1"
-	if (ctdb_check_tcp_ports "ftp" 21) ; then
-	    ctdb_counter_init "$VSFTPD_FAILS"
-	else
-	    ctdb_counter_incr "$VSFTPD_FAILS"
-	    if ctdb_counter_limit "$VSFTPD_FAILS" $VSFTPD_LIMIT ; then
-		echo "ERROR: more than $VSFTPD_LIMIT consecutive failures, marking cluster unhealthy"
-		exit 1
-	    else
-		echo "WARNING: less than $VSFTPD_LIMIT consecutive failures, not unhealthy yet"
-	    fi
-		
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	    exit 0
 	fi
+
+	if [ -n "$service_tcp_ports" ] ; then
+	    if ctdb_check_tcp_ports $service_tcp_ports ; then
+		ctdb_counter_init
+	    else
+		ctdb_counter_incr
+		ctdb_check_counter_limit
+		exit 0 # only count 1 failure per monitor event
+	    fi
+	fi	
+	;;
+
+    status)
+	ctdb_checkstatus || exit $?
 	;;
 esac

--- a/ctdb/config/events.d/41.httpd
+++ b/ctdb/config/events.d/41.httpd
@ -2,67 +2,66 @@
 # event script to manage httpd in a cluster environment

 . $CTDB_BASE/functions
-loadconfig ctdb

 detect_init_style

 case $CTDB_INIT_STYLE in
-	redhat)
-		CTDB_SERVICE_HTTP="httpd"
-		CTDB_CONFIG_HTTP="http"
-		;;
-	suse)
-		CTDB_SERVICE_HTTP="apache2"
-		CTDB_CONFIG_HTTP="apache2"
-		;;
-	debian)
-		CTDB_SERVICE_HTTP="apache2"
-		CTDB_CONFIG_HTTP="apache2"
-		;;
-	*)
-		# should not happen.
-		# for now use red hat style as default
-		CTDB_SERVICE_HTTP="httpd"
-		CTDB_CONFIG_HTTP="http"
-		;;
+    redhat)
+	service_name="httpd"
+	service_config="http"
+	;;
+    suse|debian|*)
+	service_name="apache2"
+	service_config="apache2"
+	;;
 esac

-loadconfig "${CTDB_CONFIG_HTTP}"
-
-[ "$CTDB_MANAGES_HTTPD" = "yes" ] || exit 0
-
-cmd="$1"
-shift
-
 # RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
 # semaphores.  This is a hack to clean them up.
 cleanup_httpd_semaphore_leak() {
-    killall -q -0 "${CTDB_SERVICE_HTTP}" ||
+    killall -q -0 "$service_name" ||
    for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
 	ipcrm -s $i
    done
 }

+##########
+
+service_start="cleanup_httpd_semaphore_leak; service $service_name start"
+service_stop="service $service_name stop; killall -q -9 $service_name || true"
+service_reconfigure="service $service_name restart"
+
+loadconfig
+
+ctdb_start_stop_service
+
+is_ctdb_managed_service || exit 0
+
 case $cmd in
    startup)
-	cleanup_httpd_semaphore_leak
-	service "${CTDB_SERVICE_HTTP}" start
+	ctdb_service_start
 	;;

    shutdown)
-	service "${CTDB_SERVICE_HTTP}" stop
-	killall -q -9 "${CTDB_SERVICE_HTTP}"
+	ctdb_service_stop
 	;;

-     monitor)
-	( ctdb_check_tcp_ports "http" 80 )
-	if [ $? -ne 0 ] ; then
+    monitor)
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	    exit 0
+	fi
+
+	if ! ctdb_check_tcp_ports 80 ; then
 	    echo "HTTPD is not running. Trying to restart HTTPD."
-	    cleanup_httpd_semaphore_leak
-	    service "${CTDB_SERVICE_HTTP}" start
+	    ctdb_service_start
 	    exit 1
 	fi
 	;;
+
+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0
--- a/ctdb/config/events.d/50.samba
+++ b/ctdb/config/events.d/50.samba
@ -1,11 +1,7 @@
 #!/bin/sh
 # ctdb event script for Samba

-PATH=/bin:/usr/bin:$PATH
-
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig samba

 detect_init_style

@ -20,11 +16,6 @@ case $CTDB_INIT_STYLE in
 		CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-""}
 		CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
 		;;
-	redhat)
-		CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
-		CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-""}
-		CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
-		;;
 	*)
 		# should not happen, but for now use redhat style as default:
 		CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
@ -33,11 +24,69 @@ case $CTDB_INIT_STYLE in
 		;;
 esac

-cmd="$1"
-shift
+service_name="samba"
+service_start="start_samba"
+service_stop="stop_samba"
+
+loadconfig

 [ "$CTDB_MANAGES_SAMBA" = "yes" ] || [ "$CTDB_MANAGES_WINBIND" = "yes" ] || exit 0

+start_samba() {
+	# create the state directory for samba
+	/bin/mkdir -p $CTDB_BASE/state/samba
+
+	# make sure samba is not already started
+	[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
+		service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1
+		service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
+		killall -0 -q smbd && {
+		    sleep 1
+		    # make absolutely sure samba is dead
+		    killall -q -9 smbd
+		}
+
+		killall -0 -q nmbd && {
+		    sleep 1
+		    # make absolutely sure samba is dead
+		    killall -q -9 nmbd
+		}
+	}
+
+	# restart the winbind service
+	check_ctdb_manages_winbind
+	[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
+		service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1
+		killall -0 -q winbindd && {
+		    sleep 1
+          	    # make absolutely sure winbindd is dead
+		    killall -q -9 winbindd
+		}
+		service "$CTDB_SERVICE_WINBIND" start
+	}
+
+	# start Samba service. Start it reniced, as under very heavy load 
+	# the number of smbd processes will mean that it leaves few cycles for
+	# anything else
+	[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
+		nice_service "$CTDB_SERVICE_NMB" start
+		nice_service "$CTDB_SERVICE_SMB" start
+	}
+}
+
+stop_samba() {
+	# shutdown Samba when ctdb goes down
+	[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
+		service "$CTDB_SERVICE_SMB" stop
+		service "$CTDB_SERVICE_NMB" stop
+	}
+
+	# stop the winbind service
+	check_ctdb_manages_winbind
+	[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
+		service "$CTDB_SERVICE_WINBIND" stop
+	}
+}

 # set default samba cleanup period - in minutes
 [ -z "$SAMBA_CLEANUP_PERIOD" ] && {
@ -130,6 +179,14 @@ check_ctdb_manages_winbind() {
  }
 }

+list_samba_shares ()
+{
+    testparm_cat |
+    sed -n -e 's@^[[:space:]]*path[[:space:]]*=[[:space:]]@@p' |
+    sed -e 's/"//g'
+}
+
+
 ###########################
 # periodic cleanup function
 periodic_cleanup() {
@ -141,72 +198,11 @@ periodic_cleanup() {

 case $cmd in 
     startup)
-	# create the state directory for samba
-	/bin/mkdir -p $CTDB_BASE/state/samba
-
-	# make sure samba is not already started
-	[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
-		service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1
-		service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
-		killall -0 -q smbd && {
-		    sleep 1
-		    # make absolutely sure samba is dead
-		    killall -q -9 smbd
-		}
-
-		killall -0 -q nmbd && {
-		    sleep 1
-		    # make absolutely sure samba is dead
-		    killall -q -9 nmbd
-		}
-	}
-
-	# restart the winbind service
-	check_ctdb_manages_winbind
-	[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
-		service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1
-		killall -0 -q winbindd && {
-		    sleep 1
-          	    # make absolutely sure winbindd is dead
-		    killall -q -9 winbindd
-		}
-		service "$CTDB_SERVICE_WINBIND" start
-	}
-
-	# start Samba service. Start it reniced, as under very heavy load 
-	# the number of smbd processes will mean that it leaves few cycles for
-	# anything else
-	[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
-		nice_service "$CTDB_SERVICE_NMB" start
-		nice_service "$CTDB_SERVICE_SMB" start
-	}
+	ctdb_service_start
 	;;
 	
-     takeip)
-	# nothing special for Samba
-	;;
-
-     releaseip)
-	# nothing special for Samba
-	;;
-
-     recovered)
-	# nothing special for Samba
-	exit 0
-	;;
-
     shutdown)
-	# shutdown Samba when ctdb goes down
-	[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
-		service "$CTDB_SERVICE_SMB" stop
-		service "$CTDB_SERVICE_NMB" stop
-	}
-
-	# stop the winbind service
-	check_ctdb_manages_winbind
-	[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
-		service "$CTDB_SERVICE_WINBIND" stop
-	}
+	ctdb_service_stop
 	;;

     monitor)
@ -232,20 +228,20 @@ case $cmd in
 				exit 1
 			    }
 			}
-
-			smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
-			ctdb_check_directories_probe "Samba" $smb_dirs || {
+			
+			list_samba_shares |
+			ctdb_check_directories_probe || {
 			    testparm_foreground_update
-			    smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
-			    ctdb_check_directories "Samba" $smb_dirs
-			}
+			    list_samba_shares |
+			    ctdb_check_directories
+			} || exit $?
 		}

 		smb_ports="$CTDB_SAMBA_CHECK_PORTS"
 		[ -z "$smb_ports" ] && {
 			smb_ports=`testparm_cat --parameter-name="smb ports"`
 		}
-		ctdb_check_tcp_ports "Samba" $smb_ports
+		ctdb_check_tcp_ports $smb_ports || exit $?
 	}

 	# check winbind is OK
@ -255,6 +251,9 @@ case $cmd in
 	}
 	;;

+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 # ignore unknown commands
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@ -1,71 +1,69 @@
 #!/bin/sh
 # script to manage nfs in a clustered environment

-. $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig nfs
-
-[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
-[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
-
-cmd="$1"
-shift
-
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
-
-
-case $cmd in 
-     startup)
+start_nfs() {
 	/bin/mkdir -p $CTDB_BASE/state/nfs
 	/bin/mkdir -p $CTDB_BASE/state/statd/ip
 	/bin/mkdir -p $STATD_SHARED_DIRECTORY
-
-	# make sure nfs is stopped before we start it, or it may get a bind error
 	startstop_nfs stop
 	startstop_nfs start
-	;;
-
-     shutdown)
-	startstop_nfs stop
-	exit 0
-	;;
-
-     takeip)
-	ip=$2
-
-	echo $ip >> $CTDB_BASE/state/statd/restart
-
-	# having a list of what IPs we have allows statd to do the right 
-	# thing via $CTDB_BASE/statd-callout
-	touch $CTDB_BASE/state/statd/ip/$ip
-	exit 0
-	;;
-
-     releaseip)
-	iface=$1
-	ip=$2
-	maskbits=$3
-
-	echo $ip >> $CTDB_BASE/state/statd/restart
-	/bin/rm -f $CTDB_BASE/state/statd/ip/$ip
-	exit 0
-	;;
-
-     recovered)
-	# if no IPs have changed then don't need to restart statd 
-	[ -f $CTDB_BASE/state/statd/restart ] || exit 0;
+}

+reconfigure_nfs() {
 	# always restart the lockmanager so that we start with a clusterwide
 	# graceperiod when ip addresses has changed
 	[ -x $CTDB_BASE/statd-callout ] && {
 		$CTDB_BASE/statd-callout notify &
 	} >/dev/null 2>&1

-	/bin/rm -f $CTDB_BASE/state/statd/restart
+}
+
+. $CTDB_BASE/functions
+
+service_name="nfs"
+service_start="start_nfs"
+service_stop="startstop_nfs stop"
+service_reconfigure="reconfigure_nfs"
+
+loadconfig
+
+[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
+
+ctdb_start_stop_service
+
+case $cmd in 
+     startup)
+	ctdb_service_start
+	;;
+
+     shutdown)
+	ctdb_service_stop
+	;;
+
+     takeip)
+	ctdb_service_set_reconfigure
+	touch $CTDB_BASE/state/statd/ip/$2
+	;;
+
+     releaseip)
+	ctdb_service_set_reconfigure
+	/bin/rm -f $CTDB_BASE/state/statd/ip/$2
+	;;
+
+     recovered)
+	# if we have taken or released any ips we must 
+	# restart the lock manager so that we enter a clusterwide grace period
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	fi
 	;;

      monitor)
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	    exit 0
+	fi
+
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it
 	rpcinfo -u localhost 100024 1 > /dev/null || {
@ -83,13 +81,15 @@ case $cmd in

 	# and that its directories are available
 	[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
-	    nfs_dirs=$(exportfs | grep -v '^#' | grep '^/' | awk {'print $1;'})
-	    ctdb_check_directories "nfs" $nfs_dirs
-	}
+	    exportfs | grep -v '^#' | grep '^/' |
+	    sed -e 's/[[:space:]]*[^[:space:]]*$//' |
+	    ctdb_check_directories
+	} || exit $?

 	# check that lockd responds to rpc requests
 	ctdb_check_rpc "lockd" 100021 1
-	ctdb_check_directories "statd" $STATD_SHARED_DIRECTORY
+	echo "$STATD_SHARED_DIRECTORY" | ctdb_check_directories "statd" || \
+	    exit $?

 	# mount needs special handling since it is sometimes not started
 	# correctly on RHEL5
@ -103,6 +103,9 @@ case $cmd in
 	}
       	;;

+    status)
+	ctdb_checkstatus || exit $?
+	;;
 esac

 exit 0
--- a/ctdb/config/events.d/61.nfstickle
+++ b/ctdb/config/events.d/61.nfstickle
@ -1,25 +1,21 @@
 #!/bin/sh
 # ctdb event script for NFS tickle acks

-PATH=/bin:/usr/bin:$PATH
-
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig nfs

-cmd="$1"
-shift
+service_name="nfs"
+service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
+service_reconfigure=$service_start
+
+loadconfig
+
+ctdb_start_stop_service

-[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
 [ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0

 case $cmd in 
     startup)
-	mkdir -p $CTDB_BASE/state/nfstickle
-	mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-	# we rely on fast tcp wait1 recycling
-	echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
-	exit 0
+	ctdb_service_start
 	;;
 	
     takeip)
@ -31,46 +27,25 @@ case $cmd in
 	# send tickle acks for all the connections the old server had
 	for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
 		[ -f $f ] && cat $f | while read dest; do
-			dip=`echo $dest | cut -d: -f1`
-			dport=`echo $dest | cut -d: -f2`
 			# send three, in case of lost packets
-			echo "Sending NFS tickle ack for $ip to $dip:$dport"
+			echo "Sending NFS tickle ack for $ip to $dest"
 			for i in `seq 1 3`; do
-				ctdb tickle $dip:$dport $ip:2049
+				ctdb tickle $dest $ip:2049
 			done
 		done
 	done
-	exit 0
-	;;
-
-     releaseip)
-	exit 0
-	;;
-
-     recovered)
-	exit 0
-	;;
-
-     shutdown)
-	exit 0
 	;;

     monitor)
-        # always create these direcotries since NFS might be enabled at runtime
-	# and we dont want to restart ctdbd
-	mkdir -p $CTDB_BASE/state/nfstickle
-	mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-
 	mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
 	rm -f $mydir/*
 	# record our connections to shared storage
 	netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' |
 		awk '{print $4" "$5}' | 
 		while read dest src; do
-			ip=`echo $dest | cut -d: -f1`
+			ip=${dest%:*}
 			echo $src >> $mydir/$ip
 		done
-	exit 0
 	;;

 esac
--- a/ctdb/config/events.d/70.iscsi
+++ b/ctdb/config/events.d/70.iscsi
@ -1,16 +1,11 @@
 #!/bin/sh
 # ctdb event script for TGTD based iSCSI

-PATH=/bin:/usr/bin:$PATH
-
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig iscsi

-cmd="$1"
-shift
+service_name="iscsi"

-[ "$CTDB_MANAGES_ISCSI" = "yes" ] || exit 0
+ctdb_start_stop_service

 [ -z "$CTDB_START_ISCSI_SCRIPTS" ] && {
 	echo "No iscsi start script directory found"
@ -18,15 +13,6 @@ shift
 }

 case $cmd in 
-     startup)
-	;;
-
-     takeip)
-	;;
-
-     releaseip)
-	;;
-
     recovered)
 	# block the iscsi port
 	iptables -I INPUT 1 -p tcp --dport 3260 -j DROP
@ -51,8 +37,8 @@ case $cmd in
 	done

 	# remove all iptables rules
-	while `iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null` ;  do
-		true;
+	while iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null ;  do
+	    :
 	done

 	;;
@ -63,9 +49,11 @@ case $cmd in
 	;;

     monitor)
-	[ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
-		ctdb_check_tcp_ports "iscsi" 3260
-	}
+	ctdb_check_tcp_ports 3260 || exit $?
+	;;
+
+    status)
+	ctdb_checkstatus || exit $?
 	;;
 esac

--- a/ctdb/config/events.d/91.lvs
+++ b/ctdb/config/events.d/91.lvs
@ -2,6 +2,7 @@
 # script to manage the lvs ip multiplexer for a single public address cluster

 . $CTDB_BASE/functions
+
 loadconfig ctdb

 [ -z "$CTDB_LVS_PUBLIC_IP" ] && exit 0
@ -12,12 +13,6 @@ loadconfig ctdb
    exit 0
 }

-
-cmd="$1"
-shift
-
-PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
-
 case $cmd in 
     startup)
 	ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
@ -42,12 +37,6 @@ case $cmd in
 	echo 1 > /proc/sys/net/ipv4/route/flush
 	;;

-     takeip)
-	;;
-
-     releaseip)
-	;;
-
     recovered|stopped)
 	# kill off any tcp connections
 	ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
@ -89,9 +78,6 @@ case $cmd in
 	echo 1 > /proc/sys/net/ipv4/route/flush
 	;;

-      monitor)
-	;;
-
 esac

 exit 0
--- a/ctdb/config/events.d/99.timeout
+++ b/ctdb/config/events.d/99.timeout
@ -7,17 +7,14 @@
 . $CTDB_BASE/functions
 loadconfig ctdb

-[ "x$CTDB_RUN_TIMEOUT_MONITOR" = "xyes" ] || exit 0
-
-cmd="$1"
-shift
+[ "$CTDB_RUN_TIMEOUT_MONITOR" = "yes" ] || exit 0

 case $cmd in
-	monitor)
-		TIMEOUT=$(ctdb listvars | grep EventScriptTimeout | awk	'{print $3}')
-		echo "sleeping for $((TIMEOUT * 2)) seconds..."
-		sleep $((TIMEOUT * 2))
-		;;
+    monitor)
+	TIMEOUT=$(ctdb listvars | awk '$1 == "EventScriptTimeout" {print $3}')
+	echo "sleeping for $((TIMEOUT * 2)) seconds..."
+	sleep $((TIMEOUT * 2))
+	;;
 esac

 exit 0
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@ -1,15 +1,28 @@
 # utility functions for ctdb event scripts

+PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
+
 #######################################
 # pull in a system config file, if any
 loadconfig() {
-    name="$1"
-    if [ -f /etc/sysconfig/$name ]; then
-	. /etc/sysconfig/$name
-    elif [ -f /etc/default/$name ]; then
-	. /etc/default/$name
-    elif [ -f $CTDB_BASE/sysconfig/$name ]; then
-	. $CTDB_BASE/sysconfig/$name
+
+    if [ "$1" != "ctdb" ] ; then
+	loadconfig "ctdb"
+    fi
+
+    if [ -z "$1" ] ; then
+	foo="${service_config:-${service_name}}"
+	if [ -n "$foo" ] ; then
+	    loadconfig "$foo"
+	fi
+    fi
+
+    if [ -f /etc/sysconfig/$1 ]; then
+	. /etc/sysconfig/$1
+    elif [ -f /etc/default/$1 ]; then
+	. /etc/default/$1
+    elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
+	. $CTDB_BASE/sysconfig/$1
    fi
 }

@ -31,37 +44,28 @@ detect_init_style() {
 ######################################################
 # simulate /sbin/service on platforms that don't have it
 service() { 
-  service_name="$1"
-  op="$2"
+  _service_name="$1"
+  _op="$2"

  # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
+  [ -z "$_service_name" ] && return

  if [ -x /sbin/service ]; then
-      /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      /etc/rc.d/init.d/$service_name "$op"
+      /sbin/service "$_service_name" "$_op"
+  elif [ -x /etc/init.d/$_service_name ]; then
+      /etc/init.d/$_service_name "$_op"
+  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
+      /etc/rc.d/init.d/$_service_name "$_op"
  fi
 }

 ######################################################
 # simulate /sbin/service (niced) on platforms that don't have it
 nice_service() { 
-  service_name="$1"
-  op="$2"
-
  # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
+  [ -z "$1" ] && return

-  if [ -x /sbin/service ]; then
-      nice /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      nice /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      nice /etc/rc.d/init.d/$service_name "$op"
-  fi
+    nice service "$@"
 }

 ######################################################
@ -110,57 +114,30 @@ ctdb_wait_tcp_ports() {
 	          (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
 	      else 
 		  echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
-		  return
+		  return 127
 	      fi
 	  done
 	  [ $all_ok -eq 1 ] || sleep 1
 	  ctdb status > /dev/null 2>&1 || {
  		echo "ctdb daemon has died. Exiting tcp wait $service_name"
-		exit 1
+		return 1
 	  }
  done
  echo "Local tcp services for $service_name are up"
 }


-
-######################################################
-# wait for a set of directories
-# usage: ctdb_wait_directories SERVICE_NAME <directories...>
-######################################################
-ctdb_wait_directories() {
-  service_name="$1"
-  shift
-  wait_dirs="$*"
-  [ -z "$wait_dirs" ] && return;
-  all_ok=0
-  echo "Waiting for local directories for $service_name"
-  while [ $all_ok -eq 0 ]; do
-  	  all_ok=1
-  	  for d in $wait_dirs; do
-  	      [ -d $d ] || all_ok=0
-	  done
-	  [ $all_ok -eq 1 ] || sleep 1
-	  ctdb status > /dev/null 2>&1 || {
-  		echo "ctdb daemon has died. Exiting directory wait for $service_name"
-		exit 1
-	  }
-  done
-  echo "Local directories for $service_name are available"
-}
-
-
 ######################################################
 # check that a rpc server is registered with portmap
 # and responding to requests
 # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
 ######################################################
 ctdb_check_rpc() {
-    service_name="$1"
+    progname="$1"
    prognum="$2"
    version="$3"
    rpcinfo -u localhost $prognum $version > /dev/null || {
-	    echo "ERROR: $service_name not responding to rpc requests"
+	    echo "ERROR: $progname not responding to rpc requests"
 	    exit 1
    }
 }
@ -171,18 +148,15 @@ ctdb_check_rpc() {
 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
 ######################################################
 ctdb_check_directories_probe() {
-  service_name="$1"
-  shift
-  for d ; do
-      case "$d" in
-	  *%*)
-	      continue
-	      ;;
-	  *)
-	      [ -d "$d" ] || return 1
-      esac
-  done
-  return 0
+    while IFS="" read d ; do
+	case "$d" in
+	    *%*)
+		continue
+		;;
+	    *)
+		[ -d "$d" ] || return 1
+	esac
+    done
 }

 ######################################################
@ -190,62 +164,27 @@ ctdb_check_directories_probe() {
 # usage: ctdb_check_directories SERVICE_NAME <directories...>
 ######################################################
 ctdb_check_directories() {
-  # Note: ctdb_check_directories_probe sets both $service_name and $d.
-  ctdb_check_directories_probe "$@" || {
-      echo "ERROR: $service_name directory $d not available"
-      exit 1
-  }
+    n="${1:-${service_name}}"
+    ctdb_check_directories_probe || {
+	echo "ERROR: $n directory \"$d\" not available"
+	exit 1
+    }
 }

 ######################################################
 # check a set of tcp ports
-# usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
+# usage: ctdb_check_tcp_ports <ports...>
 ######################################################
 ctdb_check_tcp_ports() {
-  service_name="$1"
-  shift
-  wait_ports="$*"
-  [ -z "$wait_ports" ] && return;

-  # check availability of netcat or netstat first
-  NETCAT=""
-  NETSTAT=""
-  if [ -x /usr/bin/netstat ]; then
-      NETSTAT=/usr/bin/netstat
-  elif [ -x /bin/netstat ]; then
-      NETSTAT=/bin/netstat
-  elif [ -x /usr/bin/netcat ]; then
-      NETCAT=/usr/bin/netcat
-  elif [ -x /bin/netcat ]; then
-      NETCAT=/bin/netcat
-  elif [ -x /usr/bin/nc ]; then
-      NETCAT=/usr/bin/nc
-  elif [ -x /bin/nc ]; then
-      NETCAT=/bin/nc
-  fi
-
-  for p in $wait_ports; do
-      all_ok=1
-
-      if [ "x${NETCAT}" != "x" ]; then
-          ${NETCAT} -z 127.0.0.1 $p > /dev/null || all_ok=0
-      elif [ "x${NETSTAT}" != "x" ]; then
-          if ! ${NETSTAT} -a -n | egrep "0.0.0.0:$p .*LISTEN" > /dev/null ; then
-              if ! ${NETSTAT} -a -n | egrep ":::$p .*LISTEN" > /dev/null ; then
-                  all_ok=0
-              fi
-          fi
-      else
-          echo "ERROR: neither netcat (or nc) nor netstat found!"
-          echo "ERROR: can't monitor ${service_name} tcp port ${p}"
-          all_ok=0
-      fi
-
-      [ $all_ok -eq 1 ] || {
-	  echo "ERROR: $service_name tcp port $p is not responding"
-	  exit 1
-      }
-  done
+    for p ; do
+	if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
+            if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
+		echo "ERROR: $service_name tcp port $p is not responding"
+		return 1
+            fi
+	fi
+    done
 }

 ######################################################
@ -253,35 +192,13 @@ ctdb_check_tcp_ports() {
 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 ######################################################
 ctdb_check_unix_socket() {
-  service_name="$1"
-  socket_path="$2"
-  [ -z "$socket_path" ] && return;
+    socket_path="$1"
+    [ -z "$socket_path" ] && return

-  # check availability of netstat first
-  NETSTAT=""
-  if [ -x $(type -p netstat) ]; then
-        NETSTAT=$(type -p netstat)
-  elif [ -x /usr/bin/netstat ]; then
-      NETSTAT=/usr/bin/netstat
-  elif [ -x /bin/netstat ]; then
-      NETSTAT=/bin/netstat
-  fi
-
-  all_ok=1
-  if [ "x$NETSTAT" != "x" ]; then
-    if $NETSTAT -l -a -n | grep -qE "^unix.*LISTEN.*${socket_path}$"; then
-      all_ok=1
-    else
-      all_ok=0
+    if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
+        echo "ERROR: $service_name socket $socket_path not found"
+        return 1
    fi
-    else
-    [ -S ${socket_path} ] && all_ok=1 || all_ok=0
-  fi
-
-  [ $all_ok -eq 1 ] || {
-    echo "ERROR: $service_name socket $socket_path not found"
-    exit 1
-  }
 }

 ######################################################
@ -500,38 +417,175 @@ remove_ip() {

 ########################################################
 # some simple logic for counting events - per eventscript
-# usage: ctdb_counter_init <tag>
-#        ctdb_counter_incr <tag>
-#        ctdb_counter_limit <tag> <limit>
-#        e.g. <tag> = "fail-count"
-# ctdb_counter_limit succeeds when count >= <limit>
+# usage: ctdb_counter_init
+#        ctdb_counter_incr
+#        ctdb_check_counter_limit <limit>
+# ctdb_check_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
-    _tag="$1"
-    _eventscript="${0##*/}" # basename
-
-    _counter_file="$CTDB_BASE/state/${_eventscript}-${_tag}"
+    _counter_file="$ctdb_fail_dir/$service_name"
    mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common

-    echo -n > "$_counter_file"
+    >"$_counter_file"
 }
 ctdb_counter_incr () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common

    # unary counting!
    echo -n 1 >> "$_counter_file"
 }
-ctdb_counter_limit () {
-    _ctdb_counter_common "$1"
-    _limit="$2"
+ctdb_check_counter_limit () {
+    _ctdb_counter_common
+
+    _limit="${1:-${service_fail_limit}}"
+    _quiet="$2"

    # unary counting!
    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
-    [ $_size -ge $_limit ]
+    if [ $_size -ge $_limit ] ; then
+	echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
+	exit 1
+    elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
+	echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
+    fi
 }
+########################################################
+
+ctdb_spool_dir="/var/spool/ctdb"
+ctdb_status_dir="$ctdb_spool_dir/status"
+ctdb_fail_dir="$ctdb_spool_dir/failcount"
+ctdb_active_dir="$ctdb_spool_dir/active"
+
+log_status_cat ()
+{
+    echo "node is \"$1\", problem with \"${script_name}\": $(cat $2)"
+}
+
+ctdb_checkstatus ()
+{
+    if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
+	log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
+	return 1
+    elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
+	log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
+	return 2
+    else
+	return 0
+    fi
+}
+
+ctdb_setstatus ()
+{
+    d="$ctdb_status_dir/$script_name"
+    case "$1" in
+	unhealthy|banned)
+	    mkdir -p "$d"
+	    cat "$2" >"$d/$1"
+	    ;;
+	*)
+	    for i in "banned" "unhealthy" ; do
+		rm -f "$d/$i"
+	    done
+	    ;;
+    esac
+}
+
+ctdb_service_needs_reconfigure ()
+{
+    [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
+}
+
+ctdb_service_set_reconfigure ()
+{
+    d="$ctdb_status_dir/$service_name"
+    mkdir -p "$d"
+    >"$d/reconfigure"
+}
+
+ctdb_service_unset_reconfigure ()
+{
+    rm -f "$ctdb_status_dir/$service_name/reconfigure"
+}
+
+ctdb_service_reconfigure ()
+{
+    if [ -n "$service_reconfigure" ] ; then
+	eval $service_reconfigure
+    else
+	service "$service_name" restart
+    fi
+    ctdb_service_unset_reconfigure
+    ctdb_counter_init
+}
+
+ctdb_compat_managed_service ()
+{
+    if [ "$1" = "yes" ] ; then
+	t="$t $2 "
+    fi
+}
+
+is_ctdb_managed_service ()
+{
+    t=" $CTDB_MANAGED_SERVICES "
+
+    ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
+    ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
+
+    # Returns 0 if "<space>$service_name<space>" appears in $t
+    [ "${t#* ${service_name} }" != "${t}" ]
+}
+
+ctdb_start_stop_service ()
+{
+    _active="$ctdb_active_dir/$service_name"
+
+    if is_ctdb_managed_service ; then
+	if ! [ -e "$_active" ] ; then
+	    echo "Starting service $service_name"
+	    ctdb_service_start || exit $?
+	    mkdir -p "$ctdb_active_dir"
+	    touch "$_active"
+	    exit 0
+	fi
+    elif ! is_ctdb_managed_service ; then
+	if [ -e "$_active" ] ; then
+	    echo "Stopping service $service_name"
+	    ctdb_service_stop || exit $?
+	    rm -f "$_active"
+	fi
+	exit 0
+    fi
+}
+
+ctdb_service_start ()
+{
+    if [ -n "$service_start" ] ; then
+	eval $service_start
+    else
+	service "$service_name" start
+    fi
+    ctdb_counter_init
+}
+
+ctdb_service_stop ()
+{
+    if [ -n "$service_stop" ] ; then
+	eval $service_stop
+    else
+	service "$service_name" stop
+    fi
+}
+
 ########################################################
 # load a site local config file
 ########################################################
@ -546,4 +600,21 @@ ctdb_counter_limit () {
 	done
 }

+# A reasonable default is the basename of the eventscript.
+script_name="${0##*/}" # basename
+service_name="$script_name"
+service_fail_limit=1

+ctdb_event="$1" ; shift
+cmd="$ctdb_event"
+
+case "$ctdb_event" in
+    status)
+	ctdb_checkstatus
+	exit
+	;;
+    setstatus)
+	ctdb_setstatus "$@"
+	exit
+	;;
+esac
--- a/ctdb/config/statd-callout
+++ b/ctdb/config/statd-callout
@ -28,7 +28,7 @@ case "$1" in
  add-client)
 	# the callout does not tell us to which ip the client connected
 	# so we must add it to all the ips that we serve
-        for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
+        for f in $CTDB_BASE/state/statd/ip/*; do
 	    ip=`basename $f`
 	    [ -d $STATD_SHARED_DIRECTORY/$ip ] || /bin/mkdir $STATD_SHARED_DIRECTORY/$ip
 	    touch $STATD_SHARED_DIRECTORY/$ip/$2
@ -37,7 +37,7 @@ case "$1" in
  del-client)
 	# the callout does not tell us to which ip the client connected
 	# so we must add it to all the ips that we serve
-        for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
+        for f in $CTDB_BASE/state/statd/ip/*; do
 	    ip=`basename $f`
 	    /bin/rm -f $STATD_SHARED_DIRECTORY/$ip/$2
 	done
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@ -129,6 +129,7 @@ struct ctdb_tunable {
 	uint32_t vacuum_min_interval;
 	uint32_t vacuum_max_interval;
 	uint32_t max_queue_depth_drop_msg;
+	uint32_t use_status_events_for_monitoring;
 };

 /*
@ -450,9 +451,13 @@ struct ctdb_context {
 	uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
 	uint32_t *recd_ping_count;
 	TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
-	TALLOC_CTX *script_monitor_ctx; /* a context where we store results while running the monitor event */
-	TALLOC_CTX *last_monitor_ctx; 
-	TALLOC_CTX *event_script_ctx;  /* non-monitoring events */
+
+	TALLOC_CTX *monitor_event_script_ctx;
+	TALLOC_CTX *other_event_script_ctx;
+
+	struct ctdb_monitor_script_status_ctx *current_monitor_status_ctx;
+	struct ctdb_monitor_script_status_ctx *last_monitor_status_ctx;
+
 	TALLOC_CTX *banning_ctx;
 };

@ -856,6 +861,19 @@ enum ctdb_trans2_commit_error {
 	CTDB_TRANS2_COMMIT_SOMEFAIL=3 /* some nodes failed the commit, some allowed it */
 };

+/* different calls to event scripts. */
+enum ctdb_eventscript_call {
+	CTDB_EVENT_STARTUP,		/* CTDB starting up: no args. */
+	CTDB_EVENT_START_RECOVERY,	/* CTDB recovery starting: no args. */
+	CTDB_EVENT_RECOVERED,		/* CTDB recovery finished: no args. */
+	CTDB_EVENT_TAKE_IP,		/* IP taken: interface, IP address, netmask bits. */
+	CTDB_EVENT_RELEASE_IP,		/* IP released: interface, IP address, netmask bits. */
+	CTDB_EVENT_STOPPED,		/* This node is stopped: no args. */
+	CTDB_EVENT_MONITOR,		/* Please check if service is healthy: no args. */
+	CTDB_EVENT_STATUS,		/* Report service status: no args. */
+	CTDB_EVENT_SHUTDOWN,		/* CTDB shutting down: no args. */
+	CTDB_EVENT_RELOAD		/* magic */
+};

 /* internal prototypes */
 void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
@ -1324,13 +1342,16 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata);

 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
-int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_eventscript_call call);
+int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call call,
+			   const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
 int ctdb_event_script_callback(struct ctdb_context *ctdb, 
-			       struct timeval timeout,
 			       TALLOC_CTX *mem_ctx,
 			       void (*callback)(struct ctdb_context *, int, void *),
 			       void *private_data,
-			       const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
+			       bool from_user,
+			       enum ctdb_eventscript_call call,
+			       const char *fmt, ...) PRINTF_ATTRIBUTE(7,8);
 void ctdb_release_all_ips(struct ctdb_context *ctdb);

 void set_nonblocking(int fd);
--- a/ctdb/server/ctdb_control.c
+++ b/ctdb/server/ctdb_control.c
@ -286,7 +286,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		if (ctdb->methods != NULL) {
 			ctdb->methods->shutdown(ctdb);
 		}
-		ctdb_event_script(ctdb, "shutdown");
+		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
 		DEBUG(DEBUG_NOTICE,("Received SHUTDOWN command. Stopping CTDB daemon.\n"));
 		exit(0);

--- a/ctdb/server/ctdb_monitor.c
+++ b/ctdb/server/ctdb_monitor.c
@ -223,9 +223,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 	
 	if (!ctdb->done_startup) {
 		ret = ctdb_event_script_callback(ctdb, 
-						 timeval_set(ctdb->tunable.script_timeout, 0),
 						 ctdb->monitor->monitor_context, ctdb_startup_callback, 
-						 ctdb, "startup");
+						 ctdb, false,
+						 CTDB_EVENT_STARTUP, "%s", "");
 	} else {
 		int i;
 		int skip_monitoring = 0;
@ -248,9 +248,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 			return;
 		} else {
 			ret = ctdb_event_script_callback(ctdb, 
-					timeval_set(ctdb->tunable.script_timeout, 0),
 					ctdb->monitor->monitor_context, ctdb_health_callback,
-					ctdb, "monitor");
+					ctdb, false,
+					CTDB_EVENT_MONITOR, "%s", "");
 		}
 	}

--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@ -962,11 +962,11 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,

 	ctdb_disable_monitoring(ctdb);

-	ret = ctdb_event_script_callback(ctdb, 
-					 timeval_set(ctdb->tunable.script_timeout, 0),
-					 state, 
+	ret = ctdb_event_script_callback(ctdb, state,
 					 ctdb_end_recovery_callback, 
-					 state, "recovered");
+					 state, 
+					 false,
+					 CTDB_EVENT_RECOVERED, "%s", "");

 	if (ret != 0) {
 		ctdb_enable_monitoring(ctdb);
@ -1016,11 +1016,11 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,

 	ctdb_disable_monitoring(ctdb);

-	ret = ctdb_event_script_callback(ctdb, 
-					 timeval_set(ctdb->tunable.script_timeout, 0),
-					 state, 
+	ret = ctdb_event_script_callback(ctdb, state,
 					 ctdb_start_recovery_callback, 
-					 state, "startrecovery");
+					 state, false,
+					 CTDB_EVENT_START_RECOVERY,
+					 "%s", "");

 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,(__location__ " Failed to start recovery\n"));
@ -1160,7 +1160,7 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event
 	if (ctdb->methods != NULL) {
 		ctdb->methods->shutdown(ctdb);
 	}
-	ctdb_event_script(ctdb, "shutdown");
+	ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
 	DEBUG(DEBUG_ERR, ("Recovery daemon ping timeout. Daemon has been shut down.\n"));
 	exit(0);
 }
@ -1230,11 +1230,10 @@ int32_t ctdb_control_stop_node(struct ctdb_context *ctdb, struct ctdb_req_contro

 	ctdb_disable_monitoring(ctdb);

-	ret = ctdb_event_script_callback(ctdb, 
-					 timeval_set(ctdb->tunable.script_timeout, 0),
-					 state, 
+	ret = ctdb_event_script_callback(ctdb, state,
 					 ctdb_stop_node_callback, 
-					 state, "stopped");
+					 state, false,
+					 CTDB_EVENT_STOPPED, "%s", "");

 	if (ret != 0) {
 		ctdb_enable_monitoring(ctdb);
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@ -3288,7 +3288,7 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
 		if (ctdb->methods != NULL) {
 			ctdb->methods->shutdown(ctdb);
 		}
-		ctdb_event_script(ctdb, "shutdown");
+		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);

 		exit(10);	
 	}
--- a/ctdb/server/ctdb_takeover.c
+++ b/ctdb/server/ctdb_takeover.c
@ -235,9 +235,10 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
 		vnn->iface));

 	ret = ctdb_event_script_callback(ctdb, 
-					 timeval_set(ctdb->tunable.script_timeout, 0),
 					 state, takeover_ip_callback, state,
-					 "takeip %s %s %u",
+					 false,
+					 CTDB_EVENT_TAKE_IP,
+					 "%s %s %u",
 					 vnn->iface, 
 					 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
 					 vnn->public_netmask_bits);
@ -391,9 +392,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
 	state->vnn   = vnn;

 	ret = ctdb_event_script_callback(ctdb, 
-					 timeval_set(ctdb->tunable.script_timeout, 0),
 					 state, release_ip_callback, state,
-					 "releaseip %s %s %u",
+					 false,
+					 CTDB_EVENT_RELEASE_IP,
+					 "%s %s %u",
 					 vnn->iface, 
 					 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
 					 vnn->public_netmask_bits);
@ -1382,7 +1384,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
 		if (vnn->pnn == ctdb->pnn) {
 			vnn->pnn = -1;
 		}
-		ctdb_event_script(ctdb, "releaseip %s %s %u",
+		ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
 				  vnn->iface, 
 				  talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
 				  vnn->public_netmask_bits);
@ -2122,9 +2124,10 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
 			DLIST_REMOVE(ctdb->vnn, vnn);

 			ret = ctdb_event_script_callback(ctdb, 
-					 timeval_set(ctdb->tunable.script_timeout, 0),
 					 mem_ctx, delete_ip_callback, mem_ctx,
-					 "releaseip %s %s %u",
+					 false,
+					 CTDB_EVENT_RELEASE_IP,
+					 "%s %s %u",
 					 vnn->iface, 
 					 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
 					 vnn->public_netmask_bits);
--- a/ctdb/server/ctdb_tunables.c
+++ b/ctdb/server/ctdb_tunables.c
@ -63,7 +63,8 @@ static const struct {
 	{ "VacuumLimit",       5000,  offsetof(struct ctdb_tunable, vacuum_limit) },
 	{ "VacuumMinInterval",   60,  offsetof(struct ctdb_tunable, vacuum_min_interval) },
 	{ "VacuumMaxInterval",  600,  offsetof(struct ctdb_tunable, vacuum_max_interval) },
-	{ "MaxQueueDropMsg",  1000,  offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) }
+	{ "MaxQueueDropMsg",  1000,  offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) },
+	{ "UseStatusEvents",     0,  offsetof(struct ctdb_tunable, use_status_events_for_monitoring) }
 };

 /*
--- a/ctdb/server/eventscript.c
+++ b/ctdb/server/eventscript.c
@ -32,6 +32,19 @@ static struct {
 	const char *script_running;
 } child_state;

+static const char *call_names[] = {
+	"startup",
+	"startrecovery",
+	"recovered",
+	"takeip",
+	"releaseip",
+	"stopped",
+	"monitor",
+	"status",
+	"shutdown",
+	"reload"
+};
+
 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);

 /*
@ -61,11 +74,13 @@ static void sigterm(int sig)
 struct ctdb_event_script_state {
 	struct ctdb_context *ctdb;
 	pid_t child;
+	/* Warning: this can free us! */
 	void (*callback)(struct ctdb_context *, int, void *);
+	int cb_status;
 	int fd[2];
 	void *private_data;
+	enum ctdb_eventscript_call call;
 	const char *options;
-        struct timed_event *te;
 	struct timeval timeout;
 };

@ -81,28 +96,22 @@ struct ctdb_monitor_script_status {
 	char *output;
 };

-struct ctdb_monitor_status {
-	struct timeval start;
-	struct timeval finished;
-	int32_t status;
+struct ctdb_monitor_script_status_ctx {
 	struct ctdb_monitor_script_status *scripts;
-	struct ctdb_event_script_state *state;
 };

-
 /* called from ctdb_logging when we have received output on STDERR from
 * one of the eventscripts
 */
 int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len)
 {
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
 	struct ctdb_monitor_script_status *script;

-	if (monitoring_status == NULL) {
+	if (ctdb->current_monitor_status_ctx == NULL) {
 		return -1;
 	}

-	script = monitoring_status->scripts;
+	script = ctdb->current_monitor_status_ctx->scripts;
 	if (script == NULL) {
 		return -1;
 	}
@ -121,17 +130,13 @@ int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t
 */
 int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
 {
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
-
 	DEBUG(DEBUG_INFO, ("event script init called\n"));

-	if (monitoring_status == NULL) {
-		DEBUG(DEBUG_ERR,(__location__ " Init called when context is NULL\n"));
-		return 0;
+	if (ctdb->current_monitor_status_ctx == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when initing script\n"));
+		return -1;
 	}

-	monitoring_status->start = timeval_current();	
-
 	return 0;
 }

@ -142,41 +147,26 @@ int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
 int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata)
 {
 	const char *name = (const char *)indata.dptr;
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
-	struct ctdb_event_script_state *state;
 	struct ctdb_monitor_script_status *script;

 	DEBUG(DEBUG_INFO, ("event script start called : %s\n", name));

-	if (monitoring_status == NULL) {
-		DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when starting to run script %s\n", name));
+	if (ctdb->current_monitor_status_ctx == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when starting script\n"));
 		return -1;
 	}

-	script = talloc_zero(monitoring_status, struct ctdb_monitor_script_status);
+	script = talloc_zero(ctdb->current_monitor_status_ctx, struct ctdb_monitor_script_status);
 	if (script == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " Failed to talloc ctdb_monitor_script_status for script %s\n", name));
 		return -1;
 	}

-	script->next  = monitoring_status->scripts;
+	script->next  = ctdb->current_monitor_status_ctx->scripts;
 	script->name  = talloc_strdup(script, name);
 	CTDB_NO_MEMORY(ctdb, script->name);
 	script->start = timeval_current();
-	monitoring_status->scripts = script;
-
-	state = monitoring_status->state;
-	if (state != NULL) {
-		/* reset the timeout for the next eventscript */
-		if (!timeval_is_zero(&state->timeout)) {
-			if (state->te != NULL) {
-				talloc_free(state->te);
-				state->te = NULL;
-			}
-			state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
-		}
-
-	}
+	ctdb->current_monitor_status_ctx->scripts = script;

 	return 0;
 }
@ -187,15 +177,14 @@ int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA inda
 int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata)
 {
 	int32_t res = *((int32_t *)indata.dptr);
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
 	struct ctdb_monitor_script_status *script;

-	if (monitoring_status == NULL) {
-		DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
+	if (ctdb->current_monitor_status_ctx == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
 		return -1;
 	}

-	script = monitoring_status->scripts;
+	script = ctdb->current_monitor_status_ctx->scripts;
 	if (script == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
 		return -1;
@ -214,17 +203,16 @@ int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indat
 int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA indata)
 {
 	const char *name = (const char *)indata.dptr;
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
 	struct ctdb_monitor_script_status *script;

 	DEBUG(DEBUG_INFO, ("event script disabed called for script %s\n", name));

-	if (monitoring_status == NULL) {
-		DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
+	if (ctdb->current_monitor_status_ctx == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
 		return -1;
 	}

-	script = monitoring_status->scripts;
+	script = ctdb->current_monitor_status_ctx->scripts;
 	if (script == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
 		return -1;
@ -242,24 +230,19 @@ int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA i
 */
 int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb)
 {
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
-
 	DEBUG(DEBUG_INFO, ("event script finished called\n"));

-	if (monitoring_status == NULL) {
+	if (ctdb->current_monitor_status_ctx == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when monitoring event finished\n"));
 		return -1;
 	}

-	monitoring_status->finished = timeval_current();	
-	monitoring_status->status   = MONITOR_SCRIPT_OK;
-
-	if (ctdb->last_monitor_ctx) {
-		talloc_free(ctdb->last_monitor_ctx);
-		ctdb->last_monitor_ctx = NULL;
+	if (ctdb->last_monitor_status_ctx) {
+		talloc_free(ctdb->last_monitor_status_ctx);
+		ctdb->last_monitor_status_ctx = NULL;
 	}
-	ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
-	ctdb->script_monitor_ctx = NULL;
+	ctdb->last_monitor_status_ctx = ctdb->current_monitor_status_ctx;
+	ctdb->current_monitor_status_ctx = NULL;

 	return 0;
 }
@ -303,11 +286,11 @@ static struct ctdb_monitoring_wire *marshall_monitoring_scripts(TALLOC_CTX *mem_

 int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
 {
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->last_monitor_ctx;
+	struct ctdb_monitor_script_status_ctx *script_status = talloc_get_type(ctdb->last_monitor_status_ctx, struct ctdb_monitor_script_status_ctx);
 	struct ctdb_monitoring_wire *monitoring_scripts;

-	if (monitoring_status == NULL) {
-		DEBUG(DEBUG_ERR,(__location__ " last_monitor_ctx is NULL when reading status\n"));
+	if (script_status == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " last_monitor_status_ctx is NULL when reading status\n"));
 		return -1;
 	}

@ -318,7 +301,7 @@ int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA
 	}
 	
 	monitoring_scripts->num_scripts = 0;
-	monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, monitoring_status->scripts);
+	monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, script_status->scripts);
 	if (monitoring_scripts == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " Monitoring scritps is NULL. can not return data to client\n"));
 		return -1;
@ -474,23 +457,21 @@ static struct ctdb_script_list *ctdb_get_script_list(struct ctdb_context *ctdb,


 /*
-  run the event script - varargs version
+  Actually run the event script
  this function is called and run in the context of a forked child
  which allows it to do blocking calls such as system()
 */
-static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
+static int ctdb_run_event_script(struct ctdb_context *ctdb,
+				 bool from_user,
+				 enum ctdb_eventscript_call call,
+				 const char *options)
 {
 	char *cmdstr;
 	int ret;
 	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
 	struct ctdb_script_list *scripts, *current;
-	int is_monitor = 0;

-	if (!strcmp(options, "monitor")) {
-		is_monitor = 1;
-	}
-
-	if (is_monitor == 1) {
+	if (!from_user && call == CTDB_EVENT_MONITOR) {
 		/* This is running in the forked child process. At this stage
 		 * we want to switch from being a ctdb daemon into being a
 		 * client and connect to the real local daemon.
@ -510,14 +491,15 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 		/* we guarantee that only some specifically allowed event scripts are run
 		   while in recovery */
-	  const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip", "stopped" };
+		const enum ctdb_eventscript_call allowed_calls[] = {
+			CTDB_EVENT_START_RECOVERY, CTDB_EVENT_SHUTDOWN, CTDB_EVENT_RELEASE_IP, CTDB_EVENT_STOPPED };
 		int i;
-		for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
-			if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
+		for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
+			if (call == allowed_calls[i]) break;
 		}
-		if (i == ARRAY_SIZE(allowed_scripts)) {
-			DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
-				 options));
+		if (i == ARRAY_SIZE(allowed_calls)) {
+			DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
+				 call_names[call]));
 			talloc_free(tmp_ctx);
 			return -1;
 		}
@ -541,10 +523,26 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
 	   them
 	 */
 	for (current=scripts; current; current=current->next) {
-		/* we dont run disabled scripts, we just report they are disabled */
-		cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s", 
-				ctdb->event_script_dir,
-				current->name, options);
+		const char *str = from_user ? "CTDB_CALLED_BY_USER=1 " : "";
+	
+		/* Allow a setting where we run the actual monitor event
+		   from an external source and replace it with
+		   a "status" event that just picks up the actual
+		   status of the event asynchronously.
+		*/
+		if ((ctdb->tunable.use_status_events_for_monitoring != 0) 
+		&&  (call == CTDB_EVENT_MONITOR)
+		&&  !from_user) {
+			cmdstr = talloc_asprintf(tmp_ctx, "%s%s/%s %s",
+					str,
+					ctdb->event_script_dir,
+					current->name, "status");
+		} else {
+			cmdstr = talloc_asprintf(tmp_ctx, "%s%s/%s %s %s",
+			       	 	str,
+					ctdb->event_script_dir,
+					current->name, call_names[call], options);
+		}
 		CTDB_NO_MEMORY(ctdb, cmdstr);

 		DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
@ -552,7 +550,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
 		child_state.start = timeval_current();
 		child_state.script_running = cmdstr;

-		if (is_monitor == 1) {
+		if (!from_user && call == CTDB_EVENT_MONITOR) {
 			if (ctdb_ctrl_event_script_start(ctdb, current->name) != 0) {
 				DEBUG(DEBUG_ERR,(__location__ " Failed to start event script monitoring\n"));
 				talloc_free(tmp_ctx);
@ -585,7 +583,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
 			DEBUG(DEBUG_ERR,("Script %s returned status 127. Someone just deleted it?\n", cmdstr));
 		}
 
-		if (is_monitor == 1) {
+		if (!from_user && call == CTDB_EVENT_MONITOR) {
 			if (ctdb_ctrl_event_script_stop(ctdb, ret) != 0) {
 				DEBUG(DEBUG_ERR,(__location__ " Failed to stop event script monitoring\n"));
 				talloc_free(tmp_ctx);
@ -596,7 +594,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
 		/* return an error if the script failed */
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
-			if (is_monitor == 1) {
+			if (!from_user && call == CTDB_EVENT_MONITOR) {
 				if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
 					DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
 					talloc_free(tmp_ctx);
@ -612,7 +610,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
 	child_state.start = timeval_current();
 	child_state.script_running = "finished";
 	
-	if (is_monitor == 1) {
+	if (!from_user && call == CTDB_EVENT_MONITOR) {
 		if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
 			DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
 			talloc_free(tmp_ctx);
@ -631,20 +629,18 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
 	struct ctdb_event_script_state *state = 
 		talloc_get_type(p, struct ctdb_event_script_state);
 	struct ctdb_context *ctdb = state->ctdb;
-	signed char rt = -1;

-	read(state->fd[0], &rt, sizeof(rt));
-
-	DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, rt));
-
-	if (state->callback) {
-		state->callback(ctdb, rt, state->private_data);
-		state->callback = NULL;
+	if (read(state->fd[0], &state->cb_status, sizeof(state->cb_status)) !=
+	    sizeof(state->cb_status)) {
+		state->cb_status = -2;
 	}

-	talloc_set_destructor(state, NULL);
-	talloc_free(state);
+	DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n",
+			  call_names[state->call], state->options, state->cb_status));
+
+	state->child = 0;
 	ctdb->event_script_timeouts = 0;
+	talloc_free(state);
 }

 static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
@ -667,29 +663,19 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
 				      struct timeval t, void *p)
 {
 	struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
-	void *private_data = state->private_data;
 	struct ctdb_context *ctdb = state->ctdb;
-	char *options;
-	struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;

-	state->te = NULL;
+	DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u  pid : %d\n",
+			 call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));

-	DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u  pid : %d\n", state->options, ctdb->event_script_timeouts, state->child));
 	if (kill(state->child, 0) != 0) {
 		DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
-		if (state->callback) {
-			state->callback(ctdb, 0, private_data);
-			state->callback = NULL;
-		}
-		talloc_set_destructor(state, NULL);
+		state->child = 0;
 		talloc_free(state);
 		return;
 	}

-	options = talloc_strdup(ctdb, state->options);
-	CTDB_NO_MEMORY_VOID(ctdb, options);
-
-	if (!strcmp(options, "monitor")) {
+	if (state->call == CTDB_EVENT_MONITOR) {
 		/* if it is a monitor event, we allow it to "hang" a few times
 		   before we declare it a failure and ban ourself (and make
 		   ourself unhealthy)
@ -697,135 +683,180 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
 		DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));

 		ctdb->event_script_timeouts++;
+
 		if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
-			if (ctdb->tunable.script_unhealthy_on_timeout != 0) {
-				DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
-				if (state->callback) {
-					state->callback(ctdb, -ETIME, private_data);
-					state->callback = NULL;
-				}
-			} else {
-				ctdb->event_script_timeouts = 0;
-				DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
-				ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
-				if (state->callback) {
-					state->callback(ctdb, -1, private_data);
-					state->callback = NULL;
-				}
-			}
+			DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
+			state->cb_status = -ETIME;
 		} else {
-			if (state->callback) {
-			  	state->callback(ctdb, 0, private_data);
-				state->callback = NULL;
-			}
+			state->cb_status = 0;
 		}
-	} else if (!strcmp(options, "startup")) {
+	} else if (state->call == CTDB_EVENT_STARTUP) {
 		DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
-		if (state->callback) {
-			state->callback(ctdb, -1, private_data);
-			state->callback = NULL;
-		}
+		state->cb_status = -1;
 	} else {
-		/* if it is not a monitor event we ban ourself immediately */
+		/* if it is not a monitor or a startup event we ban ourself
+		   immediately
+		*/
 		DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
+
 		ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
-		if (state->callback) {
-			state->callback(ctdb, -1, private_data);
-			state->callback = NULL;
-		}
+
+		state->cb_status = -1;
 	}

-	if ((!strcmp(options, "monitor")) && (monitoring_status != NULL)) {
+	if (state->call == CTDB_EVENT_MONITOR || state->call == CTDB_EVENT_STATUS) {
 		struct ctdb_monitor_script_status *script;

-		script = monitoring_status->scripts;
+		if (ctdb->current_monitor_status_ctx == NULL) {
+			talloc_free(state);
+			return;
+		}
+
+		script = ctdb->current_monitor_status_ctx->scripts;
 		if (script != NULL) {
 			script->timedout = 1;
 		}
-		monitoring_status->status = MONITOR_SCRIPT_TIMEOUT;
-		if (ctdb->last_monitor_ctx) {
-			talloc_free(ctdb->last_monitor_ctx);
-			ctdb->last_monitor_ctx = NULL;
+
+		if (ctdb->last_monitor_status_ctx) {
+			talloc_free(ctdb->last_monitor_status_ctx);
+			ctdb->last_monitor_status_ctx = NULL;
 		}
-		ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
-		ctdb->script_monitor_ctx = NULL;
+		ctdb->last_monitor_status_ctx = talloc_steal(ctdb, ctdb->current_monitor_status_ctx);
+		ctdb->current_monitor_status_ctx = NULL;
 	}

 	talloc_free(state);
-	talloc_free(options);
 }

 /*
-  destroy a running event script
+  destroy an event script: kill it if ->child != 0.
 */
 static int event_script_destructor(struct ctdb_event_script_state *state)
 {
-	DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
+	if (state->child) {
+		DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));

-	if (state->callback) {
-		state->callback(state->ctdb, 0, state->private_data);
-		state->callback = NULL;
+		if (kill(state->child, SIGTERM) != 0) {
+			DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
+		}
 	}

-	if (kill(state->child, SIGTERM) != 0) {
-		DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
+	/* This is allowed to free us; talloc will prevent double free anyway,
+	 * but beware if you call this outside the destructor! */
+	if (state->callback) {
+		state->callback(state->ctdb, state->cb_status, state->private_data);
 	}

 	return 0;
 }

+static unsigned int count_words(const char *options)
+{
+	unsigned int words = 0;
+
+	options += strspn(options, " \t");
+	while (*options) {
+		words++;
+		options += strcspn(options, " \t");
+		options += strspn(options, " \t");
+	}
+	return words;
+}
+
+static bool check_options(enum ctdb_eventscript_call call, const char *options)
+{
+	switch (call) {
+	/* These all take no arguments. */
+	case CTDB_EVENT_STARTUP:
+	case CTDB_EVENT_START_RECOVERY:
+	case CTDB_EVENT_RECOVERED:
+	case CTDB_EVENT_STOPPED:
+	case CTDB_EVENT_MONITOR:
+	case CTDB_EVENT_STATUS:
+	case CTDB_EVENT_SHUTDOWN:
+	case CTDB_EVENT_RELOAD:
+		return count_words(options) == 0;
+
+	case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
+	case CTDB_EVENT_RELEASE_IP:
+		return count_words(options) == 3;
+
+	default:
+		DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_eventscript_call %u\n", call));
+		return false;
+	}
+}
+
 /*
  run the event script in the background, calling the callback when 
  finished
 */
 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, 
-					struct timeval timeout,
 					void (*callback)(struct ctdb_context *, int, void *),
 					void *private_data,
+					bool from_user,
+					enum ctdb_eventscript_call call,
 					const char *fmt, va_list ap)
 {
-	struct ctdb_monitor_status *monitoring_status;
+	TALLOC_CTX *mem_ctx;
 	struct ctdb_event_script_state *state;
 	int ret;

-	if (!strcmp(fmt, "monitor")) {
-		if (ctdb->script_monitor_ctx != NULL) {
-			talloc_free(ctdb->script_monitor_ctx);
-			ctdb->script_monitor_ctx = NULL;
+	if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
+		/* if this was a "monitor" or a status event, we recycle the
+		   context to start a new monitor event
+		*/
+		if (ctdb->monitor_event_script_ctx != NULL) {
+			talloc_free(ctdb->monitor_event_script_ctx);
+			ctdb->monitor_event_script_ctx = NULL;
 		}
-		monitoring_status = talloc_zero(ctdb, struct ctdb_monitor_status);
+		ctdb->monitor_event_script_ctx = talloc_new(ctdb);
+		mem_ctx = ctdb->monitor_event_script_ctx;
+
+		if (ctdb->current_monitor_status_ctx != NULL) {
+			talloc_free(ctdb->current_monitor_status_ctx);
+			ctdb->current_monitor_status_ctx = NULL;
+		}
+
+		ctdb->current_monitor_status_ctx = talloc(ctdb, struct ctdb_monitor_script_status_ctx);
+		CTDB_NO_MEMORY(ctdb, ctdb->current_monitor_status_ctx);
+		ctdb->current_monitor_status_ctx->scripts = NULL;
 	} else {
-		if (ctdb->event_script_ctx == NULL) {
-			ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
+		/* any other script will first terminate any monitor event */
+		if (ctdb->monitor_event_script_ctx != NULL) {
+			talloc_free(ctdb->monitor_event_script_ctx);
+			ctdb->monitor_event_script_ctx = NULL;
 		}
-		monitoring_status = ctdb->event_script_ctx;
+		/* and then use a context common for all non-monitor events */
+		if (ctdb->other_event_script_ctx == NULL) {
+			ctdb->other_event_script_ctx = talloc_new(ctdb);
+		}
+		mem_ctx = ctdb->other_event_script_ctx;
 	}

-	if (monitoring_status == NULL) {
-		DEBUG(DEBUG_ERR, (__location__ " ERROR: Failed to talloc script_monitoring context\n"));
-		return -1;
-	}
-
-	state = talloc(monitoring_status, struct ctdb_event_script_state);
-	if (state == NULL) {
-		DEBUG(DEBUG_ERR,(__location__ " could not allocate state\n"));
-		return -1;
-	}
-	monitoring_status->state = state;
+	state = talloc(mem_ctx, struct ctdb_event_script_state);
+	CTDB_NO_MEMORY(ctdb, state);

 	state->ctdb = ctdb;
 	state->callback = callback;
 	state->private_data = private_data;
+	state->call = call;
 	state->options = talloc_vasprintf(state, fmt, ap);
-	state->timeout = timeout;
-	state->te = NULL;
+	state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
 	if (state->options == NULL) {
 		DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
 		talloc_free(state);
 		return -1;
 	}
+	if (!check_options(state->call, state->options)) {
+		DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
+				  call_names[state->call], state->options));
+		talloc_free(state);
+		return -1;
+	}

-	DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s\n", state->options));
+	DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
+			  call_names[state->call], state->options));
 	
 	ret = pipe(state->fd);
 	if (ret != 0) {
@ -843,28 +874,22 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 	}

 	if (state->child == 0) {
-		signed char rt;
+		int rt;

 		close(state->fd[0]);
 		set_close_on_exec(state->fd[1]);

-		rt = ctdb_event_script_v(ctdb, state->options);
-		while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
-			write(state->fd[1], &rt, sizeof(rt));
-			usleep(100000);
-		}
+		rt = ctdb_run_event_script(ctdb, from_user, state->call, state->options);
+		/* We must be able to write PIPEBUF bytes at least; if this
+		   somehow fails, the read above will be short. */
+		write(state->fd[1], &rt, sizeof(rt));
+		close(state->fd[1]);
 		_exit(rt);
 	}

-	talloc_set_destructor(state, event_script_destructor);
-	if (!strcmp(fmt, "monitor")) {
-		ctdb->script_monitor_ctx = monitoring_status;
-	} else {
-		ctdb->event_script_ctx  = monitoring_status;
-	}
-
 	close(state->fd[1]);
 	set_close_on_exec(state->fd[0]);
+	talloc_set_destructor(state, event_script_destructor);

 	DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));

@ -872,9 +897,10 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
 		     ctdb_event_script_handler, state);

 	if (!timeval_is_zero(&state->timeout)) {
-		state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
+		event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
 	} else {
-		DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
+		DEBUG(DEBUG_ERR, (__location__ " eventscript %s %s called with no timeout\n",
+				  call_names[state->call], state->options));
 	}

 	return 0;
@ -886,17 +912,18 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
  finished
 */
 int ctdb_event_script_callback(struct ctdb_context *ctdb, 
-			       struct timeval timeout,
 			       TALLOC_CTX *mem_ctx,
 			       void (*callback)(struct ctdb_context *, int, void *),
 			       void *private_data,
+			       bool from_user,
+			       enum ctdb_eventscript_call call,
 			       const char *fmt, ...)
 {
 	va_list ap;
 	int ret;

 	va_start(ap, fmt);
-	ret = ctdb_event_script_callback_v(ctdb, timeout, callback, private_data, fmt, ap);
+	ret = ctdb_event_script_callback_v(ctdb, callback, private_data, from_user, call, fmt, ap);
 	va_end(ap);

 	return ret;
@ -919,24 +946,23 @@ static void event_script_callback(struct ctdb_context *ctdb, int status, void *p
 }

 /*
-  run the event script, waiting for it to complete. Used when the caller doesn't want to 
-  continue till the event script has finished.
+  run the event script, waiting for it to complete. Used when the caller
+  doesn't want to continue till the event script has finished.
 */
-int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
+int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call call,
+			   const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 	struct callback_status status;

 	va_start(ap, fmt);
-	ret = ctdb_event_script_callback_v(ctdb, 
-			timeval_set(ctdb->tunable.script_timeout, 0),
-			event_script_callback, &status, fmt, ap);
-	va_end(ap);
-
+	ret = ctdb_event_script_callback_v(ctdb,
+			event_script_callback, &status, false, call, fmt, ap);
 	if (ret != 0) {
 		return ret;
 	}
+	va_end(ap);

 	status.status = -1;
 	status.done = false;
@ -946,6 +972,11 @@ int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
 	return status.status;
 }

+int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_eventscript_call call)
+{
+	/* GCC complains about empty format string, so use %s and "". */
+	return ctdb_event_script_args(ctdb, call, "%s", "");
+}

 struct eventscript_callback_state {
 	struct ctdb_req_control *c;
@ -964,17 +995,36 @@ static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,

 	if (status != 0) {
 		DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
-		ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
-		talloc_free(state);
-		return;
 	}

-	/* the control succeeded */
-	ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
+	ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+	/* This will free the struct ctdb_event_script_state we are in! */
 	talloc_free(state);
 	return;
 }

+
+/* Returns rest of string, or NULL if no match. */
+static const char *get_call(const char *p, enum ctdb_eventscript_call *call)
+{
+	unsigned int len;
+
+	/* Skip any initial whitespace. */
+	p += strspn(p, " \t");
+
+	/* See if we match any. */
+	for (*call = 0; *call < ARRAY_SIZE(call_names); (*call)++) {
+		len = strlen(call_names[*call]);
+		if (strncmp(p, call_names[*call], len) == 0) {
+			/* If end of string or whitespace, we're done. */
+			if (strcspn(p + len, " \t") == 0) {
+				return p + len;
+			}
+		}
+	}
+	return NULL;
+}
+
 /*
  A control to force running of the eventscripts from the ctdb client tool
 */
@ -984,29 +1034,33 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
 {
 	int ret;
 	struct eventscript_callback_state *state;
+	const char *options;
+	enum ctdb_eventscript_call call;

-	if (ctdb->event_script_ctx == NULL) {
-		ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
+	/* Figure out what call they want. */
+	options = get_call((const char *)indata.dptr, &call);
+	if (!options) {
+		DEBUG(DEBUG_ERR, (__location__ " Invalid forced \"%s\"\n", (const char *)indata.dptr));
+		return -1;
 	}

-	state = talloc(ctdb->event_script_ctx, struct eventscript_callback_state);
-	CTDB_NO_MEMORY(ctdb, state);
-
-	state->c = talloc_steal(state, c);
-
-	DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
-
 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 		DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
 		return -1;
 	}

+	state = talloc(ctdb->other_event_script_ctx, struct eventscript_callback_state);
+	CTDB_NO_MEMORY(ctdb, state);
+
+	state->c = talloc_steal(state, c);
+
+	DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
+
 	ctdb_disable_monitoring(ctdb);

 	ret = ctdb_event_script_callback(ctdb, 
-			 timeval_set(ctdb->tunable.script_timeout, 0),
 			 state, run_eventscripts_callback, state,
-			 "%s", (const char *)indata.dptr);
+			 true, call, "%s", options);

 	if (ret != 0) {
 		ctdb_enable_monitoring(ctdb);