1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-10 01:18:15 +03:00

Merge commit 'martins/status-test-2'

Conflicts:

	server/eventscript.c

(This used to be ctdb commit e9b3477a5b9a2eff18f727e7d59338bfb5214793)
This commit is contained in:
Ronnie Sahlberg 2009-12-01 10:53:18 +11:00
commit 569001afd0
26 changed files with 850 additions and 800 deletions

View File

@ -250,7 +250,7 @@ status() {
}
case "$1" in
case "$cmd" in
start)
start
;;

View File

@ -10,13 +10,7 @@
# recovered : called when ctdb has finished a recovery event
. $CTDB_BASE/functions
loadconfig ctdb
# ensure we have /bin and /usr/bin in the path
PATH=/bin:/usr/bin:$PATH
cmd="$1"
shift
loadconfig
case $cmd in
startup)

View File

@ -2,55 +2,43 @@
# script to check accessibility to the reclock file on a node
. $CTDB_BASE/functions
loadconfig ctdb
cmd="$1"
shift
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
# Count the number of intervals that have passed when we have tried to
# but failed to stat the reclock file. after third failure the node
# becomes unhealthy after the twentieth failure the node we shutdown
# ctdbd
RECLOCKCOUNT="fail-count"
loadconfig
case $cmd in
startup)
ctdb_counter_init "$RECLOCKCOUNT"
startup)
ctdb_counter_init
;;
monitor)
RECLOCKFILE=$(ctdb -Y getreclock)
monitor)
ctdb_counter_incr "$RECLOCKCOUNT"
ctdb_counter_limit "$RECLOCKCOUNT" 200 && {
echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Shutting down."
df
sleep 1
ctdb shutdown
ctdb_counter_incr
(ctdb_check_counter_limit 200 >/dev/null 2>&1) || {
echo "Reclock file $RECLOCKFILE\" can not be accessed. Shutting down."
df
sleep 1
ctdb shutdown
}
RECLOCKFILE=`ctdb -Y getreclock`
[ -z "$RECLOCKFILE" ] && {
# we are not using a reclock file
ctdb_counter_init "$RECLOCKCOUNT"
exit 0
# we are not using a reclock file
ctdb_counter_init
exit 0
}
# try stat the reclock file as a background process
# so that we dont block in case the cluster filesystem is unavailable
(
stat $RECLOCKFILE && {
# we could stat the file, reset the counter
ctdb_counter_init "$RECLOCKCOUNT"
}
stat $RECLOCKFILE && {
# we could stat the file, reset the counter
ctdb_counter_init
}
) >/dev/null 2>/dev/null &
ctdb_counter_limit "$RECLOCKCOUNT" 3 && {
echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Mark node UNHEALTHY."
df
exit 1;
}
ctdb_check_counter_limit 3 quiet
;;
status)
ctdb_checkstatus || exit $?
;;
esac

View File

@ -6,10 +6,7 @@
# public interface
. $CTDB_BASE/functions
loadconfig ctdb
cmd="$1"
shift
loadconfig
[ -z "$CTDB_PUBLIC_ADDRESSES" ] && {
CTDB_PUBLIC_ADDRESSES=$CTDB_BASE/public_addresses
@ -177,10 +174,10 @@ case $cmd in
esac
done
;;
status)
ctdb_checkstatus || exit $?
;;
esac
exit 0

View File

@ -6,15 +6,10 @@
#
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig
[ -z "$CTDB_NATGW_PUBLIC_IFACE" ] && exit 0
cmd="$1"
shift
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
delete_all() {
remove_ip $CTDB_NATGW_PUBLIC_IP $CTDB_NATGW_PUBLIC_IFACE
remove_ip $CTDB_NATGW_PUBLIC_IP_HOST lo
@ -28,7 +23,7 @@ delete_all() {
}
case $cmd in
startup)
startup)
# do not respond to ARPs that are for ip addresses with scope 'host'
echo 3 > /proc/sys/net/ipv4/conf/all/arp_ignore
# do not send out arp requests from loopback addresses
@ -37,13 +32,13 @@ case $cmd in
ctdb setnatgwstate on
;;
recovered|updatenatgw)
recovered|updatenatgw)
MYPNN=`ctdb pnn | cut -d: -f2`
NATGWMASTER=`ctdb natgwlist | head -1 | sed -e "s/ .*//"`
NATGWIP=`ctdb natgwlist | head -1 | sed -e "s/^[^ ]* *//"`
CTDB_NATGW_PUBLIC_IP_HOST=`echo $CTDB_NATGW_PUBLIC_IP | sed -e "s/\/.*/\/32/"`
if [ "$NATGWMASTER" = "-1" ]; then
if [ "$NATGWMASTER" == "-1" ]; then
echo "There is not NATGW master node"
exit 1
fi
@ -71,7 +66,7 @@ case $cmd in
echo 1 > /proc/sys/net/ipv4/route/flush
;;
shutdown|removenatgw)
shutdown|removenatgw)
delete_all
;;

View File

@ -13,16 +13,12 @@
# bond1 10.3.3.0/24 10.0.0.1
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig
[ -f $CTDB_BASE/static-routes ] || {
exit 0
}
cmd="$1"
shift
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
case $cmd in
takeip|releaseip)
iface=$1

View File

@ -6,14 +6,11 @@
# CTDB_MONITOR_MPDEVICES="device1 device2 ..."
#
PATH=/bin:/usr/bin:$PATH
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig multipathd
cmd="$1"
shift
service_name="multipathd"
loadconfig
[ -z "$CTDB_MONITOR_MPDEVICES" ] && {
exit 0

View File

@ -2,52 +2,45 @@
# event script to manage clamd in a cluster environment
. $CTDB_BASE/functions
loadconfig ctdb
detect_init_style
case $CTDB_INIT_STYLE in
redhat)
CTDB_SERVICE_CLAMD="clamd"
CTDB_CONFIG_CLAMD="clamd"
;;
suse)
CTDB_SERVICE_CLAMD="clamav"
CTDB_CONFIG_CLAMD="clamav"
;;
debian)
CTDB_SERVICE_CLAMD="clamav"
CTDB_CONFIG_CLAMD="clamav"
service_name="clamd"
service_config="clamd"
;;
*)
# should not happen.
# for now use red hat style as default
CTDB_SERVICE_CLAMD="clamd"
CTDB_CONFIG_CLAMD="clamd"
service_name="clamav"
service_config="clamav"
;;
esac
loadconfig "${CTDB_CONFIG_CLAMD}"
service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
service_stop="service $service_name stop"
[ "$CTDB_MANAGES_CLAMD" = "yes" ] || exit 0
loadconfig
cmd="$1"
shift
ctdb_start_stop_service
is_ctdb_managed_service || exit 0
case $cmd in
startup)
service "${CTDB_SERVICE_CLAMD}" stop > /dev/null 2>&1
service "${CTDB_SERVICE_CLAMD}" start
ctdb_service_start
;;
shutdown)
service "${CTDB_SERVICE_CLAMD}" stop
ctdb_service_stop
;;
monitor)
ctdb_check_unix_socket "clamd" ${CTDB_CLAMD_SOCKET}
ctdb_check_unix_socket ${CTDB_CLAMD_SOCKET} || exit $?
;;
status)
ctdb_checkstatus || exit $?
;;
esac
exit 0

View File

@ -2,67 +2,61 @@
# event strict to manage vsftpd in a cluster environment
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig vsftpd
[ "$CTDB_MANAGES_VSFTPD" = "yes" ] || exit 0
service_name="vsftpd"
# make sure the service is stopped first
service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
service_stop="service $service_name stop"
service_reconfigure="service $service_name restart"
service_fail_limit=2
service_tcp_ports=21
cmd="$1"
shift
loadconfig
# Count the number of monitor failures. The cluster only becomes
# unhealthy after 2 failures.
VSFTPD_FAILS="fail-count"
VSFTPD_LIMIT=2
ctdb_start_stop_service
is_ctdb_managed_service || exit 0
case $cmd in
startup)
/bin/mkdir -p $CTDB_BASE/state/vsftpd
# make sure the service is stopped first
service vsftpd stop > /dev/null 2>&1
service vsftpd start
ctdb_counter_init "$VSFTPD_FAILS"
ctdb_service_start
;;
shutdown)
service vsftpd stop
ctdb_service_stop
;;
takeip)
echo "restart" > $CTDB_BASE/state/vsftpd/restart
;;
releaseip)
echo "restart" > $CTDB_BASE/state/vsftpd/restart
takeip|releaseip)
ctdb_service_set_reconfigure
;;
recovered)
# if we have taken or released any ips we must
# restart vsftpd to ensure that all tcp connections are reset
[ -f $CTDB_BASE/state/vsftpd/restart ] && {
service vsftpd stop > /dev/null 2>&1
service vsftpd start
/bin/rm -f $CTDB_BASE/state/vsftpd/restart 2>/dev/null
ctdb_counter_init "$VSFTPD_FAILS"
} >/dev/null 2>&1
if ctdb_service_needs_reconfigure ; then
ctdb_service_reconfigure
fi
;;
monitor)
# Subshell catches the "exit 1"
if (ctdb_check_tcp_ports "ftp" 21) ; then
ctdb_counter_init "$VSFTPD_FAILS"
else
ctdb_counter_incr "$VSFTPD_FAILS"
if ctdb_counter_limit "$VSFTPD_FAILS" $VSFTPD_LIMIT ; then
echo "ERROR: more than $VSFTPD_LIMIT consecutive failures, marking cluster unhealthy"
exit 1
else
echo "WARNING: less than $VSFTPD_LIMIT consecutive failures, not unhealthy yet"
fi
if ctdb_service_needs_reconfigure ; then
ctdb_service_reconfigure
exit 0
fi
if [ -n "$service_tcp_ports" ] ; then
if ctdb_check_tcp_ports $service_tcp_ports ; then
ctdb_counter_init
else
ctdb_counter_incr
ctdb_check_counter_limit
exit 0 # only count 1 failure per monitor event
fi
fi
;;
status)
ctdb_checkstatus || exit $?
;;
esac

View File

@ -2,67 +2,66 @@
# event script to manage httpd in a cluster environment
. $CTDB_BASE/functions
loadconfig ctdb
detect_init_style
case $CTDB_INIT_STYLE in
redhat)
CTDB_SERVICE_HTTP="httpd"
CTDB_CONFIG_HTTP="http"
;;
suse)
CTDB_SERVICE_HTTP="apache2"
CTDB_CONFIG_HTTP="apache2"
;;
debian)
CTDB_SERVICE_HTTP="apache2"
CTDB_CONFIG_HTTP="apache2"
;;
*)
# should not happen.
# for now use red hat style as default
CTDB_SERVICE_HTTP="httpd"
CTDB_CONFIG_HTTP="http"
;;
redhat)
service_name="httpd"
service_config="http"
;;
suse|debian|*)
service_name="apache2"
service_config="apache2"
;;
esac
loadconfig "${CTDB_CONFIG_HTTP}"
[ "$CTDB_MANAGES_HTTPD" = "yes" ] || exit 0
cmd="$1"
shift
# RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
# semaphores. This is a hack to clean them up.
cleanup_httpd_semaphore_leak() {
killall -q -0 "${CTDB_SERVICE_HTTP}" ||
killall -q -0 "$service_name" ||
for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
ipcrm -s $i
done
}
##########
service_start="cleanup_httpd_semaphore_leak; service $service_name start"
service_stop="service $service_name stop; killall -q -9 $service_name || true"
service_reconfigure="service $service_name restart"
loadconfig
ctdb_start_stop_service
is_ctdb_managed_service || exit 0
case $cmd in
startup)
cleanup_httpd_semaphore_leak
service "${CTDB_SERVICE_HTTP}" start
ctdb_service_start
;;
shutdown)
service "${CTDB_SERVICE_HTTP}" stop
killall -q -9 "${CTDB_SERVICE_HTTP}"
ctdb_service_stop
;;
monitor)
( ctdb_check_tcp_ports "http" 80 )
if [ $? -ne 0 ] ; then
monitor)
if ctdb_service_needs_reconfigure ; then
ctdb_service_reconfigure
exit 0
fi
if ! ctdb_check_tcp_ports 80 ; then
echo "HTTPD is not running. Trying to restart HTTPD."
cleanup_httpd_semaphore_leak
service "${CTDB_SERVICE_HTTP}" start
ctdb_service_start
exit 1
fi
;;
status)
ctdb_checkstatus || exit $?
;;
esac
exit 0

View File

@ -1,11 +1,7 @@
#!/bin/sh
# ctdb event script for Samba
PATH=/bin:/usr/bin:$PATH
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig samba
detect_init_style
@ -20,11 +16,6 @@ case $CTDB_INIT_STYLE in
CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-""}
CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
;;
redhat)
CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-""}
CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
;;
*)
# should not happen, but for now use redhat style as default:
CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
@ -33,11 +24,69 @@ case $CTDB_INIT_STYLE in
;;
esac
cmd="$1"
shift
service_name="samba"
service_start="start_samba"
service_stop="stop_samba"
loadconfig
[ "$CTDB_MANAGES_SAMBA" = "yes" ] || [ "$CTDB_MANAGES_WINBIND" = "yes" ] || exit 0
start_samba() {
# create the state directory for samba
/bin/mkdir -p $CTDB_BASE/state/samba
# make sure samba is not already started
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1
service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
killall -0 -q smbd && {
sleep 1
# make absolutely sure samba is dead
killall -q -9 smbd
}
killall -0 -q nmbd && {
sleep 1
# make absolutely sure samba is dead
killall -q -9 nmbd
}
}
# restart the winbind service
check_ctdb_manages_winbind
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1
killall -0 -q winbindd && {
sleep 1
# make absolutely sure winbindd is dead
killall -q -9 winbindd
}
service "$CTDB_SERVICE_WINBIND" start
}
# start Samba service. Start it reniced, as under very heavy load
# the number of smbd processes will mean that it leaves few cycles for
# anything else
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
nice_service "$CTDB_SERVICE_NMB" start
nice_service "$CTDB_SERVICE_SMB" start
}
}
stop_samba() {
# shutdown Samba when ctdb goes down
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
service "$CTDB_SERVICE_SMB" stop
service "$CTDB_SERVICE_NMB" stop
}
# stop the winbind service
check_ctdb_manages_winbind
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
service "$CTDB_SERVICE_WINBIND" stop
}
}
# set default samba cleanup period - in minutes
[ -z "$SAMBA_CLEANUP_PERIOD" ] && {
@ -130,6 +179,14 @@ check_ctdb_manages_winbind() {
}
}
list_samba_shares ()
{
testparm_cat |
sed -n -e 's@^[[:space:]]*path[[:space:]]*=[[:space:]]@@p' |
sed -e 's/"//g'
}
###########################
# periodic cleanup function
periodic_cleanup() {
@ -141,72 +198,11 @@ periodic_cleanup() {
case $cmd in
startup)
# create the state directory for samba
/bin/mkdir -p $CTDB_BASE/state/samba
# make sure samba is not already started
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1
service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
killall -0 -q smbd && {
sleep 1
# make absolutely sure samba is dead
killall -q -9 smbd
}
killall -0 -q nmbd && {
sleep 1
# make absolutely sure samba is dead
killall -q -9 nmbd
}
}
# restart the winbind service
check_ctdb_manages_winbind
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1
killall -0 -q winbindd && {
sleep 1
# make absolutely sure winbindd is dead
killall -q -9 winbindd
}
service "$CTDB_SERVICE_WINBIND" start
}
# start Samba service. Start it reniced, as under very heavy load
# the number of smbd processes will mean that it leaves few cycles for
# anything else
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
nice_service "$CTDB_SERVICE_NMB" start
nice_service "$CTDB_SERVICE_SMB" start
}
ctdb_service_start
;;
takeip)
# nothing special for Samba
;;
releaseip)
# nothing special for Samba
;;
recovered)
# nothing special for Samba
exit 0
;;
shutdown)
# shutdown Samba when ctdb goes down
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
service "$CTDB_SERVICE_SMB" stop
service "$CTDB_SERVICE_NMB" stop
}
# stop the winbind service
check_ctdb_manages_winbind
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
service "$CTDB_SERVICE_WINBIND" stop
}
ctdb_service_stop
;;
monitor)
@ -232,20 +228,20 @@ case $cmd in
exit 1
}
}
smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
ctdb_check_directories_probe "Samba" $smb_dirs || {
list_samba_shares |
ctdb_check_directories_probe || {
testparm_foreground_update
smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
ctdb_check_directories "Samba" $smb_dirs
}
list_samba_shares |
ctdb_check_directories
} || exit $?
}
smb_ports="$CTDB_SAMBA_CHECK_PORTS"
[ -z "$smb_ports" ] && {
smb_ports=`testparm_cat --parameter-name="smb ports"`
}
ctdb_check_tcp_ports "Samba" $smb_ports
ctdb_check_tcp_ports $smb_ports || exit $?
}
# check winbind is OK
@ -255,6 +251,9 @@ case $cmd in
}
;;
status)
ctdb_checkstatus || exit $?
;;
esac
# ignore unknown commands

View File

@ -1,71 +1,69 @@
#!/bin/sh
# script to manage nfs in a clustered environment
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig nfs
[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
cmd="$1"
shift
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
case $cmd in
startup)
start_nfs() {
/bin/mkdir -p $CTDB_BASE/state/nfs
/bin/mkdir -p $CTDB_BASE/state/statd/ip
/bin/mkdir -p $STATD_SHARED_DIRECTORY
# make sure nfs is stopped before we start it, or it may get a bind error
startstop_nfs stop
startstop_nfs start
;;
shutdown)
startstop_nfs stop
exit 0
;;
takeip)
ip=$2
echo $ip >> $CTDB_BASE/state/statd/restart
# having a list of what IPs we have allows statd to do the right
# thing via $CTDB_BASE/statd-callout
touch $CTDB_BASE/state/statd/ip/$ip
exit 0
;;
releaseip)
iface=$1
ip=$2
maskbits=$3
echo $ip >> $CTDB_BASE/state/statd/restart
/bin/rm -f $CTDB_BASE/state/statd/ip/$ip
exit 0
;;
recovered)
# if no IPs have changed then don't need to restart statd
[ -f $CTDB_BASE/state/statd/restart ] || exit 0;
}
reconfigure_nfs() {
# always restart the lockmanager so that we start with a clusterwide
# graceperiod when ip addresses has changed
[ -x $CTDB_BASE/statd-callout ] && {
$CTDB_BASE/statd-callout notify &
} >/dev/null 2>&1
/bin/rm -f $CTDB_BASE/state/statd/restart
}
. $CTDB_BASE/functions
service_name="nfs"
service_start="start_nfs"
service_stop="startstop_nfs stop"
service_reconfigure="reconfigure_nfs"
loadconfig
[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
ctdb_start_stop_service
case $cmd in
startup)
ctdb_service_start
;;
shutdown)
ctdb_service_stop
;;
takeip)
ctdb_service_set_reconfigure
touch $CTDB_BASE/state/statd/ip/$2
;;
releaseip)
ctdb_service_set_reconfigure
/bin/rm -f $CTDB_BASE/state/statd/ip/$2
;;
recovered)
# if we have taken or released any ips we must
# restart the lock manager so that we enter a clusterwide grace period
if ctdb_service_needs_reconfigure ; then
ctdb_service_reconfigure
fi
;;
monitor)
if ctdb_service_needs_reconfigure ; then
ctdb_service_reconfigure
exit 0
fi
# check that statd responds to rpc requests
# if statd is not running we try to restart it
rpcinfo -u localhost 100024 1 > /dev/null || {
@ -83,13 +81,15 @@ case $cmd in
# and that its directories are available
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
nfs_dirs=$(exportfs | grep -v '^#' | grep '^/' | awk {'print $1;'})
ctdb_check_directories "nfs" $nfs_dirs
}
exportfs | grep -v '^#' | grep '^/' |
sed -e 's/[[:space:]]*[^[:space:]]*$//' |
ctdb_check_directories
} || exit $?
# check that lockd responds to rpc requests
ctdb_check_rpc "lockd" 100021 1
ctdb_check_directories "statd" $STATD_SHARED_DIRECTORY
echo "$STATD_SHARED_DIRECTORY" | ctdb_check_directories "statd" || \
exit $?
# mount needs special handling since it is sometimes not started
# correctly on RHEL5
@ -103,6 +103,9 @@ case $cmd in
}
;;
status)
ctdb_checkstatus || exit $?
;;
esac
exit 0

View File

@ -1,25 +1,21 @@
#!/bin/sh
# ctdb event script for NFS tickle acks
PATH=/bin:/usr/bin:$PATH
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig nfs
cmd="$1"
shift
service_name="nfs"
service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
service_reconfigure=$service_start
loadconfig
ctdb_start_stop_service
[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
[ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0
case $cmd in
startup)
mkdir -p $CTDB_BASE/state/nfstickle
mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
# we rely on fast tcp wait1 recycling
echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
exit 0
ctdb_service_start
;;
takeip)
@ -31,46 +27,25 @@ case $cmd in
# send tickle acks for all the connections the old server had
for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
[ -f $f ] && cat $f | while read dest; do
dip=`echo $dest | cut -d: -f1`
dport=`echo $dest | cut -d: -f2`
# send three, in case of lost packets
echo "Sending NFS tickle ack for $ip to $dip:$dport"
echo "Sending NFS tickle ack for $ip to $dest"
for i in `seq 1 3`; do
ctdb tickle $dip:$dport $ip:2049
ctdb tickle $dest $ip:2049
done
done
done
exit 0
;;
releaseip)
exit 0
;;
recovered)
exit 0
;;
shutdown)
exit 0
;;
monitor)
# always create these direcotries since NFS might be enabled at runtime
# and we dont want to restart ctdbd
mkdir -p $CTDB_BASE/state/nfstickle
mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
rm -f $mydir/*
# record our connections to shared storage
netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' |
awk '{print $4" "$5}' |
while read dest src; do
ip=`echo $dest | cut -d: -f1`
ip=${dest%:*}
echo $src >> $mydir/$ip
done
exit 0
;;
esac

View File

@ -1,16 +1,11 @@
#!/bin/sh
# ctdb event script for TGTD based iSCSI
PATH=/bin:/usr/bin:$PATH
. $CTDB_BASE/functions
loadconfig ctdb
loadconfig iscsi
cmd="$1"
shift
service_name="iscsi"
[ "$CTDB_MANAGES_ISCSI" = "yes" ] || exit 0
ctdb_start_stop_service
[ -z "$CTDB_START_ISCSI_SCRIPTS" ] && {
echo "No iscsi start script directory found"
@ -18,15 +13,6 @@ shift
}
case $cmd in
startup)
;;
takeip)
;;
releaseip)
;;
recovered)
# block the iscsi port
iptables -I INPUT 1 -p tcp --dport 3260 -j DROP
@ -51,8 +37,8 @@ case $cmd in
done
# remove all iptables rules
while `iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null` ; do
true;
while iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null ; do
:
done
;;
@ -63,9 +49,11 @@ case $cmd in
;;
monitor)
[ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
ctdb_check_tcp_ports "iscsi" 3260
}
ctdb_check_tcp_ports 3260 || exit $?
;;
status)
ctdb_checkstatus || exit $?
;;
esac

View File

@ -2,6 +2,7 @@
# script to manage the lvs ip multiplexer for a single public address cluster
. $CTDB_BASE/functions
loadconfig ctdb
[ -z "$CTDB_LVS_PUBLIC_IP" ] && exit 0
@ -12,12 +13,6 @@ loadconfig ctdb
exit 0
}
cmd="$1"
shift
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
case $cmd in
startup)
ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
@ -42,12 +37,6 @@ case $cmd in
echo 1 > /proc/sys/net/ipv4/route/flush
;;
takeip)
;;
releaseip)
;;
recovered|stopped)
# kill off any tcp connections
ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
@ -89,9 +78,6 @@ case $cmd in
echo 1 > /proc/sys/net/ipv4/route/flush
;;
monitor)
;;
esac
exit 0

View File

@ -7,17 +7,14 @@
. $CTDB_BASE/functions
loadconfig ctdb
[ "x$CTDB_RUN_TIMEOUT_MONITOR" = "xyes" ] || exit 0
cmd="$1"
shift
[ "$CTDB_RUN_TIMEOUT_MONITOR" = "yes" ] || exit 0
case $cmd in
monitor)
TIMEOUT=$(ctdb listvars | grep EventScriptTimeout | awk '{print $3}')
echo "sleeping for $((TIMEOUT * 2)) seconds..."
sleep $((TIMEOUT * 2))
;;
monitor)
TIMEOUT=$(ctdb listvars | awk '$1 == "EventScriptTimeout" {print $3}')
echo "sleeping for $((TIMEOUT * 2)) seconds..."
sleep $((TIMEOUT * 2))
;;
esac
exit 0

View File

@ -1,15 +1,28 @@
# utility functions for ctdb event scripts
PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
#######################################
# pull in a system config file, if any
loadconfig() {
name="$1"
if [ -f /etc/sysconfig/$name ]; then
. /etc/sysconfig/$name
elif [ -f /etc/default/$name ]; then
. /etc/default/$name
elif [ -f $CTDB_BASE/sysconfig/$name ]; then
. $CTDB_BASE/sysconfig/$name
if [ "$1" != "ctdb" ] ; then
loadconfig "ctdb"
fi
if [ -z "$1" ] ; then
foo="${service_config:-${service_name}}"
if [ -n "$foo" ] ; then
loadconfig "$foo"
fi
fi
if [ -f /etc/sysconfig/$1 ]; then
. /etc/sysconfig/$1
elif [ -f /etc/default/$1 ]; then
. /etc/default/$1
elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
. $CTDB_BASE/sysconfig/$1
fi
}
@ -31,37 +44,28 @@ detect_init_style() {
######################################################
# simulate /sbin/service on platforms that don't have it
service() {
service_name="$1"
op="$2"
_service_name="$1"
_op="$2"
# do nothing, when no service was specified
test "x$service_name" = "x" && return
[ -z "$_service_name" ] && return
if [ -x /sbin/service ]; then
/sbin/service "$service_name" "$op"
elif [ -x /etc/init.d/$service_name ]; then
/etc/init.d/$service_name "$op"
elif [ -x /etc/rc.d/init.d/$service_name ]; then
/etc/rc.d/init.d/$service_name "$op"
/sbin/service "$_service_name" "$_op"
elif [ -x /etc/init.d/$_service_name ]; then
/etc/init.d/$_service_name "$_op"
elif [ -x /etc/rc.d/init.d/$_service_name ]; then
/etc/rc.d/init.d/$_service_name "$_op"
fi
}
######################################################
# simulate /sbin/service (niced) on platforms that don't have it
nice_service() {
service_name="$1"
op="$2"
# do nothing, when no service was specified
test "x$service_name" = "x" && return
[ -z "$1" ] && return
if [ -x /sbin/service ]; then
nice /sbin/service "$service_name" "$op"
elif [ -x /etc/init.d/$service_name ]; then
nice /etc/init.d/$service_name "$op"
elif [ -x /etc/rc.d/init.d/$service_name ]; then
nice /etc/rc.d/init.d/$service_name "$op"
fi
nice service "$@"
}
######################################################
@ -110,57 +114,30 @@ ctdb_wait_tcp_ports() {
(netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
else
echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
return
return 127
fi
done
[ $all_ok -eq 1 ] || sleep 1
ctdb status > /dev/null 2>&1 || {
echo "ctdb daemon has died. Exiting tcp wait $service_name"
exit 1
return 1
}
done
echo "Local tcp services for $service_name are up"
}
######################################################
# wait for a set of directories
# usage: ctdb_wait_directories SERVICE_NAME <directories...>
######################################################
ctdb_wait_directories() {
service_name="$1"
shift
wait_dirs="$*"
[ -z "$wait_dirs" ] && return;
all_ok=0
echo "Waiting for local directories for $service_name"
while [ $all_ok -eq 0 ]; do
all_ok=1
for d in $wait_dirs; do
[ -d $d ] || all_ok=0
done
[ $all_ok -eq 1 ] || sleep 1
ctdb status > /dev/null 2>&1 || {
echo "ctdb daemon has died. Exiting directory wait for $service_name"
exit 1
}
done
echo "Local directories for $service_name are available"
}
######################################################
# check that a rpc server is registered with portmap
# and responding to requests
# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
######################################################
ctdb_check_rpc() {
service_name="$1"
progname="$1"
prognum="$2"
version="$3"
rpcinfo -u localhost $prognum $version > /dev/null || {
echo "ERROR: $service_name not responding to rpc requests"
echo "ERROR: $progname not responding to rpc requests"
exit 1
}
}
@ -171,18 +148,15 @@ ctdb_check_rpc() {
# usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
######################################################
ctdb_check_directories_probe() {
service_name="$1"
shift
for d ; do
case "$d" in
*%*)
continue
;;
*)
[ -d "$d" ] || return 1
esac
done
return 0
while IFS="" read d ; do
case "$d" in
*%*)
continue
;;
*)
[ -d "$d" ] || return 1
esac
done
}
######################################################
@ -190,62 +164,27 @@ ctdb_check_directories_probe() {
# usage: ctdb_check_directories SERVICE_NAME <directories...>
######################################################
ctdb_check_directories() {
# Note: ctdb_check_directories_probe sets both $service_name and $d.
ctdb_check_directories_probe "$@" || {
echo "ERROR: $service_name directory $d not available"
exit 1
}
n="${1:-${service_name}}"
ctdb_check_directories_probe || {
echo "ERROR: $n directory \"$d\" not available"
exit 1
}
}
######################################################
# check a set of tcp ports
# usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
# usage: ctdb_check_tcp_ports <ports...>
######################################################
ctdb_check_tcp_ports() {
service_name="$1"
shift
wait_ports="$*"
[ -z "$wait_ports" ] && return;
# check availability of netcat or netstat first
NETCAT=""
NETSTAT=""
if [ -x /usr/bin/netstat ]; then
NETSTAT=/usr/bin/netstat
elif [ -x /bin/netstat ]; then
NETSTAT=/bin/netstat
elif [ -x /usr/bin/netcat ]; then
NETCAT=/usr/bin/netcat
elif [ -x /bin/netcat ]; then
NETCAT=/bin/netcat
elif [ -x /usr/bin/nc ]; then
NETCAT=/usr/bin/nc
elif [ -x /bin/nc ]; then
NETCAT=/bin/nc
fi
for p in $wait_ports; do
all_ok=1
if [ "x${NETCAT}" != "x" ]; then
${NETCAT} -z 127.0.0.1 $p > /dev/null || all_ok=0
elif [ "x${NETSTAT}" != "x" ]; then
if ! ${NETSTAT} -a -n | egrep "0.0.0.0:$p .*LISTEN" > /dev/null ; then
if ! ${NETSTAT} -a -n | egrep ":::$p .*LISTEN" > /dev/null ; then
all_ok=0
fi
fi
else
echo "ERROR: neither netcat (or nc) nor netstat found!"
echo "ERROR: can't monitor ${service_name} tcp port ${p}"
all_ok=0
fi
[ $all_ok -eq 1 ] || {
echo "ERROR: $service_name tcp port $p is not responding"
exit 1
}
done
for p ; do
if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
echo "ERROR: $service_name tcp port $p is not responding"
return 1
fi
fi
done
}
######################################################
@ -253,35 +192,13 @@ ctdb_check_tcp_ports() {
# usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
######################################################
ctdb_check_unix_socket() {
service_name="$1"
socket_path="$2"
[ -z "$socket_path" ] && return;
socket_path="$1"
[ -z "$socket_path" ] && return
# check availability of netstat first
NETSTAT=""
if [ -x $(type -p netstat) ]; then
NETSTAT=$(type -p netstat)
elif [ -x /usr/bin/netstat ]; then
NETSTAT=/usr/bin/netstat
elif [ -x /bin/netstat ]; then
NETSTAT=/bin/netstat
fi
all_ok=1
if [ "x$NETSTAT" != "x" ]; then
if $NETSTAT -l -a -n | grep -qE "^unix.*LISTEN.*${socket_path}$"; then
all_ok=1
else
all_ok=0
if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
echo "ERROR: $service_name socket $socket_path not found"
return 1
fi
else
[ -S ${socket_path} ] && all_ok=1 || all_ok=0
fi
[ $all_ok -eq 1 ] || {
echo "ERROR: $service_name socket $socket_path not found"
exit 1
}
}
######################################################
@ -500,38 +417,175 @@ remove_ip() {
########################################################
# some simple logic for counting events - per eventscript
# usage: ctdb_counter_init <tag>
# ctdb_counter_incr <tag>
# ctdb_counter_limit <tag> <limit>
# e.g. <tag> = "fail-count"
# ctdb_counter_limit succeeds when count >= <limit>
# usage: ctdb_counter_init
# ctdb_counter_incr
# ctdb_check_counter_limit <limit>
# ctdb_check_counter_limit succeeds when count >= <limit>
########################################################
_ctdb_counter_common () {
_tag="$1"
_eventscript="${0##*/}" # basename
_counter_file="$CTDB_BASE/state/${_eventscript}-${_tag}"
_counter_file="$ctdb_fail_dir/$service_name"
mkdir -p "${_counter_file%/*}" # dirname
}
ctdb_counter_init () {
_ctdb_counter_common "$1"
_ctdb_counter_common
echo -n > "$_counter_file"
>"$_counter_file"
}
ctdb_counter_incr () {
_ctdb_counter_common "$1"
_ctdb_counter_common
# unary counting!
echo -n 1 >> "$_counter_file"
}
ctdb_counter_limit () {
_ctdb_counter_common "$1"
_limit="$2"
ctdb_check_counter_limit () {
_ctdb_counter_common
_limit="${1:-${service_fail_limit}}"
_quiet="$2"
# unary counting!
_size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
[ $_size -ge $_limit ]
if [ $_size -ge $_limit ] ; then
echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
exit 1
elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
fi
}
########################################################
ctdb_spool_dir="/var/spool/ctdb"
ctdb_status_dir="$ctdb_spool_dir/status"
ctdb_fail_dir="$ctdb_spool_dir/failcount"
ctdb_active_dir="$ctdb_spool_dir/active"
log_status_cat ()
{
echo "node is \"$1\", problem with \"${script_name}\": $(cat $2)"
}
ctdb_checkstatus ()
{
if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
return 1
elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
return 2
else
return 0
fi
}
ctdb_setstatus ()
{
d="$ctdb_status_dir/$script_name"
case "$1" in
unhealthy|banned)
mkdir -p "$d"
cat "$2" >"$d/$1"
;;
*)
for i in "banned" "unhealthy" ; do
rm -f "$d/$i"
done
;;
esac
}
ctdb_service_needs_reconfigure ()
{
[ -e "$ctdb_status_dir/$service_name/reconfigure" ]
}
ctdb_service_set_reconfigure ()
{
d="$ctdb_status_dir/$service_name"
mkdir -p "$d"
>"$d/reconfigure"
}
ctdb_service_unset_reconfigure ()
{
rm -f "$ctdb_status_dir/$service_name/reconfigure"
}
ctdb_service_reconfigure ()
{
if [ -n "$service_reconfigure" ] ; then
eval $service_reconfigure
else
service "$service_name" restart
fi
ctdb_service_unset_reconfigure
ctdb_counter_init
}
ctdb_compat_managed_service ()
{
if [ "$1" = "yes" ] ; then
t="$t $2 "
fi
}
is_ctdb_managed_service ()
{
t=" $CTDB_MANAGED_SERVICES "
ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
# Returns 0 if "<space>$service_name<space>" appears in $t
[ "${t#* ${service_name} }" != "${t}" ]
}
ctdb_start_stop_service ()
{
_active="$ctdb_active_dir/$service_name"
if is_ctdb_managed_service ; then
if ! [ -e "$_active" ] ; then
echo "Starting service $service_name"
ctdb_service_start || exit $?
mkdir -p "$ctdb_active_dir"
touch "$_active"
exit 0
fi
elif ! is_ctdb_managed_service ; then
if [ -e "$_active" ] ; then
echo "Stopping service $service_name"
ctdb_service_stop || exit $?
rm -f "$_active"
fi
exit 0
fi
}
ctdb_service_start ()
{
if [ -n "$service_start" ] ; then
eval $service_start
else
service "$service_name" start
fi
ctdb_counter_init
}
ctdb_service_stop ()
{
if [ -n "$service_stop" ] ; then
eval $service_stop
else
service "$service_name" stop
fi
}
########################################################
# load a site local config file
########################################################
@ -546,4 +600,21 @@ ctdb_counter_limit () {
done
}
# A reasonable default is the basename of the eventscript.
script_name="${0##*/}" # basename
service_name="$script_name"
service_fail_limit=1
ctdb_event="$1" ; shift
cmd="$ctdb_event"
case "$ctdb_event" in
status)
ctdb_checkstatus
exit
;;
setstatus)
ctdb_setstatus "$@"
exit
;;
esac

View File

@ -28,7 +28,7 @@ case "$1" in
add-client)
# the callout does not tell us to which ip the client connected
# so we must add it to all the ips that we serve
for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
for f in $CTDB_BASE/state/statd/ip/*; do
ip=`basename $f`
[ -d $STATD_SHARED_DIRECTORY/$ip ] || /bin/mkdir $STATD_SHARED_DIRECTORY/$ip
touch $STATD_SHARED_DIRECTORY/$ip/$2
@ -37,7 +37,7 @@ case "$1" in
del-client)
# the callout does not tell us to which ip the client connected
# so we must add it to all the ips that we serve
for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
for f in $CTDB_BASE/state/statd/ip/*; do
ip=`basename $f`
/bin/rm -f $STATD_SHARED_DIRECTORY/$ip/$2
done

View File

@ -129,6 +129,7 @@ struct ctdb_tunable {
uint32_t vacuum_min_interval;
uint32_t vacuum_max_interval;
uint32_t max_queue_depth_drop_msg;
uint32_t use_status_events_for_monitoring;
};
/*
@ -450,9 +451,13 @@ struct ctdb_context {
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
uint32_t *recd_ping_count;
TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
TALLOC_CTX *script_monitor_ctx; /* a context where we store results while running the monitor event */
TALLOC_CTX *last_monitor_ctx;
TALLOC_CTX *event_script_ctx; /* non-monitoring events */
TALLOC_CTX *monitor_event_script_ctx;
TALLOC_CTX *other_event_script_ctx;
struct ctdb_monitor_script_status_ctx *current_monitor_status_ctx;
struct ctdb_monitor_script_status_ctx *last_monitor_status_ctx;
TALLOC_CTX *banning_ctx;
};
@ -856,6 +861,19 @@ enum ctdb_trans2_commit_error {
CTDB_TRANS2_COMMIT_SOMEFAIL=3 /* some nodes failed the commit, some allowed it */
};
/* different calls to event scripts. */
enum ctdb_eventscript_call {
CTDB_EVENT_STARTUP, /* CTDB starting up: no args. */
CTDB_EVENT_START_RECOVERY, /* CTDB recovery starting: no args. */
CTDB_EVENT_RECOVERED, /* CTDB recovery finished: no args. */
CTDB_EVENT_TAKE_IP, /* IP taken: interface, IP address, netmask bits. */
CTDB_EVENT_RELEASE_IP, /* IP released: interface, IP address, netmask bits. */
CTDB_EVENT_STOPPED, /* This node is stopped: no args. */
CTDB_EVENT_MONITOR, /* Please check if service is healthy: no args. */
CTDB_EVENT_STATUS, /* Report service status: no args. */
CTDB_EVENT_SHUTDOWN, /* CTDB shutting down: no args. */
CTDB_EVENT_RELOAD /* magic */
};
/* internal prototypes */
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
@ -1324,13 +1342,16 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata);
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_eventscript_call call);
int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call call,
const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
int ctdb_event_script_callback(struct ctdb_context *ctdb,
struct timeval timeout,
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
bool from_user,
enum ctdb_eventscript_call call,
const char *fmt, ...) PRINTF_ATTRIBUTE(7,8);
void ctdb_release_all_ips(struct ctdb_context *ctdb);
void set_nonblocking(int fd);

View File

@ -286,7 +286,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
if (ctdb->methods != NULL) {
ctdb->methods->shutdown(ctdb);
}
ctdb_event_script(ctdb, "shutdown");
ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
DEBUG(DEBUG_NOTICE,("Received SHUTDOWN command. Stopping CTDB daemon.\n"));
exit(0);

View File

@ -223,9 +223,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
if (!ctdb->done_startup) {
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
ctdb->monitor->monitor_context, ctdb_startup_callback,
ctdb, "startup");
ctdb, false,
CTDB_EVENT_STARTUP, "%s", "");
} else {
int i;
int skip_monitoring = 0;
@ -248,9 +248,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
return;
} else {
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
ctdb->monitor->monitor_context, ctdb_health_callback,
ctdb, "monitor");
ctdb, false,
CTDB_EVENT_MONITOR, "%s", "");
}
}

View File

@ -962,11 +962,11 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,
ctdb_disable_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
state,
ret = ctdb_event_script_callback(ctdb, state,
ctdb_end_recovery_callback,
state, "recovered");
state,
false,
CTDB_EVENT_RECOVERED, "%s", "");
if (ret != 0) {
ctdb_enable_monitoring(ctdb);
@ -1016,11 +1016,11 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
ctdb_disable_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
state,
ret = ctdb_event_script_callback(ctdb, state,
ctdb_start_recovery_callback,
state, "startrecovery");
state, false,
CTDB_EVENT_START_RECOVERY,
"%s", "");
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to start recovery\n"));
@ -1160,7 +1160,7 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event
if (ctdb->methods != NULL) {
ctdb->methods->shutdown(ctdb);
}
ctdb_event_script(ctdb, "shutdown");
ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
DEBUG(DEBUG_ERR, ("Recovery daemon ping timeout. Daemon has been shut down.\n"));
exit(0);
}
@ -1230,11 +1230,10 @@ int32_t ctdb_control_stop_node(struct ctdb_context *ctdb, struct ctdb_req_contro
ctdb_disable_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
state,
ret = ctdb_event_script_callback(ctdb, state,
ctdb_stop_node_callback,
state, "stopped");
state, false,
CTDB_EVENT_STOPPED, "%s", "");
if (ret != 0) {
ctdb_enable_monitoring(ctdb);

View File

@ -3288,7 +3288,7 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
if (ctdb->methods != NULL) {
ctdb->methods->shutdown(ctdb);
}
ctdb_event_script(ctdb, "shutdown");
ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
exit(10);
}

View File

@ -235,9 +235,10 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
vnn->iface));
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
state, takeover_ip_callback, state,
"takeip %s %s %u",
false,
CTDB_EVENT_TAKE_IP,
"%s %s %u",
vnn->iface,
talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
vnn->public_netmask_bits);
@ -391,9 +392,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
state->vnn = vnn;
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
state, release_ip_callback, state,
"releaseip %s %s %u",
false,
CTDB_EVENT_RELEASE_IP,
"%s %s %u",
vnn->iface,
talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
vnn->public_netmask_bits);
@ -1382,7 +1384,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
if (vnn->pnn == ctdb->pnn) {
vnn->pnn = -1;
}
ctdb_event_script(ctdb, "releaseip %s %s %u",
ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
vnn->iface,
talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
vnn->public_netmask_bits);
@ -2122,9 +2124,10 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
DLIST_REMOVE(ctdb->vnn, vnn);
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
mem_ctx, delete_ip_callback, mem_ctx,
"releaseip %s %s %u",
false,
CTDB_EVENT_RELEASE_IP,
"%s %s %u",
vnn->iface,
talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
vnn->public_netmask_bits);

View File

@ -63,7 +63,8 @@ static const struct {
{ "VacuumLimit", 5000, offsetof(struct ctdb_tunable, vacuum_limit) },
{ "VacuumMinInterval", 60, offsetof(struct ctdb_tunable, vacuum_min_interval) },
{ "VacuumMaxInterval", 600, offsetof(struct ctdb_tunable, vacuum_max_interval) },
{ "MaxQueueDropMsg", 1000, offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) }
{ "MaxQueueDropMsg", 1000, offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) },
{ "UseStatusEvents", 0, offsetof(struct ctdb_tunable, use_status_events_for_monitoring) }
};
/*

View File

@ -32,6 +32,19 @@ static struct {
const char *script_running;
} child_state;
static const char *call_names[] = {
"startup",
"startrecovery",
"recovered",
"takeip",
"releaseip",
"stopped",
"monitor",
"status",
"shutdown",
"reload"
};
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
/*
@ -61,11 +74,13 @@ static void sigterm(int sig)
struct ctdb_event_script_state {
struct ctdb_context *ctdb;
pid_t child;
/* Warning: this can free us! */
void (*callback)(struct ctdb_context *, int, void *);
int cb_status;
int fd[2];
void *private_data;
enum ctdb_eventscript_call call;
const char *options;
struct timed_event *te;
struct timeval timeout;
};
@ -81,28 +96,22 @@ struct ctdb_monitor_script_status {
char *output;
};
struct ctdb_monitor_status {
struct timeval start;
struct timeval finished;
int32_t status;
struct ctdb_monitor_script_status_ctx {
struct ctdb_monitor_script_status *scripts;
struct ctdb_event_script_state *state;
};
/* called from ctdb_logging when we have received output on STDERR from
* one of the eventscripts
*/
int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len)
{
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_monitor_script_status *script;
if (monitoring_status == NULL) {
if (ctdb->current_monitor_status_ctx == NULL) {
return -1;
}
script = monitoring_status->scripts;
script = ctdb->current_monitor_status_ctx->scripts;
if (script == NULL) {
return -1;
}
@ -121,17 +130,13 @@ int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t
*/
int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
{
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
DEBUG(DEBUG_INFO, ("event script init called\n"));
if (monitoring_status == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Init called when context is NULL\n"));
return 0;
if (ctdb->current_monitor_status_ctx == NULL) {
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when initing script\n"));
return -1;
}
monitoring_status->start = timeval_current();
return 0;
}
@ -142,41 +147,26 @@ int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata)
{
const char *name = (const char *)indata.dptr;
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_event_script_state *state;
struct ctdb_monitor_script_status *script;
DEBUG(DEBUG_INFO, ("event script start called : %s\n", name));
if (monitoring_status == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when starting to run script %s\n", name));
if (ctdb->current_monitor_status_ctx == NULL) {
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when starting script\n"));
return -1;
}
script = talloc_zero(monitoring_status, struct ctdb_monitor_script_status);
script = talloc_zero(ctdb->current_monitor_status_ctx, struct ctdb_monitor_script_status);
if (script == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to talloc ctdb_monitor_script_status for script %s\n", name));
return -1;
}
script->next = monitoring_status->scripts;
script->next = ctdb->current_monitor_status_ctx->scripts;
script->name = talloc_strdup(script, name);
CTDB_NO_MEMORY(ctdb, script->name);
script->start = timeval_current();
monitoring_status->scripts = script;
state = monitoring_status->state;
if (state != NULL) {
/* reset the timeout for the next eventscript */
if (!timeval_is_zero(&state->timeout)) {
if (state->te != NULL) {
talloc_free(state->te);
state->te = NULL;
}
state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
}
}
ctdb->current_monitor_status_ctx->scripts = script;
return 0;
}
@ -187,15 +177,14 @@ int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA inda
int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata)
{
int32_t res = *((int32_t *)indata.dptr);
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_monitor_script_status *script;
if (monitoring_status == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
if (ctdb->current_monitor_status_ctx == NULL) {
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
return -1;
}
script = monitoring_status->scripts;
script = ctdb->current_monitor_status_ctx->scripts;
if (script == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
return -1;
@ -214,17 +203,16 @@ int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indat
int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA indata)
{
const char *name = (const char *)indata.dptr;
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_monitor_script_status *script;
DEBUG(DEBUG_INFO, ("event script disabed called for script %s\n", name));
if (monitoring_status == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
if (ctdb->current_monitor_status_ctx == NULL) {
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
return -1;
}
script = monitoring_status->scripts;
script = ctdb->current_monitor_status_ctx->scripts;
if (script == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
return -1;
@ -242,24 +230,19 @@ int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA i
*/
int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb)
{
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
DEBUG(DEBUG_INFO, ("event script finished called\n"));
if (monitoring_status == NULL) {
if (ctdb->current_monitor_status_ctx == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when monitoring event finished\n"));
return -1;
}
monitoring_status->finished = timeval_current();
monitoring_status->status = MONITOR_SCRIPT_OK;
if (ctdb->last_monitor_ctx) {
talloc_free(ctdb->last_monitor_ctx);
ctdb->last_monitor_ctx = NULL;
if (ctdb->last_monitor_status_ctx) {
talloc_free(ctdb->last_monitor_status_ctx);
ctdb->last_monitor_status_ctx = NULL;
}
ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
ctdb->script_monitor_ctx = NULL;
ctdb->last_monitor_status_ctx = ctdb->current_monitor_status_ctx;
ctdb->current_monitor_status_ctx = NULL;
return 0;
}
@ -303,11 +286,11 @@ static struct ctdb_monitoring_wire *marshall_monitoring_scripts(TALLOC_CTX *mem_
int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->last_monitor_ctx;
struct ctdb_monitor_script_status_ctx *script_status = talloc_get_type(ctdb->last_monitor_status_ctx, struct ctdb_monitor_script_status_ctx);
struct ctdb_monitoring_wire *monitoring_scripts;
if (monitoring_status == NULL) {
DEBUG(DEBUG_ERR,(__location__ " last_monitor_ctx is NULL when reading status\n"));
if (script_status == NULL) {
DEBUG(DEBUG_ERR,(__location__ " last_monitor_status_ctx is NULL when reading status\n"));
return -1;
}
@ -318,7 +301,7 @@ int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA
}
monitoring_scripts->num_scripts = 0;
monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, monitoring_status->scripts);
monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, script_status->scripts);
if (monitoring_scripts == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Monitoring scritps is NULL. can not return data to client\n"));
return -1;
@ -474,23 +457,21 @@ static struct ctdb_script_list *ctdb_get_script_list(struct ctdb_context *ctdb,
/*
run the event script - varargs version
Actually run the event script
this function is called and run in the context of a forked child
which allows it to do blocking calls such as system()
*/
static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
static int ctdb_run_event_script(struct ctdb_context *ctdb,
bool from_user,
enum ctdb_eventscript_call call,
const char *options)
{
char *cmdstr;
int ret;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_script_list *scripts, *current;
int is_monitor = 0;
if (!strcmp(options, "monitor")) {
is_monitor = 1;
}
if (is_monitor == 1) {
if (!from_user && call == CTDB_EVENT_MONITOR) {
/* This is running in the forked child process. At this stage
* we want to switch from being a ctdb daemon into being a
* client and connect to the real local daemon.
@ -510,14 +491,15 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip", "stopped" };
const enum ctdb_eventscript_call allowed_calls[] = {
CTDB_EVENT_START_RECOVERY, CTDB_EVENT_SHUTDOWN, CTDB_EVENT_RELEASE_IP, CTDB_EVENT_STOPPED };
int i;
for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
if (call == allowed_calls[i]) break;
}
if (i == ARRAY_SIZE(allowed_scripts)) {
DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
options));
if (i == ARRAY_SIZE(allowed_calls)) {
DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
call_names[call]));
talloc_free(tmp_ctx);
return -1;
}
@ -541,10 +523,26 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
them
*/
for (current=scripts; current; current=current->next) {
/* we dont run disabled scripts, we just report they are disabled */
cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
ctdb->event_script_dir,
current->name, options);
const char *str = from_user ? "CTDB_CALLED_BY_USER=1 " : "";
/* Allow a setting where we run the actual monitor event
from an external source and replace it with
a "status" event that just picks up the actual
status of the event asynchronously.
*/
if ((ctdb->tunable.use_status_events_for_monitoring != 0)
&& (call == CTDB_EVENT_MONITOR)
&& !from_user) {
cmdstr = talloc_asprintf(tmp_ctx, "%s%s/%s %s",
str,
ctdb->event_script_dir,
current->name, "status");
} else {
cmdstr = talloc_asprintf(tmp_ctx, "%s%s/%s %s %s",
str,
ctdb->event_script_dir,
current->name, call_names[call], options);
}
CTDB_NO_MEMORY(ctdb, cmdstr);
DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
@ -552,7 +550,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
child_state.start = timeval_current();
child_state.script_running = cmdstr;
if (is_monitor == 1) {
if (!from_user && call == CTDB_EVENT_MONITOR) {
if (ctdb_ctrl_event_script_start(ctdb, current->name) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to start event script monitoring\n"));
talloc_free(tmp_ctx);
@ -585,7 +583,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
DEBUG(DEBUG_ERR,("Script %s returned status 127. Someone just deleted it?\n", cmdstr));
}
if (is_monitor == 1) {
if (!from_user && call == CTDB_EVENT_MONITOR) {
if (ctdb_ctrl_event_script_stop(ctdb, ret) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to stop event script monitoring\n"));
talloc_free(tmp_ctx);
@ -596,7 +594,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
/* return an error if the script failed */
if (ret != 0) {
DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
if (is_monitor == 1) {
if (!from_user && call == CTDB_EVENT_MONITOR) {
if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
talloc_free(tmp_ctx);
@ -612,7 +610,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
child_state.start = timeval_current();
child_state.script_running = "finished";
if (is_monitor == 1) {
if (!from_user && call == CTDB_EVENT_MONITOR) {
if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
talloc_free(tmp_ctx);
@ -631,20 +629,18 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
struct ctdb_event_script_state *state =
talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_context *ctdb = state->ctdb;
signed char rt = -1;
read(state->fd[0], &rt, sizeof(rt));
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, rt));
if (state->callback) {
state->callback(ctdb, rt, state->private_data);
state->callback = NULL;
if (read(state->fd[0], &state->cb_status, sizeof(state->cb_status)) !=
sizeof(state->cb_status)) {
state->cb_status = -2;
}
talloc_set_destructor(state, NULL);
talloc_free(state);
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n",
call_names[state->call], state->options, state->cb_status));
state->child = 0;
ctdb->event_script_timeouts = 0;
talloc_free(state);
}
static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
@ -667,29 +663,19 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
struct timeval t, void *p)
{
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
char *options;
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
state->te = NULL;
DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n",
call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u pid : %d\n", state->options, ctdb->event_script_timeouts, state->child));
if (kill(state->child, 0) != 0) {
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
if (state->callback) {
state->callback(ctdb, 0, private_data);
state->callback = NULL;
}
talloc_set_destructor(state, NULL);
state->child = 0;
talloc_free(state);
return;
}
options = talloc_strdup(ctdb, state->options);
CTDB_NO_MEMORY_VOID(ctdb, options);
if (!strcmp(options, "monitor")) {
if (state->call == CTDB_EVENT_MONITOR) {
/* if it is a monitor event, we allow it to "hang" a few times
before we declare it a failure and ban ourself (and make
ourself unhealthy)
@ -697,135 +683,180 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
ctdb->event_script_timeouts++;
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
if (ctdb->tunable.script_unhealthy_on_timeout != 0) {
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
if (state->callback) {
state->callback(ctdb, -ETIME, private_data);
state->callback = NULL;
}
} else {
ctdb->event_script_timeouts = 0;
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
if (state->callback) {
state->callback(ctdb, -1, private_data);
state->callback = NULL;
}
}
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
state->cb_status = -ETIME;
} else {
if (state->callback) {
state->callback(ctdb, 0, private_data);
state->callback = NULL;
}
state->cb_status = 0;
}
} else if (!strcmp(options, "startup")) {
} else if (state->call == CTDB_EVENT_STARTUP) {
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
if (state->callback) {
state->callback(ctdb, -1, private_data);
state->callback = NULL;
}
state->cb_status = -1;
} else {
/* if it is not a monitor event we ban ourself immediately */
/* if it is not a monitor or a startup event we ban ourself
immediately
*/
DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
if (state->callback) {
state->callback(ctdb, -1, private_data);
state->callback = NULL;
}
state->cb_status = -1;
}
if ((!strcmp(options, "monitor")) && (monitoring_status != NULL)) {
if (state->call == CTDB_EVENT_MONITOR || state->call == CTDB_EVENT_STATUS) {
struct ctdb_monitor_script_status *script;
script = monitoring_status->scripts;
if (ctdb->current_monitor_status_ctx == NULL) {
talloc_free(state);
return;
}
script = ctdb->current_monitor_status_ctx->scripts;
if (script != NULL) {
script->timedout = 1;
}
monitoring_status->status = MONITOR_SCRIPT_TIMEOUT;
if (ctdb->last_monitor_ctx) {
talloc_free(ctdb->last_monitor_ctx);
ctdb->last_monitor_ctx = NULL;
if (ctdb->last_monitor_status_ctx) {
talloc_free(ctdb->last_monitor_status_ctx);
ctdb->last_monitor_status_ctx = NULL;
}
ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
ctdb->script_monitor_ctx = NULL;
ctdb->last_monitor_status_ctx = talloc_steal(ctdb, ctdb->current_monitor_status_ctx);
ctdb->current_monitor_status_ctx = NULL;
}
talloc_free(state);
talloc_free(options);
}
/*
destroy a running event script
destroy an event script: kill it if ->child != 0.
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
if (state->child) {
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
if (state->callback) {
state->callback(state->ctdb, 0, state->private_data);
state->callback = NULL;
if (kill(state->child, SIGTERM) != 0) {
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
}
}
if (kill(state->child, SIGTERM) != 0) {
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
/* This is allowed to free us; talloc will prevent double free anyway,
* but beware if you call this outside the destructor! */
if (state->callback) {
state->callback(state->ctdb, state->cb_status, state->private_data);
}
return 0;
}
static unsigned int count_words(const char *options)
{
unsigned int words = 0;
options += strspn(options, " \t");
while (*options) {
words++;
options += strcspn(options, " \t");
options += strspn(options, " \t");
}
return words;
}
static bool check_options(enum ctdb_eventscript_call call, const char *options)
{
switch (call) {
/* These all take no arguments. */
case CTDB_EVENT_STARTUP:
case CTDB_EVENT_START_RECOVERY:
case CTDB_EVENT_RECOVERED:
case CTDB_EVENT_STOPPED:
case CTDB_EVENT_MONITOR:
case CTDB_EVENT_STATUS:
case CTDB_EVENT_SHUTDOWN:
case CTDB_EVENT_RELOAD:
return count_words(options) == 0;
case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
case CTDB_EVENT_RELEASE_IP:
return count_words(options) == 3;
default:
DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_eventscript_call %u\n", call));
return false;
}
}
/*
run the event script in the background, calling the callback when
finished
*/
static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
struct timeval timeout,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
bool from_user,
enum ctdb_eventscript_call call,
const char *fmt, va_list ap)
{
struct ctdb_monitor_status *monitoring_status;
TALLOC_CTX *mem_ctx;
struct ctdb_event_script_state *state;
int ret;
if (!strcmp(fmt, "monitor")) {
if (ctdb->script_monitor_ctx != NULL) {
talloc_free(ctdb->script_monitor_ctx);
ctdb->script_monitor_ctx = NULL;
if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
/* if this was a "monitor" or a status event, we recycle the
context to start a new monitor event
*/
if (ctdb->monitor_event_script_ctx != NULL) {
talloc_free(ctdb->monitor_event_script_ctx);
ctdb->monitor_event_script_ctx = NULL;
}
monitoring_status = talloc_zero(ctdb, struct ctdb_monitor_status);
ctdb->monitor_event_script_ctx = talloc_new(ctdb);
mem_ctx = ctdb->monitor_event_script_ctx;
if (ctdb->current_monitor_status_ctx != NULL) {
talloc_free(ctdb->current_monitor_status_ctx);
ctdb->current_monitor_status_ctx = NULL;
}
ctdb->current_monitor_status_ctx = talloc(ctdb, struct ctdb_monitor_script_status_ctx);
CTDB_NO_MEMORY(ctdb, ctdb->current_monitor_status_ctx);
ctdb->current_monitor_status_ctx->scripts = NULL;
} else {
if (ctdb->event_script_ctx == NULL) {
ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
/* any other script will first terminate any monitor event */
if (ctdb->monitor_event_script_ctx != NULL) {
talloc_free(ctdb->monitor_event_script_ctx);
ctdb->monitor_event_script_ctx = NULL;
}
monitoring_status = ctdb->event_script_ctx;
/* and then use a context common for all non-monitor events */
if (ctdb->other_event_script_ctx == NULL) {
ctdb->other_event_script_ctx = talloc_new(ctdb);
}
mem_ctx = ctdb->other_event_script_ctx;
}
if (monitoring_status == NULL) {
DEBUG(DEBUG_ERR, (__location__ " ERROR: Failed to talloc script_monitoring context\n"));
return -1;
}
state = talloc(monitoring_status, struct ctdb_event_script_state);
if (state == NULL) {
DEBUG(DEBUG_ERR,(__location__ " could not allocate state\n"));
return -1;
}
monitoring_status->state = state;
state = talloc(mem_ctx, struct ctdb_event_script_state);
CTDB_NO_MEMORY(ctdb, state);
state->ctdb = ctdb;
state->callback = callback;
state->private_data = private_data;
state->call = call;
state->options = talloc_vasprintf(state, fmt, ap);
state->timeout = timeout;
state->te = NULL;
state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
if (state->options == NULL) {
DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
talloc_free(state);
return -1;
}
if (!check_options(state->call, state->options)) {
DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
call_names[state->call], state->options));
talloc_free(state);
return -1;
}
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s\n", state->options));
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
call_names[state->call], state->options));
ret = pipe(state->fd);
if (ret != 0) {
@ -843,28 +874,22 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
}
if (state->child == 0) {
signed char rt;
int rt;
close(state->fd[0]);
set_close_on_exec(state->fd[1]);
rt = ctdb_event_script_v(ctdb, state->options);
while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
write(state->fd[1], &rt, sizeof(rt));
usleep(100000);
}
rt = ctdb_run_event_script(ctdb, from_user, state->call, state->options);
/* We must be able to write PIPEBUF bytes at least; if this
somehow fails, the read above will be short. */
write(state->fd[1], &rt, sizeof(rt));
close(state->fd[1]);
_exit(rt);
}
talloc_set_destructor(state, event_script_destructor);
if (!strcmp(fmt, "monitor")) {
ctdb->script_monitor_ctx = monitoring_status;
} else {
ctdb->event_script_ctx = monitoring_status;
}
close(state->fd[1]);
set_close_on_exec(state->fd[0]);
talloc_set_destructor(state, event_script_destructor);
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));
@ -872,9 +897,10 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
ctdb_event_script_handler, state);
if (!timeval_is_zero(&state->timeout)) {
state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
} else {
DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
DEBUG(DEBUG_ERR, (__location__ " eventscript %s %s called with no timeout\n",
call_names[state->call], state->options));
}
return 0;
@ -886,17 +912,18 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
finished
*/
int ctdb_event_script_callback(struct ctdb_context *ctdb,
struct timeval timeout,
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
bool from_user,
enum ctdb_eventscript_call call,
const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = ctdb_event_script_callback_v(ctdb, timeout, callback, private_data, fmt, ap);
ret = ctdb_event_script_callback_v(ctdb, callback, private_data, from_user, call, fmt, ap);
va_end(ap);
return ret;
@ -919,24 +946,23 @@ static void event_script_callback(struct ctdb_context *ctdb, int status, void *p
}
/*
run the event script, waiting for it to complete. Used when the caller doesn't want to
continue till the event script has finished.
run the event script, waiting for it to complete. Used when the caller
doesn't want to continue till the event script has finished.
*/
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call call,
const char *fmt, ...)
{
va_list ap;
int ret;
struct callback_status status;
va_start(ap, fmt);
ret = ctdb_event_script_callback_v(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
event_script_callback, &status, fmt, ap);
va_end(ap);
ret = ctdb_event_script_callback_v(ctdb,
event_script_callback, &status, false, call, fmt, ap);
if (ret != 0) {
return ret;
}
va_end(ap);
status.status = -1;
status.done = false;
@ -946,6 +972,11 @@ int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
return status.status;
}
int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_eventscript_call call)
{
/* GCC complains about empty format string, so use %s and "". */
return ctdb_event_script_args(ctdb, call, "%s", "");
}
struct eventscript_callback_state {
struct ctdb_req_control *c;
@ -964,17 +995,36 @@ static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
if (status != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
talloc_free(state);
return;
}
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
/* This will free the struct ctdb_event_script_state we are in! */
talloc_free(state);
return;
}
/* Returns rest of string, or NULL if no match. */
static const char *get_call(const char *p, enum ctdb_eventscript_call *call)
{
unsigned int len;
/* Skip any initial whitespace. */
p += strspn(p, " \t");
/* See if we match any. */
for (*call = 0; *call < ARRAY_SIZE(call_names); (*call)++) {
len = strlen(call_names[*call]);
if (strncmp(p, call_names[*call], len) == 0) {
/* If end of string or whitespace, we're done. */
if (strcspn(p + len, " \t") == 0) {
return p + len;
}
}
}
return NULL;
}
/*
A control to force running of the eventscripts from the ctdb client tool
*/
@ -984,29 +1034,33 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
{
int ret;
struct eventscript_callback_state *state;
const char *options;
enum ctdb_eventscript_call call;
if (ctdb->event_script_ctx == NULL) {
ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
/* Figure out what call they want. */
options = get_call((const char *)indata.dptr, &call);
if (!options) {
DEBUG(DEBUG_ERR, (__location__ " Invalid forced \"%s\"\n", (const char *)indata.dptr));
return -1;
}
state = talloc(ctdb->event_script_ctx, struct eventscript_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(state, c);
DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
return -1;
}
state = talloc(ctdb->other_event_script_ctx, struct eventscript_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(state, c);
DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
ctdb_disable_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
timeval_set(ctdb->tunable.script_timeout, 0),
state, run_eventscripts_callback, state,
"%s", (const char *)indata.dptr);
true, call, "%s", options);
if (ret != 0) {
ctdb_enable_monitoring(ctdb);