mirror of
https://github.com/samba-team/samba.git
synced 2025-01-10 01:18:15 +03:00
Merge commit 'martins/status-test-2'
Conflicts: server/eventscript.c (This used to be ctdb commit e9b3477a5b9a2eff18f727e7d59338bfb5214793)
This commit is contained in:
commit
569001afd0
@ -250,7 +250,7 @@ status() {
|
||||
}
|
||||
|
||||
|
||||
case "$1" in
|
||||
case "$cmd" in
|
||||
start)
|
||||
start
|
||||
;;
|
||||
|
@ -10,13 +10,7 @@
|
||||
# recovered : called when ctdb has finished a recovery event
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
|
||||
# ensure we have /bin and /usr/bin in the path
|
||||
PATH=/bin:/usr/bin:$PATH
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
loadconfig
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
|
@ -2,55 +2,43 @@
|
||||
# script to check accessibility to the reclock file on a node
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
|
||||
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
# Count the number of intervals that have passed when we have tried to
|
||||
# but failed to stat the reclock file. after third failure the node
|
||||
# becomes unhealthy after the twentieth failure the node we shutdown
|
||||
# ctdbd
|
||||
RECLOCKCOUNT="fail-count"
|
||||
loadconfig
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
ctdb_counter_init "$RECLOCKCOUNT"
|
||||
startup)
|
||||
ctdb_counter_init
|
||||
;;
|
||||
|
||||
monitor)
|
||||
RECLOCKFILE=$(ctdb -Y getreclock)
|
||||
|
||||
monitor)
|
||||
ctdb_counter_incr "$RECLOCKCOUNT"
|
||||
ctdb_counter_limit "$RECLOCKCOUNT" 200 && {
|
||||
echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Shutting down."
|
||||
df
|
||||
sleep 1
|
||||
ctdb shutdown
|
||||
ctdb_counter_incr
|
||||
(ctdb_check_counter_limit 200 >/dev/null 2>&1) || {
|
||||
echo "Reclock file $RECLOCKFILE\" can not be accessed. Shutting down."
|
||||
df
|
||||
sleep 1
|
||||
ctdb shutdown
|
||||
}
|
||||
|
||||
RECLOCKFILE=`ctdb -Y getreclock`
|
||||
[ -z "$RECLOCKFILE" ] && {
|
||||
# we are not using a reclock file
|
||||
ctdb_counter_init "$RECLOCKCOUNT"
|
||||
exit 0
|
||||
# we are not using a reclock file
|
||||
ctdb_counter_init
|
||||
exit 0
|
||||
}
|
||||
|
||||
# try stat the reclock file as a background process
|
||||
# so that we dont block in case the cluster filesystem is unavailable
|
||||
(
|
||||
stat $RECLOCKFILE && {
|
||||
# we could stat the file, reset the counter
|
||||
ctdb_counter_init "$RECLOCKCOUNT"
|
||||
}
|
||||
stat $RECLOCKFILE && {
|
||||
# we could stat the file, reset the counter
|
||||
ctdb_counter_init
|
||||
}
|
||||
) >/dev/null 2>/dev/null &
|
||||
|
||||
|
||||
ctdb_counter_limit "$RECLOCKCOUNT" 3 && {
|
||||
echo "Reclock file \"$RECLOCKFILE\" can not be accessed. Mark node UNHEALTHY."
|
||||
df
|
||||
exit 1;
|
||||
}
|
||||
ctdb_check_counter_limit 3 quiet
|
||||
;;
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@ -6,10 +6,7 @@
|
||||
# public interface
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
loadconfig
|
||||
|
||||
[ -z "$CTDB_PUBLIC_ADDRESSES" ] && {
|
||||
CTDB_PUBLIC_ADDRESSES=$CTDB_BASE/public_addresses
|
||||
@ -177,10 +174,10 @@ case $cmd in
|
||||
esac
|
||||
done
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
|
||||
|
||||
|
@ -6,15 +6,10 @@
|
||||
#
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig
|
||||
|
||||
[ -z "$CTDB_NATGW_PUBLIC_IFACE" ] && exit 0
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
|
||||
delete_all() {
|
||||
remove_ip $CTDB_NATGW_PUBLIC_IP $CTDB_NATGW_PUBLIC_IFACE
|
||||
remove_ip $CTDB_NATGW_PUBLIC_IP_HOST lo
|
||||
@ -28,7 +23,7 @@ delete_all() {
|
||||
}
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
startup)
|
||||
# do not respond to ARPs that are for ip addresses with scope 'host'
|
||||
echo 3 > /proc/sys/net/ipv4/conf/all/arp_ignore
|
||||
# do not send out arp requests from loopback addresses
|
||||
@ -37,13 +32,13 @@ case $cmd in
|
||||
ctdb setnatgwstate on
|
||||
;;
|
||||
|
||||
recovered|updatenatgw)
|
||||
recovered|updatenatgw)
|
||||
MYPNN=`ctdb pnn | cut -d: -f2`
|
||||
NATGWMASTER=`ctdb natgwlist | head -1 | sed -e "s/ .*//"`
|
||||
NATGWIP=`ctdb natgwlist | head -1 | sed -e "s/^[^ ]* *//"`
|
||||
|
||||
CTDB_NATGW_PUBLIC_IP_HOST=`echo $CTDB_NATGW_PUBLIC_IP | sed -e "s/\/.*/\/32/"`
|
||||
if [ "$NATGWMASTER" = "-1" ]; then
|
||||
if [ "$NATGWMASTER" == "-1" ]; then
|
||||
echo "There is not NATGW master node"
|
||||
exit 1
|
||||
fi
|
||||
@ -71,7 +66,7 @@ case $cmd in
|
||||
echo 1 > /proc/sys/net/ipv4/route/flush
|
||||
;;
|
||||
|
||||
shutdown|removenatgw)
|
||||
shutdown|removenatgw)
|
||||
delete_all
|
||||
;;
|
||||
|
||||
|
@ -13,16 +13,12 @@
|
||||
# bond1 10.3.3.0/24 10.0.0.1
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig
|
||||
|
||||
[ -f $CTDB_BASE/static-routes ] || {
|
||||
exit 0
|
||||
}
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
case $cmd in
|
||||
takeip|releaseip)
|
||||
iface=$1
|
||||
|
@ -6,14 +6,11 @@
|
||||
# CTDB_MONITOR_MPDEVICES="device1 device2 ..."
|
||||
#
|
||||
|
||||
PATH=/bin:/usr/bin:$PATH
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig multipathd
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
service_name="multipathd"
|
||||
|
||||
loadconfig
|
||||
|
||||
[ -z "$CTDB_MONITOR_MPDEVICES" ] && {
|
||||
exit 0
|
||||
|
@ -2,52 +2,45 @@
|
||||
# event script to manage clamd in a cluster environment
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
|
||||
detect_init_style
|
||||
|
||||
case $CTDB_INIT_STYLE in
|
||||
redhat)
|
||||
CTDB_SERVICE_CLAMD="clamd"
|
||||
CTDB_CONFIG_CLAMD="clamd"
|
||||
;;
|
||||
suse)
|
||||
CTDB_SERVICE_CLAMD="clamav"
|
||||
CTDB_CONFIG_CLAMD="clamav"
|
||||
;;
|
||||
debian)
|
||||
CTDB_SERVICE_CLAMD="clamav"
|
||||
CTDB_CONFIG_CLAMD="clamav"
|
||||
service_name="clamd"
|
||||
service_config="clamd"
|
||||
;;
|
||||
*)
|
||||
# should not happen.
|
||||
# for now use red hat style as default
|
||||
CTDB_SERVICE_CLAMD="clamd"
|
||||
CTDB_CONFIG_CLAMD="clamd"
|
||||
service_name="clamav"
|
||||
service_config="clamav"
|
||||
;;
|
||||
esac
|
||||
|
||||
loadconfig "${CTDB_CONFIG_CLAMD}"
|
||||
service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
|
||||
service_stop="service $service_name stop"
|
||||
|
||||
[ "$CTDB_MANAGES_CLAMD" = "yes" ] || exit 0
|
||||
loadconfig
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
ctdb_start_stop_service
|
||||
|
||||
is_ctdb_managed_service || exit 0
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
service "${CTDB_SERVICE_CLAMD}" stop > /dev/null 2>&1
|
||||
service "${CTDB_SERVICE_CLAMD}" start
|
||||
ctdb_service_start
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
service "${CTDB_SERVICE_CLAMD}" stop
|
||||
ctdb_service_stop
|
||||
;;
|
||||
|
||||
monitor)
|
||||
ctdb_check_unix_socket "clamd" ${CTDB_CLAMD_SOCKET}
|
||||
ctdb_check_unix_socket ${CTDB_CLAMD_SOCKET} || exit $?
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
|
@ -2,67 +2,61 @@
|
||||
# event strict to manage vsftpd in a cluster environment
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig vsftpd
|
||||
|
||||
[ "$CTDB_MANAGES_VSFTPD" = "yes" ] || exit 0
|
||||
service_name="vsftpd"
|
||||
# make sure the service is stopped first
|
||||
service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start"
|
||||
service_stop="service $service_name stop"
|
||||
service_reconfigure="service $service_name restart"
|
||||
service_fail_limit=2
|
||||
service_tcp_ports=21
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
loadconfig
|
||||
|
||||
# Count the number of monitor failures. The cluster only becomes
|
||||
# unhealthy after 2 failures.
|
||||
VSFTPD_FAILS="fail-count"
|
||||
VSFTPD_LIMIT=2
|
||||
ctdb_start_stop_service
|
||||
|
||||
is_ctdb_managed_service || exit 0
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
/bin/mkdir -p $CTDB_BASE/state/vsftpd
|
||||
|
||||
# make sure the service is stopped first
|
||||
service vsftpd stop > /dev/null 2>&1
|
||||
service vsftpd start
|
||||
|
||||
ctdb_counter_init "$VSFTPD_FAILS"
|
||||
ctdb_service_start
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
service vsftpd stop
|
||||
ctdb_service_stop
|
||||
;;
|
||||
|
||||
takeip)
|
||||
echo "restart" > $CTDB_BASE/state/vsftpd/restart
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
echo "restart" > $CTDB_BASE/state/vsftpd/restart
|
||||
takeip|releaseip)
|
||||
ctdb_service_set_reconfigure
|
||||
;;
|
||||
|
||||
recovered)
|
||||
# if we have taken or released any ips we must
|
||||
# restart vsftpd to ensure that all tcp connections are reset
|
||||
[ -f $CTDB_BASE/state/vsftpd/restart ] && {
|
||||
service vsftpd stop > /dev/null 2>&1
|
||||
service vsftpd start
|
||||
/bin/rm -f $CTDB_BASE/state/vsftpd/restart 2>/dev/null
|
||||
ctdb_counter_init "$VSFTPD_FAILS"
|
||||
} >/dev/null 2>&1
|
||||
if ctdb_service_needs_reconfigure ; then
|
||||
ctdb_service_reconfigure
|
||||
fi
|
||||
;;
|
||||
|
||||
monitor)
|
||||
# Subshell catches the "exit 1"
|
||||
if (ctdb_check_tcp_ports "ftp" 21) ; then
|
||||
ctdb_counter_init "$VSFTPD_FAILS"
|
||||
else
|
||||
ctdb_counter_incr "$VSFTPD_FAILS"
|
||||
if ctdb_counter_limit "$VSFTPD_FAILS" $VSFTPD_LIMIT ; then
|
||||
echo "ERROR: more than $VSFTPD_LIMIT consecutive failures, marking cluster unhealthy"
|
||||
exit 1
|
||||
else
|
||||
echo "WARNING: less than $VSFTPD_LIMIT consecutive failures, not unhealthy yet"
|
||||
fi
|
||||
|
||||
if ctdb_service_needs_reconfigure ; then
|
||||
ctdb_service_reconfigure
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -n "$service_tcp_ports" ] ; then
|
||||
if ctdb_check_tcp_ports $service_tcp_ports ; then
|
||||
ctdb_counter_init
|
||||
else
|
||||
ctdb_counter_incr
|
||||
ctdb_check_counter_limit
|
||||
exit 0 # only count 1 failure per monitor event
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@ -2,67 +2,66 @@
|
||||
# event script to manage httpd in a cluster environment
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
|
||||
detect_init_style
|
||||
|
||||
case $CTDB_INIT_STYLE in
|
||||
redhat)
|
||||
CTDB_SERVICE_HTTP="httpd"
|
||||
CTDB_CONFIG_HTTP="http"
|
||||
;;
|
||||
suse)
|
||||
CTDB_SERVICE_HTTP="apache2"
|
||||
CTDB_CONFIG_HTTP="apache2"
|
||||
;;
|
||||
debian)
|
||||
CTDB_SERVICE_HTTP="apache2"
|
||||
CTDB_CONFIG_HTTP="apache2"
|
||||
;;
|
||||
*)
|
||||
# should not happen.
|
||||
# for now use red hat style as default
|
||||
CTDB_SERVICE_HTTP="httpd"
|
||||
CTDB_CONFIG_HTTP="http"
|
||||
;;
|
||||
redhat)
|
||||
service_name="httpd"
|
||||
service_config="http"
|
||||
;;
|
||||
suse|debian|*)
|
||||
service_name="apache2"
|
||||
service_config="apache2"
|
||||
;;
|
||||
esac
|
||||
|
||||
loadconfig "${CTDB_CONFIG_HTTP}"
|
||||
|
||||
[ "$CTDB_MANAGES_HTTPD" = "yes" ] || exit 0
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
|
||||
# RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
|
||||
# semaphores. This is a hack to clean them up.
|
||||
cleanup_httpd_semaphore_leak() {
|
||||
killall -q -0 "${CTDB_SERVICE_HTTP}" ||
|
||||
killall -q -0 "$service_name" ||
|
||||
for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
|
||||
ipcrm -s $i
|
||||
done
|
||||
}
|
||||
|
||||
##########
|
||||
|
||||
service_start="cleanup_httpd_semaphore_leak; service $service_name start"
|
||||
service_stop="service $service_name stop; killall -q -9 $service_name || true"
|
||||
service_reconfigure="service $service_name restart"
|
||||
|
||||
loadconfig
|
||||
|
||||
ctdb_start_stop_service
|
||||
|
||||
is_ctdb_managed_service || exit 0
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
cleanup_httpd_semaphore_leak
|
||||
service "${CTDB_SERVICE_HTTP}" start
|
||||
ctdb_service_start
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
service "${CTDB_SERVICE_HTTP}" stop
|
||||
killall -q -9 "${CTDB_SERVICE_HTTP}"
|
||||
ctdb_service_stop
|
||||
;;
|
||||
|
||||
monitor)
|
||||
( ctdb_check_tcp_ports "http" 80 )
|
||||
if [ $? -ne 0 ] ; then
|
||||
monitor)
|
||||
if ctdb_service_needs_reconfigure ; then
|
||||
ctdb_service_reconfigure
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! ctdb_check_tcp_ports 80 ; then
|
||||
echo "HTTPD is not running. Trying to restart HTTPD."
|
||||
cleanup_httpd_semaphore_leak
|
||||
service "${CTDB_SERVICE_HTTP}" start
|
||||
ctdb_service_start
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
@ -1,11 +1,7 @@
|
||||
#!/bin/sh
|
||||
# ctdb event script for Samba
|
||||
|
||||
PATH=/bin:/usr/bin:$PATH
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig samba
|
||||
|
||||
detect_init_style
|
||||
|
||||
@ -20,11 +16,6 @@ case $CTDB_INIT_STYLE in
|
||||
CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-""}
|
||||
CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
|
||||
;;
|
||||
redhat)
|
||||
CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
|
||||
CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-""}
|
||||
CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind}
|
||||
;;
|
||||
*)
|
||||
# should not happen, but for now use redhat style as default:
|
||||
CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb}
|
||||
@ -33,11 +24,69 @@ case $CTDB_INIT_STYLE in
|
||||
;;
|
||||
esac
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
service_name="samba"
|
||||
service_start="start_samba"
|
||||
service_stop="stop_samba"
|
||||
|
||||
loadconfig
|
||||
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] || [ "$CTDB_MANAGES_WINBIND" = "yes" ] || exit 0
|
||||
|
||||
start_samba() {
|
||||
# create the state directory for samba
|
||||
/bin/mkdir -p $CTDB_BASE/state/samba
|
||||
|
||||
# make sure samba is not already started
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1
|
||||
service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
|
||||
killall -0 -q smbd && {
|
||||
sleep 1
|
||||
# make absolutely sure samba is dead
|
||||
killall -q -9 smbd
|
||||
}
|
||||
|
||||
killall -0 -q nmbd && {
|
||||
sleep 1
|
||||
# make absolutely sure samba is dead
|
||||
killall -q -9 nmbd
|
||||
}
|
||||
}
|
||||
|
||||
# restart the winbind service
|
||||
check_ctdb_manages_winbind
|
||||
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1
|
||||
killall -0 -q winbindd && {
|
||||
sleep 1
|
||||
# make absolutely sure winbindd is dead
|
||||
killall -q -9 winbindd
|
||||
}
|
||||
service "$CTDB_SERVICE_WINBIND" start
|
||||
}
|
||||
|
||||
# start Samba service. Start it reniced, as under very heavy load
|
||||
# the number of smbd processes will mean that it leaves few cycles for
|
||||
# anything else
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
|
||||
nice_service "$CTDB_SERVICE_NMB" start
|
||||
nice_service "$CTDB_SERVICE_SMB" start
|
||||
}
|
||||
}
|
||||
|
||||
stop_samba() {
|
||||
# shutdown Samba when ctdb goes down
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_SMB" stop
|
||||
service "$CTDB_SERVICE_NMB" stop
|
||||
}
|
||||
|
||||
# stop the winbind service
|
||||
check_ctdb_manages_winbind
|
||||
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_WINBIND" stop
|
||||
}
|
||||
}
|
||||
|
||||
# set default samba cleanup period - in minutes
|
||||
[ -z "$SAMBA_CLEANUP_PERIOD" ] && {
|
||||
@ -130,6 +179,14 @@ check_ctdb_manages_winbind() {
|
||||
}
|
||||
}
|
||||
|
||||
list_samba_shares ()
|
||||
{
|
||||
testparm_cat |
|
||||
sed -n -e 's@^[[:space:]]*path[[:space:]]*=[[:space:]]@@p' |
|
||||
sed -e 's/"//g'
|
||||
}
|
||||
|
||||
|
||||
###########################
|
||||
# periodic cleanup function
|
||||
periodic_cleanup() {
|
||||
@ -141,72 +198,11 @@ periodic_cleanup() {
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
# create the state directory for samba
|
||||
/bin/mkdir -p $CTDB_BASE/state/samba
|
||||
|
||||
# make sure samba is not already started
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1
|
||||
service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1
|
||||
killall -0 -q smbd && {
|
||||
sleep 1
|
||||
# make absolutely sure samba is dead
|
||||
killall -q -9 smbd
|
||||
}
|
||||
|
||||
killall -0 -q nmbd && {
|
||||
sleep 1
|
||||
# make absolutely sure samba is dead
|
||||
killall -q -9 nmbd
|
||||
}
|
||||
}
|
||||
|
||||
# restart the winbind service
|
||||
check_ctdb_manages_winbind
|
||||
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1
|
||||
killall -0 -q winbindd && {
|
||||
sleep 1
|
||||
# make absolutely sure winbindd is dead
|
||||
killall -q -9 winbindd
|
||||
}
|
||||
service "$CTDB_SERVICE_WINBIND" start
|
||||
}
|
||||
|
||||
# start Samba service. Start it reniced, as under very heavy load
|
||||
# the number of smbd processes will mean that it leaves few cycles for
|
||||
# anything else
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
|
||||
nice_service "$CTDB_SERVICE_NMB" start
|
||||
nice_service "$CTDB_SERVICE_SMB" start
|
||||
}
|
||||
ctdb_service_start
|
||||
;;
|
||||
|
||||
takeip)
|
||||
# nothing special for Samba
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
# nothing special for Samba
|
||||
;;
|
||||
|
||||
recovered)
|
||||
# nothing special for Samba
|
||||
exit 0
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
# shutdown Samba when ctdb goes down
|
||||
[ "$CTDB_MANAGES_SAMBA" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_SMB" stop
|
||||
service "$CTDB_SERVICE_NMB" stop
|
||||
}
|
||||
|
||||
# stop the winbind service
|
||||
check_ctdb_manages_winbind
|
||||
[ "$CTDB_MANAGES_WINBIND" = "yes" ] && {
|
||||
service "$CTDB_SERVICE_WINBIND" stop
|
||||
}
|
||||
ctdb_service_stop
|
||||
;;
|
||||
|
||||
monitor)
|
||||
@ -232,20 +228,20 @@ case $cmd in
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
|
||||
ctdb_check_directories_probe "Samba" $smb_dirs || {
|
||||
|
||||
list_samba_shares |
|
||||
ctdb_check_directories_probe || {
|
||||
testparm_foreground_update
|
||||
smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
|
||||
ctdb_check_directories "Samba" $smb_dirs
|
||||
}
|
||||
list_samba_shares |
|
||||
ctdb_check_directories
|
||||
} || exit $?
|
||||
}
|
||||
|
||||
smb_ports="$CTDB_SAMBA_CHECK_PORTS"
|
||||
[ -z "$smb_ports" ] && {
|
||||
smb_ports=`testparm_cat --parameter-name="smb ports"`
|
||||
}
|
||||
ctdb_check_tcp_ports "Samba" $smb_ports
|
||||
ctdb_check_tcp_ports $smb_ports || exit $?
|
||||
}
|
||||
|
||||
# check winbind is OK
|
||||
@ -255,6 +251,9 @@ case $cmd in
|
||||
}
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
# ignore unknown commands
|
||||
|
@ -1,71 +1,69 @@
|
||||
#!/bin/sh
|
||||
# script to manage nfs in a clustered environment
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig nfs
|
||||
|
||||
[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
|
||||
[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
|
||||
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
start_nfs() {
|
||||
/bin/mkdir -p $CTDB_BASE/state/nfs
|
||||
/bin/mkdir -p $CTDB_BASE/state/statd/ip
|
||||
/bin/mkdir -p $STATD_SHARED_DIRECTORY
|
||||
|
||||
# make sure nfs is stopped before we start it, or it may get a bind error
|
||||
startstop_nfs stop
|
||||
startstop_nfs start
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
startstop_nfs stop
|
||||
exit 0
|
||||
;;
|
||||
|
||||
takeip)
|
||||
ip=$2
|
||||
|
||||
echo $ip >> $CTDB_BASE/state/statd/restart
|
||||
|
||||
# having a list of what IPs we have allows statd to do the right
|
||||
# thing via $CTDB_BASE/statd-callout
|
||||
touch $CTDB_BASE/state/statd/ip/$ip
|
||||
exit 0
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
iface=$1
|
||||
ip=$2
|
||||
maskbits=$3
|
||||
|
||||
echo $ip >> $CTDB_BASE/state/statd/restart
|
||||
/bin/rm -f $CTDB_BASE/state/statd/ip/$ip
|
||||
exit 0
|
||||
;;
|
||||
|
||||
recovered)
|
||||
# if no IPs have changed then don't need to restart statd
|
||||
[ -f $CTDB_BASE/state/statd/restart ] || exit 0;
|
||||
}
|
||||
|
||||
reconfigure_nfs() {
|
||||
# always restart the lockmanager so that we start with a clusterwide
|
||||
# graceperiod when ip addresses has changed
|
||||
[ -x $CTDB_BASE/statd-callout ] && {
|
||||
$CTDB_BASE/statd-callout notify &
|
||||
} >/dev/null 2>&1
|
||||
|
||||
/bin/rm -f $CTDB_BASE/state/statd/restart
|
||||
}
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
|
||||
service_name="nfs"
|
||||
service_start="start_nfs"
|
||||
service_stop="startstop_nfs stop"
|
||||
service_reconfigure="reconfigure_nfs"
|
||||
|
||||
loadconfig
|
||||
|
||||
[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
|
||||
|
||||
ctdb_start_stop_service
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
ctdb_service_start
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
ctdb_service_stop
|
||||
;;
|
||||
|
||||
takeip)
|
||||
ctdb_service_set_reconfigure
|
||||
touch $CTDB_BASE/state/statd/ip/$2
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
ctdb_service_set_reconfigure
|
||||
/bin/rm -f $CTDB_BASE/state/statd/ip/$2
|
||||
;;
|
||||
|
||||
recovered)
|
||||
# if we have taken or released any ips we must
|
||||
# restart the lock manager so that we enter a clusterwide grace period
|
||||
if ctdb_service_needs_reconfigure ; then
|
||||
ctdb_service_reconfigure
|
||||
fi
|
||||
;;
|
||||
|
||||
monitor)
|
||||
if ctdb_service_needs_reconfigure ; then
|
||||
ctdb_service_reconfigure
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# check that statd responds to rpc requests
|
||||
# if statd is not running we try to restart it
|
||||
rpcinfo -u localhost 100024 1 > /dev/null || {
|
||||
@ -83,13 +81,15 @@ case $cmd in
|
||||
|
||||
# and that its directories are available
|
||||
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
|
||||
nfs_dirs=$(exportfs | grep -v '^#' | grep '^/' | awk {'print $1;'})
|
||||
ctdb_check_directories "nfs" $nfs_dirs
|
||||
}
|
||||
exportfs | grep -v '^#' | grep '^/' |
|
||||
sed -e 's/[[:space:]]*[^[:space:]]*$//' |
|
||||
ctdb_check_directories
|
||||
} || exit $?
|
||||
|
||||
# check that lockd responds to rpc requests
|
||||
ctdb_check_rpc "lockd" 100021 1
|
||||
ctdb_check_directories "statd" $STATD_SHARED_DIRECTORY
|
||||
echo "$STATD_SHARED_DIRECTORY" | ctdb_check_directories "statd" || \
|
||||
exit $?
|
||||
|
||||
# mount needs special handling since it is sometimes not started
|
||||
# correctly on RHEL5
|
||||
@ -103,6 +103,9 @@ case $cmd in
|
||||
}
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
@ -1,25 +1,21 @@
|
||||
#!/bin/sh
|
||||
# ctdb event script for NFS tickle acks
|
||||
|
||||
PATH=/bin:/usr/bin:$PATH
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig nfs
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
service_name="nfs"
|
||||
service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
|
||||
service_reconfigure=$service_start
|
||||
|
||||
loadconfig
|
||||
|
||||
ctdb_start_stop_service
|
||||
|
||||
[ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
|
||||
[ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
mkdir -p $CTDB_BASE/state/nfstickle
|
||||
mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
|
||||
# we rely on fast tcp wait1 recycling
|
||||
echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
|
||||
exit 0
|
||||
ctdb_service_start
|
||||
;;
|
||||
|
||||
takeip)
|
||||
@ -31,46 +27,25 @@ case $cmd in
|
||||
# send tickle acks for all the connections the old server had
|
||||
for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
|
||||
[ -f $f ] && cat $f | while read dest; do
|
||||
dip=`echo $dest | cut -d: -f1`
|
||||
dport=`echo $dest | cut -d: -f2`
|
||||
# send three, in case of lost packets
|
||||
echo "Sending NFS tickle ack for $ip to $dip:$dport"
|
||||
echo "Sending NFS tickle ack for $ip to $dest"
|
||||
for i in `seq 1 3`; do
|
||||
ctdb tickle $dip:$dport $ip:2049
|
||||
ctdb tickle $dest $ip:2049
|
||||
done
|
||||
done
|
||||
done
|
||||
exit 0
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
exit 0
|
||||
;;
|
||||
|
||||
recovered)
|
||||
exit 0
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
exit 0
|
||||
;;
|
||||
|
||||
monitor)
|
||||
# always create these direcotries since NFS might be enabled at runtime
|
||||
# and we dont want to restart ctdbd
|
||||
mkdir -p $CTDB_BASE/state/nfstickle
|
||||
mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`
|
||||
|
||||
mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
|
||||
rm -f $mydir/*
|
||||
# record our connections to shared storage
|
||||
netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' |
|
||||
awk '{print $4" "$5}' |
|
||||
while read dest src; do
|
||||
ip=`echo $dest | cut -d: -f1`
|
||||
ip=${dest%:*}
|
||||
echo $src >> $mydir/$ip
|
||||
done
|
||||
exit 0
|
||||
;;
|
||||
|
||||
esac
|
||||
|
@ -1,16 +1,11 @@
|
||||
#!/bin/sh
|
||||
# ctdb event script for TGTD based iSCSI
|
||||
|
||||
PATH=/bin:/usr/bin:$PATH
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
loadconfig iscsi
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
service_name="iscsi"
|
||||
|
||||
[ "$CTDB_MANAGES_ISCSI" = "yes" ] || exit 0
|
||||
ctdb_start_stop_service
|
||||
|
||||
[ -z "$CTDB_START_ISCSI_SCRIPTS" ] && {
|
||||
echo "No iscsi start script directory found"
|
||||
@ -18,15 +13,6 @@ shift
|
||||
}
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
;;
|
||||
|
||||
takeip)
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
;;
|
||||
|
||||
recovered)
|
||||
# block the iscsi port
|
||||
iptables -I INPUT 1 -p tcp --dport 3260 -j DROP
|
||||
@ -51,8 +37,8 @@ case $cmd in
|
||||
done
|
||||
|
||||
# remove all iptables rules
|
||||
while `iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null` ; do
|
||||
true;
|
||||
while iptables -D INPUT -p tcp --dport 3260 -j DROP 2>/dev/null >/dev/null ; do
|
||||
:
|
||||
done
|
||||
|
||||
;;
|
||||
@ -63,9 +49,11 @@ case $cmd in
|
||||
;;
|
||||
|
||||
monitor)
|
||||
[ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
|
||||
ctdb_check_tcp_ports "iscsi" 3260
|
||||
}
|
||||
ctdb_check_tcp_ports 3260 || exit $?
|
||||
;;
|
||||
|
||||
status)
|
||||
ctdb_checkstatus || exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
# script to manage the lvs ip multiplexer for a single public address cluster
|
||||
|
||||
. $CTDB_BASE/functions
|
||||
|
||||
loadconfig ctdb
|
||||
|
||||
[ -z "$CTDB_LVS_PUBLIC_IP" ] && exit 0
|
||||
@ -12,12 +13,6 @@ loadconfig ctdb
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
|
||||
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
|
||||
@ -42,12 +37,6 @@ case $cmd in
|
||||
echo 1 > /proc/sys/net/ipv4/route/flush
|
||||
;;
|
||||
|
||||
takeip)
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
;;
|
||||
|
||||
recovered|stopped)
|
||||
# kill off any tcp connections
|
||||
ipvsadm -D -t $CTDB_LVS_PUBLIC_IP:0
|
||||
@ -89,9 +78,6 @@ case $cmd in
|
||||
echo 1 > /proc/sys/net/ipv4/route/flush
|
||||
;;
|
||||
|
||||
monitor)
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
@ -7,17 +7,14 @@
|
||||
. $CTDB_BASE/functions
|
||||
loadconfig ctdb
|
||||
|
||||
[ "x$CTDB_RUN_TIMEOUT_MONITOR" = "xyes" ] || exit 0
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
[ "$CTDB_RUN_TIMEOUT_MONITOR" = "yes" ] || exit 0
|
||||
|
||||
case $cmd in
|
||||
monitor)
|
||||
TIMEOUT=$(ctdb listvars | grep EventScriptTimeout | awk '{print $3}')
|
||||
echo "sleeping for $((TIMEOUT * 2)) seconds..."
|
||||
sleep $((TIMEOUT * 2))
|
||||
;;
|
||||
monitor)
|
||||
TIMEOUT=$(ctdb listvars | awk '$1 == "EventScriptTimeout" {print $3}')
|
||||
echo "sleeping for $((TIMEOUT * 2)) seconds..."
|
||||
sleep $((TIMEOUT * 2))
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
@ -1,15 +1,28 @@
|
||||
# utility functions for ctdb event scripts
|
||||
|
||||
PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
#######################################
|
||||
# pull in a system config file, if any
|
||||
loadconfig() {
|
||||
name="$1"
|
||||
if [ -f /etc/sysconfig/$name ]; then
|
||||
. /etc/sysconfig/$name
|
||||
elif [ -f /etc/default/$name ]; then
|
||||
. /etc/default/$name
|
||||
elif [ -f $CTDB_BASE/sysconfig/$name ]; then
|
||||
. $CTDB_BASE/sysconfig/$name
|
||||
|
||||
if [ "$1" != "ctdb" ] ; then
|
||||
loadconfig "ctdb"
|
||||
fi
|
||||
|
||||
if [ -z "$1" ] ; then
|
||||
foo="${service_config:-${service_name}}"
|
||||
if [ -n "$foo" ] ; then
|
||||
loadconfig "$foo"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f /etc/sysconfig/$1 ]; then
|
||||
. /etc/sysconfig/$1
|
||||
elif [ -f /etc/default/$1 ]; then
|
||||
. /etc/default/$1
|
||||
elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
|
||||
. $CTDB_BASE/sysconfig/$1
|
||||
fi
|
||||
}
|
||||
|
||||
@ -31,37 +44,28 @@ detect_init_style() {
|
||||
######################################################
|
||||
# simulate /sbin/service on platforms that don't have it
|
||||
service() {
|
||||
service_name="$1"
|
||||
op="$2"
|
||||
_service_name="$1"
|
||||
_op="$2"
|
||||
|
||||
# do nothing, when no service was specified
|
||||
test "x$service_name" = "x" && return
|
||||
[ -z "$_service_name" ] && return
|
||||
|
||||
if [ -x /sbin/service ]; then
|
||||
/sbin/service "$service_name" "$op"
|
||||
elif [ -x /etc/init.d/$service_name ]; then
|
||||
/etc/init.d/$service_name "$op"
|
||||
elif [ -x /etc/rc.d/init.d/$service_name ]; then
|
||||
/etc/rc.d/init.d/$service_name "$op"
|
||||
/sbin/service "$_service_name" "$_op"
|
||||
elif [ -x /etc/init.d/$_service_name ]; then
|
||||
/etc/init.d/$_service_name "$_op"
|
||||
elif [ -x /etc/rc.d/init.d/$_service_name ]; then
|
||||
/etc/rc.d/init.d/$_service_name "$_op"
|
||||
fi
|
||||
}
|
||||
|
||||
######################################################
|
||||
# simulate /sbin/service (niced) on platforms that don't have it
|
||||
nice_service() {
|
||||
service_name="$1"
|
||||
op="$2"
|
||||
|
||||
# do nothing, when no service was specified
|
||||
test "x$service_name" = "x" && return
|
||||
[ -z "$1" ] && return
|
||||
|
||||
if [ -x /sbin/service ]; then
|
||||
nice /sbin/service "$service_name" "$op"
|
||||
elif [ -x /etc/init.d/$service_name ]; then
|
||||
nice /etc/init.d/$service_name "$op"
|
||||
elif [ -x /etc/rc.d/init.d/$service_name ]; then
|
||||
nice /etc/rc.d/init.d/$service_name "$op"
|
||||
fi
|
||||
nice service "$@"
|
||||
}
|
||||
|
||||
######################################################
|
||||
@ -110,57 +114,30 @@ ctdb_wait_tcp_ports() {
|
||||
(netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
|
||||
else
|
||||
echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
|
||||
return
|
||||
return 127
|
||||
fi
|
||||
done
|
||||
[ $all_ok -eq 1 ] || sleep 1
|
||||
ctdb status > /dev/null 2>&1 || {
|
||||
echo "ctdb daemon has died. Exiting tcp wait $service_name"
|
||||
exit 1
|
||||
return 1
|
||||
}
|
||||
done
|
||||
echo "Local tcp services for $service_name are up"
|
||||
}
|
||||
|
||||
|
||||
|
||||
######################################################
|
||||
# wait for a set of directories
|
||||
# usage: ctdb_wait_directories SERVICE_NAME <directories...>
|
||||
######################################################
|
||||
ctdb_wait_directories() {
|
||||
service_name="$1"
|
||||
shift
|
||||
wait_dirs="$*"
|
||||
[ -z "$wait_dirs" ] && return;
|
||||
all_ok=0
|
||||
echo "Waiting for local directories for $service_name"
|
||||
while [ $all_ok -eq 0 ]; do
|
||||
all_ok=1
|
||||
for d in $wait_dirs; do
|
||||
[ -d $d ] || all_ok=0
|
||||
done
|
||||
[ $all_ok -eq 1 ] || sleep 1
|
||||
ctdb status > /dev/null 2>&1 || {
|
||||
echo "ctdb daemon has died. Exiting directory wait for $service_name"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
echo "Local directories for $service_name are available"
|
||||
}
|
||||
|
||||
|
||||
######################################################
|
||||
# check that a rpc server is registered with portmap
|
||||
# and responding to requests
|
||||
# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
|
||||
######################################################
|
||||
ctdb_check_rpc() {
|
||||
service_name="$1"
|
||||
progname="$1"
|
||||
prognum="$2"
|
||||
version="$3"
|
||||
rpcinfo -u localhost $prognum $version > /dev/null || {
|
||||
echo "ERROR: $service_name not responding to rpc requests"
|
||||
echo "ERROR: $progname not responding to rpc requests"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
@ -171,18 +148,15 @@ ctdb_check_rpc() {
|
||||
# usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
|
||||
######################################################
|
||||
ctdb_check_directories_probe() {
|
||||
service_name="$1"
|
||||
shift
|
||||
for d ; do
|
||||
case "$d" in
|
||||
*%*)
|
||||
continue
|
||||
;;
|
||||
*)
|
||||
[ -d "$d" ] || return 1
|
||||
esac
|
||||
done
|
||||
return 0
|
||||
while IFS="" read d ; do
|
||||
case "$d" in
|
||||
*%*)
|
||||
continue
|
||||
;;
|
||||
*)
|
||||
[ -d "$d" ] || return 1
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
######################################################
|
||||
@ -190,62 +164,27 @@ ctdb_check_directories_probe() {
|
||||
# usage: ctdb_check_directories SERVICE_NAME <directories...>
|
||||
######################################################
|
||||
ctdb_check_directories() {
|
||||
# Note: ctdb_check_directories_probe sets both $service_name and $d.
|
||||
ctdb_check_directories_probe "$@" || {
|
||||
echo "ERROR: $service_name directory $d not available"
|
||||
exit 1
|
||||
}
|
||||
n="${1:-${service_name}}"
|
||||
ctdb_check_directories_probe || {
|
||||
echo "ERROR: $n directory \"$d\" not available"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
######################################################
|
||||
# check a set of tcp ports
|
||||
# usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
|
||||
# usage: ctdb_check_tcp_ports <ports...>
|
||||
######################################################
|
||||
ctdb_check_tcp_ports() {
|
||||
service_name="$1"
|
||||
shift
|
||||
wait_ports="$*"
|
||||
[ -z "$wait_ports" ] && return;
|
||||
|
||||
# check availability of netcat or netstat first
|
||||
NETCAT=""
|
||||
NETSTAT=""
|
||||
if [ -x /usr/bin/netstat ]; then
|
||||
NETSTAT=/usr/bin/netstat
|
||||
elif [ -x /bin/netstat ]; then
|
||||
NETSTAT=/bin/netstat
|
||||
elif [ -x /usr/bin/netcat ]; then
|
||||
NETCAT=/usr/bin/netcat
|
||||
elif [ -x /bin/netcat ]; then
|
||||
NETCAT=/bin/netcat
|
||||
elif [ -x /usr/bin/nc ]; then
|
||||
NETCAT=/usr/bin/nc
|
||||
elif [ -x /bin/nc ]; then
|
||||
NETCAT=/bin/nc
|
||||
fi
|
||||
|
||||
for p in $wait_ports; do
|
||||
all_ok=1
|
||||
|
||||
if [ "x${NETCAT}" != "x" ]; then
|
||||
${NETCAT} -z 127.0.0.1 $p > /dev/null || all_ok=0
|
||||
elif [ "x${NETSTAT}" != "x" ]; then
|
||||
if ! ${NETSTAT} -a -n | egrep "0.0.0.0:$p .*LISTEN" > /dev/null ; then
|
||||
if ! ${NETSTAT} -a -n | egrep ":::$p .*LISTEN" > /dev/null ; then
|
||||
all_ok=0
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "ERROR: neither netcat (or nc) nor netstat found!"
|
||||
echo "ERROR: can't monitor ${service_name} tcp port ${p}"
|
||||
all_ok=0
|
||||
fi
|
||||
|
||||
[ $all_ok -eq 1 ] || {
|
||||
echo "ERROR: $service_name tcp port $p is not responding"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
for p ; do
|
||||
if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
|
||||
if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
|
||||
echo "ERROR: $service_name tcp port $p is not responding"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
######################################################
|
||||
@ -253,35 +192,13 @@ ctdb_check_tcp_ports() {
|
||||
# usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
|
||||
######################################################
|
||||
ctdb_check_unix_socket() {
|
||||
service_name="$1"
|
||||
socket_path="$2"
|
||||
[ -z "$socket_path" ] && return;
|
||||
socket_path="$1"
|
||||
[ -z "$socket_path" ] && return
|
||||
|
||||
# check availability of netstat first
|
||||
NETSTAT=""
|
||||
if [ -x $(type -p netstat) ]; then
|
||||
NETSTAT=$(type -p netstat)
|
||||
elif [ -x /usr/bin/netstat ]; then
|
||||
NETSTAT=/usr/bin/netstat
|
||||
elif [ -x /bin/netstat ]; then
|
||||
NETSTAT=/bin/netstat
|
||||
fi
|
||||
|
||||
all_ok=1
|
||||
if [ "x$NETSTAT" != "x" ]; then
|
||||
if $NETSTAT -l -a -n | grep -qE "^unix.*LISTEN.*${socket_path}$"; then
|
||||
all_ok=1
|
||||
else
|
||||
all_ok=0
|
||||
if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
|
||||
echo "ERROR: $service_name socket $socket_path not found"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
[ -S ${socket_path} ] && all_ok=1 || all_ok=0
|
||||
fi
|
||||
|
||||
[ $all_ok -eq 1 ] || {
|
||||
echo "ERROR: $service_name socket $socket_path not found"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
######################################################
|
||||
@ -500,38 +417,175 @@ remove_ip() {
|
||||
|
||||
########################################################
|
||||
# some simple logic for counting events - per eventscript
|
||||
# usage: ctdb_counter_init <tag>
|
||||
# ctdb_counter_incr <tag>
|
||||
# ctdb_counter_limit <tag> <limit>
|
||||
# e.g. <tag> = "fail-count"
|
||||
# ctdb_counter_limit succeeds when count >= <limit>
|
||||
# usage: ctdb_counter_init
|
||||
# ctdb_counter_incr
|
||||
# ctdb_check_counter_limit <limit>
|
||||
# ctdb_check_counter_limit succeeds when count >= <limit>
|
||||
########################################################
|
||||
_ctdb_counter_common () {
|
||||
_tag="$1"
|
||||
_eventscript="${0##*/}" # basename
|
||||
|
||||
_counter_file="$CTDB_BASE/state/${_eventscript}-${_tag}"
|
||||
_counter_file="$ctdb_fail_dir/$service_name"
|
||||
mkdir -p "${_counter_file%/*}" # dirname
|
||||
}
|
||||
ctdb_counter_init () {
|
||||
_ctdb_counter_common "$1"
|
||||
_ctdb_counter_common
|
||||
|
||||
echo -n > "$_counter_file"
|
||||
>"$_counter_file"
|
||||
}
|
||||
ctdb_counter_incr () {
|
||||
_ctdb_counter_common "$1"
|
||||
_ctdb_counter_common
|
||||
|
||||
# unary counting!
|
||||
echo -n 1 >> "$_counter_file"
|
||||
}
|
||||
ctdb_counter_limit () {
|
||||
_ctdb_counter_common "$1"
|
||||
_limit="$2"
|
||||
ctdb_check_counter_limit () {
|
||||
_ctdb_counter_common
|
||||
|
||||
_limit="${1:-${service_fail_limit}}"
|
||||
_quiet="$2"
|
||||
|
||||
# unary counting!
|
||||
_size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
|
||||
[ $_size -ge $_limit ]
|
||||
if [ $_size -ge $_limit ] ; then
|
||||
echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
|
||||
exit 1
|
||||
elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
|
||||
echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
|
||||
fi
|
||||
}
|
||||
########################################################
|
||||
|
||||
ctdb_spool_dir="/var/spool/ctdb"
|
||||
ctdb_status_dir="$ctdb_spool_dir/status"
|
||||
ctdb_fail_dir="$ctdb_spool_dir/failcount"
|
||||
ctdb_active_dir="$ctdb_spool_dir/active"
|
||||
|
||||
log_status_cat ()
|
||||
{
|
||||
echo "node is \"$1\", problem with \"${script_name}\": $(cat $2)"
|
||||
}
|
||||
|
||||
ctdb_checkstatus ()
|
||||
{
|
||||
if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
|
||||
log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
|
||||
return 1
|
||||
elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
|
||||
log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
|
||||
return 2
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
ctdb_setstatus ()
|
||||
{
|
||||
d="$ctdb_status_dir/$script_name"
|
||||
case "$1" in
|
||||
unhealthy|banned)
|
||||
mkdir -p "$d"
|
||||
cat "$2" >"$d/$1"
|
||||
;;
|
||||
*)
|
||||
for i in "banned" "unhealthy" ; do
|
||||
rm -f "$d/$i"
|
||||
done
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
ctdb_service_needs_reconfigure ()
|
||||
{
|
||||
[ -e "$ctdb_status_dir/$service_name/reconfigure" ]
|
||||
}
|
||||
|
||||
ctdb_service_set_reconfigure ()
|
||||
{
|
||||
d="$ctdb_status_dir/$service_name"
|
||||
mkdir -p "$d"
|
||||
>"$d/reconfigure"
|
||||
}
|
||||
|
||||
ctdb_service_unset_reconfigure ()
|
||||
{
|
||||
rm -f "$ctdb_status_dir/$service_name/reconfigure"
|
||||
}
|
||||
|
||||
ctdb_service_reconfigure ()
|
||||
{
|
||||
if [ -n "$service_reconfigure" ] ; then
|
||||
eval $service_reconfigure
|
||||
else
|
||||
service "$service_name" restart
|
||||
fi
|
||||
ctdb_service_unset_reconfigure
|
||||
ctdb_counter_init
|
||||
}
|
||||
|
||||
ctdb_compat_managed_service ()
|
||||
{
|
||||
if [ "$1" = "yes" ] ; then
|
||||
t="$t $2 "
|
||||
fi
|
||||
}
|
||||
|
||||
is_ctdb_managed_service ()
|
||||
{
|
||||
t=" $CTDB_MANAGED_SERVICES "
|
||||
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
|
||||
ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
|
||||
|
||||
# Returns 0 if "<space>$service_name<space>" appears in $t
|
||||
[ "${t#* ${service_name} }" != "${t}" ]
|
||||
}
|
||||
|
||||
ctdb_start_stop_service ()
|
||||
{
|
||||
_active="$ctdb_active_dir/$service_name"
|
||||
|
||||
if is_ctdb_managed_service ; then
|
||||
if ! [ -e "$_active" ] ; then
|
||||
echo "Starting service $service_name"
|
||||
ctdb_service_start || exit $?
|
||||
mkdir -p "$ctdb_active_dir"
|
||||
touch "$_active"
|
||||
exit 0
|
||||
fi
|
||||
elif ! is_ctdb_managed_service ; then
|
||||
if [ -e "$_active" ] ; then
|
||||
echo "Stopping service $service_name"
|
||||
ctdb_service_stop || exit $?
|
||||
rm -f "$_active"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
||||
ctdb_service_start ()
|
||||
{
|
||||
if [ -n "$service_start" ] ; then
|
||||
eval $service_start
|
||||
else
|
||||
service "$service_name" start
|
||||
fi
|
||||
ctdb_counter_init
|
||||
}
|
||||
|
||||
ctdb_service_stop ()
|
||||
{
|
||||
if [ -n "$service_stop" ] ; then
|
||||
eval $service_stop
|
||||
else
|
||||
service "$service_name" stop
|
||||
fi
|
||||
}
|
||||
|
||||
########################################################
|
||||
# load a site local config file
|
||||
########################################################
|
||||
@ -546,4 +600,21 @@ ctdb_counter_limit () {
|
||||
done
|
||||
}
|
||||
|
||||
# A reasonable default is the basename of the eventscript.
|
||||
script_name="${0##*/}" # basename
|
||||
service_name="$script_name"
|
||||
service_fail_limit=1
|
||||
|
||||
ctdb_event="$1" ; shift
|
||||
cmd="$ctdb_event"
|
||||
|
||||
case "$ctdb_event" in
|
||||
status)
|
||||
ctdb_checkstatus
|
||||
exit
|
||||
;;
|
||||
setstatus)
|
||||
ctdb_setstatus "$@"
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
|
@ -28,7 +28,7 @@ case "$1" in
|
||||
add-client)
|
||||
# the callout does not tell us to which ip the client connected
|
||||
# so we must add it to all the ips that we serve
|
||||
for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
|
||||
for f in $CTDB_BASE/state/statd/ip/*; do
|
||||
ip=`basename $f`
|
||||
[ -d $STATD_SHARED_DIRECTORY/$ip ] || /bin/mkdir $STATD_SHARED_DIRECTORY/$ip
|
||||
touch $STATD_SHARED_DIRECTORY/$ip/$2
|
||||
@ -37,7 +37,7 @@ case "$1" in
|
||||
del-client)
|
||||
# the callout does not tell us to which ip the client connected
|
||||
# so we must add it to all the ips that we serve
|
||||
for f in `/bin/ls $CTDB_BASE/state/statd/ip/*`; do
|
||||
for f in $CTDB_BASE/state/statd/ip/*; do
|
||||
ip=`basename $f`
|
||||
/bin/rm -f $STATD_SHARED_DIRECTORY/$ip/$2
|
||||
done
|
||||
|
@ -129,6 +129,7 @@ struct ctdb_tunable {
|
||||
uint32_t vacuum_min_interval;
|
||||
uint32_t vacuum_max_interval;
|
||||
uint32_t max_queue_depth_drop_msg;
|
||||
uint32_t use_status_events_for_monitoring;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -450,9 +451,13 @@ struct ctdb_context {
|
||||
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
|
||||
uint32_t *recd_ping_count;
|
||||
TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
|
||||
TALLOC_CTX *script_monitor_ctx; /* a context where we store results while running the monitor event */
|
||||
TALLOC_CTX *last_monitor_ctx;
|
||||
TALLOC_CTX *event_script_ctx; /* non-monitoring events */
|
||||
|
||||
TALLOC_CTX *monitor_event_script_ctx;
|
||||
TALLOC_CTX *other_event_script_ctx;
|
||||
|
||||
struct ctdb_monitor_script_status_ctx *current_monitor_status_ctx;
|
||||
struct ctdb_monitor_script_status_ctx *last_monitor_status_ctx;
|
||||
|
||||
TALLOC_CTX *banning_ctx;
|
||||
};
|
||||
|
||||
@ -856,6 +861,19 @@ enum ctdb_trans2_commit_error {
|
||||
CTDB_TRANS2_COMMIT_SOMEFAIL=3 /* some nodes failed the commit, some allowed it */
|
||||
};
|
||||
|
||||
/* different calls to event scripts. */
|
||||
enum ctdb_eventscript_call {
|
||||
CTDB_EVENT_STARTUP, /* CTDB starting up: no args. */
|
||||
CTDB_EVENT_START_RECOVERY, /* CTDB recovery starting: no args. */
|
||||
CTDB_EVENT_RECOVERED, /* CTDB recovery finished: no args. */
|
||||
CTDB_EVENT_TAKE_IP, /* IP taken: interface, IP address, netmask bits. */
|
||||
CTDB_EVENT_RELEASE_IP, /* IP released: interface, IP address, netmask bits. */
|
||||
CTDB_EVENT_STOPPED, /* This node is stopped: no args. */
|
||||
CTDB_EVENT_MONITOR, /* Please check if service is healthy: no args. */
|
||||
CTDB_EVENT_STATUS, /* Report service status: no args. */
|
||||
CTDB_EVENT_SHUTDOWN, /* CTDB shutting down: no args. */
|
||||
CTDB_EVENT_RELOAD /* magic */
|
||||
};
|
||||
|
||||
/* internal prototypes */
|
||||
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
|
||||
@ -1324,13 +1342,16 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
|
||||
int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
|
||||
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
|
||||
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
|
||||
int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_eventscript_call call);
|
||||
int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call call,
|
||||
const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
|
||||
int ctdb_event_script_callback(struct ctdb_context *ctdb,
|
||||
struct timeval timeout,
|
||||
TALLOC_CTX *mem_ctx,
|
||||
void (*callback)(struct ctdb_context *, int, void *),
|
||||
void *private_data,
|
||||
const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
|
||||
bool from_user,
|
||||
enum ctdb_eventscript_call call,
|
||||
const char *fmt, ...) PRINTF_ATTRIBUTE(7,8);
|
||||
void ctdb_release_all_ips(struct ctdb_context *ctdb);
|
||||
|
||||
void set_nonblocking(int fd);
|
||||
|
@ -286,7 +286,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
if (ctdb->methods != NULL) {
|
||||
ctdb->methods->shutdown(ctdb);
|
||||
}
|
||||
ctdb_event_script(ctdb, "shutdown");
|
||||
ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
|
||||
DEBUG(DEBUG_NOTICE,("Received SHUTDOWN command. Stopping CTDB daemon.\n"));
|
||||
exit(0);
|
||||
|
||||
|
@ -223,9 +223,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
|
||||
|
||||
if (!ctdb->done_startup) {
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
ctdb->monitor->monitor_context, ctdb_startup_callback,
|
||||
ctdb, "startup");
|
||||
ctdb, false,
|
||||
CTDB_EVENT_STARTUP, "%s", "");
|
||||
} else {
|
||||
int i;
|
||||
int skip_monitoring = 0;
|
||||
@ -248,9 +248,9 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
|
||||
return;
|
||||
} else {
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
ctdb->monitor->monitor_context, ctdb_health_callback,
|
||||
ctdb, "monitor");
|
||||
ctdb, false,
|
||||
CTDB_EVENT_MONITOR, "%s", "");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -962,11 +962,11 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,
|
||||
|
||||
ctdb_disable_monitoring(ctdb);
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
state,
|
||||
ret = ctdb_event_script_callback(ctdb, state,
|
||||
ctdb_end_recovery_callback,
|
||||
state, "recovered");
|
||||
state,
|
||||
false,
|
||||
CTDB_EVENT_RECOVERED, "%s", "");
|
||||
|
||||
if (ret != 0) {
|
||||
ctdb_enable_monitoring(ctdb);
|
||||
@ -1016,11 +1016,11 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
|
||||
|
||||
ctdb_disable_monitoring(ctdb);
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
state,
|
||||
ret = ctdb_event_script_callback(ctdb, state,
|
||||
ctdb_start_recovery_callback,
|
||||
state, "startrecovery");
|
||||
state, false,
|
||||
CTDB_EVENT_START_RECOVERY,
|
||||
"%s", "");
|
||||
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to start recovery\n"));
|
||||
@ -1160,7 +1160,7 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event
|
||||
if (ctdb->methods != NULL) {
|
||||
ctdb->methods->shutdown(ctdb);
|
||||
}
|
||||
ctdb_event_script(ctdb, "shutdown");
|
||||
ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
|
||||
DEBUG(DEBUG_ERR, ("Recovery daemon ping timeout. Daemon has been shut down.\n"));
|
||||
exit(0);
|
||||
}
|
||||
@ -1230,11 +1230,10 @@ int32_t ctdb_control_stop_node(struct ctdb_context *ctdb, struct ctdb_req_contro
|
||||
|
||||
ctdb_disable_monitoring(ctdb);
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
state,
|
||||
ret = ctdb_event_script_callback(ctdb, state,
|
||||
ctdb_stop_node_callback,
|
||||
state, "stopped");
|
||||
state, false,
|
||||
CTDB_EVENT_STOPPED, "%s", "");
|
||||
|
||||
if (ret != 0) {
|
||||
ctdb_enable_monitoring(ctdb);
|
||||
|
@ -3288,7 +3288,7 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
|
||||
if (ctdb->methods != NULL) {
|
||||
ctdb->methods->shutdown(ctdb);
|
||||
}
|
||||
ctdb_event_script(ctdb, "shutdown");
|
||||
ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
|
||||
|
||||
exit(10);
|
||||
}
|
||||
|
@ -235,9 +235,10 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
|
||||
vnn->iface));
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
state, takeover_ip_callback, state,
|
||||
"takeip %s %s %u",
|
||||
false,
|
||||
CTDB_EVENT_TAKE_IP,
|
||||
"%s %s %u",
|
||||
vnn->iface,
|
||||
talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
|
||||
vnn->public_netmask_bits);
|
||||
@ -391,9 +392,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
|
||||
state->vnn = vnn;
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
state, release_ip_callback, state,
|
||||
"releaseip %s %s %u",
|
||||
false,
|
||||
CTDB_EVENT_RELEASE_IP,
|
||||
"%s %s %u",
|
||||
vnn->iface,
|
||||
talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
|
||||
vnn->public_netmask_bits);
|
||||
@ -1382,7 +1384,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
|
||||
if (vnn->pnn == ctdb->pnn) {
|
||||
vnn->pnn = -1;
|
||||
}
|
||||
ctdb_event_script(ctdb, "releaseip %s %s %u",
|
||||
ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
|
||||
vnn->iface,
|
||||
talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
|
||||
vnn->public_netmask_bits);
|
||||
@ -2122,9 +2124,10 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
|
||||
DLIST_REMOVE(ctdb->vnn, vnn);
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
mem_ctx, delete_ip_callback, mem_ctx,
|
||||
"releaseip %s %s %u",
|
||||
false,
|
||||
CTDB_EVENT_RELEASE_IP,
|
||||
"%s %s %u",
|
||||
vnn->iface,
|
||||
talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
|
||||
vnn->public_netmask_bits);
|
||||
|
@ -63,7 +63,8 @@ static const struct {
|
||||
{ "VacuumLimit", 5000, offsetof(struct ctdb_tunable, vacuum_limit) },
|
||||
{ "VacuumMinInterval", 60, offsetof(struct ctdb_tunable, vacuum_min_interval) },
|
||||
{ "VacuumMaxInterval", 600, offsetof(struct ctdb_tunable, vacuum_max_interval) },
|
||||
{ "MaxQueueDropMsg", 1000, offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) }
|
||||
{ "MaxQueueDropMsg", 1000, offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) },
|
||||
{ "UseStatusEvents", 0, offsetof(struct ctdb_tunable, use_status_events_for_monitoring) }
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -32,6 +32,19 @@ static struct {
|
||||
const char *script_running;
|
||||
} child_state;
|
||||
|
||||
static const char *call_names[] = {
|
||||
"startup",
|
||||
"startrecovery",
|
||||
"recovered",
|
||||
"takeip",
|
||||
"releaseip",
|
||||
"stopped",
|
||||
"monitor",
|
||||
"status",
|
||||
"shutdown",
|
||||
"reload"
|
||||
};
|
||||
|
||||
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p);
|
||||
|
||||
/*
|
||||
@ -61,11 +74,13 @@ static void sigterm(int sig)
|
||||
struct ctdb_event_script_state {
|
||||
struct ctdb_context *ctdb;
|
||||
pid_t child;
|
||||
/* Warning: this can free us! */
|
||||
void (*callback)(struct ctdb_context *, int, void *);
|
||||
int cb_status;
|
||||
int fd[2];
|
||||
void *private_data;
|
||||
enum ctdb_eventscript_call call;
|
||||
const char *options;
|
||||
struct timed_event *te;
|
||||
struct timeval timeout;
|
||||
};
|
||||
|
||||
@ -81,28 +96,22 @@ struct ctdb_monitor_script_status {
|
||||
char *output;
|
||||
};
|
||||
|
||||
struct ctdb_monitor_status {
|
||||
struct timeval start;
|
||||
struct timeval finished;
|
||||
int32_t status;
|
||||
struct ctdb_monitor_script_status_ctx {
|
||||
struct ctdb_monitor_script_status *scripts;
|
||||
struct ctdb_event_script_state *state;
|
||||
};
|
||||
|
||||
|
||||
/* called from ctdb_logging when we have received output on STDERR from
|
||||
* one of the eventscripts
|
||||
*/
|
||||
int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len)
|
||||
{
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
struct ctdb_monitor_script_status *script;
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
script = monitoring_status->scripts;
|
||||
script = ctdb->current_monitor_status_ctx->scripts;
|
||||
if (script == NULL) {
|
||||
return -1;
|
||||
}
|
||||
@ -121,17 +130,13 @@ int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t
|
||||
*/
|
||||
int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
|
||||
{
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
|
||||
DEBUG(DEBUG_INFO, ("event script init called\n"));
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Init called when context is NULL\n"));
|
||||
return 0;
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when initing script\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
monitoring_status->start = timeval_current();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -142,41 +147,26 @@ int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
|
||||
int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
{
|
||||
const char *name = (const char *)indata.dptr;
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
struct ctdb_event_script_state *state;
|
||||
struct ctdb_monitor_script_status *script;
|
||||
|
||||
DEBUG(DEBUG_INFO, ("event script start called : %s\n", name));
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when starting to run script %s\n", name));
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when starting script\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
script = talloc_zero(monitoring_status, struct ctdb_monitor_script_status);
|
||||
script = talloc_zero(ctdb->current_monitor_status_ctx, struct ctdb_monitor_script_status);
|
||||
if (script == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to talloc ctdb_monitor_script_status for script %s\n", name));
|
||||
return -1;
|
||||
}
|
||||
|
||||
script->next = monitoring_status->scripts;
|
||||
script->next = ctdb->current_monitor_status_ctx->scripts;
|
||||
script->name = talloc_strdup(script, name);
|
||||
CTDB_NO_MEMORY(ctdb, script->name);
|
||||
script->start = timeval_current();
|
||||
monitoring_status->scripts = script;
|
||||
|
||||
state = monitoring_status->state;
|
||||
if (state != NULL) {
|
||||
/* reset the timeout for the next eventscript */
|
||||
if (!timeval_is_zero(&state->timeout)) {
|
||||
if (state->te != NULL) {
|
||||
talloc_free(state->te);
|
||||
state->te = NULL;
|
||||
}
|
||||
state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
|
||||
}
|
||||
|
||||
}
|
||||
ctdb->current_monitor_status_ctx->scripts = script;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -187,15 +177,14 @@ int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA inda
|
||||
int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
{
|
||||
int32_t res = *((int32_t *)indata.dptr);
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
struct ctdb_monitor_script_status *script;
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
script = monitoring_status->scripts;
|
||||
script = ctdb->current_monitor_status_ctx->scripts;
|
||||
if (script == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
|
||||
return -1;
|
||||
@ -214,17 +203,16 @@ int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indat
|
||||
int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
{
|
||||
const char *name = (const char *)indata.dptr;
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
struct ctdb_monitor_script_status *script;
|
||||
|
||||
DEBUG(DEBUG_INFO, ("event script disabed called for script %s\n", name));
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
script = monitoring_status->scripts;
|
||||
script = ctdb->current_monitor_status_ctx->scripts;
|
||||
if (script == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
|
||||
return -1;
|
||||
@ -242,24 +230,19 @@ int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA i
|
||||
*/
|
||||
int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb)
|
||||
{
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
|
||||
DEBUG(DEBUG_INFO, ("event script finished called\n"));
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when monitoring event finished\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
monitoring_status->finished = timeval_current();
|
||||
monitoring_status->status = MONITOR_SCRIPT_OK;
|
||||
|
||||
if (ctdb->last_monitor_ctx) {
|
||||
talloc_free(ctdb->last_monitor_ctx);
|
||||
ctdb->last_monitor_ctx = NULL;
|
||||
if (ctdb->last_monitor_status_ctx) {
|
||||
talloc_free(ctdb->last_monitor_status_ctx);
|
||||
ctdb->last_monitor_status_ctx = NULL;
|
||||
}
|
||||
ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
|
||||
ctdb->script_monitor_ctx = NULL;
|
||||
ctdb->last_monitor_status_ctx = ctdb->current_monitor_status_ctx;
|
||||
ctdb->current_monitor_status_ctx = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -303,11 +286,11 @@ static struct ctdb_monitoring_wire *marshall_monitoring_scripts(TALLOC_CTX *mem_
|
||||
|
||||
int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
|
||||
{
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->last_monitor_ctx;
|
||||
struct ctdb_monitor_script_status_ctx *script_status = talloc_get_type(ctdb->last_monitor_status_ctx, struct ctdb_monitor_script_status_ctx);
|
||||
struct ctdb_monitoring_wire *monitoring_scripts;
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " last_monitor_ctx is NULL when reading status\n"));
|
||||
if (script_status == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " last_monitor_status_ctx is NULL when reading status\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -318,7 +301,7 @@ int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA
|
||||
}
|
||||
|
||||
monitoring_scripts->num_scripts = 0;
|
||||
monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, monitoring_status->scripts);
|
||||
monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, script_status->scripts);
|
||||
if (monitoring_scripts == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Monitoring scritps is NULL. can not return data to client\n"));
|
||||
return -1;
|
||||
@ -474,23 +457,21 @@ static struct ctdb_script_list *ctdb_get_script_list(struct ctdb_context *ctdb,
|
||||
|
||||
|
||||
/*
|
||||
run the event script - varargs version
|
||||
Actually run the event script
|
||||
this function is called and run in the context of a forked child
|
||||
which allows it to do blocking calls such as system()
|
||||
*/
|
||||
static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
static int ctdb_run_event_script(struct ctdb_context *ctdb,
|
||||
bool from_user,
|
||||
enum ctdb_eventscript_call call,
|
||||
const char *options)
|
||||
{
|
||||
char *cmdstr;
|
||||
int ret;
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
||||
struct ctdb_script_list *scripts, *current;
|
||||
int is_monitor = 0;
|
||||
|
||||
if (!strcmp(options, "monitor")) {
|
||||
is_monitor = 1;
|
||||
}
|
||||
|
||||
if (is_monitor == 1) {
|
||||
if (!from_user && call == CTDB_EVENT_MONITOR) {
|
||||
/* This is running in the forked child process. At this stage
|
||||
* we want to switch from being a ctdb daemon into being a
|
||||
* client and connect to the real local daemon.
|
||||
@ -510,14 +491,15 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
|
||||
/* we guarantee that only some specifically allowed event scripts are run
|
||||
while in recovery */
|
||||
const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip", "stopped" };
|
||||
const enum ctdb_eventscript_call allowed_calls[] = {
|
||||
CTDB_EVENT_START_RECOVERY, CTDB_EVENT_SHUTDOWN, CTDB_EVENT_RELEASE_IP, CTDB_EVENT_STOPPED };
|
||||
int i;
|
||||
for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
|
||||
if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
|
||||
for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
|
||||
if (call == allowed_calls[i]) break;
|
||||
}
|
||||
if (i == ARRAY_SIZE(allowed_scripts)) {
|
||||
DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
|
||||
options));
|
||||
if (i == ARRAY_SIZE(allowed_calls)) {
|
||||
DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
|
||||
call_names[call]));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
@ -541,10 +523,26 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
them
|
||||
*/
|
||||
for (current=scripts; current; current=current->next) {
|
||||
/* we dont run disabled scripts, we just report they are disabled */
|
||||
cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
|
||||
ctdb->event_script_dir,
|
||||
current->name, options);
|
||||
const char *str = from_user ? "CTDB_CALLED_BY_USER=1 " : "";
|
||||
|
||||
/* Allow a setting where we run the actual monitor event
|
||||
from an external source and replace it with
|
||||
a "status" event that just picks up the actual
|
||||
status of the event asynchronously.
|
||||
*/
|
||||
if ((ctdb->tunable.use_status_events_for_monitoring != 0)
|
||||
&& (call == CTDB_EVENT_MONITOR)
|
||||
&& !from_user) {
|
||||
cmdstr = talloc_asprintf(tmp_ctx, "%s%s/%s %s",
|
||||
str,
|
||||
ctdb->event_script_dir,
|
||||
current->name, "status");
|
||||
} else {
|
||||
cmdstr = talloc_asprintf(tmp_ctx, "%s%s/%s %s %s",
|
||||
str,
|
||||
ctdb->event_script_dir,
|
||||
current->name, call_names[call], options);
|
||||
}
|
||||
CTDB_NO_MEMORY(ctdb, cmdstr);
|
||||
|
||||
DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
|
||||
@ -552,7 +550,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
child_state.start = timeval_current();
|
||||
child_state.script_running = cmdstr;
|
||||
|
||||
if (is_monitor == 1) {
|
||||
if (!from_user && call == CTDB_EVENT_MONITOR) {
|
||||
if (ctdb_ctrl_event_script_start(ctdb, current->name) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to start event script monitoring\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
@ -585,7 +583,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
DEBUG(DEBUG_ERR,("Script %s returned status 127. Someone just deleted it?\n", cmdstr));
|
||||
}
|
||||
|
||||
if (is_monitor == 1) {
|
||||
if (!from_user && call == CTDB_EVENT_MONITOR) {
|
||||
if (ctdb_ctrl_event_script_stop(ctdb, ret) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to stop event script monitoring\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
@ -596,7 +594,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
/* return an error if the script failed */
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
|
||||
if (is_monitor == 1) {
|
||||
if (!from_user && call == CTDB_EVENT_MONITOR) {
|
||||
if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
@ -612,7 +610,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
|
||||
child_state.start = timeval_current();
|
||||
child_state.script_running = "finished";
|
||||
|
||||
if (is_monitor == 1) {
|
||||
if (!from_user && call == CTDB_EVENT_MONITOR) {
|
||||
if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
@ -631,20 +629,18 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
|
||||
struct ctdb_event_script_state *state =
|
||||
talloc_get_type(p, struct ctdb_event_script_state);
|
||||
struct ctdb_context *ctdb = state->ctdb;
|
||||
signed char rt = -1;
|
||||
|
||||
read(state->fd[0], &rt, sizeof(rt));
|
||||
|
||||
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, rt));
|
||||
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, rt, state->private_data);
|
||||
state->callback = NULL;
|
||||
if (read(state->fd[0], &state->cb_status, sizeof(state->cb_status)) !=
|
||||
sizeof(state->cb_status)) {
|
||||
state->cb_status = -2;
|
||||
}
|
||||
|
||||
talloc_set_destructor(state, NULL);
|
||||
talloc_free(state);
|
||||
DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n",
|
||||
call_names[state->call], state->options, state->cb_status));
|
||||
|
||||
state->child = 0;
|
||||
ctdb->event_script_timeouts = 0;
|
||||
talloc_free(state);
|
||||
}
|
||||
|
||||
static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
|
||||
@ -667,29 +663,19 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
|
||||
struct timeval t, void *p)
|
||||
{
|
||||
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
|
||||
void *private_data = state->private_data;
|
||||
struct ctdb_context *ctdb = state->ctdb;
|
||||
char *options;
|
||||
struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
|
||||
|
||||
state->te = NULL;
|
||||
DEBUG(DEBUG_ERR,("Event script timed out : %s %s count : %u pid : %d\n",
|
||||
call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
|
||||
|
||||
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u pid : %d\n", state->options, ctdb->event_script_timeouts, state->child));
|
||||
if (kill(state->child, 0) != 0) {
|
||||
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, 0, private_data);
|
||||
state->callback = NULL;
|
||||
}
|
||||
talloc_set_destructor(state, NULL);
|
||||
state->child = 0;
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
||||
options = talloc_strdup(ctdb, state->options);
|
||||
CTDB_NO_MEMORY_VOID(ctdb, options);
|
||||
|
||||
if (!strcmp(options, "monitor")) {
|
||||
if (state->call == CTDB_EVENT_MONITOR) {
|
||||
/* if it is a monitor event, we allow it to "hang" a few times
|
||||
before we declare it a failure and ban ourself (and make
|
||||
ourself unhealthy)
|
||||
@ -697,135 +683,180 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
|
||||
|
||||
ctdb->event_script_timeouts++;
|
||||
|
||||
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
|
||||
if (ctdb->tunable.script_unhealthy_on_timeout != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, -ETIME, private_data);
|
||||
state->callback = NULL;
|
||||
}
|
||||
} else {
|
||||
ctdb->event_script_timeouts = 0;
|
||||
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
|
||||
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, -1, private_data);
|
||||
state->callback = NULL;
|
||||
}
|
||||
}
|
||||
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
|
||||
state->cb_status = -ETIME;
|
||||
} else {
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, 0, private_data);
|
||||
state->callback = NULL;
|
||||
}
|
||||
state->cb_status = 0;
|
||||
}
|
||||
} else if (!strcmp(options, "startup")) {
|
||||
} else if (state->call == CTDB_EVENT_STARTUP) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, -1, private_data);
|
||||
state->callback = NULL;
|
||||
}
|
||||
state->cb_status = -1;
|
||||
} else {
|
||||
/* if it is not a monitor event we ban ourself immediately */
|
||||
/* if it is not a monitor or a startup event we ban ourself
|
||||
immediately
|
||||
*/
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
|
||||
|
||||
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
|
||||
if (state->callback) {
|
||||
state->callback(ctdb, -1, private_data);
|
||||
state->callback = NULL;
|
||||
}
|
||||
|
||||
state->cb_status = -1;
|
||||
}
|
||||
|
||||
if ((!strcmp(options, "monitor")) && (monitoring_status != NULL)) {
|
||||
if (state->call == CTDB_EVENT_MONITOR || state->call == CTDB_EVENT_STATUS) {
|
||||
struct ctdb_monitor_script_status *script;
|
||||
|
||||
script = monitoring_status->scripts;
|
||||
if (ctdb->current_monitor_status_ctx == NULL) {
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
||||
script = ctdb->current_monitor_status_ctx->scripts;
|
||||
if (script != NULL) {
|
||||
script->timedout = 1;
|
||||
}
|
||||
monitoring_status->status = MONITOR_SCRIPT_TIMEOUT;
|
||||
if (ctdb->last_monitor_ctx) {
|
||||
talloc_free(ctdb->last_monitor_ctx);
|
||||
ctdb->last_monitor_ctx = NULL;
|
||||
|
||||
if (ctdb->last_monitor_status_ctx) {
|
||||
talloc_free(ctdb->last_monitor_status_ctx);
|
||||
ctdb->last_monitor_status_ctx = NULL;
|
||||
}
|
||||
ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
|
||||
ctdb->script_monitor_ctx = NULL;
|
||||
ctdb->last_monitor_status_ctx = talloc_steal(ctdb, ctdb->current_monitor_status_ctx);
|
||||
ctdb->current_monitor_status_ctx = NULL;
|
||||
}
|
||||
|
||||
talloc_free(state);
|
||||
talloc_free(options);
|
||||
}
|
||||
|
||||
/*
|
||||
destroy a running event script
|
||||
destroy an event script: kill it if ->child != 0.
|
||||
*/
|
||||
static int event_script_destructor(struct ctdb_event_script_state *state)
|
||||
{
|
||||
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
|
||||
if (state->child) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
|
||||
|
||||
if (state->callback) {
|
||||
state->callback(state->ctdb, 0, state->private_data);
|
||||
state->callback = NULL;
|
||||
if (kill(state->child, SIGTERM) != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (kill(state->child, SIGTERM) != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
|
||||
/* This is allowed to free us; talloc will prevent double free anyway,
|
||||
* but beware if you call this outside the destructor! */
|
||||
if (state->callback) {
|
||||
state->callback(state->ctdb, state->cb_status, state->private_data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int count_words(const char *options)
|
||||
{
|
||||
unsigned int words = 0;
|
||||
|
||||
options += strspn(options, " \t");
|
||||
while (*options) {
|
||||
words++;
|
||||
options += strcspn(options, " \t");
|
||||
options += strspn(options, " \t");
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
static bool check_options(enum ctdb_eventscript_call call, const char *options)
|
||||
{
|
||||
switch (call) {
|
||||
/* These all take no arguments. */
|
||||
case CTDB_EVENT_STARTUP:
|
||||
case CTDB_EVENT_START_RECOVERY:
|
||||
case CTDB_EVENT_RECOVERED:
|
||||
case CTDB_EVENT_STOPPED:
|
||||
case CTDB_EVENT_MONITOR:
|
||||
case CTDB_EVENT_STATUS:
|
||||
case CTDB_EVENT_SHUTDOWN:
|
||||
case CTDB_EVENT_RELOAD:
|
||||
return count_words(options) == 0;
|
||||
|
||||
case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
|
||||
case CTDB_EVENT_RELEASE_IP:
|
||||
return count_words(options) == 3;
|
||||
|
||||
default:
|
||||
DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_eventscript_call %u\n", call));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
run the event script in the background, calling the callback when
|
||||
finished
|
||||
*/
|
||||
static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
struct timeval timeout,
|
||||
void (*callback)(struct ctdb_context *, int, void *),
|
||||
void *private_data,
|
||||
bool from_user,
|
||||
enum ctdb_eventscript_call call,
|
||||
const char *fmt, va_list ap)
|
||||
{
|
||||
struct ctdb_monitor_status *monitoring_status;
|
||||
TALLOC_CTX *mem_ctx;
|
||||
struct ctdb_event_script_state *state;
|
||||
int ret;
|
||||
|
||||
if (!strcmp(fmt, "monitor")) {
|
||||
if (ctdb->script_monitor_ctx != NULL) {
|
||||
talloc_free(ctdb->script_monitor_ctx);
|
||||
ctdb->script_monitor_ctx = NULL;
|
||||
if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) {
|
||||
/* if this was a "monitor" or a status event, we recycle the
|
||||
context to start a new monitor event
|
||||
*/
|
||||
if (ctdb->monitor_event_script_ctx != NULL) {
|
||||
talloc_free(ctdb->monitor_event_script_ctx);
|
||||
ctdb->monitor_event_script_ctx = NULL;
|
||||
}
|
||||
monitoring_status = talloc_zero(ctdb, struct ctdb_monitor_status);
|
||||
ctdb->monitor_event_script_ctx = talloc_new(ctdb);
|
||||
mem_ctx = ctdb->monitor_event_script_ctx;
|
||||
|
||||
if (ctdb->current_monitor_status_ctx != NULL) {
|
||||
talloc_free(ctdb->current_monitor_status_ctx);
|
||||
ctdb->current_monitor_status_ctx = NULL;
|
||||
}
|
||||
|
||||
ctdb->current_monitor_status_ctx = talloc(ctdb, struct ctdb_monitor_script_status_ctx);
|
||||
CTDB_NO_MEMORY(ctdb, ctdb->current_monitor_status_ctx);
|
||||
ctdb->current_monitor_status_ctx->scripts = NULL;
|
||||
} else {
|
||||
if (ctdb->event_script_ctx == NULL) {
|
||||
ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
|
||||
/* any other script will first terminate any monitor event */
|
||||
if (ctdb->monitor_event_script_ctx != NULL) {
|
||||
talloc_free(ctdb->monitor_event_script_ctx);
|
||||
ctdb->monitor_event_script_ctx = NULL;
|
||||
}
|
||||
monitoring_status = ctdb->event_script_ctx;
|
||||
/* and then use a context common for all non-monitor events */
|
||||
if (ctdb->other_event_script_ctx == NULL) {
|
||||
ctdb->other_event_script_ctx = talloc_new(ctdb);
|
||||
}
|
||||
mem_ctx = ctdb->other_event_script_ctx;
|
||||
}
|
||||
|
||||
if (monitoring_status == NULL) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " ERROR: Failed to talloc script_monitoring context\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
state = talloc(monitoring_status, struct ctdb_event_script_state);
|
||||
if (state == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " could not allocate state\n"));
|
||||
return -1;
|
||||
}
|
||||
monitoring_status->state = state;
|
||||
state = talloc(mem_ctx, struct ctdb_event_script_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
|
||||
state->ctdb = ctdb;
|
||||
state->callback = callback;
|
||||
state->private_data = private_data;
|
||||
state->call = call;
|
||||
state->options = talloc_vasprintf(state, fmt, ap);
|
||||
state->timeout = timeout;
|
||||
state->te = NULL;
|
||||
state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
|
||||
if (state->options == NULL) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
|
||||
talloc_free(state);
|
||||
return -1;
|
||||
}
|
||||
if (!check_options(state->call, state->options)) {
|
||||
DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
|
||||
call_names[state->call], state->options));
|
||||
talloc_free(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s\n", state->options));
|
||||
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
|
||||
call_names[state->call], state->options));
|
||||
|
||||
ret = pipe(state->fd);
|
||||
if (ret != 0) {
|
||||
@ -843,28 +874,22 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
}
|
||||
|
||||
if (state->child == 0) {
|
||||
signed char rt;
|
||||
int rt;
|
||||
|
||||
close(state->fd[0]);
|
||||
set_close_on_exec(state->fd[1]);
|
||||
|
||||
rt = ctdb_event_script_v(ctdb, state->options);
|
||||
while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
|
||||
write(state->fd[1], &rt, sizeof(rt));
|
||||
usleep(100000);
|
||||
}
|
||||
rt = ctdb_run_event_script(ctdb, from_user, state->call, state->options);
|
||||
/* We must be able to write PIPEBUF bytes at least; if this
|
||||
somehow fails, the read above will be short. */
|
||||
write(state->fd[1], &rt, sizeof(rt));
|
||||
close(state->fd[1]);
|
||||
_exit(rt);
|
||||
}
|
||||
|
||||
talloc_set_destructor(state, event_script_destructor);
|
||||
if (!strcmp(fmt, "monitor")) {
|
||||
ctdb->script_monitor_ctx = monitoring_status;
|
||||
} else {
|
||||
ctdb->event_script_ctx = monitoring_status;
|
||||
}
|
||||
|
||||
close(state->fd[1]);
|
||||
set_close_on_exec(state->fd[0]);
|
||||
talloc_set_destructor(state, event_script_destructor);
|
||||
|
||||
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));
|
||||
|
||||
@ -872,9 +897,10 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
ctdb_event_script_handler, state);
|
||||
|
||||
if (!timeval_is_zero(&state->timeout)) {
|
||||
state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
|
||||
event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
|
||||
} else {
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
|
||||
DEBUG(DEBUG_ERR, (__location__ " eventscript %s %s called with no timeout\n",
|
||||
call_names[state->call], state->options));
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -886,17 +912,18 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
|
||||
finished
|
||||
*/
|
||||
int ctdb_event_script_callback(struct ctdb_context *ctdb,
|
||||
struct timeval timeout,
|
||||
TALLOC_CTX *mem_ctx,
|
||||
void (*callback)(struct ctdb_context *, int, void *),
|
||||
void *private_data,
|
||||
bool from_user,
|
||||
enum ctdb_eventscript_call call,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int ret;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = ctdb_event_script_callback_v(ctdb, timeout, callback, private_data, fmt, ap);
|
||||
ret = ctdb_event_script_callback_v(ctdb, callback, private_data, from_user, call, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return ret;
|
||||
@ -919,24 +946,23 @@ static void event_script_callback(struct ctdb_context *ctdb, int status, void *p
|
||||
}
|
||||
|
||||
/*
|
||||
run the event script, waiting for it to complete. Used when the caller doesn't want to
|
||||
continue till the event script has finished.
|
||||
run the event script, waiting for it to complete. Used when the caller
|
||||
doesn't want to continue till the event script has finished.
|
||||
*/
|
||||
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
|
||||
int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call call,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int ret;
|
||||
struct callback_status status;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = ctdb_event_script_callback_v(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
event_script_callback, &status, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
ret = ctdb_event_script_callback_v(ctdb,
|
||||
event_script_callback, &status, false, call, fmt, ap);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
va_end(ap);
|
||||
|
||||
status.status = -1;
|
||||
status.done = false;
|
||||
@ -946,6 +972,11 @@ int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
|
||||
return status.status;
|
||||
}
|
||||
|
||||
int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_eventscript_call call)
|
||||
{
|
||||
/* GCC complains about empty format string, so use %s and "". */
|
||||
return ctdb_event_script_args(ctdb, call, "%s", "");
|
||||
}
|
||||
|
||||
struct eventscript_callback_state {
|
||||
struct ctdb_req_control *c;
|
||||
@ -964,17 +995,36 @@ static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
|
||||
|
||||
if (status != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
||||
/* the control succeeded */
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
||||
/* This will free the struct ctdb_event_script_state we are in! */
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* Returns rest of string, or NULL if no match. */
|
||||
static const char *get_call(const char *p, enum ctdb_eventscript_call *call)
|
||||
{
|
||||
unsigned int len;
|
||||
|
||||
/* Skip any initial whitespace. */
|
||||
p += strspn(p, " \t");
|
||||
|
||||
/* See if we match any. */
|
||||
for (*call = 0; *call < ARRAY_SIZE(call_names); (*call)++) {
|
||||
len = strlen(call_names[*call]);
|
||||
if (strncmp(p, call_names[*call], len) == 0) {
|
||||
/* If end of string or whitespace, we're done. */
|
||||
if (strcspn(p + len, " \t") == 0) {
|
||||
return p + len;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
A control to force running of the eventscripts from the ctdb client tool
|
||||
*/
|
||||
@ -984,29 +1034,33 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
|
||||
{
|
||||
int ret;
|
||||
struct eventscript_callback_state *state;
|
||||
const char *options;
|
||||
enum ctdb_eventscript_call call;
|
||||
|
||||
if (ctdb->event_script_ctx == NULL) {
|
||||
ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
|
||||
/* Figure out what call they want. */
|
||||
options = get_call((const char *)indata.dptr, &call);
|
||||
if (!options) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Invalid forced \"%s\"\n", (const char *)indata.dptr));
|
||||
return -1;
|
||||
}
|
||||
|
||||
state = talloc(ctdb->event_script_ctx, struct eventscript_callback_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
|
||||
state->c = talloc_steal(state, c);
|
||||
|
||||
DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
|
||||
|
||||
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
|
||||
return -1;
|
||||
}
|
||||
|
||||
state = talloc(ctdb->other_event_script_ctx, struct eventscript_callback_state);
|
||||
CTDB_NO_MEMORY(ctdb, state);
|
||||
|
||||
state->c = talloc_steal(state, c);
|
||||
|
||||
DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
|
||||
|
||||
ctdb_disable_monitoring(ctdb);
|
||||
|
||||
ret = ctdb_event_script_callback(ctdb,
|
||||
timeval_set(ctdb->tunable.script_timeout, 0),
|
||||
state, run_eventscripts_callback, state,
|
||||
"%s", (const char *)indata.dptr);
|
||||
true, call, "%s", options);
|
||||
|
||||
if (ret != 0) {
|
||||
ctdb_enable_monitoring(ctdb);
|
||||
|
Loading…
Reference in New Issue
Block a user