1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00
samba-mirror/ctdb/config/functions

1484 lines
38 KiB
Plaintext
Raw Normal View History

# Hey Emacs, this is a -*- shell-script -*- !!!
# utility functions for ctdb event scripts
PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
[ -z "$CTDB_VARDIR" ] && {
if [ -d "/var/lib/ctdb" ] ; then
export CTDB_VARDIR="/var/lib/ctdb"
else
export CTDB_VARDIR="/var/ctdb"
fi
}
[ -z "$CTDB_ETCDIR" ] && {
export CTDB_ETCDIR="/etc"
}
#######################################
# pull in a system config file, if any
_loadconfig() {
if [ -z "$1" ] ; then
foo="${service_config:-${service_name}}"
if [ -n "$foo" ] ; then
loadconfig "$foo"
fi
elif [ "$1" != "ctdb" ] ; then
loadconfig "ctdb"
fi
if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
. $CTDB_ETCDIR/sysconfig/$1
elif [ -f $CTDB_ETCDIR/default/$1 ]; then
. $CTDB_ETCDIR/default/$1
elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
. $CTDB_BASE/sysconfig/$1
fi
}
loadconfig () {
_loadconfig "$@"
}
##############################################################
# CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
# configuration file.
debug ()
{
if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
# If there are arguments then echo them. Otherwise expect to
# use stdin, which allows us to pass lots of debug using a
# here document.
if [ -n "$1" ] ; then
echo "DEBUG: $*"
elif ! tty -s ; then
sed -e 's@^@DEBUG: @'
fi
fi
}
die ()
{
_msg="$1"
_rc="${2:-1}"
echo "$_msg"
exit $_rc
}
# Log given message or stdin to either syslog or a CTDB log file
# $1 is the tag passed to logger if syslog is in use.
script_log ()
{
_tag="$1" ; shift
_using_syslog=false
if [ "$CTDB_SYSLOG" = "yes" -o -z "$CTDB_LOGFILE" ] ; then
_using_syslog=true
fi
case "$CTDB_OPTIONS" in
*--syslog*) _using_syslog=true ;;
esac
if $_using_syslog ; then
logger -t "ctdbd: ${_tag}" $*
else
{
if [ -n "$*" ] ; then
echo "$*"
else
cat
fi
} >>"${CTDB_LOGFILE:-/var/log/log.ctdb}"
fi
}
# When things are run in the background in an eventscript then logging
# output might get lost. This is the "solution". :-)
background_with_logging ()
{
(
"$@" 2>&1 </dev/null |
script_log "${script_name}&"
)&
return 0
}
##############################################################
# check number of args for different events
ctdb_check_args ()
{
case "$1" in
takeip|releaseip)
if [ $# != 4 ]; then
echo "ERROR: must supply interface, IP and maskbits"
exit 1
fi
;;
updateip)
if [ $# != 5 ]; then
echo "ERROR: must supply old interface, new interface, IP and maskbits"
exit 1
fi
;;
esac
}
##############################################################
# determine on what type of system (init style) we are running
detect_init_style() {
# only do detection if not already set:
test "x$CTDB_INIT_STYLE" != "x" && return
if [ -x /sbin/startproc ]; then
CTDB_INIT_STYLE="suse"
elif [ -x /sbin/start-stop-daemon ]; then
CTDB_INIT_STYLE="debian"
else
CTDB_INIT_STYLE="redhat"
fi
}
######################################################
# simulate /sbin/service on platforms that don't have it
# _service() makes it easier to hook the service() function for
# testing.
_service ()
{
_service_name="$1"
_op="$2"
# do nothing, when no service was specified
[ -z "$_service_name" ] && return
if [ -x /sbin/service ]; then
$_nice /sbin/service "$_service_name" "$_op"
elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
$_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
$_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
fi
}
service()
{
_nice=""
_service "$@"
}
######################################################
# simulate /sbin/service (niced) on platforms that don't have it
nice_service()
{
_nice="nice"
_service "$@"
}
######################################################
# wrapper around /proc/ settings to allow them to be hooked
# for testing
# 1st arg is relative path under /proc/, 2nd arg is value to set
set_proc ()
{
echo "$2" >"/proc/$1"
}
######################################################
# wrapper around getting file contents from /proc/ to allow
# this to be hooked for testing
# 1st arg is relative path under /proc/
get_proc ()
{
cat "/proc/$1"
}
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
######################################################
# Check that an RPC service is healthy -
# this includes allowing a certain number of failures
# before marking the NFS service unhealthy.
#
# usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
#
# each triple is a set of 3 arguments: an operator, a
# fail count limit and an action string.
#
# For example:
#
# nfs_check_rpc_service "lockd" \
# -ge 15 "verbose restart unhealthy" \
# -eq 10 "restart:bs"
#
# says that if lockd is down for 15 iterations then do
# a verbose restart of lockd and mark the node unhealthy.
# Before this, after 10 iterations of failure, the
# service is restarted silently in the background.
# Order is important: the number of failures need to be
# specified in reverse order because processing stops
# after the first condition that is true.
######################################################
nfs_check_rpc_service ()
{
_prog_name="$1" ; shift
_v=""
case "$1" in
-*) : ;;
*) _v="$1" ; shift ;;
esac
_version=${_v:-1}
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
_rpc_prog="$_prog_name"
_restart=""
_opts=""
case "$_prog_name" in
knfsd)
_rpc_prog=nfs
_version=${_v:-3}
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
_restart="echo 'Trying to restart NFS service'"
_restart="${_restart}; startstop_nfs restart"
;;
ganesha)
_rpc_prog=nfs
_version=${_v:-3}
_restart="echo 'Trying to restart Ganesha NFS service'"
_restart="${_restart}; startstop_ganesha restart"
;;
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
mountd)
_opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
;;
rquotad)
_opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
;;
lockd)
_rpc_prog=nlockmgr
_version=${_v:-4}
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
_restart="echo 'Trying to restart lock manager service'"
_restart="${_restart}; startstop_nfslock restart"
;;
statd)
_rpc_prog=status
_opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
_opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
_opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
;;
*)
echo "Internal error: unknown RPC program \"$_prog_name\"."
exit 1
esac
_service_name="nfs_${_prog_name}"
if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
ctdb_counter_init "$_service_name"
return 0
fi
ctdb_counter_incr "$_service_name"
while [ -n "$3" ] ; do
ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
for _action in $3 ; do
case "$_action" in
verbose)
echo "$ctdb_check_rpc_out"
;;
restart|restart:*)
# No explicit command specified, construct rpc command.
if [ -z "$_restart" ] ; then
_p="rpc.${_prog_name}"
_restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
_restart="${_restart}; killall -q -9 $_p"
_restart="${_restart}; $_p $_opts"
fi
# Process restart flags...
_flags="${_action#restart:}"
# There may not have been a colon...
[ "$_flags" != "$_action" ] || _flags=""
# q=quiet - everything to /dev/null
if [ "${_flags#*q}" != "$_flags" ] ; then
_restart="{ ${_restart} ; } >/dev/null 2>&1"
fi
# s=stealthy - last command to /dev/null
if [ "${_flags#*s}" != "$_flags" ] ; then
_restart="${_restart} >/dev/null 2>&1"
fi
# b=background - the whole thing, easy and reliable
if [ "${_flags#*b}" != "$_flags" ] ; then
_restart="{ ${_restart} ; } &"
fi
# Do it!
eval "${_restart}"
;;
unhealthy)
exit 1
;;
*)
echo "Internal error: unknown action \"$_action\"."
exit 1
esac
done
# Only process the first action group.
break
}
shift 3
done
}
######################################################
# check that a rpc server is registered with portmap
# and responding to requests
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
# usage: ctdb_check_rpc SERVICE_NAME VERSION
######################################################
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
ctdb_check_rpc ()
{
progname="$1"
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
version="$2"
Eventscripts: clean up 60.nfs monitor event. This adds a helper function called nfs_check_rpc_service() and uses it to make the monitor event much more readable. An example of usage is as follows: nfs_check_rpc_service "mountd" \ -ge 10 "verbose restart:b unhealthy" \ -eq 5 "restart:b" The first argument to nfs_check_rpc_service() is the name of the RPC service to be checked. The RPC service corresponding to this command is checked for availability using the rpcinfo command. If the service is available then the function succeeds and subsequent arguments are ignored. If the rpcinfo check fails then a failure counter for that particular RPC service is incremented and subsequent arguments are processed in groups of 3: 1. An integer comparison operator supported by test. 2. An integer failure limit. 3. An action string. The value of the failure counter is checked using (1) and (2) above. The first check that succeeds has its action string processed - note that this explains the somewhat curious reverse ordering of checks. It the example above: * If the counter is >= 10 then a verbose message is printed describing the failure, the service is restarted in the background and the node is marked as unhealthy (via an "exit 1" from the function). * If the counter is == 5 then the service us restarted in the background. For more action options please see the code. This also changes the ctdb_check_rpc() function so that it no longer takes a program number to check. It now just takes a real RPC program name that rpcinfo can resolve via /etc/rpc. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 9b66057964756a6245bafb436eb6106fb6a2866e)
2010-12-17 08:25:04 +03:00
if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
ctdb_check_rpc_out="ERROR: $progname failed RPC check:
$ctdb_check_rpc_out"
echo "$ctdb_check_rpc_out"
return 1
fi
}
######################################################
# check a set of directories is available
# return 1 on a missing directory
# usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
######################################################
ctdb_check_directories_probe() {
while IFS="" read d ; do
case "$d" in
*%*)
continue
;;
*)
[ -d "${d}/." ] || return 1
esac
done
}
######################################################
# check a set of directories is available
# usage: ctdb_check_directories SERVICE_NAME <directories...>
######################################################
ctdb_check_directories() {
n="${1:-${service_name}}"
ctdb_check_directories_probe || {
echo "ERROR: $n directory \"$d\" not available"
exit 1
}
}
######################################################
# check a set of tcp ports
# usage: ctdb_check_tcp_ports <ports...>
######################################################
# This flag file is created when a service is initially started. It
# is deleted the first time TCP port checks for that service succeed.
# Until then ctdb_check_tcp_ports() prints a more subtle "error"
# message if a port check fails.
_ctdb_check_tcp_common ()
{
_ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
}
ctdb_check_tcp_init ()
{
_ctdb_check_tcp_common
mkdir -p "${_ctdb_service_started_file%/*}" # dirname
touch "$_ctdb_service_started_file"
}
ctdb_check_tcp_ports()
{
if [ -z "$1" ] ; then
echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
exit 1
fi
# Set default value for CTDB_TCP_PORT_CHECKS if unset.
# If any of these defaults are unsupported then this variable can
# be overridden in /etc/sysconfig/ctdb or via a file in
# /etc/ctdb/rc.local.d/.
: ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
for _c in $CTDB_TCP_PORT_CHECKERS ; do
ctdb_check_tcp_ports_$_c "$@"
case "$?" in
0)
_ctdb_check_tcp_common
rm -f "$_ctdb_service_started_file"
return 0
;;
1)
_ctdb_check_tcp_common
if [ ! -f "$_ctdb_service_started_file" ] ; then
echo "ERROR: $service_name tcp port $_p is not responding"
debug <<EOF
$ctdb_check_tcp_ports_debug
EOF
else
echo "INFO: $service_name tcp port $_p is not responding"
fi
return 1
;;
127)
debug <<EOF
ctdb_check_ports - checker $_c not implemented
output from checker was:
$ctdb_check_tcp_ports_debug
EOF
;;
*)
esac
done
echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
return 127
}
ctdb_check_tcp_ports_netstat ()
{
_cmd='netstat -l -t -n'
_ns=$($_cmd 2>&1)
if [ $? -eq 127 ] ; then
# netstat probably not installed - unlikely?
ctdb_check_tcp_ports_debug="$_ns"
return 127
fi
for _p ; do # process each function argument (port)
for _a in '0\.0\.0\.0' '::' ; do
_pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
if echo "$_ns" | grep -E -q "$_pat" ; then
# We matched the port, so process next port
continue 2
fi
done
# We didn't match the port, so flag an error.
ctdb_check_tcp_ports_debug="$_cmd shows this output:
$_ns"
return 1
done
return 0
}
ctdb_check_tcp_ports_nmap ()
{
# nmap wants a comma-separated list of ports
_ports=""
for _p ; do
_ports="${_ports}${_ports:+,}${_p}"
done
_cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
_nmap_out=$($_cmd 2>&1)
if [ $? -eq 127 ] ; then
# nmap probably not installed
ctdb_check_tcp_ports_debug="$_nmap_out"
return 127
fi
# get the port-related output
_port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
for _p ; do
# looking for something like this:
# 445/open/tcp//microsoft-ds///
# possibly followed by a comma
_t="$_p/open/tcp//"
case "$_port_info" in
# The info we're after must be either at the beginning of
# the string or it must follow a space.
$_t*|*\ $_t*) : ;;
*)
# Nope, flag an error...
ctdb_check_tcp_ports_debug="$_cmd shows this output:
$_nmap_out"
return 1
esac
done
return 0
}
# Use the new "ctdb checktcpport" command to check the port.
# This is very cheap.
ctdb_check_tcp_ports_ctdb ()
{
for _p ; do # process each function argument (port)
_cmd="ctdb checktcpport $_p"
_out=$($_cmd 2>&1)
_ret=$?
case "$_ret" in
0)
ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
return 1
;;
98)
# Couldn't bind, something already listening, next port...
continue
;;
*)
ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
$_out"
# assume not implemented
return 127
esac
done
return 0
}
######################################################
# check a unix socket
# usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
######################################################
ctdb_check_unix_socket() {
socket_path="$1"
[ -z "$socket_path" ] && return
if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
echo "ERROR: $service_name socket $socket_path not found"
return 1
fi
}
######################################################
# check a command returns zero status
# usage: ctdb_check_command SERVICE_NAME <command>
######################################################
ctdb_check_command() {
service_name="$1"
wait_cmd="$2"
[ -z "$wait_cmd" ] && return;
$wait_cmd > /dev/null 2>&1 || {
echo "ERROR: $service_name - $wait_cmd returned error"
exit 1
}
}
################################################
# kill off any TCP connections with the given IP
################################################
kill_tcp_connections() {
_IP="$1"
_failed=0
_killcount=0
connfile="$CTDB_VARDIR/state/connections.$_IP"
mkdir -p "${connfile%/*}" # dirname
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
while read dest src; do
srcip=`echo $src | sed -e "s/:[^:]*$//"`
srcport=`echo $src | sed -e "s/^.*://"`
destip=`echo $dest | sed -e "s/:[^:]*$//"`
destport=`echo $dest | sed -e "s/^.*://"`
echo "Killing TCP connection $srcip:$srcport $destip:$destport"
ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
case $destport in
# we only do one-way killtcp for CIFS
139|445) : ;;
# for all others we do 2-way
*)
ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
;;
esac
_killcount=`expr $_killcount + 1`
done < $connfile
rm -f $connfile
[ $_failed = 0 ] || {
echo "Failed to send killtcp control"
return;
}
[ $_killcount -gt 0 ] || {
return;
}
_count=0
while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
sleep 1
_count=`expr $_count + 1`
[ $_count -gt 3 ] && {
echo "Timed out killing tcp connections for IP $_IP"
return;
}
done
echo "killed $_killcount TCP connections to released IP $_IP"
}
##################################################################
# kill off the local end for any TCP connections with the given IP
##################################################################
kill_tcp_connections_local_only() {
_IP="$1"
_failed=0
_killcount=0
connfile="$CTDB_VARDIR/state/connections.$_IP"
mkdir -p "${connfile%/*}" # dirname
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
while read dest src; do
srcip=`echo $src | sed -e "s/:[^:]*$//"`
srcport=`echo $src | sed -e "s/^.*://"`
destip=`echo $dest | sed -e "s/:[^:]*$//"`
destport=`echo $dest | sed -e "s/^.*://"`
echo "Killing TCP connection $srcip:$srcport $destip:$destport"
ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
_killcount=`expr $_killcount + 1`
done < $connfile
rm -f $connfile
[ $_failed = 0 ] || {
echo "Failed to send killtcp control"
return;
}
[ $_killcount -gt 0 ] || {
return;
}
_count=0
while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
sleep 1
_count=`expr $_count + 1`
[ $_count -gt 3 ] && {
echo "Timed out killing tcp connections for IP $_IP"
return;
}
done
echo "killed $_killcount TCP connections to released IP $_IP"
}
##################################################################
# tickle any TCP connections with the given IP
##################################################################
tickle_tcp_connections() {
_IP="$1"
_failed=0
_killcount=0
connfile="$CTDB_VARDIR/state/connections.$_IP"
mkdir -p "${connfile%/*}" # dirname
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
while read dest src; do
srcip=`echo $src | sed -e "s/:[^:]*$//"`
srcport=`echo $src | sed -e "s/^.*://"`
destip=`echo $dest | sed -e "s/:[^:]*$//"`
destport=`echo $dest | sed -e "s/^.*://"`
echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
done < $connfile
rm -f $connfile
[ $_failed = 0 ] || {
echo "Failed to send tickle control"
return;
}
}
########################################################
# start/stop the Ganesha nfs service
########################################################
startstop_ganesha()
{
_service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
case "$1" in
start)
service "$_service_name" start
;;
stop)
service "$_service_name" stop
;;
restart)
service "$_service_name" restart
;;
esac
}
########################################################
# start/stop the nfs service on different platforms
########################################################
startstop_nfs() {
PLATFORM="unknown"
[ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
PLATFORM="sles"
}
[ -x $CTDB_ETCDIR/init.d/nfslock ] && {
PLATFORM="rhel"
}
case $PLATFORM in
sles)
case $1 in
start)
service nfsserver start
;;
stop)
service nfsserver stop > /dev/null 2>&1
;;
restart)
set_proc "fs/nfsd/threads" 0
service nfsserver stop > /dev/null 2>&1
pkill -9 nfsd
service nfsserver start
;;
esac
;;
rhel)
case $1 in
start)
service nfslock start
service nfs start
;;
stop)
service nfs stop
service nfslock stop
;;
restart)
set_proc "fs/nfsd/threads" 0
service nfs stop > /dev/null 2>&1
service nfslock stop > /dev/null 2>&1
pkill -9 nfsd
service nfslock start
service nfs start
;;
esac
;;
*)
echo "Unknown platform. NFS is not supported with ctdb"
exit 1
;;
esac
}
########################################################
# start/stop the nfs lockmanager service on different platforms
########################################################
startstop_nfslock() {
PLATFORM="unknown"
[ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
PLATFORM="sles"
}
[ -x $CTDB_ETCDIR/init.d/nfslock ] && {
PLATFORM="rhel"
}
case $PLATFORM in
sles)
# for sles there is no service for lockmanager
# so we instead just shutdown/restart nfs
case $1 in
start)
service nfsserver start
;;
stop)
service nfsserver stop > /dev/null 2>&1
;;
restart)
service nfsserver stop
service nfsserver start
;;
esac
;;
rhel)
case $1 in
start)
service nfslock start
;;
stop)
service nfslock stop > /dev/null 2>&1
;;
restart)
service nfslock stop
service nfslock start
;;
esac
;;
*)
echo "Unknown platform. NFS locking is not supported with ctdb"
exit 1
;;
esac
}
add_ip_to_iface()
{
_iface=$1
_ip=$2
_maskbits=$3
_lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
mkdir -p "${_lockfile%/*}" # dirname
[ -f "$_lockfile" ] || touch "$_lockfile"
(
# Note: use of return/exit/die() below only gets us out of the
# sub-shell, which is actually what we want. That is, the
# function should just return non-zero.
flock --timeout 30 0 || \
die "add_ip_to_iface: unable to get lock for ${_iface}"
# Ensure interface is up
ip link set "$_iface" up || \
die "Failed to bringup interface $_iface"
ip addr add "$_ip/$_maskbits" brd + dev "$_iface" || \
die "Failed to add $_ip/$_maskbits on dev $_iface"
) <"$_lockfile"
# Do nothing here - return above only gets us out of the subshell
# and doing anything here will affect the return code.
}
delete_ip_from_iface()
{
_iface=$1
_ip=$2
_maskbits=$3
_lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
mkdir -p "${_lockfile%/*}" # dirname
[ -f "$_lockfile" ] || touch "$_lockfile"
(
# Note: use of return/exit/die() below only gets us out of the
# sub-shell, which is actually what we want. That is, the
# function should just return non-zero.
flock --timeout 30 0 || \
die "delete_ip_from_iface: unable to get lock for ${_iface}"
_im="$_ip/$_maskbits" # shorthand for readability
# "ip addr del" will delete all secondary IPs if this is the
# primary. To work around this _very_ annoying behaviour we
# have to keep a record of the secondaries and re-add them
# afterwards. Yuck!
_secondaries=""
if ip addr list dev "$_iface" primary | grep -Fq "inet $_im " ; then
_secondaries=$(ip addr list dev "$_iface" secondary | \
awk '$1 == "inet" { print $2 }')
fi
local _rc=0
ip addr del "$_im" dev "$_iface" || {
echo "Failed to del $_ip on dev $_iface"
_rc=1
}
if [ -n "$_secondaries" ] ; then
for _i in $_secondaries; do
if ip addr list dev "$_iface" | grep -Fq "inet $_i" ; then
echo "Kept secondary $_i on dev $_iface"
else
echo "Re-adding secondary address $_i to dev $_iface"
ip addr add $_i brd + dev $_iface || {
echo "Failed to re-add address $_i to dev $_iface"
_rc=1
}
fi
done
fi
return $_rc
) <"$_lockfile"
# Do nothing here - return above only gets us out of the subshell
# and doing anything here will affect the return code.
}
# If the given IP is hosted then print 2 items: maskbits and iface
ip_maskbits_iface ()
{
_addr="$1"
ip addr show to "${_addr}/32" 2>/dev/null | \
awk '$1 == "inet" { print gensub(".*/", "", 1, $2), $NF }'
}
drop_ip ()
{
_addr="${1%/*}" # Remove optional maskbits
_log_tag="$2"
set -- $(ip_maskbits_iface $_addr)
if [ -n "$1" ] ; then
_maskbits="$1"
_iface="$2"
if [ -n "$_log_tag" ] ; then
script_log "$_log_tag" \
"Removing public address $_addr/$_maskbits from device $_iface"
fi
ip addr del $_addr/$_maskbits dev $_iface >/dev/null 2>&1
fi
}
drop_all_public_ips ()
{
_log_tag="$1"
while read _ip _x ; do
drop_ip "$_ip" "$_log_tag"
done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
}
########################################################
# some simple logic for counting events - per eventscript
# usage: ctdb_counter_init
# ctdb_counter_incr
# ctdb_check_counter_limit <limit>
# ctdb_check_counter_limit fails when count >= <limit>
########################################################
_ctdb_counter_common () {
_service_name="${1:-${service_name}}"
_counter_file="$ctdb_fail_dir/$_service_name"
mkdir -p "${_counter_file%/*}" # dirname
}
ctdb_counter_init () {
_ctdb_counter_common "$1"
>"$_counter_file"
}
ctdb_counter_incr () {
_ctdb_counter_common "$1"
# unary counting!
echo -n 1 >> "$_counter_file"
}
ctdb_check_counter_limit () {
_ctdb_counter_common
_limit="${1:-${service_fail_limit}}"
_quiet="$2"
# unary counting!
_size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
if [ $_size -ge $_limit ] ; then
echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
exit 1
elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
fi
}
ctdb_check_counter () {
_msg="${1:-error}" # "error" - anything else is silent on fail
_op="${2:--ge}" # an integer operator supported by test
_limit="${3:-${service_fail_limit}}"
shift 3
_ctdb_counter_common "$1"
# unary counting!
_size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
if [ $_size $_op $_limit ] ; then
if [ "$_msg" = "error" ] ; then
echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
exit 1
else
return 1
fi
fi
}
########################################################
ctdb_status_dir="$CTDB_VARDIR/status"
ctdb_fail_dir="$CTDB_VARDIR/failcount"
ctdb_setup_service_state_dir ()
{
service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
mkdir -p "$service_state_dir" || {
echo "Error creating state dir \"$service_state_dir\""
exit 1
}
}
########################################################
# Managed status history, for auto-start/stop
ctdb_managed_dir="$CTDB_VARDIR/managed_history"
_ctdb_managed_common ()
{
_service_name="${1:-${service_name}}"
_ctdb_managed_file="$ctdb_managed_dir/$_service_name"
}
ctdb_service_managed ()
{
_ctdb_managed_common "$@"
mkdir -p "$ctdb_managed_dir"
touch "$_ctdb_managed_file"
}
ctdb_service_unmanaged ()
{
_ctdb_managed_common "$@"
rm -f "$_ctdb_managed_file"
}
is_ctdb_previously_managed_service ()
{
_ctdb_managed_common "$@"
[ -f "$_ctdb_managed_file" ]
}
########################################################
# Check and set status
log_status_cat ()
{
echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
}
ctdb_checkstatus ()
{
if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
return 1
elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
return 2
else
return 0
fi
}
ctdb_setstatus ()
{
d="$ctdb_status_dir/$script_name"
case "$1" in
unhealthy|banned)
mkdir -p "$d"
cat "$2" >"$d/$1"
;;
*)
for i in "banned" "unhealthy" ; do
rm -f "$d/$i"
done
;;
esac
}
##################################################################
# Reconfigure a service on demand
_ctdb_service_reconfigure_common ()
{
_d="$ctdb_status_dir/${1:-${service_name}}"
mkdir -p "$_d"
_ctdb_service_reconfigure_flag="$_d/reconfigure"
}
ctdb_service_needs_reconfigure ()
{
_ctdb_service_reconfigure_common "$@"
[ -e "$_ctdb_service_reconfigure_flag" ]
}
ctdb_service_set_reconfigure ()
{
_ctdb_service_reconfigure_common "$@"
>"$_ctdb_service_reconfigure_flag"
}
ctdb_service_unset_reconfigure ()
{
_ctdb_service_reconfigure_common "$@"
rm -f "$_ctdb_service_reconfigure_flag"
}
ctdb_service_reconfigure ()
{
echo "Reconfiguring service \"$@\"..."
ctdb_service_unset_reconfigure "$@"
service_reconfigure "$@" || return $?
ctdb_counter_init "$@"
}
# Default service_reconfigure() function does nothing.
service_reconfigure ()
{
:
}
ctdb_reconfigure_try_lock ()
{
_ctdb_service_reconfigure_common "$@"
_lock="${_d}/reconfigure_lock"
mkdir -p "${_lock%/*}" # dirname
touch "$_lock"
(
flock 0
# This is overkill but will work if we need to extend this to
# allow certain events to run multiple times in parallel
# (e.g. takeip) and write multiple PIDs to the file.
read _locker_event
if [ -n "$_locker_event" ] ; then
while read _pid ; do
if [ -n "$_pid" -a "$_pid" != $$ ] && \
kill -0 "$_pid" 2>/dev/null ; then
exit 1
fi
done
fi
printf "%s\n%s\n" "$event_name" $$ >"$_lock"
exit 0
) <"$_lock"
}
ctdb_replay_monitor_status ()
{
echo "Replaying previous status for this script due to reconfigure..."
# Leading colon (':') is missing in some versions...
_out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
# Output looks like this:
# :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
# This is the cheapest way of getting fields in the middle.
set -- $(IFS=":" ; echo $_out)
_code="$3"
_status="$4"
# The error output field can include colons so we'll try to
# preserve them. The weak checking at the beginning tries to make
# this work for both broken (no leading ':') and fixed output.
_out="${_out%:}"
_err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
case "$_status" in
OK) : ;; # Do nothing special.
TIMEDOUT)
# Recast this as an error, since we can't exit with the
# correct negative number.
_code=1
_err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
;;
DISABLED)
# Recast this as an OK, since we can't exit with the
# correct negative number.
_code=0
_err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
;;
*) : ;; # Must be ERROR, do nothing special.
esac
echo "$_err_out"
exit $_code
}
ctdb_service_check_reconfigure ()
{
[ -n "$1" ] || set -- "$service_name"
# We only care about some events in this function. For others we
# return now.
case "$event_name" in
monitor|ipreallocated|reconfigure) : ;;
*) return 0 ;;
esac
if ctdb_reconfigure_try_lock "$@" ; then
# No events covered by this function are running, so proceed
# with gay abandon.
case "$event_name" in
reconfigure)
(ctdb_service_reconfigure "$@")
exit $?
;;
ipreallocated)
if ctdb_service_needs_reconfigure "$@" ; then
ctdb_service_reconfigure "$@"
fi
;;
monitor)
if ctdb_service_needs_reconfigure "$@" ; then
ctdb_service_reconfigure "$@"
# Given that the reconfigure might not have
# resulted in the service being stable yet, we
# replay the previous status since that's the best
# information we have.
ctdb_replay_monitor_status
fi
;;
esac
else
# Somebody else is running an event we don't want to collide
# with. We proceed with caution.
case "$event_name" in
reconfigure)
# Tell whoever called us to retry.
exit 2
;;
ipreallocated)
# Defer any scheduled reconfigure and just run the
# rest of the ipreallocated event, as per the
# eventscript. There's an assumption here that the
# event doesn't depend on any scheduled reconfigure.
# This is true in the current code.
return 0
;;
monitor)
# There is most likely a reconfigure in progress so
# the service is possibly unstable. As above, we
# defer any scheduled reconfigured. We also replay
# the previous monitor status since that's the best
# information we have.
ctdb_replay_monitor_status
;;
esac
fi
}
##################################################################
# Does CTDB manage this service? - and associated auto-start/stop
ctdb_compat_managed_service ()
{
if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
fi
}
is_ctdb_managed_service ()
{
_service_name="${1:-${service_name}}"
# $t is used just for readability and to allow better accurate
# matching via leading/trailing spaces
t=" $CTDB_MANAGED_SERVICES "
# Return 0 if "<space>$_service_name<space>" appears in $t
if [ "${t#* ${_service_name} }" != "${t}" ] ; then
return 0
fi
# If above didn't match then update $CTDB_MANAGED_SERVICES for
# backward compatibility and try again.
ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "apache2"
ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
t=" $CTDB_MANAGED_SERVICES "
# Return 0 if "<space>$_service_name<space>" appears in $t
[ "${t#* ${_service_name} }" != "${t}" ]
}
ctdb_start_stop_service ()
{
_service_name="${1:-${service_name}}"
# Allow service-start/service-stop pseudo-events to start/stop
# services when we're not auto-starting/stopping and we're not
# monitoring.
case "$event_name" in
service-start)
if is_ctdb_managed_service "$_service_name" ; then
die 'service-start event not permitted when service is managed'
fi
if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
fi
ctdb_service_start "$_service_name"
exit $?
;;
service-stop)
if is_ctdb_managed_service "$_service_name" ; then
die 'service-stop event not permitted when service is managed'
fi
if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
fi
ctdb_service_stop "$_service_name"
exit $?
;;
esac
# Do nothing unless configured to...
[ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
[ "$event_name" = "monitor" ] || return 0
if is_ctdb_managed_service "$_service_name" ; then
if ! is_ctdb_previously_managed_service "$_service_name" ; then
echo "Starting service \"$_service_name\" - now managed"
background_with_logging ctdb_service_start "$_service_name"
exit $?
fi
else
if is_ctdb_previously_managed_service "$_service_name" ; then
echo "Stopping service \"$_service_name\" - no longer managed"
background_with_logging ctdb_service_stop "$_service_name"
exit $?
fi
fi
}
ctdb_service_start ()
{
# The service is marked managed if we've ever tried to start it.
ctdb_service_managed "$@"
# Here we only want $1. If no argument is passed then
# service_start needs to know.
service_start "$@" || return $?
ctdb_counter_init "$@"
ctdb_check_tcp_init
}
ctdb_service_stop ()
{
ctdb_service_unmanaged "$@"
service_stop "$@"
}
# Default service_start() and service_stop() functions.
# These may be overridden in an eventscript. When overriding, the
# following convention must be followed. If these functions are
# called with no arguments then they may use internal logic to
# determine whether the service is managed and, therefore, whether
# they should take any action. However, if the service name is
# specified as an argument then an attempt must be made to start or
# stop the service. This is because the auto-start/stop code calls
# them with the service name as an argument.
service_start ()
{
service "${1:-${service_name}}" start
}
service_stop ()
{
service "${1:-${service_name}}" stop
}
##################################################################
ctdb_standard_event_handler ()
{
case "$1" in
status)
ctdb_checkstatus
exit
;;
setstatus)
shift
ctdb_setstatus "$@"
exit
;;
esac
}
# iptables doesn't like being re-entered, so flock-wrap it.
iptables()
{
flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
}
########################################################
# tickle handling
########################################################
update_tickles ()
{
_port="$1"
tickledir="$CTDB_VARDIR/state/tickles"
mkdir -p "$tickledir"
# Who am I?
_pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
# What public IPs do I hold?
_ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
# IPs as a regexp choice
_ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
# Record connections to our public IPs in a temporary file
_my_connections="${tickledir}/${_port}.connections"
rm -f "$_my_connections"
netstat -tn |
awk -v destpat="^${_ipschoice}:${_port}\$" \
'$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
sort >"$_my_connections"
# Record our current tickles in a temporary file
_my_tickles="${tickledir}/${_port}.tickles"
rm -f "$_my_tickles"
for _i in $_ips ; do
ctdb -Y gettickles $_i $_port |
awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
done |
sort >"$_my_tickles"
# Add tickles for connections that we haven't already got tickles for
comm -23 "$_my_connections" "$_my_tickles" |
while read _src _dst ; do
ctdb addtickle $_src $_dst
done
# Remove tickles for connections that are no longer there
comm -13 "$_my_connections" "$_my_tickles" |
while read _src _dst ; do
ctdb deltickle $_src $_dst
done
rm -f "$_my_connections" "$_my_tickles"
}
########################################################
# load a site local config file
########################################################
[ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
. "$CTDB_RC_LOCAL"
}
[ -x $CTDB_BASE/rc.local ] && {
. $CTDB_BASE/rc.local
}
[ -d $CTDB_BASE/rc.local.d ] && {
for i in $CTDB_BASE/rc.local.d/* ; do
[ -x "$i" ] && . "$i"
done
}
script_name="${0##*/}" # basename
service_name="$script_name" # default is just the script name
service_fail_limit=1
event_name="$1"