1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-03 01:18:10 +03:00
samba-mirror/ctdb/config/functions
Martin Schwenke 578dfa5765 ctdb-scripts: Avoid flapping NFS services at startup
If an NFS service check is set to, say, unhealthy_after=2 then it will
always switch from the (default startup) unhealthy state to healthy,
even if there is a fatal problem.  If all services/scripts appear OK
then the node will become healthy.  When the counter hits the limit it
will return to unhealthy.  This is misleading.

Instead, never use the counter at startup, until the service becomes
healthy.  This stops services flapping unhealthy-healthy-unhealthy.

A side-effect is that a service that starts in a broken state will
never be restarted to try to fix the problem.  This makes sense.  The
counting and restarting really exist to deal with problems that might
occur under load.  The first monitor events occur before public IPs
are hosted, so there can be no load.  If a service doesn't start
reliably the first time then the admin probably wants to know about
it.

nfs_iterate_test() is updated to run an initial monitor event to mark
the services as healthy.  This initialises the counter so it can be
used for the important part of the test.  Passing the -i option avoids
running the extra monitor event, so the first iteration will be the
initial monitor event.

Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
2024-08-20 22:50:34 +00:00

1258 lines
28 KiB
Bash
Executable File

# Hey Emacs, this is a -*- shell-script -*- !!!
# utility functions for ctdb event scripts
if [ -z "$CTDB_BASE" ]; then
echo 'CTDB_BASE unset in CTDB functions file'
exit 1
fi
export CTDB_BASE
# CTDB_VARDIR is used elsewhere
# shellcheck disable=SC2034
CTDB_VARDIR="/usr/local/var/lib/ctdb"
CTDB="${CTDB:-/usr/local/bin/ctdb}"
# Only (and always) override these variables in test code
if [ -z "$CTDB_SCRIPT_VARDIR" ]; then
CTDB_SCRIPT_VARDIR="/usr/local/var/lib/ctdb/scripts"
fi
if [ -z "$CTDB_SYS_ETCDIR" ]; then
CTDB_SYS_ETCDIR="/etc"
fi
if [ -z "$CTDB_HELPER_BINDIR" ]; then
CTDB_HELPER_BINDIR="/usr/local/libexec/ctdb"
fi
#######################################
# pull in a system config file, if any
load_system_config()
{
for _i; do
if [ -f "${CTDB_SYS_ETCDIR}/sysconfig/${_i}" ]; then
. "${CTDB_SYS_ETCDIR}/sysconfig/${_i}"
return
elif [ -f "${CTDB_SYS_ETCDIR}/default/${_i}" ]; then
. "${CTDB_SYS_ETCDIR}/default/${_i}"
return
fi
done
}
# load_script_options [ component script ]
# script is an event script name relative to a component
# component is currently ignored
load_script_options()
{
if [ $# -eq 2 ]; then
_script="$2"
elif [ $# -eq 0 ]; then
_script=""
else
die "usage: load_script_options [ component script ]"
fi
_options="${CTDB_BASE}/script.options"
if [ -r "$_options" ]; then
. "$_options"
fi
if [ -n "$_script" ]; then
_s="${CTDB_BASE}/events/legacy/${_script}"
else
_s="${0%.script}"
fi
_options="${_s}.options"
if [ -r "$_options" ]; then
. "$_options"
fi
}
##############################################################
die()
{
_msg="$1"
_rc="${2:-1}"
echo "$_msg" >&2
exit "$_rc"
}
# Log given message or stdin to either syslog or a CTDB log file
# $1 is the tag passed to logger if syslog is in use.
script_log()
{
_tag="$1"
shift
case "$CTDB_LOGGING" in
file:)
if [ -n "$*" ] ; then
echo "$*"
else
cat
fi >&2
;;
file:* | "")
if [ -n "$CTDB_LOGGING" ]; then
_file="${CTDB_LOGGING#file:}"
else
_file="/usr/local/var/log/log.ctdb"
fi
{
if [ -n "$*" ]; then
echo "$*"
else
cat
fi
} >>"$_file"
;;
*)
# Handle all syslog:* variants here too. There's no tool to do
# the lossy things, so just use logger.
logger -t "ctdbd: ${_tag}" "$@"
;;
esac
}
# When things are run in the background in an eventscript then logging
# output might get lost. This is the "solution". :-)
background_with_logging()
{
(
"$@" 2>&1 </dev/null |
script_log "${script_name}&"
) &
return 0
}
##############################################################
# check number of args for different events
ctdb_check_args()
{
case "$1" in
takeip | releaseip)
if [ $# != 4 ]; then
echo "ERROR: must supply interface, IP and maskbits"
exit 1
fi
;;
updateip)
if [ $# != 5 ]; then
echo "ERROR: must supply old interface, new interface, IP and maskbits"
exit 1
fi
;;
esac
}
##############################################################
# determine on what type of system (init style) we are running
detect_init_style()
{
_init_style_file="${CTDB_SCRIPT_VARDIR}/init-style"
if [ ! -f "$_init_style_file" ]; then
if [ -n "$CTDB_INIT_STYLE" ]; then
echo "$CTDB_INIT_STYLE" >"$_init_style_file"
return
fi
# Subshell to contain variables in os-release file
(
_os_release="${CTDB_SYS_ETCDIR}/os-release"
if [ -f "$_os_release" ]; then
. "$_os_release"
case "$ID" in
centos | fedora | rhel)
echo "redhat"
;;
debian | ubuntu)
echo "debian"
;;
sles | suse)
echo "suse"
;;
*)
case "$ID_LIKE" in
*centos* | *rhel*)
echo "redhat"
;;
*)
echo "$ID"
;;
esac
;;
esac
else
echo "WARNING: unknown distribution ${ID}" >&2
echo "unknown"
fi
) >"$_init_style_file"
fi
read -r CTDB_INIT_STYLE <"$_init_style_file"
}
######################################################
# simulate /sbin/service on platforms that don't have it
# _service() makes it easier to hook the service() function for
# testing.
_service()
{
_service_name="$1"
_op="$2"
# do nothing, when no service was specified
[ -z "$_service_name" ] && return
if [ -x /sbin/service ]; then
$_nice /sbin/service "$_service_name" "$_op"
elif [ -x /usr/sbin/service ]; then
$_nice /usr/sbin/service "$_service_name" "$_op"
elif [ -x /bin/systemctl ]; then
$_nice /bin/systemctl "$_op" "$_service_name"
elif [ -x "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" ]; then
$_nice "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" "$_op"
elif [ -x "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" ]; then
$_nice "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" "$_op"
fi
}
service()
{
_nice=""
_service "$@"
}
######################################################
# simulate /sbin/service (niced) on platforms that don't have it
nice_service()
{
_nice="nice"
_service "$@"
}
######################################################
# Cached retrieval of PNN from local node. This never changes so why
# open a client connection to the server each time this is needed?
ctdb_get_pnn()
{
_pnn_file="${CTDB_SCRIPT_VARDIR}/my-pnn"
if [ ! -f "$_pnn_file" ]; then
$CTDB pnn >"$_pnn_file"
fi
cat "$_pnn_file"
}
# Cached retrieval of private IP address from local node. This never
# changes.
ctdb_get_ip_address()
{
_ip_addr_file="${CTDB_SCRIPT_VARDIR}/my-ip-address"
if [ ! -f "$_ip_addr_file" ]; then
$CTDB -X nodestatus |
awk -F '|' 'NR == 2 { print $3 }' >"$_ip_addr_file"
fi
cat "$_ip_addr_file"
}
# Cache of public IP addresses assigned to this node. This function
# exists mainly so statd_callout does not need to talk to ctdbd, so
# can be run as non-root, but it may be used in other places. This
# must be updated/refreshed on failover. This is done in
# 10.interface, but doing it in "ipreallocated" isn't enough because
# clients may connect as soon as "takeip" completes. Also, the VNN in
# the daemon is only updated after the "releaseip" event completes, so
# "ctdb -X ip" can't be relied on there. Hence, complex updates
# involving locking for "takeip" & "releaseip". A future
# restructuring of the failover model will obsolete all of these
# moving parts.
CTDB_MY_PUBLIC_IPS_CACHE="${CTDB_SCRIPT_VARDIR}/my-public-ip-addresses"
update_my_public_ip_addresses()
{
_event="$1"
_f="$CTDB_MY_PUBLIC_IPS_CACHE"
_lock="${_f}.lock"
# In private CTDB state directory - no $$ security issue
_new="${_f}.new.$$"
{
flock --timeout 10 9 ||
die "ctdb_get_my_public_ip_addresses: timeout"
case "$_event" in
takeip)
_ip="$2"
# Redirect of stderr guards against initial
# missing file
cat "$_f" 2>/dev/null >"$_new"
echo "$_ip" >>"$_new"
;;
releaseip)
_ip="$2"
# Redirect of stderr guards against initial
# missing file, which shouldn't happen in
# releaseip...
grep -Fvx "$_ip" "$_f" 2>/dev/null >"$_new"
;;
ipreallocated)
_pnn=$(ctdb_get_pnn)
$CTDB -X ip |
awk -F'|' -v pnn="$_pnn" \
'$3 == pnn {print $2}' >"$_new"
;;
esac
mv "$_new" "$_f"
} 9>"$_lock"
}
# Cached retrieval of database options for use by event scripts.
#
# If the variables are already set then they should not be overwritten
# - this should only happen during event script testing.
ctdb_get_db_options()
{
_db_opts_file="${CTDB_SCRIPT_VARDIR}/db_options.cache"
if [ ! -f "$_db_opts_file" ]; then
{
ctdb_translate_option "database" \
"volatile database directory" \
"CTDB_DBDIR"
ctdb_translate_option "database" \
"persistent database directory" \
"CTDB_DBDIR_PERSISTENT"
ctdb_translate_option "database" \
"state database directory" \
"CTDB_DBDIR_STATE"
} >"$_db_opts_file"
fi
. "$_db_opts_file"
}
ctdb_translate_option()
{
_section="$1"
_opt="$2"
_variable="$3"
# ctdb-config already prints an error if something goes wrong
_t=$("${CTDB_HELPER_BINDIR}/ctdb-config" get "$_section" "$_opt") ||
exit $?
echo "${_variable}=\"${_t}\""
}
######################################################
# wrapper around /proc/ settings to allow them to be hooked
# for testing
# 1st arg is relative path under /proc/, 2nd arg is value to set
set_proc()
{
echo "$2" >"/proc/$1"
}
set_proc_maybe()
{
if [ -w "/proc/$1" ]; then
set_proc "$1" "$2"
fi
}
######################################################
# wrapper around getting file contents from /proc/ to allow
# this to be hooked for testing
# 1st arg is relative path under /proc/
get_proc()
{
cat "/proc/$1"
}
######################################################
# Print up to $_max kernel stack traces for processes named $_program
program_stack_traces()
{
_prog="$1"
_max="${2:-1}"
_count=1
for _pid in $(pidof "$_prog"); do
[ "$_count" -le "$_max" ] || break
# Do this first to avoid racing with process exit
_stack=$(get_proc "${_pid}/stack" 2>/dev/null)
if [ -n "$_stack" ]; then
echo "Stack trace for ${_prog}[${_pid}]:"
echo "$_stack"
_count=$((_count + 1))
fi
done
}
######################################################
# Ensure $service_name is set
assert_service_name()
{
# service_name is set by the event script
# shellcheck disable=SC2154
[ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
}
######################################################
# check a set of directories is available
# return 1 on a missing directory
# directories are read from stdin
######################################################
ctdb_check_directories_probe()
{
while IFS="" read -r d; do
case "$d" in
*%*)
continue
;;
*)
[ -d "${d}/." ] || return 1
;;
esac
done
}
######################################################
# check a set of directories is available
# directories are read from stdin
######################################################
ctdb_check_directories()
{
ctdb_check_directories_probe || {
echo "ERROR: $service_name directory \"$d\" not available"
exit 1
}
}
######################################################
# check a set of tcp ports
# usage: ctdb_check_tcp_ports <ports...>
######################################################
# Check whether something is listening on all of the given TCP ports
# using the "ctdb checktcpport" command.
ctdb_check_tcp_ports()
{
if [ -z "$1" ]; then
echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
exit 1
fi
for _p; do # process each function argument (port)
_cmd="$CTDB checktcpport $_p"
_out=$($_cmd 2>&1)
_ret=$?
case "$_ret" in
0)
echo "$service_name not listening on TCP port $_p"
return 1
;;
98)
# Couldn't bind, something already listening, next port
continue
;;
*)
echo "unexpected error (${_ret}) running \"${_cmd}\""
if [ -n "$_out" ]; then
echo "$_out"
fi
return $_ret
;;
esac
done
# All ports listening
return 0
}
######################################################
# check a unix socket
# usage: ctdb_check_unix_socket SOCKPATH
######################################################
ctdb_check_unix_socket()
{
_sockpath="$1"
if [ -z "$_sockpath" ]; then
echo "ERROR: ctdb_check_unix_socket() requires socket path"
return 1
fi
_out=$(ss -l -x "src ${_sockpath}" | tail -n +2)
if [ -z "$_out" ]; then
echo "ERROR: ${service_name} not listening on ${_sockpath}"
return 1
fi
}
################################################
# kill off any TCP connections with the given IP
################################################
kill_tcp_connections()
{
_iface="$1"
_ip="$2"
_oneway=false
if [ "$3" = "oneway" ]; then
_oneway=true
fi
get_tcp_connections_for_ip "$_ip" | {
_killcount=0
_connections=""
_nl="
"
while read -r _dst _src; do
_destport="${_dst##*:}"
__oneway=$_oneway
case $_destport in
# we only do one-way killtcp for CIFS
139 | 445) __oneway=true ;;
esac
_connections="${_connections}${_nl}${_src} ${_dst}"
if ! $__oneway; then
_connections="${_connections}${_nl}${_dst} ${_src}"
fi
_killcount=$((_killcount + 1))
done
if [ $_killcount -eq 0 ]; then
return
fi
if [ -n "$CTDB_KILLTCP_DEBUGLEVEL" ]; then
_debuglevel="$CTDB_KILLTCP_DEBUGLEVEL"
else
_debuglevel="$CTDB_DEBUGLEVEL"
fi
echo "$_connections" |
CTDB_DEBUGLEVEL="$_debuglevel" \
"${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || {
echo "Failed to kill TCP connections"
return
}
_connections=$(get_tcp_connections_for_ip "$_ip")
if [ -z "$_connections" ]; then
_remaining=0
else
_remaining=$(echo "$_connections" | wc -l)
fi
_actually_killed=$((_killcount - _remaining))
_t="${_actually_killed}/${_killcount}"
echo "Killed ${_t} TCP connections to released IP $_ip"
if [ -n "$_connections" ]; then
echo "Remaining connections:"
echo "$_connections" | sed -e 's|^| |'
fi
}
}
##################################################################
# kill off the local end for any TCP connections with the given IP
##################################################################
kill_tcp_connections_local_only()
{
kill_tcp_connections "$@" "oneway"
}
##################################################################
# tickle any TCP connections with the given IP
##################################################################
tickle_tcp_connections()
{
_ip="$1"
# Get connections, both directions
_conns=$(get_tcp_connections_for_ip "$_ip" |
awk '{ print $1, $2 ; print $2, $1 }')
echo "$_conns" | awk '{ print "Tickle TCP connection", $1, $2 }'
echo "$_conns" | ctdb tickle
}
get_tcp_connections_for_ip()
{
_ip="$1"
ss -tn state established "src [$_ip]" | awk 'NR > 1 {print $3, $4}'
}
########################################################
add_ip_to_iface()
{
_iface=$1
_ip=$2
_maskbits=$3
# Ensure interface is up
ip link set "$_iface" up ||
die "Failed to bringup interface $_iface"
# Only need to define broadcast for IPv4
case "$_ip" in
*:*) _bcast="" ;;
*) _bcast="brd +" ;;
esac
# Intentionally unquoted multi-word value here
# shellcheck disable=SC2086
ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
echo "Failed to add $_ip/$_maskbits on dev $_iface"
return 1
}
# Wait 5 seconds for IPv6 addresses to stop being tentative...
if [ -z "$_bcast" ]; then
for _x in $(seq 1 10); do
ip addr show to "${_ip}/128" | grep -q "tentative" || break
sleep 0.5
done
# If the address was a duplicate then it won't be on the
# interface so flag an error.
_t=$(ip addr show to "${_ip}/128")
case "$_t" in
"")
echo "Failed to add $_ip/$_maskbits on dev $_iface"
return 1
;;
*tentative* | *dadfailed*)
echo "Failed to add $_ip/$_maskbits on dev $_iface"
ip addr del "$_ip/$_maskbits" dev "$_iface"
return 1
;;
esac
fi
}
delete_ip_from_iface()
{
_iface=$1
_ip=$2
_maskbits=$3
# This could be set globally for all interfaces but it is probably
# better to avoid surprises, so limit it the interfaces where CTDB
# has public IP addresses. There isn't anywhere else convenient
# to do this so just set it each time. This is much cheaper than
# remembering and re-adding secondaries.
set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
ip addr del "$_ip/$_maskbits" dev "$_iface" || {
echo "Failed to del $_ip on dev $_iface"
return 1
}
}
# If the given IP is hosted then print 2 items: maskbits and iface
ip_maskbits_iface()
{
_addr="$1"
case "$_addr" in
*:*) _bits=128 ;;
*) _bits=32 ;;
esac
ip addr show to "${_addr}/${_bits}" 2>/dev/null |
awk 'NR == 1 { iface = $2; sub(":$", "", iface) ;
sub("@.*", "", iface) }
$1 ~ /inet/ { mask = $2; sub(".*/", "", mask);
print mask, iface }'
}
drop_ip()
{
_addr="${1%/*}" # Remove optional maskbits
# Intentional word splitting here
# shellcheck disable=SC2046
set -- $(ip_maskbits_iface "$_addr")
if [ -n "$1" ]; then
_maskbits="$1"
_iface="$2"
echo "Removing public address $_addr/$_maskbits from device $_iface"
delete_ip_from_iface "$_iface" "$_addr" "$_maskbits" >/dev/null 2>&1
fi
}
have_public_addresses()
{
[ -f "${CTDB_BASE}/public_addresses" ]
}
# This sets $public_ifaces as a side-effect.
get_public_ifaces()
{
# Get all the interfaces listed in the public_addresses file
public_ifaces=$(sed -e '/^#.*/d' \
-e 's/^[^\t ]*[\t ]*//' \
-e 's/,/ /g' \
-e 's/[\t ]*$//' "${CTDB_BASE}/public_addresses")
# Get the interfaces for which CTDB has public IPs configured.
# That is, for all but the 1st line, get the 1st field.
ctdb_ifaces=$($CTDB -X ifaces | sed -e '1d' -e 's@^|@@' -e 's@|.*@@')
# Add $ctdb_ifaces and make $public_ifaces unique
# Use word splitting to squash whitespace
# shellcheck disable=SC2086
public_ifaces=$(echo $public_ifaces $ctdb_ifaces | tr ' ' '\n' | sort -u)
}
drop_all_public_ips()
{
# _x is intentionally ignored
# shellcheck disable=SC2034
while read -r _ip _x; do
case "$_ip" in
\#*) continue ;;
esac
drop_ip "$_ip"
done <"${CTDB_BASE}/public_addresses"
}
flush_route_cache()
{
set_proc_maybe sys/net/ipv4/route/flush 1
set_proc_maybe sys/net/ipv6/route/flush 1
}
########################################################
# Interface monitoring
# If the interface is a virtual one (e.g. VLAN) then get the
# underlying interface
interface_get_real()
{
_iface="$1"
# If $_iface is a VLAN (i.e. contains an '@') then strip every
# before the '@', otherwise print the whole interface
echo "${_iface##*@}"
}
# Check whether an interface is operational
interface_monitor()
{
_iface="$1"
_iface_info=$(ip -br link show "$_iface" 2>&1) || {
echo "ERROR: Monitored interface ${_iface} does not exist"
return 1
}
# If the interface is a virtual one (e.g. VLAN) then get the
# underlying interface.
_realiface=$(interface_get_real "${_iface_info%% *}")
if _bi=$(get_proc "net/bonding/${_realiface}" 2>/dev/null); then
# This is a bond: various monitoring strategies
echo "$_bi" | grep -q 'Currently Active Slave: None' && {
echo "ERROR: No active slaves for bond device ${_realiface}"
return 1
}
echo "$_bi" | grep -q '^MII Status: up' || {
echo "ERROR: public network interface ${_realiface} is down"
return 1
}
echo "$_bi" | grep -q '^Bonding Mode: IEEE 802.3ad Dynamic link aggregation' && {
# This works around a bug in the driver where the
# overall bond status can be up but none of the actual
# physical interfaces have a link.
echo "$_bi" | grep 'MII Status:' | tail -n +2 | grep -q '^MII Status: up' || {
echo "ERROR: No active slaves for 802.ad bond device ${_realiface}"
return 1
}
}
return 0
else
# Not a bond
case "$_iface" in
lo*)
# loopback is always working
return 0
;;
ib*)
# we don't know how to test ib links
return 0
;;
*)
ethtool "$_iface" | grep -q 'Link detected: yes' || {
# On some systems, this is not successful when a
# cable is plugged but the interface has not been
# brought up previously. Bring the interface up
# and try again...
ip link set "$_iface" up
ethtool "$_iface" | grep -q 'Link detected: yes' || {
echo "ERROR: No link on the public network interface ${_iface}"
return 1
}
}
return 0
;;
esac
fi
}
########################################################
# Simple counters
_ctdb_counter_common()
{
[ $# -le 1 ] || die "usage: _ctdb_counter_common [name]"
if [ $# -eq 1 ]; then
_counter_name="${1}.failcount"
else
_counter_name="failcount"
fi
if [ -z "$script_state_dir" ]; then
die "ctdb_counter_* functions need ctdb_setup_state_dir()"
fi
_counter_file="${script_state_dir}/${_counter_name}"
}
# Some code passes an argument
# shellcheck disable=SC2120
ctdb_counter_init()
{
_ctdb_counter_common "$1"
: >"$_counter_file"
}
ctdb_counter_incr()
{
_ctdb_counter_common "$1"
# unary counting using newlines!
echo >>"$_counter_file"
}
ctdb_counter_get()
{
_ctdb_counter_common "$1"
# unary counting!
_val=$(wc -c 2>/dev/null <"$_counter_file" || echo 0)
# Strip leading spaces from output of wc (on freebsd)
# shellcheck disable=SC2086
echo $_val
}
ctdb_counter_exists()
{
_ctdb_counter_common "$1"
[ -e "$_counter_file" ]
}
#
# Fail counter/threshold combination to control warnings and node unhealthy
#
_failcount_validate_threshold()
{
case "$1" in
"") return 1 ;; # A failure that doesn't need a warning
*)
if echo "$1" | grep -qx '[0-9]*'; then
return 0
fi
echo "WARNING: ${1} is an invalid threshold in \"${2}\" check"
return 1
;;
esac
}
_failcount_common()
{
_thing="$1"
_counter=$(echo "$_thing" | sed -e 's@/@_SLASH_@g' -e 's@ @_@g')
}
failcount_init()
{
_thing="$1"
_failcount_common "$_thing"
ctdb_counter_init "$_counter"
}
failcount_reset()
{
_thing="$1"
_failcount_common "$_thing"
_failcount=$(ctdb_counter_get "$_counter")
if [ "$_failcount" -eq 0 ]; then
return
fi
printf 'NOTICE: %s: no longer failing\n' "$_thing"
ctdb_counter_init "$_counter"
}
failcount_incr()
{
_thing="$1"
_thresholds="$2"
_output="$3"
_failcount_common "$_thing"
ctdb_counter_incr "$_counter"
_failcount=$(ctdb_counter_get "$_counter")
case "$_thresholds" in
*:*)
_warn_threshold="${_thresholds%:*}"
_unhealthy_threshold="${_thresholds#*:}"
;;
"")
_warn_threshold=1
_unhealthy_threshold=""
;;
*)
_warn_threshold="$_thresholds"
_unhealthy_threshold=""
;;
esac
if _failcount_validate_threshold "$_unhealthy_threshold" "$_thing"; then
if [ "$_failcount" -ge "$_unhealthy_threshold" ]; then
printf 'ERROR: %s: fail count %d >= threshold %d\n' \
"$_thing" \
"$_failcount" \
"$_unhealthy_threshold"
# Only print output when exceeding the
# unhealthy threshold
if [ "$_failcount" -eq "$_unhealthy_threshold" ] && \
[ -n "$_output" ]; then
echo "$_output"
fi
exit 1
fi
fi
if _failcount_validate_threshold "$_warn_threshold" "$_thing"; then
if [ "$_failcount" -lt "$_warn_threshold" ]; then
return 0
fi
fi
printf 'WARNING: %s: fail count %d >= threshold %d\n' \
"$_thing" \
"$_failcount" \
"$_warn_threshold"
if [ "$_failcount" -eq "$_warn_threshold" ] && [ -n "$_output" ]; then
# Only print output when exceeding the warning threshold
echo "$_output"
fi
}
########################################################
# ctdb_setup_state_dir <type> <name>
# Sets/creates script_state_dir)
ctdb_setup_state_dir()
{
[ $# -eq 2 ] || die "usage: ctdb_setup_state_dir <type> <name>"
_type="$1"
_name="$2"
script_state_dir="${CTDB_SCRIPT_VARDIR}/${_type}/${_name}"
mkdir -p "$script_state_dir" ||
die "Error creating script state dir \"${script_state_dir}\""
}
##################################################################
# Reconfigure a service on demand
_ctdb_service_reconfigure_common()
{
if [ -z "$script_state_dir" ]; then
die "ctdb_service_*_reconfigure() needs ctdb_setup_state_dir()"
fi
_ctdb_service_reconfigure_flag="${script_state_dir}/need_reconfigure"
}
ctdb_service_needs_reconfigure()
{
_ctdb_service_reconfigure_common
[ -e "$_ctdb_service_reconfigure_flag" ]
}
ctdb_service_set_reconfigure()
{
_ctdb_service_reconfigure_common
: >"$_ctdb_service_reconfigure_flag"
}
ctdb_service_unset_reconfigure()
{
_ctdb_service_reconfigure_common
rm -f "$_ctdb_service_reconfigure_flag"
}
ctdb_service_reconfigure()
{
echo "Reconfiguring service \"${service_name}\"..."
ctdb_service_unset_reconfigure
service_reconfigure || return $?
# Intentionally have this use $service_name as default
# shellcheck disable=SC2119
ctdb_counter_init
}
# Default service_reconfigure() function does nothing.
service_reconfigure()
{
:
}
# Default service_start() and service_stop() functions.
# These may be overridden in an eventscript.
service_start()
{
service "$service_name" start
}
service_stop()
{
service "$service_name" stop
}
##################################################################
# This exists only for backward compatibility with 3rd party scripts
# that call it
ctdb_standard_event_handler()
{
:
}
iptables_wrapper()
{
_family="$1"
shift
if [ "$_family" = "inet6" ]; then
_iptables_cmd="ip6tables"
else
_iptables_cmd="iptables"
fi
# iptables doesn't like being re-entered, so flock-wrap it.
flock -w 30 "${CTDB_SCRIPT_VARDIR}/iptables.flock" "$_iptables_cmd" "$@"
}
# AIX (and perhaps others?) doesn't have mktemp
# type is commonly supported and more portable than which(1)
# shellcheck disable=SC2039
if ! type mktemp >/dev/null 2>&1; then
mktemp()
{
_dir=false
if [ "$1" = "-d" ]; then
_dir=true
shift
fi
_d="${TMPDIR:-/tmp}"
_hex10=$(dd if=/dev/urandom count=20 2>/dev/null |
cksum |
awk '{print $1}')
_t="${_d}/tmp.${_hex10}"
(
umask 077
if $_dir; then
mkdir "$_t"
else
: >"$_t"
fi
)
echo "$_t"
}
fi
######################################################################
# NFS callout handling
nfs_callout_init()
{
_state_dir="$1"
if [ -z "$CTDB_NFS_CALLOUT" ]; then
CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout"
fi
# Always export, for statd callout
export CTDB_NFS_CALLOUT
# If the callout wants to use this then it must create it
export CTDB_NFS_CALLOUT_STATE_DIR="${_state_dir}/callout-state"
# Export, if set, for use by clustered NFS callouts
if [ -n "$CTDB_NFS_STATE_FS_TYPE" ]; then
export CTDB_NFS_STATE_FS_TYPE
fi
if [ -n "$CTDB_NFS_STATE_MNT" ]; then
export CTDB_NFS_STATE_MNT
fi
if [ -n "$CTDB_NFS_EXPORTS_FILE" ]; then
export CTDB_NFS_EXPORTS_FILE
fi
nfs_callout_cache="${_state_dir}/nfs_callout_cache"
nfs_callout_cache_callout="${nfs_callout_cache}/CTDB_NFS_CALLOUT"
nfs_callout_cache_ops="${nfs_callout_cache}/ops"
}
nfs_callout_register()
{
mkdir -p "$nfs_callout_cache_ops"
rm -f "$nfs_callout_cache_ops"/*
echo "$CTDB_NFS_CALLOUT" >"$nfs_callout_cache_callout"
_t=$("$CTDB_NFS_CALLOUT" "register")
if [ -n "$_t" ]; then
echo "$_t" |
while IFS="" read -r _op; do
touch "${nfs_callout_cache_ops}/${_op}"
done
else
touch "${nfs_callout_cache_ops}/ALL"
fi
}
nfs_callout()
{
# Re-run registration if $CTDB_NFS_CALLOUT has changed
_prev=""
if [ -r "$nfs_callout_cache_callout" ]; then
read -r _prev <"$nfs_callout_cache_callout"
fi
if [ "$CTDB_NFS_CALLOUT" != "$_prev" ]; then
nfs_callout_register
fi
# Run the operation if it is registered...
if [ -e "${nfs_callout_cache_ops}/${1}" ] ||
[ -e "${nfs_callout_cache_ops}/ALL" ]; then
"$CTDB_NFS_CALLOUT" "$@"
fi
}
########################################################
# tickle handling
########################################################
update_tickles()
{
_port="$1"
tickledir="${CTDB_SCRIPT_VARDIR}/tickles"
mkdir -p "$tickledir"
# What public IPs do I hold?
_pnn=$(ctdb_get_pnn)
_ips=$($CTDB -X ip | awk -F'|' -v pnn="$_pnn" '$3 == pnn {print $2}')
# IPs and port as ss filters
_ip_filter=""
for _ip in $_ips; do
_ip_filter="${_ip_filter}${_ip_filter:+ || }src [${_ip}]"
done
_port_filter="sport == :${_port}"
# Record connections to our public IPs in a temporary file.
# This temporary file is in CTDB's private state directory and
# $$ is used to avoid a very rare race involving CTDB's script
# debugging. No security issue, nothing to see here...
_my_connections="${tickledir}/${_port}.connections.$$"
# Parentheses are needed around the filters for precedence but
# the parentheses can't be empty!
#
# Recent versions of ss print square brackets around IPv6
# addresses. While it is desirable to update CTDB's address
# parsing and printing code, something needs to be done here
# for backward compatibility, so just delete the brackets.
ss -tn state established \
"${_ip_filter:+( ${_ip_filter} )}" \
"${_port_filter:+( ${_port_filter} )}" |
awk 'NR > 1 {print $4, $3}' |
tr -d '][' |
sort >"$_my_connections"
# Record our current tickles in a temporary file
_my_tickles="${tickledir}/${_port}.tickles.$$"
for _i in $_ips; do
$CTDB -X gettickles "$_i" "$_port" |
awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
done |
sort >"$_my_tickles"
# Add tickles for connections that we haven't already got tickles for
comm -23 "$_my_connections" "$_my_tickles" |
$CTDB addtickle
# Remove tickles for connections that are no longer there
comm -13 "$_my_connections" "$_my_tickles" |
$CTDB deltickle
rm -f "$_my_connections" "$_my_tickles"
# Remove stale files from killed scripts
# Files can't have spaces in name, more portable than -print0/-0
# shellcheck disable=SC2038
(cd "$tickledir" && find . -type f -mmin +10 | xargs -r rm)
}
########################################################
# load a site local config file
########################################################
[ -x "${CTDB_BASE}/rc.local" ] && {
. "${CTDB_BASE}/rc.local"
}
[ -d "${CTDB_BASE}/rc.local.d" ] && {
for i in "${CTDB_BASE}/rc.local.d"/*; do
[ -x "$i" ] && . "$i"
done
}
script_name="${0##*/}" # basename