1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-10 01:18:15 +03:00
samba-mirror/ctdb/config/ctdb.init
Martin Schwenke a3ee4a900f Initscript - add backup of corrupt non-persistent databases
Corrupt non-persistent databases never get analysed because ctdbd
zeroes them at startup.

Modify the initscript so that corrupt non-persistent databases are
moved aside to a backup.  If the number of backups for a particular
database exceeds $CTDB_MAX_CORRUPT_DB_BACKUPS (default 10) then the
oldest excess backups are garbage collected.

Abstracts from and cleans up the code for checking persistent
databases.

Logging of related messages is done to syslog or a log file as
specified.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit 00cd75595685dae829758abf1a4cb644af7ed50e)
2012-03-28 15:02:07 +11:00

432 lines
9.5 KiB
Bash
Executable File

#!/bin/sh
#
##############################
# ctdb: Starts the clustered tdb daemon
#
# chkconfig: - 90 01
#
# description: Starts and stops the clustered tdb daemon
# pidfile: /var/run/ctdbd/ctdbd.pid
#
### BEGIN INIT INFO
# Provides: ctdb
# Required-Start: $network
# Required-Stop: $network
# Default-Stop:
# Default-Start: 3 5
# Short-Description: start and stop ctdb service
# Description: initscript for the ctdb service
### END INIT INFO
# Source function library.
if [ -f /etc/init.d/functions ] ; then
. /etc/init.d/functions
elif [ -f /etc/rc.d/init.d/functions ] ; then
. /etc/rc.d/init.d/functions
fi
[ -f /etc/rc.status ] && {
. /etc/rc.status
rc_reset
LC_ALL=en_US.UTF-8
}
# Avoid using root's TMPDIR
unset TMPDIR
[ -z "$CTDB_BASE" ] && {
export CTDB_BASE="/etc/ctdb"
}
. $CTDB_BASE/functions
loadconfig network
loadconfig ctdb
# check networking is up (for redhat)
[ "$NETWORKING" = "no" ] && exit 0
detect_init_style
export CTDB_INIT_STYLE
ctdbd=${CTDBD:-/usr/sbin/ctdbd}
if [ "$CTDB_VALGRIND" = "yes" ]; then
init_style="valgrind"
else
init_style="$CTDB_INIT_STYLE"
fi
build_ctdb_options () {
maybe_set () {
# If the 2nd arg is null then return - don't set anything.
# Else if the 3rd arg is set and it doesn't match the 2nd arg
# then return
[ -z "$2" -o \( -n "$3" -a "$3" != "$2" \) ] && return
val="'$2'"
case "$1" in
--*) sep="=" ;;
-*) sep=" " ;;
esac
# For these options we're only passing a value-less flag.
[ -n "$3" ] && {
val=""
sep=""
}
CTDB_OPTIONS="${CTDB_OPTIONS}${CTDB_OPTIONS:+ }${1}${sep}${val}"
}
[ -z "$CTDB_RECOVERY_LOCK" ] && {
echo "No recovery lock specified. Starting CTDB without split brain prevention"
}
maybe_set "--reclock" "$CTDB_RECOVERY_LOCK"
# build up CTDB_OPTIONS variable from optional parameters
maybe_set "--logfile" "$CTDB_LOGFILE"
maybe_set "--nlist" "$CTDB_NODES"
maybe_set "--socket" "$CTDB_SOCKET"
maybe_set "--public-addresses" "$CTDB_PUBLIC_ADDRESSES"
maybe_set "--public-interface" "$CTDB_PUBLIC_INTERFACE"
maybe_set "--dbdir" "$CTDB_DBDIR"
maybe_set "--dbdir-persistent" "$CTDB_DBDIR_PERSISTENT"
maybe_set "--event-script-dir" "$CTDB_EVENT_SCRIPT_DIR"
maybe_set "--transport" "$CTDB_TRANSPORT"
maybe_set "-d" "$CTDB_DEBUGLEVEL"
maybe_set "--notification-script" "$CTDB_NOTIFY_SCRIPT"
maybe_set "--start-as-disabled" "$CTDB_START_AS_DISABLED" "yes"
maybe_set "--start-as-stopped " "$CTDB_START_AS_STOPPED" "yes"
maybe_set "--no-recmaster" "$CTDB_CAPABILITY_RECMASTER" "no"
maybe_set "--no-lmaster" "$CTDB_CAPABILITY_LMASTER" "no"
maybe_set "--lvs --single-public-ip" "$CTDB_LVS_PUBLIC_IP"
maybe_set "--script-log-level" "$CTDB_SCRIPT_LOG_LEVEL"
maybe_set "--log-ringbuf-size" "$CTDB_LOG_RINGBUF_SIZE"
maybe_set "--syslog" "$CTDB_SYSLOG" "yes"
maybe_set "--max-persistent-check-errors" "$CTDB_MAX_PERSISTENT_CHECK_ERRORS"
}
# Log given message or stdin to either syslog or a CTDB log file
do_log ()
{
if [ "$CTDB_SYSLOG" = "yes" -o \
"${CTDB_OPTIONS#*--syslog}" != "$CTDB_OPTIONS" ] ; then
logger -t "ctdb.init" "$@"
else
_l="${CTDB_LOGFILE:-/var/log/log.ctdb}"
{
date
if [ -n "$*" ] ; then
echo "$*"
else
cat
fi
} >>"$_l"
fi
}
select_tdb_checker ()
{
# Find the best TDB consistency check available.
use_tdb_tool_check=false
if [ -x /usr/bin/tdbtool ] && \
echo "help" | /usr/bin/tdbtool | grep -q check ; then
use_tdb_tool_check=true
elif [ -x /usr/bin/tdbtool -a -x /usr/bin/tdbdump ] ; then
do_log <<EOF
WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
Using 'tdbdump' for database checks.
Consider updating 'tdbtool' for better checks!
EOF
elif [ -x /usr/bin/tdbdump ] ; then
do_log <<EOF
WARNING: 'tdbtool' is not available.
Using 'tdbdump' to check the databases.
Consider installing a recent 'tdbtool' for better checks!
EOF
else
do_log <<EOF
WARNING: Cannot check databases since neither
'tdbdump' nor 'tdbtool check' is available.
Consider installing tdbtool or at least tdbdump!
EOF
return 1
fi
}
check_tdb ()
{
_db="$1"
if $use_tdb_tool_check ; then
# tdbtool always exits with 0 :-(
if tdbtool "$_db" check 2>/dev/null |
grep -q "Database integrity is OK" ; then
return 0
else
return 1
fi
else
tdbdump "$_db" >/dev/null 2>/dev/null
return $?
fi
}
check_persistent_databases ()
{
_dir="${CTDB_DBDIR_PERSISTENT:-${CTDB_DBDIR:-/var/ctdb}/persistent}"
mkdir -p "$_dir" 2>/dev/null
[ "${CTDB_MAX_PERSISTENT_CHECK_ERRORS:-0}" = "0" ] || return 0
for _db in $(ls "$_dir/"*.tdb.*[0-9] 2>/dev/null) ; do
check_tdb $_db || {
do_log "Persistent database $_db is corrupted! CTDB will not start."
return 1
}
done
}
check_non_persistent_databases ()
{
_dir="${CTDB_DBDIR:-/var/ctdb}"
mkdir -p "$_dir" 2>/dev/null
for _db in $(ls "${_dir}/"*.tdb.*[0-9] 2>/dev/null) ; do
check_tdb $_db || {
_backup="${_db}.$(date +'%Y%m%d.%H%M%S.%N').corrupt"
do_log <<EOF
WARNING: database ${_db} is corrupted.
Moving to backup ${_backup} for later analysis.
EOF
mv "$_db" "$_backup"
# Now remove excess backups
ls -td "${_db}."*".corrupt" |
tail -n +$((${CTDB_MAX_CORRUPT_DB_BACKUPS:-10} + 1)) |
xargs rm -f
}
done
}
set_ctdb_variables () {
# set any tunables from the config file
set | grep ^CTDB_SET_ | cut -d_ -f3- |
while read v; do
varname=`echo $v | cut -d= -f1`
value=`echo $v | cut -d= -f2`
ctdb setvar $varname $value || RETVAL=1
done || exit 1
}
set_retval() {
return $1
}
wait_until_ready () {
_timeout="${1:-10}" # default is 10 seconds
_count=0
while ! ctdb ping >/dev/null 2>&1 ; do
if [ $_count -ge $_timeout ] ; then
return 1
fi
sleep 1
_count=$(($_count + 1))
done
}
ctdbd=${CTDBD:-/usr/sbin/ctdbd}
drop_all_public_ips() {
[ -z "$CTDB_PUBLIC_ADDRESSES" ] && {
return
}
cat $CTDB_PUBLIC_ADDRESSES | while read IP IFACE REST; do
ip addr del $IP dev $IFACE >/dev/null 2>/dev/null
done
}
start() {
echo -n $"Starting ctdbd service: "
ctdb ping >/dev/null 2>&1 && {
echo $"CTDB is already running"
return 0
}
build_ctdb_options
# make sure we drop any ips that might still be held if previous
# instance of ctdb got killed with -9 or similar
drop_all_public_ips
if select_tdb_checker ; then
check_persistent_databases || return $?
check_non_persistent_databases
fi
if [ "$CTDB_SUPPRESS_COREFILE" = "yes" ]; then
ulimit -c 0
else
ulimit -c unlimited
fi
case $init_style in
valgrind)
eval valgrind -q --log-file=/var/log/ctdb_valgrind \
$ctdbd --valgrinding "$CTDB_OPTIONS"
RETVAL=$?
echo
;;
suse)
eval startproc $ctdbd "$CTDB_OPTIONS"
RETVAL=$?
;;
redhat)
eval $ctdbd "$CTDB_OPTIONS"
RETVAL=$?
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || RETVAL=1
;;
debian)
eval start-stop-daemon --start --quiet --background \
--exec $ctdbd -- "$CTDB_OPTIONS"
RETVAL=$?
;;
esac
if [ $RETVAL -eq 0 ] ; then
if wait_until_ready ; then
set_ctdb_variables
else
RETVAL=1
pkill -9 -f $ctdbd >/dev/null 2>&1
fi
fi
case $init_style in
suse)
set_retval $RETVAL
rc_status -v
;;
redhat)
[ $RETVAL -eq 0 ] && success || failure
echo
;;
esac
return $RETVAL
}
stop() {
echo -n $"Shutting down ctdbd service: "
pkill -0 -f $ctdbd || {
echo -n " Warning: ctdbd not running ! "
case $init_style in
suse)
rc_status -v
;;
redhat)
echo ""
;;
esac
return 0
}
ctdb shutdown >/dev/null 2>&1
RETVAL=$?
count=0
while pkill -0 -f $ctdbd ; do
sleep 1
count=$(($count + 1))
[ $count -gt 30 ] && {
echo -n $"killing ctdbd "
pkill -9 -f $ctdbd
pkill -9 -f $CTDB_BASE/events.d/
}
done
# make sure all ips are dropped, pfkill -9 might leave them hanging around
drop_all_public_ips
case $init_style in
suse)
# re-set the return code to the recorded RETVAL in order
# to print the correct status message
set_retval $RETVAL
rc_status -v
;;
redhat)
[ $RETVAL -eq 0 ] && success || failure
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
echo ""
;;
esac
return $RETVAL
}
restart() {
stop
start
}
status() {
echo -n $"Checking for ctdbd service: "
ctdb ping >/dev/null 2>&1 || {
RETVAL=$?
echo -n " ctdbd not running. "
case $init_style in
suse)
set_retval $RETVAL
rc_status -v
;;
redhat)
if [ -f /var/lock/subsys/ctdb ]; then
echo $"ctdb dead but subsys locked"
RETVAL=2
else
echo $"ctdb is stopped"
RETVAL=3
fi
;;
esac
return $RETVAL
}
echo ""
ctdb status
}
[ -f "$CTDB_BASE/rc.ctdb" ] && "$CTDB_BASE/rc.ctdb" $1
case "$1" in
start)
start
;;
stop)
stop
;;
restart|reload|force-reload)
restart
;;
status)
status
;;
condrestart|try-restart)
ctdb status > /dev/null && restart || :
;;
cron)
# used from cron to auto-restart ctdb
ctdb status > /dev/null || restart
;;
*)
echo $"Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}"
exit 1
esac
exit $?