2012-05-17 10:17:51 +10:00
#!/bin/sh
2014-11-17 14:15:14 +11:00
# This script only works on Linux. Please modify (and submit patches)
# for other operating systems.
2014-02-07 17:37:00 +11:00
[ -n " $CTDB_BASE " ] || \
2016-11-03 16:03:24 +11:00
CTDB_BASE = $( d = $( dirname " $0 " ) ; cd -P " $d " ; echo " $PWD " )
2014-02-07 17:37:00 +11:00
2016-06-29 17:36:05 +10:00
. " ${ CTDB_BASE } /functions "
2014-02-07 17:37:00 +11:00
2018-04-04 19:16:57 +10:00
load_script_options
2014-02-07 17:37:00 +11:00
# Testing hook
if [ -n " $CTDB_DEBUG_HUNG_SCRIPT_LOGFILE " ] ; then
2014-06-26 15:16:12 +10:00
tmp = " ${ CTDB_DEBUG_HUNG_SCRIPT_LOGFILE } .part "
exec >>" $tmp " 2>& 1
2014-02-07 17:37:00 +11:00
fi
2013-07-22 15:08:32 +10:00
(
2014-06-26 14:46:54 +10:00
# No use running several of these in parallel if, say, "releaseip"
# event hangs for multiple IPs. In that case the output would be
# interleaved in the log and would just be confusing.
2013-07-22 15:08:32 +10:00
flock --wait 2 9 || exit 1
2013-08-06 16:11:40 +10:00
echo " ===== Start of hung script debug for PID=\" $1 \", event=\" $2 \" ===== "
2013-07-22 15:08:32 +10:00
echo " pstree -p -a ${ 1 } : "
2016-06-29 18:11:44 +10:00
out = $( pstree -p -a " $1 " )
2014-02-07 17:37:00 +11:00
echo " $out "
# Check for processes matching a regular expression and print
# stack staces. This could help confirm that certain processes
# are stuck in certain places such as the cluster filesystem. The
2014-11-17 14:15:14 +11:00
# regexp must separate items with "|" and must not contain
2014-02-07 17:37:00 +11:00
# parentheses. The default pattern can be replaced for testing.
2014-11-17 14:15:14 +11:00
default_pat = 'exportfs|rpcinfo'
2014-02-07 17:37:00 +11:00
pat = " ${ CTDB_DEBUG_HUNG_SCRIPT_STACKPAT :- ${ default_pat } } "
echo " $out " |
2018-04-19 11:54:26 +10:00
sed -r -n " s@.*-(.*( ${ pat } ).*),([0-9]*).*@\\3 \\1@p " |
2014-02-07 17:37:00 +11:00
while read pid name ; do
trace = $( cat " /proc/ ${ pid } /stack " 2>/dev/null)
2017-08-11 12:49:32 +10:00
# No! Checking the exit code afterwards is actually clearer...
# shellcheck disable=SC2181
2014-02-07 17:37:00 +11:00
if [ $? -eq 0 ] ; then
echo " ---- Stack trace of interesting process ${ pid } [ ${ name } ] ---- "
echo " $trace "
fi
done
2013-07-22 15:08:32 +10:00
2014-06-26 14:46:54 +10:00
if [ " $2 " != "init" ] ; then
echo " ---- ctdb scriptstatus ${ 2 } : ---- "
2016-06-08 12:32:04 +02:00
$CTDB scriptstatus " $2 "
2013-08-06 16:11:40 +10:00
fi
echo " ===== End of hung script debug for PID=\" $1 \", event=\" $2 \" ===== "
2013-07-22 15:08:32 +10:00
2014-06-26 15:16:12 +10:00
if [ -n " $CTDB_DEBUG_HUNG_SCRIPT_LOGFILE " ] ; then
mv " $tmp " " $CTDB_DEBUG_HUNG_SCRIPT_LOGFILE "
fi
2015-08-13 15:57:52 +10:00
) 9>" ${ CTDB_SCRIPT_VARDIR } /debug-hung-script.lock "