2012-05-17 04:17:51 +04:00
#!/bin/sh
2014-11-17 06:15:14 +03:00
# This script only works on Linux. Please modify (and submit patches)
# for other operating systems.
2014-02-07 10:37:00 +04:00
[ -n " $CTDB_BASE " ] || \
2016-11-03 08:03:24 +03:00
CTDB_BASE = $( d = $( dirname " $0 " ) ; cd -P " $d " ; echo " $PWD " )
2014-02-07 10:37:00 +04:00
2016-06-29 10:36:05 +03:00
. " ${ CTDB_BASE } /functions "
2014-02-07 10:37:00 +04:00
2018-02-06 03:25:56 +03:00
loadconfig
2014-02-07 10:37:00 +04:00
# Testing hook
if [ -n " $CTDB_DEBUG_HUNG_SCRIPT_LOGFILE " ] ; then
2014-06-26 09:16:12 +04:00
tmp = " ${ CTDB_DEBUG_HUNG_SCRIPT_LOGFILE } .part "
exec >>" $tmp " 2>& 1
2014-02-07 10:37:00 +04:00
fi
2013-07-22 09:08:32 +04:00
(
2014-06-26 08:46:54 +04:00
# No use running several of these in parallel if, say, "releaseip"
# event hangs for multiple IPs. In that case the output would be
# interleaved in the log and would just be confusing.
2013-07-22 09:08:32 +04:00
flock --wait 2 9 || exit 1
2013-08-06 10:11:40 +04:00
echo " ===== Start of hung script debug for PID=\" $1 \", event=\" $2 \" ===== "
2013-07-22 09:08:32 +04:00
echo " pstree -p -a ${ 1 } : "
2016-06-29 11:11:44 +03:00
out = $( pstree -p -a " $1 " )
2014-02-07 10:37:00 +04:00
echo " $out "
# Check for processes matching a regular expression and print
# stack staces. This could help confirm that certain processes
# are stuck in certain places such as the cluster filesystem. The
2014-11-17 06:15:14 +03:00
# regexp must separate items with "|" and must not contain
2014-02-07 10:37:00 +04:00
# parentheses. The default pattern can be replaced for testing.
2014-11-17 06:15:14 +03:00
default_pat = 'exportfs|rpcinfo'
2014-02-07 10:37:00 +04:00
pat = " ${ CTDB_DEBUG_HUNG_SCRIPT_STACKPAT :- ${ default_pat } } "
echo " $out " |
2014-11-17 06:15:14 +03:00
sed -r -n " s@.*-(.*( ${ pat } ).*),([0-9]*).*@\3 \1@p " |
2014-02-07 10:37:00 +04:00
while read pid name ; do
trace = $( cat " /proc/ ${ pid } /stack " 2>/dev/null)
2017-08-11 05:49:32 +03:00
# No! Checking the exit code afterwards is actually clearer...
# shellcheck disable=SC2181
2014-02-07 10:37:00 +04:00
if [ $? -eq 0 ] ; then
echo " ---- Stack trace of interesting process ${ pid } [ ${ name } ] ---- "
echo " $trace "
fi
done
2013-07-22 09:08:32 +04:00
2014-06-26 08:46:54 +04:00
if [ " $2 " != "init" ] ; then
echo " ---- ctdb scriptstatus ${ 2 } : ---- "
2016-06-08 13:32:04 +03:00
$CTDB scriptstatus " $2 "
2013-08-06 10:11:40 +04:00
fi
echo " ===== End of hung script debug for PID=\" $1 \", event=\" $2 \" ===== "
2013-07-22 09:08:32 +04:00
2014-06-26 09:16:12 +04:00
if [ -n " $CTDB_DEBUG_HUNG_SCRIPT_LOGFILE " ] ; then
mv " $tmp " " $CTDB_DEBUG_HUNG_SCRIPT_LOGFILE "
fi
2015-08-13 08:57:52 +03:00
) 9>" ${ CTDB_SCRIPT_VARDIR } /debug-hung-script.lock "