2013-07-08 15:56:30 +10:00
#!/bin/sh
# This script parses /proc/locks and finds the processes that are holding
# locks on CTDB databases. For all those processes the script dumps a
2015-10-14 15:06:56 +11:00
# stack trace.
2013-07-08 15:56:30 +10:00
#
# This script can be used only if Samba is configured to use fcntl locks
# rather than mutex locks.
2013-11-04 12:56:39 +11:00
[ -n " $CTDB_BASE " ] || \
2016-11-03 16:03:24 +11:00
CTDB_BASE = $( d = $( dirname " $0 " ) ; cd -P " $d " ; echo " $PWD " )
2013-11-04 12:56:39 +11:00
2016-06-29 17:36:05 +10:00
. " ${ CTDB_BASE } /functions "
2013-11-04 12:56:39 +11:00
2017-12-02 20:06:25 +11:00
# type is at least mentioned in POSIX and more is portable than which(1)
# shellcheck disable=SC2039
if ! type gstack >/dev/null 2>& 1 ; then
gstack ( )
{
_pid = " $1 "
gdb -batch --quiet -nx " /proc/ ${ _pid } /exe " " $_pid " \
-ex "thread apply all bt" 2>/dev/null |
grep '^\(#\|Thread \)'
}
fi
2018-04-20 12:15:26 +10:00
# Load/cache database options from configuration file
ctdb_get_db_options
2013-11-04 12:56:39 +11:00
2013-11-15 18:59:04 +11:00
(
flock -n 9 || exit 1
echo " ===== Start of debug locks PID= $$ ===== "
2016-07-13 11:50:58 +10:00
# Create sed expression to convert inodes to names.
# Filenames don't contain dashes and we want basenames
# shellcheck disable=SC2035
sed_cmd = $( cd " $CTDB_DBDIR " &&
stat -c "s#[0-9a-f]*:[0-9a-f]*:%i #%n #" *.tdb.* 2>/dev/null ;
cd " $CTDB_DBDIR_PERSISTENT " &&
stat -c "s#[0-9a-f]*:[0-9a-f]*:%i #%n #" *.tdb.* 2>/dev/null)
2013-07-08 15:56:30 +10:00
2013-11-15 18:59:04 +11:00
# Parse /proc/locks and extract following information
# pid process_name tdb_name offsets [W]
2016-07-06 20:17:26 +10:00
out = $( grep -F "POSIX ADVISORY WRITE" /proc/locks |
2013-07-08 15:56:30 +10:00
awk '{ if($2 == "->") { print $6, $7, $8, $9, "W" } else { print $5, $6, $7, $8 } }' |
while read pid rest ; do
2016-06-29 18:11:44 +10:00
pname = $( readlink " /proc/ ${ pid } /exe " )
2016-07-06 17:31:51 +10:00
echo " $pid $pname $rest "
2018-04-19 11:54:26 +10:00
done | sed -e " $sed_cmd " | grep '\.tdb' )
2013-07-08 15:56:30 +10:00
2013-11-15 18:59:04 +11:00
if [ -n " $out " ] ; then
# Log information about locks
echo " $out "
# Find processes that are waiting for locks
dbs = $( echo " $out " | grep " W $" | awk '{print $3}' )
all_pids = ""
for db in $dbs ; do
pids = $( echo " $out " | grep -v " W $" | grep " $db " | grep -v ctdbd | awk '{print $1}' )
all_pids = " $all_pids $pids "
done
2016-07-06 17:31:51 +10:00
# Use word splitting to squash whitespace
# shellcheck disable=SC2086
2018-04-19 11:54:26 +10:00
pids = $( echo $all_pids | tr ' ' '\n' | sort -u)
2013-11-15 18:59:04 +11:00
# For each process waiting, log stack trace
for pid in $pids ; do
echo " ----- Stack trace for PID= $pid ----- "
2016-07-06 17:16:44 +10:00
# x is intentionally ignored
# shellcheck disable=SC2034
2016-06-29 18:11:44 +10:00
read x x state x <" /proc/ ${ pid } /stat "
2015-10-14 15:06:56 +11:00
if [ " $state " = "D" ] ; then
# Don't run gstack on a process in D state since
# gstack will hang until the process exits D state.
# Although it is possible for a process to transition
# to D state after this check, it is unlikely because
# if a process is stuck in D state then it is probably
# the reason why this script was called. Note that a
# kernel stack almost certainly won't help diagnose a
# deadlock... but it will probably give us someone to
# blame!
echo "----- Process in D state, printing kernel stack only"
2016-06-29 18:11:44 +10:00
cat " /proc/ ${ pid } /stack "
2015-10-14 15:06:56 +11:00
else
2016-06-29 18:11:44 +10:00
gstack " $pid "
2015-10-14 15:06:56 +11:00
fi
2013-11-15 18:59:04 +11:00
done
fi
echo " ===== End of debug locks PID= $$ ===== "
2015-08-13 15:57:52 +10:00
) 9>" ${ CTDB_SCRIPT_VARDIR } /debug_locks.lock " | script_log "ctdbd-lock"
2013-07-08 15:56:30 +10:00
exit 0