1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-22 22:04:08 +03:00
samba-mirror/ctdb/config/debug_locks.sh

219 lines
5.3 KiB
Bash
Raw Normal View History

#!/bin/sh
# This script attempts to find processes holding locks on a particular
# CTDB database and dumps a stack trace for each such processe.
#
# There are 2 cases:
#
# * Samba is configured to use fcntl locks
#
# In this case /proc/locks is parsed to find potential lock holders
#
# * Samba is configured to use POSIX robust mutexes
#
# In this case the helper program tdb_mutex_check is used to find
# potential lock holders.
#
# This helper program uses a private glibc struct field, so is
# neither portable nor supported. If this field is not available
# then the helper is not built. Unexpected changes in internal
# glibc structures may cause unexpected results, including crashes.
# Bug reports for this helper program are not accepted without an
# accompanying patch.
[ -n "$CTDB_BASE" ] || \
CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; echo "$PWD")
. "${CTDB_BASE}/functions"
if [ $# -ne 4 ] ; then
die "usage: $0 <pid> { DB | RECORD } <tdb_path> { FCNTL | MUTEX }"
fi
lock_helper_pid="$1"
# lock_scope is unused for now
# shellcheck disable=SC2034
lock_scope="$2"
tdb_path="$3"
lock_type="$4"
# type is at least mentioned in POSIX and more is portable than which(1)
# shellcheck disable=SC2039
if ! type gstack >/dev/null 2>&1 ; then
gstack ()
{
_pid="$1"
gdb -batch --quiet -nx "/proc/${_pid}/exe" "$_pid" \
-ex "thread apply all bt" 2>/dev/null |
grep '^\(#\|Thread \)'
}
fi
# Load/cache database options from configuration file
ctdb_get_db_options
dump_stack ()
{
_pid="$1"
echo "----- Stack trace for PID=${_pid} -----"
_state=$(ps -p "$_pid" -o state= | cut -c 1)
if [ "$_state" = "D" ] ; then
# Don't run gstack on a process in D state since
# gstack will hang until the process exits D state.
# Although it is possible for a process to transition
# to D state after this check, it is unlikely because
# if a process is stuck in D state then it is probably
# the reason why this script was called. Note that a
# kernel stack almost certainly won't help diagnose a
# deadlock... but it will probably give us someone to
# blame!
echo "----- Process in D state, printing kernel stack only"
get_proc "${_pid}/stack"
else
gstack "$_pid"
fi
}
dump_stacks ()
{
_pids="$1"
# Use word splitting to squash whitespace
# shellcheck disable=SC2086
_pids=$(echo $_pids | tr ' ' '\n' | sort -u)
for _pid in $_pids; do
dump_stack "$_pid"
done
}
get_tdb_file_id ()
{
if ! _device_inode=$(stat -c "%d:%i" "$tdb_path" 2>/dev/null) ; then
die "Unable to stat \"${tdb_path}\""
fi
_device="${_device_inode%%:*}"
_device_major=$((_device >> 8))
_device_minor=$((_device & 0xff))
_inode="${_device_inode#*:}"
printf '%02x:%02x:%u\n' "$_device_major" "$_device_minor" "$_inode"
}
debug_via_proc_locks ()
{
# Get file ID to match relevant column in /proc/locks
_file_id=$(get_tdb_file_id)
# Log information from /proc/locks about the waiting process
_tdb=$(basename "$tdb_path")
_comm=$(ps -p "$lock_helper_pid" -o comm=)
_out=$(get_proc "locks" |
awk -v pid="$lock_helper_pid" \
-v file_id="$_file_id" \
-v file="$_tdb" \
-v comm="$_comm" \
'$2 == "->" &&
$3 == "POSIX" &&
$4 == "ADVISORY" &&
$5 == "WRITE" &&
$6 == pid &&
$7 == file_id { print $6, comm, file, $8, $9 }')
if [ -n "$_out" ] ; then
echo "Waiter:"
echo "$_out"
fi
# Parse /proc/locks and find process holding locks on $tdb_path
# extract following information
# pid process_name tdb_name offsets
_out=$(get_proc "locks" |
awk -v pid="$lock_helper_pid" \
-v file_id="$_file_id" \
-v file="$_tdb" \
'$2 == "POSIX" &&
$3 == "ADVISORY" &&
$4 == "WRITE" &&
$5 != pid &&
$6 == file_id { print $5, file, $7, $8 }' |
while read -r _pid _rest ; do
_pname=$(ps -p "$_pid" -o comm=)
echo "$_pid $_pname $_rest"
done)
if [ -z "$_out" ]; then
return
fi
# Log information about locks
echo "Lock holders:"
echo "$_out"
_pids=$(echo "$_out" | awk '{ print $1 }')
lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}"
}
debug_via_tdb_mutex ()
{
_helper="${CTDB_HELPER_BINDIR}/tdb_mutex_check"
if [ ! -x "$_helper" ] ; then
# Mutex helper not available - not supported?
# Avoid not found error...
return
fi
# Helper should always succeed
if ! _t=$("$_helper" "$tdb_path") ; then
return
fi
_out=$(echo "$_t" | sed -n -e 's#^\[\(.*\)\] pid=\(.*\)#\2 \1#p')
if [ -z "$_out" ]; then
if [ -n "$_t" ] ; then
echo "$_t" | grep -F 'trylock failed'
fi
return
fi
# Get process names, append $tdb_path
_out=$(echo "$_out" |
while read -r _pid _rest ; do
_pname=$(ps -p "$_pid" -o comm=)
_tdb=$(basename "$tdb_path")
echo "${_pid} ${_pname} ${_tdb} ${_rest}"
done)
# Log information about locks
echo "Lock holders:"
echo "$_out"
# Get PIDs of processes that are holding locks
_pids=$(echo "$_out" |
awk -v pid="$lock_helper_pid" '$1 != pid {print $1}')
lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}"
}
(
flock -n 9 || exit 1
echo "===== Start of debug locks PID=$$ ====="
lock_holder_pids=""
debug_via_proc_locks
if [ "$lock_type" = "MUTEX" ] ; then
debug_via_tdb_mutex
fi
dump_stacks "$lock_holder_pids"
echo "===== End of debug locks PID=$$ ====="
)9>"${CTDB_SCRIPT_VARDIR}/debug_locks.lock" | script_log "ctdbd-lock"
exit 0