From 67b22b6e94784b4ac324881a25993f9cecd2946d Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 22 Jul 2013 15:08:32 +1000 Subject: [PATCH] scripts: Run scriptstatus for hung event The timeout information printed by ctdbd is less than useful because it refers to the cumulative time taken by the eventscripts run so far. Adding scriptstatus output indicates where time was actually spent. Since there is now quite a bit of output, serialise the calls to this script using flock. Signed-off-by: Martin Schwenke Pair-programmed-with: Amitay Isaacs (This used to be ctdb commit 1b016b2dfc5d7d3f2a42ce4dfe569608e90eb714) --- ctdb/config/debug-hung-script.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) mode change 100644 => 100755 ctdb/config/debug-hung-script.sh diff --git a/ctdb/config/debug-hung-script.sh b/ctdb/config/debug-hung-script.sh old mode 100644 new mode 100755 index dcf68ba89e5..32dbd5f863a --- a/ctdb/config/debug-hung-script.sh +++ b/ctdb/config/debug-hung-script.sh @@ -1,4 +1,19 @@ #!/bin/sh -echo "Pstree output for the hung script:" -pstree -p -a $1 +( + flock --wait 2 9 || exit 1 + + echo "===== Start of hung script debug for PID=\"$1\", event\"$2\" =====" + + echo "pstree -p -a ${1}:" + pstree -p -a $1 + + echo "ctdb scriptstatus ${2}:" + # No use running several of these in parallel if, say, "releaseip" + # event hangs for multiple IPs. In that case the output would be + # interleaved in the log and would just be confusing. + ctdb scriptstatus "$2" + + echo "===== End of hung script debug for PID=\"$1\", event\"$2\" =====" + +) 9>"${CTDB_VARDIR}/debug-hung-script.lock"