DEBUG: wdt: better detect apparently locked up threads and warn about them
In order to help users detect when threads are behaving abnormally, let's try to emit a warning when one is no longer making any progress. This will allow to catch faulty situations more accurately, instead of occasionally triggering just after the long task. It will also let users know that there is something wrong with their configuration, and inspect the call trace to figure whether they're using excessively long rules or Lua for example (the usual warnings about lua-load vs lua-load-per-thread are still reported). The warning will only be emitted for threads not yet marked as stuck so as not to interfere with panic dumps and avoid sending a warning just before a panic. A tainted flag is set when this happens however (0x2000). (cherry picked from commit 148eb5875fb7e6c46c0a9eac486dcb7b3bca931d) Signed-off-by: Willy Tarreau <w@1wt.eu>
This commit is contained in:
parent
80ea59459c
commit
a44922fb10
@ -327,6 +327,7 @@ enum tainted_flags {
|
||||
TAINTED_LUA_STUCK = 0x00000400, /* stuck in a Lua context */
|
||||
TAINTED_LUA_STUCK_SHARED = 0x00000800, /* stuck in a shared Lua context */
|
||||
TAINTED_MEM_TRIMMING_STUCK = 0x00001000, /* stuck while trimming memory */
|
||||
TAINTED_WARN_BLOCKED_TRAFFIC = 0x00002000, /* emitted a warning about blocked traffic */
|
||||
};
|
||||
|
||||
/* this is a bit field made of TAINTED_*, and is declared in haproxy.c */
|
||||
|
@ -675,7 +675,7 @@ void ha_stuck_warning(int thr)
|
||||
struct buffer buf;
|
||||
ullong n, p;
|
||||
|
||||
if (get_tainted() & TAINTED_PANIC) {
|
||||
if (mark_tainted(TAINTED_WARN_BLOCKED_TRAFFIC) & TAINTED_PANIC) {
|
||||
/* a panic dump is already in progress, let's not disturb it,
|
||||
* we'll be called via signal DEBUGSIG. By returning we may be
|
||||
* able to leave a current signal handler (e.g. WDT) so that
|
||||
|
12
src/wdt.c
12
src/wdt.c
@ -12,6 +12,7 @@
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <haproxy/activity.h>
|
||||
#include <haproxy/api.h>
|
||||
#include <haproxy/clock.h>
|
||||
#include <haproxy/debug.h>
|
||||
@ -38,6 +39,7 @@
|
||||
*/
|
||||
static struct {
|
||||
timer_t timer;
|
||||
uint prev_ctxsw;
|
||||
} per_thread_wd_ctx[MAX_THREADS];
|
||||
|
||||
/* Setup (or ping) the watchdog timer for thread <thr>. Returns non-zero on
|
||||
@ -106,10 +108,18 @@ void wdt_handler(int sig, siginfo_t *si, void *arg)
|
||||
* scheduler is still alive by setting the TH_FL_STUCK flag
|
||||
* that the scheduler clears when switching to the next task.
|
||||
* If it's already set, then it's our second call with no
|
||||
* progress and the thread is dead.
|
||||
* progress and the thread is dead. However, if we figure
|
||||
* that the scheduler made no progress since last time, we'll
|
||||
* at least emit a warning.
|
||||
*/
|
||||
if (!(_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].flags) & TH_FL_STUCK)) {
|
||||
uint prev_ctxsw;
|
||||
|
||||
_HA_ATOMIC_OR(&ha_thread_ctx[thr].flags, TH_FL_STUCK);
|
||||
prev_ctxsw = HA_ATOMIC_LOAD(&per_thread_wd_ctx[tid].prev_ctxsw);
|
||||
if (HA_ATOMIC_LOAD(&activity[thr].ctxsw) == prev_ctxsw)
|
||||
ha_stuck_warning(thr);
|
||||
HA_ATOMIC_STORE(&activity[thr].ctxsw, prev_ctxsw);
|
||||
goto update_and_leave;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user