MEDIUM: time: measure the time stolen by other threads

The purpose is to detect if threads or processes are competing for the
same CPU. This can happen when threads are incorrectly bound, or after a
reload if the previous process still has an important activity. With
threads this situation is problematic because a preempted thread holding
a lock will block other ones waiting for this lock to be released.

A first attempt consisted in measuring the cumulated lost time more
precisely but the system's scheduler is smart enough to try to limit the
thread preemption rate by mostly context switching during poll()'s blank
periods, so most of the time lost is not seen. In essence this is good
because it means a thread is not preempted with a lock held, and even
regarding the rendez-vous point it cannot prevent the other ones from
making progress. But still it happens tens to hundreds of times per
second that a thread might be preempted, so it's still possible to detect
that the situation is happening, thus it's interesting to measure and
report its frequency.

Each time we enter the poller, we check the CPU time spent working and
see if we've lost time doing something else. To limit false positives,
we're only interested in losses of 500 microseconds or more (i.e. half
a clock tick on a 1 kHz system). If so, it indicates that some time was
stolen by another thread or process. Note that we purposely store some
sub-millisecond counters so that under heavy traffic with a 1 kHz clock,
it's still possible to measure something without being subject to the
risk of rounding errors (i.e. if exactly 1 ms is stolen it's possible
that the time difference could often be slightly lower).

This counter of lost CPU time slots time is reported in "show activity"
in numbers of milliseconds of CPU lost per second, per 15s, and total
over the process' life. By definition, the per-second counter cannot
report values larger than 1000 per thread per second and the 15s one
will be limited to 15000/s in the worst case, but it's possible that
peak values exceed such thresholds after long pauses.
This commit is contained in:
Willy Tarreau 2018-10-17 19:01:24 +02:00
parent 5ceeb15002
commit ed72d82827
4 changed files with 51 additions and 0 deletions

View File

@ -63,6 +63,8 @@ extern THREAD_LOCAL struct timeval date; /* the real current date */
extern struct timeval start_date; /* the process's start date */
extern THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
extern THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
extern THREAD_LOCAL uint64_t prev_cpu_time; /* previous per thread CPU time */
extern THREAD_LOCAL uint64_t prev_mono_time; /* previous system wide monotonic time */
/**** exported functions *************************************************/
@ -83,6 +85,11 @@ REGPRM2 int tv_ms_cmp(const struct timeval *tv1, const struct timeval *tv2);
*/
REGPRM2 int tv_ms_cmp2(const struct timeval *tv1, const struct timeval *tv2);
/* Updates the current thread's statistics about stolen CPU time. The unit for
* <stolen> is half-milliseconds.
*/
REGPRM1 void report_stolen_time(uint64_t stolen);
/**** general purpose functions and macros *******************************/
@ -574,6 +581,26 @@ static inline void measure_idle()
*/
static inline void tv_entering_poll()
{
uint64_t new_mono_time;
uint64_t new_cpu_time;
int64_t stolen;
new_cpu_time = now_cpu_time();
new_mono_time = now_mono_time();
if (prev_cpu_time && prev_mono_time) {
new_cpu_time -= prev_cpu_time;
new_mono_time -= prev_mono_time;
stolen = new_mono_time - new_cpu_time;
if (stolen >= 500000) {
stolen /= 500000;
/* more than half a millisecond difference might
* indicate an undesired preemption.
*/
report_stolen_time(stolen);
}
}
gettimeofday(&before_poll, NULL);
}
@ -586,6 +613,8 @@ static inline void tv_leaving_poll(int timeout, int interrupted)
{
tv_update_date(timeout, interrupted);
measure_idle();
prev_cpu_time = now_cpu_time();
prev_mono_time = now_mono_time();
}
#endif /* _COMMON_TIME_H */

View File

@ -195,6 +195,9 @@ struct activity {
unsigned int stream; // calls to process_stream()
unsigned int empty_rq; // calls to process_runnable_tasks() with nothing for the thread
unsigned int long_rq; // process_runnable_tasks() left with tasks in the run queue
unsigned int cpust_total; // sum of half-ms stolen per thread
struct freq_ctr cpust_1s; // avg amount of half-ms stolen over last second
struct freq_ctr_period cpust_15s; // avg amount of half-ms stolen over last 15s
char __pad[0]; // unused except to check remaining room
char __end[0] __attribute__((aligned(64))); // align size to 64.
};

View File

@ -972,6 +972,9 @@ static int cli_io_handler_show_activity(struct appctx *appctx)
chunk_appendf(&trash, "\nstream:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].stream);
chunk_appendf(&trash, "\nempty_rq:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].empty_rq);
chunk_appendf(&trash, "\nlong_rq:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].long_rq);
chunk_appendf(&trash, "\ncpust_tot:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].cpust_total/2);
chunk_appendf(&trash, "\ncpust_1s:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", read_freq_ctr(&activity[thr].cpust_1s)/2);
chunk_appendf(&trash, "\ncpust_15s:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", read_freq_ctr_period(&activity[thr].cpust_15s, 15000)/2);
chunk_appendf(&trash, "\n");

View File

@ -10,12 +10,16 @@
*
*/
#include <stdint.h>
#include <unistd.h>
#include <sys/time.h>
#include <common/config.h>
#include <common/standard.h>
#include <common/time.h>
#include <common/hathreads.h>
#include <types/global.h>
#include <proto/freq_ctr.h>
THREAD_LOCAL unsigned int ms_left_scaled; /* milliseconds left for current second (0..2^32-1) */
THREAD_LOCAL unsigned int now_ms; /* internal date in milliseconds (may wrap) */
@ -27,6 +31,8 @@ THREAD_LOCAL struct timeval date; /* the real current date */
struct timeval start_date; /* the process's start date */
THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
THREAD_LOCAL uint64_t prev_cpu_time = 0; /* previous per thread CPU time */
THREAD_LOCAL uint64_t prev_mono_time = 0; /* previous system wide monotonic time */
static THREAD_LOCAL struct timeval tv_offset; /* per-thread time ofsset relative to global time */
volatile unsigned long long global_now; /* common date between all threads (32:32) */
@ -258,6 +264,16 @@ REGPRM2 void tv_update_date(int max_wait, int interrupted)
return;
}
/* Updates the current thread's statistics about stolen CPU time. The unit for
* <stolen> is half-milliseconds.
*/
REGPRM1 void report_stolen_time(uint64_t stolen)
{
activity[tid].cpust_total += stolen;
update_freq_ctr(&activity[tid].cpust_1s, stolen);
update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
}
/*
* Local variables:
* c-indent-level: 8