MEDIUM: tasks/activity: combine the called function with the caller
Now instead of getting aggregate stats per called function, we have them per function AND per call place. The "byaddr" sort considers the function pointer first, then the call count, so that dominant callers of a given callee are instantly spotted. This allows to get sorted outputs like this: Tasks activity: function calls cpu_tot cpu_avg lat_tot lat_avg h1_io_cb 17357952 40.91s 2.357us 4.849m 16.76us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup sc_conn_io_cb 10357182 6.297s 607.0ns 27.93m 161.8us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup process_stream 9891131 1.809m 10.97us 53.61m 325.2us <- sc_notify@src/stconn.c:1209 task_wakeup process_stream 9823934 1.887m 11.52us 48.31m 295.1us <- stream_new@src/stream.c:563 task_wakeup sc_conn_io_cb 9347863 16.59s 1.774us 6.143m 39.43us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup h1_io_cb 501344 1.848s 3.686us 6.544m 783.2us <- conn_subscribe@src/connection.c:732 tasklet_wakeup sc_conn_io_cb 239717 492.3ms 2.053us 3.213m 804.3us <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup h2_io_cb 173019 4.204s 24.30us 40.95s 236.7us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup h2_io_cb 149487 424.3ms 2.838us 14.63s 97.87us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup other 101893 4.626s 45.40us 14.84s 145.7us quic_lstnr_dghdlr 94389 614.0ms 6.504us 30.54s 323.6us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup quic_conn_app_io_cb 92205 3.735s 40.51us 390.9ms 4.239us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after qc_io_cb 50355 19.01s 377.5us 10.65s 211.4us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup h1_io_cb 44427 155.0ms 3.489us 21.50s 484.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup qc_io_cb 9018 4.924s 546.0us 3.084s 342.0us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup h1_timeout_task 3236 1.172ms 362.0ns 1.119s 345.9us <- h1_release@src/mux_h1.c:1087 task_wakeup h1_io_cb 2804 7.974ms 2.843us 1.980s 706.0us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup sc_conn_io_cb 2804 33.44ms 11.92us 2.597s 926.2us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup qc_io_cb 2623 2.669s 1.017ms 1.347s 513.5us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup qc_process_timer 662 526.4us 795.0ns 1.081s 1.633ms <- wake_expired_tasks@src/task.c:344 task_wakeup quic_conn_app_io_cb 648 12.62ms 19.47us 225.7ms 348.2us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup accept_queue_process 286 1.571ms 5.494us 72.55ms 253.7us <- listener_accept@src/listener.c:1099 tasklet_wakeup process_resolvers 176 157.8us 896.0ns 7.835ms 44.52us <- wake_expired_tasks@src/task.c:429 task_drop_running qc_io_cb 167 10.71ms 64.12us 32.47ms 194.4us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup sc_conn_io_cb 123 80.05us 650.0ns 50.35ms 409.4us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup h2_timeout_task 32 30.69us 958.0ns 9.038ms 282.4us <- h2_release@src/mux_h2.c:1191 task_wakeup task_run_applet 24 33.79ms 1.408ms 5.838ms 243.3us <- sc_applet_create@src/stconn.c:489 appctx_wakeup accept_queue_process 17 56.34us 3.314us 7.505ms 441.5us <- accept_queue_process@src/listener.c:165 tasklet_wakeup srv_cleanup_toremove_conns 16 1.133ms 70.81us 5.685ms 355.3us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup srv_cleanup_idle_conns 16 74.57us 4.660us 2.797ms 174.8us <- wake_expired_tasks@src/task.c:429 task_drop_running quic_conn_app_io_cb 12 786.9us 65.58us 2.042ms 170.1us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup sc_conn_io_cb 9 20.55us 2.283us 2.475ms 275.0us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup h2_io_cb 8 34.12us 4.265us 1.784ms 223.0us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup task_run_applet 4 6.615ms 1.654ms 2.306us 576.0ns <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup quic_conn_io_cb 4 4.278ms 1.069ms 6.469us 1.617us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after qc_io_cb 2 20.81us 10.40us 4.943us 2.471us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup quic_conn_app_io_cb 2 752.9us 376.4us 63.97us 31.99us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup quic_accept_run 2 13.84us 6.920us 172.8us 86.42us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup qc_idle_timer_task 2 295.0us 147.5us 8.761us 4.380us <- wake_expired_tasks@src/task.c:344 task_wakeup qc_io_cb 1 867.1us 867.1us 812.8us 812.8us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup ... and calls sorted by address like this: Tasks activity: function calls cpu_tot cpu_avg lat_tot lat_avg task_run_applet 23 32.73ms 1.423ms 5.837ms 253.8us <- sc_applet_create@src/stconn.c:489 appctx_wakeup task_run_applet 4 6.615ms 1.654ms 2.306us 576.0ns <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup accept_queue_process 285 1.566ms 5.495us 72.49ms 254.3us <- listener_accept@src/listener.c:1099 tasklet_wakeup accept_queue_process 17 56.34us 3.314us 7.505ms 441.5us <- accept_queue_process@src/listener.c:165 tasklet_wakeup sc_conn_io_cb 10357182 6.297s 607.0ns 27.93m 161.8us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup sc_conn_io_cb 9347863 16.59s 1.774us 6.143m 39.43us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup sc_conn_io_cb 239717 492.3ms 2.053us 3.213m 804.3us <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup sc_conn_io_cb 2804 33.44ms 11.92us 2.597s 926.2us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup sc_conn_io_cb 123 80.05us 650.0ns 50.35ms 409.4us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup sc_conn_io_cb 9 20.55us 2.283us 2.475ms 275.0us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup process_resolvers 159 145.9us 917.0ns 7.823ms 49.20us <- wake_expired_tasks@src/task.c:429 task_drop_running srv_cleanup_idle_conns 16 74.57us 4.660us 2.797ms 174.8us <- wake_expired_tasks@src/task.c:429 task_drop_running srv_cleanup_toremove_conns 16 1.133ms 70.81us 5.685ms 355.3us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup process_stream 9891130 1.809m 10.97us 53.61m 325.2us <- sc_notify@src/stconn.c:1209 task_wakeup process_stream 9823933 1.887m 11.52us 48.31m 295.1us <- stream_new@src/stream.c:563 task_wakeup h1_io_cb 17357952 40.91s 2.357us 4.849m 16.76us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup h1_io_cb 501344 1.848s 3.686us 6.544m 783.2us <- conn_subscribe@src/connection.c:732 tasklet_wakeup h1_io_cb 44427 155.0ms 3.489us 21.50s 484.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup h1_io_cb 2804 7.974ms 2.843us 1.980s 706.0us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup h1_timeout_task 3236 1.172ms 362.0ns 1.119s 345.9us <- h1_release@src/mux_h1.c:1087 task_wakeup h2_timeout_task 32 30.69us 958.0ns 9.038ms 282.4us <- h2_release@src/mux_h2.c:1191 task_wakeup h2_io_cb 173019 4.204s 24.30us 40.95s 236.7us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup h2_io_cb 149487 424.3ms 2.838us 14.63s 97.87us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup h2_io_cb 8 34.12us 4.265us 1.784ms 223.0us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup qc_io_cb 50355 19.01s 377.5us 10.65s 211.4us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup qc_io_cb 9018 4.924s 546.0us 3.084s 342.0us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup qc_io_cb 2623 2.669s 1.017ms 1.347s 513.5us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup qc_io_cb 167 10.71ms 64.12us 32.47ms 194.4us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup qc_io_cb 2 20.81us 10.40us 4.943us 2.471us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup qc_io_cb 1 867.1us 867.1us 812.8us 812.8us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup qc_idle_timer_task 2 295.0us 147.5us 8.761us 4.380us <- wake_expired_tasks@src/task.c:344 task_wakeup quic_conn_io_cb 4 4.278ms 1.069ms 6.469us 1.617us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after quic_conn_app_io_cb 92205 3.735s 40.51us 390.9ms 4.239us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after quic_conn_app_io_cb 648 12.62ms 19.47us 225.7ms 348.2us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup quic_conn_app_io_cb 12 786.9us 65.58us 2.042ms 170.1us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup quic_conn_app_io_cb 2 752.9us 376.4us 63.97us 31.99us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup quic_lstnr_dghdlr 94389 614.0ms 6.504us 30.54s 323.6us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup qc_process_timer 662 526.4us 795.0ns 1.081s 1.633ms <- wake_expired_tasks@src/task.c:344 task_wakeup quic_accept_run 2 13.84us 6.920us 172.8us 86.42us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup other 101892 4.626s 45.40us 14.84s 145.7us It already becomes visible that some tasks have different very costs depending where they're called (e.g. process_stream). The method used to wake them up is also shown. Applets are handled specially and shown as appctx_wakeup.
This commit is contained in:
parent
41e701e2c1
commit
3d4cdb198c
@ -126,6 +126,7 @@ struct activity {
|
||||
*/
|
||||
struct sched_activity {
|
||||
const void *func;
|
||||
const struct ha_caller *caller;
|
||||
uint64_t calls;
|
||||
uint64_t cpu_time;
|
||||
uint64_t lat_time;
|
||||
|
@ -31,7 +31,7 @@ extern struct sched_activity sched_activity[SCHED_ACT_HASH_BUCKETS];
|
||||
|
||||
void report_stolen_time(uint64_t stolen);
|
||||
void activity_count_runtime(uint32_t run_time);
|
||||
struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func);
|
||||
struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller);
|
||||
|
||||
#ifdef USE_MEMORY_PROFILING
|
||||
struct memprof_stats *memprof_get_bin(const void *ra, enum memprof_method meth);
|
||||
|
@ -445,6 +445,7 @@ static int cli_parse_set_profiling(char **args, char *payload, struct appctx *ap
|
||||
HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
|
||||
HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
|
||||
HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
|
||||
HA_ATOMIC_STORE(&sched_activity[i].caller, NULL);
|
||||
}
|
||||
}
|
||||
else if (strcmp(args[3], "auto") == 0) {
|
||||
@ -482,6 +483,7 @@ static int cmp_sched_activity_calls(const void *a, const void *b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* sort by address first, then by call count */
|
||||
static int cmp_sched_activity_addr(const void *a, const void *b)
|
||||
{
|
||||
const struct sched_activity *l = (const struct sched_activity *)a;
|
||||
@ -491,6 +493,10 @@ static int cmp_sched_activity_addr(const void *a, const void *b)
|
||||
return -1;
|
||||
else if (l->func < r->func)
|
||||
return 1;
|
||||
else if (l->calls > r->calls)
|
||||
return -1;
|
||||
else if (l->calls < r->calls)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
@ -524,23 +530,25 @@ static int cmp_memprof_addr(const void *a, const void *b)
|
||||
}
|
||||
#endif // USE_MEMORY_PROFILING
|
||||
|
||||
/* Computes the index of function pointer <func> for use with sched_activity[]
|
||||
* or any other similar array passed in <array>, and returns a pointer to the
|
||||
* entry after having atomically assigned it to this function pointer. Note
|
||||
* that in case of collision, the first entry is returned instead ("other").
|
||||
/* Computes the index of function pointer <func> and caller <caller> for use
|
||||
* with sched_activity[] or any other similar array passed in <array>, and
|
||||
* returns a pointer to the entry after having atomically assigned it to this
|
||||
* function pointer and caller combination. Note that in case of collision,
|
||||
* the first entry is returned instead ("other").
|
||||
*/
|
||||
struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func)
|
||||
struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller)
|
||||
{
|
||||
uint32_t hash = ptr_hash(func, SCHED_ACT_HASH_BITS);
|
||||
uint32_t hash = ptr2_hash(func, caller, SCHED_ACT_HASH_BITS);
|
||||
struct sched_activity *ret;
|
||||
const void *old = NULL;
|
||||
|
||||
ret = &array[hash];
|
||||
|
||||
if (likely(ret->func == func))
|
||||
if (likely(ret->func == func && ret->caller == caller))
|
||||
return ret;
|
||||
|
||||
if (HA_ATOMIC_CAS(&ret->func, &old, func))
|
||||
if (HA_ATOMIC_CAS(&ret->func, &old, func) &&
|
||||
HA_ATOMIC_CAS(&ret->caller, &old, caller))
|
||||
return ret;
|
||||
|
||||
return array;
|
||||
@ -572,6 +580,7 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
|
||||
#endif
|
||||
struct stconn *sc = appctx_sc(appctx);
|
||||
struct buffer *name_buffer = get_trash_chunk();
|
||||
const struct ha_caller *caller;
|
||||
const char *str;
|
||||
int max_lines;
|
||||
int i, max;
|
||||
@ -626,6 +635,7 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
|
||||
for (i = ctx->linenum; i < max_lines && tmp_activity[i].calls; i++) {
|
||||
ctx->linenum = i;
|
||||
chunk_reset(name_buffer);
|
||||
caller = HA_ATOMIC_LOAD(&tmp_activity[i].caller);
|
||||
|
||||
if (!tmp_activity[i].func)
|
||||
chunk_printf(name_buffer, "other");
|
||||
@ -643,7 +653,14 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
|
||||
print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
|
||||
print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
|
||||
print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
|
||||
print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
|
||||
print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "");
|
||||
|
||||
if (caller && caller->what <= WAKEUP_TYPE_APPCTX_WAKEUP)
|
||||
chunk_appendf(&trash, " <- %s@%s:%d %s",
|
||||
caller->func, caller->file, caller->line,
|
||||
task_wakeup_type_str(caller->what));
|
||||
|
||||
b_putchr(&trash, '\n');
|
||||
|
||||
if (applet_putchk(appctx, &trash) == -1) {
|
||||
/* failed, try again */
|
||||
@ -834,7 +851,7 @@ static int cli_io_handler_show_tasks(struct appctx *appctx)
|
||||
rqnode = eb32_first(&ha_thread_ctx[thr].rqueue_shared);
|
||||
while (rqnode) {
|
||||
t = eb32_entry(rqnode, struct task, rq);
|
||||
entry = sched_activity_entry(tmp_activity, t->process);
|
||||
entry = sched_activity_entry(tmp_activity, t->process, NULL);
|
||||
if (t->wake_date) {
|
||||
lat = now_ns - t->wake_date;
|
||||
if ((int64_t)lat > 0)
|
||||
@ -851,7 +868,7 @@ static int cli_io_handler_show_tasks(struct appctx *appctx)
|
||||
rqnode = eb32_first(&ha_thread_ctx[thr].rqueue);
|
||||
while (rqnode) {
|
||||
t = eb32_entry(rqnode, struct task, rq);
|
||||
entry = sched_activity_entry(tmp_activity, t->process);
|
||||
entry = sched_activity_entry(tmp_activity, t->process, NULL);
|
||||
if (t->wake_date) {
|
||||
lat = now_ns - t->wake_date;
|
||||
if ((int64_t)lat > 0)
|
||||
@ -864,7 +881,7 @@ static int cli_io_handler_show_tasks(struct appctx *appctx)
|
||||
/* shared tasklet list */
|
||||
list_for_each_entry(tl, mt_list_to_list(&ha_thread_ctx[thr].shared_tasklet_list), list) {
|
||||
t = (const struct task *)tl;
|
||||
entry = sched_activity_entry(tmp_activity, t->process);
|
||||
entry = sched_activity_entry(tmp_activity, t->process, NULL);
|
||||
if (!TASK_IS_TASKLET(t) && t->wake_date) {
|
||||
lat = now_ns - t->wake_date;
|
||||
if ((int64_t)lat > 0)
|
||||
@ -877,7 +894,7 @@ static int cli_io_handler_show_tasks(struct appctx *appctx)
|
||||
for (queue = 0; queue < TL_CLASSES; queue++) {
|
||||
list_for_each_entry(tl, &ha_thread_ctx[thr].tasklets[queue], list) {
|
||||
t = (const struct task *)tl;
|
||||
entry = sched_activity_entry(tmp_activity, t->process);
|
||||
entry = sched_activity_entry(tmp_activity, t->process, NULL);
|
||||
if (!TASK_IS_TASKLET(t) && t->wake_date) {
|
||||
lat = now_ns - t->wake_date;
|
||||
if ((int64_t)lat > 0)
|
||||
|
@ -567,7 +567,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
|
||||
|
||||
t->wake_date = 0;
|
||||
th_ctx->sched_call_date = now_ns;
|
||||
profile_entry = sched_activity_entry(sched_activity, t->process);
|
||||
profile_entry = sched_activity_entry(sched_activity, t->process, t->caller);
|
||||
th_ctx->sched_profile_entry = profile_entry;
|
||||
HA_ATOMIC_ADD(&profile_entry->lat_time, lat);
|
||||
HA_ATOMIC_INC(&profile_entry->calls);
|
||||
|
Loading…
x
Reference in New Issue
Block a user