a955d7eac1
The timerlat tracer aims to help the preemptive kernel developers to found souces of wakeup latencies of real-time threads. Like cyclictest, the tracer sets a periodic timer that wakes up a thread. The thread then computes a *wakeup latency* value as the difference between the *current time* and the *absolute time* that the timer was set to expire. The main goal of timerlat is tracing in such a way to help kernel developers. Usage Write the ASCII text "timerlat" into the current_tracer file of the tracing system (generally mounted at /sys/kernel/tracing). For example: [root@f32 ~]# cd /sys/kernel/tracing/ [root@f32 tracing]# echo timerlat > current_tracer It is possible to follow the trace by reading the trace trace file: [root@f32 tracing]# cat trace # tracer: timerlat # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # || / # |||| ACTIVATION # TASK-PID CPU# |||| TIMESTAMP ID CONTEXT LATENCY # | | | |||| | | | | <idle>-0 [000] d.h1 54.029328: #1 context irq timer_latency 932 ns <...>-867 [000] .... 54.029339: #1 context thread timer_latency 11700 ns <idle>-0 [001] dNh1 54.029346: #1 context irq timer_latency 2833 ns <...>-868 [001] .... 54.029353: #1 context thread timer_latency 9820 ns <idle>-0 [000] d.h1 54.030328: #2 context irq timer_latency 769 ns <...>-867 [000] .... 54.030330: #2 context thread timer_latency 3070 ns <idle>-0 [001] d.h1 54.030344: #2 context irq timer_latency 935 ns <...>-868 [001] .... 54.030347: #2 context thread timer_latency 4351 ns The tracer creates a per-cpu kernel thread with real-time priority that prints two lines at every activation. The first is the *timer latency* observed at the *hardirq* context before the activation of the thread. The second is the *timer latency* observed by the thread, which is the same level that cyclictest reports. The ACTIVATION ID field serves to relate the *irq* execution to its respective *thread* execution. The irq/thread splitting is important to clarify at which context the unexpected high value is coming from. The *irq* context can be delayed by hardware related actions, such as SMIs, NMIs, IRQs or by a thread masking interrupts. Once the timer happens, the delay can also be influenced by blocking caused by threads. For example, by postponing the scheduler execution via preempt_disable(), by the scheduler execution, or by masking interrupts. Threads can also be delayed by the interference from other threads and IRQs. The timerlat can also take advantage of the osnoise: traceevents. For example: [root@f32 ~]# cd /sys/kernel/tracing/ [root@f32 tracing]# echo timerlat > current_tracer [root@f32 tracing]# echo osnoise > set_event [root@f32 tracing]# echo 25 > osnoise/stop_tracing_total_us [root@f32 tracing]# tail -10 trace cc1-87882 [005] d..h... 548.771078: #402268 context irq timer_latency 1585 ns cc1-87882 [005] dNLh1.. 548.771082: irq_noise: local_timer:236 start 548.771077442 duration 4597 ns cc1-87882 [005] dNLh2.. 548.771083: irq_noise: reschedule:253 start 548.771083017 duration 56 ns cc1-87882 [005] dNLh2.. 548.771086: irq_noise: call_function_single:251 start 548.771083811 duration 2048 ns cc1-87882 [005] dNLh2.. 548.771088: irq_noise: call_function_single:251 start 548.771086814 duration 1495 ns cc1-87882 [005] dNLh2.. 548.771091: irq_noise: call_function_single:251 start 548.771089194 duration 1558 ns cc1-87882 [005] dNLh2.. 548.771094: irq_noise: call_function_single:251 start 548.771091719 duration 1932 ns cc1-87882 [005] dNLh2.. 548.771096: irq_noise: call_function_single:251 start 548.771094696 duration 1050 ns cc1-87882 [005] d...3.. 548.771101: thread_noise: cc1:87882 start 548.771078243 duration 10909 ns timerlat/5-1035 [005] ....... 548.771103: #402268 context thread timer_latency 25960 ns For further information see: Documentation/trace/timerlat-tracer.rst Link: https://lkml.kernel.org/r/71f18efc013e1194bcaea1e54db957de2b19ba62.1624372313.git.bristot@redhat.com Cc: Phil Auld <pauld@redhat.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Kate Carcia <kcarcia@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Alexandre Chartre <alexandre.chartre@oracle.com> Cc: Clark Willaims <williams@redhat.com> Cc: John Kacur <jkacur@redhat.com> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: x86@kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
404 lines
9.6 KiB
C
404 lines
9.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* This file defines the trace event structures that go into the ring
|
|
* buffer directly. They are created via macros so that changes for them
|
|
* appear in the format file. Using macros will automate this process.
|
|
*
|
|
* The macro used to create a ftrace data structure is:
|
|
*
|
|
* FTRACE_ENTRY( name, struct_name, id, structure, print )
|
|
*
|
|
* @name: the name used the event name, as well as the name of
|
|
* the directory that holds the format file.
|
|
*
|
|
* @struct_name: the name of the structure that is created.
|
|
*
|
|
* @id: The event identifier that is used to detect what event
|
|
* this is from the ring buffer.
|
|
*
|
|
* @structure: the structure layout
|
|
*
|
|
* - __field( type, item )
|
|
* This is equivalent to declaring
|
|
* type item;
|
|
* in the structure.
|
|
* - __array( type, item, size )
|
|
* This is equivalent to declaring
|
|
* type item[size];
|
|
* in the structure.
|
|
*
|
|
* * for structures within structures, the format of the internal
|
|
* structure is laid out. This allows the internal structure
|
|
* to be deciphered for the format file. Although these macros
|
|
* may become out of sync with the internal structure, they
|
|
* will create a compile error if it happens. Since the
|
|
* internal structures are just tracing helpers, this is not
|
|
* an issue.
|
|
*
|
|
* When an internal structure is used, it should use:
|
|
*
|
|
* __field_struct( type, item )
|
|
*
|
|
* instead of __field. This will prevent it from being shown in
|
|
* the output file. The fields in the structure should use.
|
|
*
|
|
* __field_desc( type, container, item )
|
|
* __array_desc( type, container, item, len )
|
|
*
|
|
* type, item and len are the same as __field and __array, but
|
|
* container is added. This is the name of the item in
|
|
* __field_struct that this is describing.
|
|
*
|
|
*
|
|
* @print: the print format shown to users in the format file.
|
|
*/
|
|
|
|
/*
|
|
* Function trace entry - function address and parent function address:
|
|
*/
|
|
FTRACE_ENTRY_REG(function, ftrace_entry,
|
|
|
|
TRACE_FN,
|
|
|
|
F_STRUCT(
|
|
__field_fn( unsigned long, ip )
|
|
__field_fn( unsigned long, parent_ip )
|
|
),
|
|
|
|
F_printk(" %ps <-- %ps",
|
|
(void *)__entry->ip, (void *)__entry->parent_ip),
|
|
|
|
perf_ftrace_event_register
|
|
);
|
|
|
|
/* Function call entry */
|
|
FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
|
|
|
|
TRACE_GRAPH_ENT,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct ftrace_graph_ent, graph_ent )
|
|
__field_packed( unsigned long, graph_ent, func )
|
|
__field_packed( int, graph_ent, depth )
|
|
),
|
|
|
|
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
|
|
);
|
|
|
|
/* Function return entry */
|
|
FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
|
|
|
|
TRACE_GRAPH_RET,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct ftrace_graph_ret, ret )
|
|
__field_packed( unsigned long, ret, func )
|
|
__field_packed( int, ret, depth )
|
|
__field_packed( unsigned int, ret, overrun )
|
|
__field_packed( unsigned long long, ret, calltime)
|
|
__field_packed( unsigned long long, ret, rettime )
|
|
),
|
|
|
|
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
|
|
(void *)__entry->func, __entry->depth,
|
|
__entry->calltime, __entry->rettime,
|
|
__entry->depth)
|
|
);
|
|
|
|
/*
|
|
* Context switch trace entry - which task (and prio) we switched from/to:
|
|
*
|
|
* This is used for both wakeup and context switches. We only want
|
|
* to create one structure, but we need two outputs for it.
|
|
*/
|
|
#define FTRACE_CTX_FIELDS \
|
|
__field( unsigned int, prev_pid ) \
|
|
__field( unsigned int, next_pid ) \
|
|
__field( unsigned int, next_cpu ) \
|
|
__field( unsigned char, prev_prio ) \
|
|
__field( unsigned char, prev_state ) \
|
|
__field( unsigned char, next_prio ) \
|
|
__field( unsigned char, next_state )
|
|
|
|
FTRACE_ENTRY(context_switch, ctx_switch_entry,
|
|
|
|
TRACE_CTX,
|
|
|
|
F_STRUCT(
|
|
FTRACE_CTX_FIELDS
|
|
),
|
|
|
|
F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
|
|
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
|
|
__entry->next_pid, __entry->next_prio, __entry->next_state,
|
|
__entry->next_cpu)
|
|
);
|
|
|
|
/*
|
|
* FTRACE_ENTRY_DUP only creates the format file, it will not
|
|
* create another structure.
|
|
*/
|
|
FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
|
|
|
|
TRACE_WAKE,
|
|
|
|
F_STRUCT(
|
|
FTRACE_CTX_FIELDS
|
|
),
|
|
|
|
F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
|
|
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
|
|
__entry->next_pid, __entry->next_prio, __entry->next_state,
|
|
__entry->next_cpu)
|
|
);
|
|
|
|
/*
|
|
* Stack-trace entry:
|
|
*/
|
|
|
|
#define FTRACE_STACK_ENTRIES 8
|
|
|
|
FTRACE_ENTRY(kernel_stack, stack_entry,
|
|
|
|
TRACE_STACK,
|
|
|
|
F_STRUCT(
|
|
__field( int, size )
|
|
__array( unsigned long, caller, FTRACE_STACK_ENTRIES )
|
|
),
|
|
|
|
F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n",
|
|
(void *)__entry->caller[0], (void *)__entry->caller[1],
|
|
(void *)__entry->caller[2], (void *)__entry->caller[3],
|
|
(void *)__entry->caller[4], (void *)__entry->caller[5],
|
|
(void *)__entry->caller[6], (void *)__entry->caller[7])
|
|
);
|
|
|
|
FTRACE_ENTRY(user_stack, userstack_entry,
|
|
|
|
TRACE_USER_STACK,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, tgid )
|
|
__array( unsigned long, caller, FTRACE_STACK_ENTRIES )
|
|
),
|
|
|
|
F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n",
|
|
(void *)__entry->caller[0], (void *)__entry->caller[1],
|
|
(void *)__entry->caller[2], (void *)__entry->caller[3],
|
|
(void *)__entry->caller[4], (void *)__entry->caller[5],
|
|
(void *)__entry->caller[6], (void *)__entry->caller[7])
|
|
);
|
|
|
|
/*
|
|
* trace_printk entry:
|
|
*/
|
|
FTRACE_ENTRY(bprint, bprint_entry,
|
|
|
|
TRACE_BPRINT,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__field( const char *, fmt )
|
|
__dynamic_array( u32, buf )
|
|
),
|
|
|
|
F_printk("%ps: %s",
|
|
(void *)__entry->ip, __entry->fmt)
|
|
);
|
|
|
|
FTRACE_ENTRY_REG(print, print_entry,
|
|
|
|
TRACE_PRINT,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__dynamic_array( char, buf )
|
|
),
|
|
|
|
F_printk("%ps: %s",
|
|
(void *)__entry->ip, __entry->buf),
|
|
|
|
ftrace_event_register
|
|
);
|
|
|
|
FTRACE_ENTRY(raw_data, raw_data_entry,
|
|
|
|
TRACE_RAW_DATA,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, id )
|
|
__dynamic_array( char, buf )
|
|
),
|
|
|
|
F_printk("id:%04x %08x",
|
|
__entry->id, (int)__entry->buf[0])
|
|
);
|
|
|
|
FTRACE_ENTRY(bputs, bputs_entry,
|
|
|
|
TRACE_BPUTS,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__field( const char *, str )
|
|
),
|
|
|
|
F_printk("%ps: %s",
|
|
(void *)__entry->ip, __entry->str)
|
|
);
|
|
|
|
FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
|
|
|
|
TRACE_MMIO_RW,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct mmiotrace_rw, rw )
|
|
__field_desc( resource_size_t, rw, phys )
|
|
__field_desc( unsigned long, rw, value )
|
|
__field_desc( unsigned long, rw, pc )
|
|
__field_desc( int, rw, map_id )
|
|
__field_desc( unsigned char, rw, opcode )
|
|
__field_desc( unsigned char, rw, width )
|
|
),
|
|
|
|
F_printk("%lx %lx %lx %d %x %x",
|
|
(unsigned long)__entry->phys, __entry->value, __entry->pc,
|
|
__entry->map_id, __entry->opcode, __entry->width)
|
|
);
|
|
|
|
FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
|
|
|
|
TRACE_MMIO_MAP,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct mmiotrace_map, map )
|
|
__field_desc( resource_size_t, map, phys )
|
|
__field_desc( unsigned long, map, virt )
|
|
__field_desc( unsigned long, map, len )
|
|
__field_desc( int, map, map_id )
|
|
__field_desc( unsigned char, map, opcode )
|
|
),
|
|
|
|
F_printk("%lx %lx %lx %d %x",
|
|
(unsigned long)__entry->phys, __entry->virt, __entry->len,
|
|
__entry->map_id, __entry->opcode)
|
|
);
|
|
|
|
|
|
#define TRACE_FUNC_SIZE 30
|
|
#define TRACE_FILE_SIZE 20
|
|
|
|
FTRACE_ENTRY(branch, trace_branch,
|
|
|
|
TRACE_BRANCH,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, line )
|
|
__array( char, func, TRACE_FUNC_SIZE+1 )
|
|
__array( char, file, TRACE_FILE_SIZE+1 )
|
|
__field( char, correct )
|
|
__field( char, constant )
|
|
),
|
|
|
|
F_printk("%u:%s:%s (%u)%s",
|
|
__entry->line,
|
|
__entry->func, __entry->file, __entry->correct,
|
|
__entry->constant ? " CONSTANT" : "")
|
|
);
|
|
|
|
|
|
FTRACE_ENTRY(hwlat, hwlat_entry,
|
|
|
|
TRACE_HWLAT,
|
|
|
|
F_STRUCT(
|
|
__field( u64, duration )
|
|
__field( u64, outer_duration )
|
|
__field( u64, nmi_total_ts )
|
|
__field_struct( struct timespec64, timestamp )
|
|
__field_desc( s64, timestamp, tv_sec )
|
|
__field_desc( long, timestamp, tv_nsec )
|
|
__field( unsigned int, nmi_count )
|
|
__field( unsigned int, seqnum )
|
|
__field( unsigned int, count )
|
|
),
|
|
|
|
F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tcount:%d\tnmi-ts:%llu\tnmi-count:%u\n",
|
|
__entry->seqnum,
|
|
__entry->tv_sec,
|
|
__entry->tv_nsec,
|
|
__entry->duration,
|
|
__entry->outer_duration,
|
|
__entry->count,
|
|
__entry->nmi_total_ts,
|
|
__entry->nmi_count)
|
|
);
|
|
|
|
#define FUNC_REPEATS_GET_DELTA_TS(entry) \
|
|
(((u64)(entry)->top_delta_ts << 32) | (entry)->bottom_delta_ts) \
|
|
|
|
FTRACE_ENTRY(func_repeats, func_repeats_entry,
|
|
|
|
TRACE_FUNC_REPEATS,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__field( unsigned long, parent_ip )
|
|
__field( u16 , count )
|
|
__field( u16 , top_delta_ts )
|
|
__field( u32 , bottom_delta_ts )
|
|
),
|
|
|
|
F_printk(" %ps <-%ps\t(repeats:%u delta: -%llu)",
|
|
(void *)__entry->ip,
|
|
(void *)__entry->parent_ip,
|
|
__entry->count,
|
|
FUNC_REPEATS_GET_DELTA_TS(__entry))
|
|
);
|
|
|
|
FTRACE_ENTRY(osnoise, osnoise_entry,
|
|
|
|
TRACE_OSNOISE,
|
|
|
|
F_STRUCT(
|
|
__field( u64, noise )
|
|
__field( u64, runtime )
|
|
__field( u64, max_sample )
|
|
__field( unsigned int, hw_count )
|
|
__field( unsigned int, nmi_count )
|
|
__field( unsigned int, irq_count )
|
|
__field( unsigned int, softirq_count )
|
|
__field( unsigned int, thread_count )
|
|
),
|
|
|
|
F_printk("noise:%llu\tmax_sample:%llu\thw:%u\tnmi:%u\tirq:%u\tsoftirq:%u\tthread:%u\n",
|
|
__entry->noise,
|
|
__entry->max_sample,
|
|
__entry->hw_count,
|
|
__entry->nmi_count,
|
|
__entry->irq_count,
|
|
__entry->softirq_count,
|
|
__entry->thread_count)
|
|
);
|
|
|
|
FTRACE_ENTRY(timerlat, timerlat_entry,
|
|
|
|
TRACE_TIMERLAT,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, seqnum )
|
|
__field( int, context )
|
|
__field( u64, timer_latency )
|
|
),
|
|
|
|
F_printk("seq:%u\tcontext:%d\ttimer_latency:%llu\n",
|
|
__entry->seqnum,
|
|
__entry->context,
|
|
__entry->timer_latency)
|
|
);
|