2018-08-16 18:23:53 +03:00
// SPDX-License-Identifier: GPL-2.0
2008-12-24 07:24:12 +03:00
/*
* trace_output . c
*
* Copyright ( C ) 2008 Red Hat Inc , Steven Rostedt < srostedt @ redhat . com >
*
*/
# include <linux/module.h>
# include <linux/mutex.h>
# include <linux/ftrace.h>
2021-09-14 17:42:40 +03:00
# include <linux/kprobes.h>
2017-02-01 18:36:40 +03:00
# include <linux/sched/clock.h>
2017-02-08 20:51:29 +03:00
# include <linux/sched/mm.h>
2008-12-24 07:24:12 +03:00
# include "trace_output.h"
/* must be a power of 2 */
# define EVENT_HASHSIZE 128
2013-03-11 11:14:03 +04:00
DECLARE_RWSEM ( trace_event_sem ) ;
2009-05-26 22:25:22 +04:00
2008-12-24 07:24:12 +03:00
static struct hlist_head event_hash [ EVENT_HASHSIZE ] __read_mostly ;
2020-07-03 05:06:10 +03:00
static int next_event_type = __TRACE_LAST_TYPE ;
2008-12-24 07:24:12 +03:00
2013-03-09 06:02:34 +04:00
enum print_line_t trace_print_bputs_msg_only ( struct trace_iterator * iter )
{
struct trace_seq * s = & iter - > seq ;
struct trace_entry * entry = iter - > ent ;
struct bputs_entry * field ;
trace_assign_type ( field , entry ) ;
2014-11-12 18:29:54 +03:00
trace_seq_puts ( s , field - > str ) ;
2013-03-09 06:02:34 +04:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2013-03-09 06:02:34 +04:00
}
2009-03-19 19:20:38 +03:00
enum print_line_t trace_print_bprintk_msg_only ( struct trace_iterator * iter )
{
struct trace_seq * s = & iter - > seq ;
struct trace_entry * entry = iter - > ent ;
struct bprint_entry * field ;
trace_assign_type ( field , entry ) ;
2014-11-12 18:29:54 +03:00
trace_seq_bprintf ( s , field - > fmt , field - > buf ) ;
2009-03-19 19:20:38 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2009-03-19 19:20:38 +03:00
}
enum print_line_t trace_print_printk_msg_only ( struct trace_iterator * iter )
{
struct trace_seq * s = & iter - > seq ;
struct trace_entry * entry = iter - > ent ;
struct print_entry * field ;
trace_assign_type ( field , entry ) ;
2014-11-12 18:29:54 +03:00
trace_seq_puts ( s , field - > buf ) ;
2009-03-19 19:20:38 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2009-03-19 19:20:38 +03:00
}
2009-05-26 22:25:22 +04:00
const char *
2015-05-05 01:12:44 +03:00
trace_print_flags_seq ( struct trace_seq * p , const char * delim ,
unsigned long flags ,
const struct trace_print_flags * flag_array )
2009-05-26 22:25:22 +04:00
{
unsigned long mask ;
const char * str ;
2014-06-26 17:42:41 +04:00
const char * ret = trace_seq_buffer_ptr ( p ) ;
2012-02-19 15:16:07 +04:00
int i , first = 1 ;
2009-05-26 22:25:22 +04:00
for ( i = 0 ; flag_array [ i ] . name & & flags ; i + + ) {
mask = flag_array [ i ] . mask ;
if ( ( flags & mask ) ! = mask )
continue ;
str = flag_array [ i ] . name ;
flags & = ~ mask ;
2012-02-19 15:16:07 +04:00
if ( ! first & & delim )
2009-05-26 22:25:22 +04:00
trace_seq_puts ( p , delim ) ;
2012-02-19 15:16:07 +04:00
else
first = 0 ;
2009-05-26 22:25:22 +04:00
trace_seq_puts ( p , str ) ;
}
/* check for left over flags */
if ( flags ) {
2012-02-21 05:37:32 +04:00
if ( ! first & & delim )
2009-05-26 22:25:22 +04:00
trace_seq_puts ( p , delim ) ;
trace_seq_printf ( p , " 0x%lx " , flags ) ;
}
trace_seq_putc ( p , 0 ) ;
2009-06-03 17:52:03 +04:00
return ret ;
2009-05-26 22:25:22 +04:00
}
2015-05-05 01:12:44 +03:00
EXPORT_SYMBOL ( trace_print_flags_seq ) ;
2009-05-26 22:25:22 +04:00
2009-05-21 03:21:47 +04:00
const char *
2015-05-05 01:12:44 +03:00
trace_print_symbols_seq ( struct trace_seq * p , unsigned long val ,
const struct trace_print_flags * symbol_array )
2009-05-21 03:21:47 +04:00
{
int i ;
2014-06-26 17:42:41 +04:00
const char * ret = trace_seq_buffer_ptr ( p ) ;
2009-05-21 03:21:47 +04:00
for ( i = 0 ; symbol_array [ i ] . name ; i + + ) {
if ( val ! = symbol_array [ i ] . mask )
continue ;
trace_seq_puts ( p , symbol_array [ i ] . name ) ;
break ;
}
2014-06-26 17:42:41 +04:00
if ( ret = = ( const char * ) ( trace_seq_buffer_ptr ( p ) ) )
2009-05-21 03:21:47 +04:00
trace_seq_printf ( p , " 0x%lx " , val ) ;
2014-11-24 03:34:19 +03:00
2009-05-21 03:21:47 +04:00
trace_seq_putc ( p , 0 ) ;
2009-06-03 17:52:03 +04:00
return ret ;
2009-05-21 03:21:47 +04:00
}
2015-05-05 01:12:44 +03:00
EXPORT_SYMBOL ( trace_print_symbols_seq ) ;
2009-05-21 03:21:47 +04:00
2011-04-19 05:35:28 +04:00
# if BITS_PER_LONG == 32
2017-02-23 02:39:47 +03:00
const char *
trace_print_flags_seq_u64 ( struct trace_seq * p , const char * delim ,
unsigned long long flags ,
const struct trace_print_flags_u64 * flag_array )
{
unsigned long long mask ;
const char * str ;
const char * ret = trace_seq_buffer_ptr ( p ) ;
int i , first = 1 ;
for ( i = 0 ; flag_array [ i ] . name & & flags ; i + + ) {
mask = flag_array [ i ] . mask ;
if ( ( flags & mask ) ! = mask )
continue ;
str = flag_array [ i ] . name ;
flags & = ~ mask ;
if ( ! first & & delim )
trace_seq_puts ( p , delim ) ;
else
first = 0 ;
trace_seq_puts ( p , str ) ;
}
/* check for left over flags */
if ( flags ) {
if ( ! first & & delim )
trace_seq_puts ( p , delim ) ;
trace_seq_printf ( p , " 0x%llx " , flags ) ;
}
trace_seq_putc ( p , 0 ) ;
return ret ;
}
EXPORT_SYMBOL ( trace_print_flags_seq_u64 ) ;
2011-04-19 05:35:28 +04:00
const char *
2015-05-05 01:12:44 +03:00
trace_print_symbols_seq_u64 ( struct trace_seq * p , unsigned long long val ,
2011-04-19 05:35:28 +04:00
const struct trace_print_flags_u64 * symbol_array )
{
int i ;
2014-06-26 17:42:41 +04:00
const char * ret = trace_seq_buffer_ptr ( p ) ;
2011-04-19 05:35:28 +04:00
for ( i = 0 ; symbol_array [ i ] . name ; i + + ) {
if ( val ! = symbol_array [ i ] . mask )
continue ;
trace_seq_puts ( p , symbol_array [ i ] . name ) ;
break ;
}
2014-06-26 17:42:41 +04:00
if ( ret = = ( const char * ) ( trace_seq_buffer_ptr ( p ) ) )
2011-04-19 05:35:28 +04:00
trace_seq_printf ( p , " 0x%llx " , val ) ;
trace_seq_putc ( p , 0 ) ;
return ret ;
}
2015-05-05 01:12:44 +03:00
EXPORT_SYMBOL ( trace_print_symbols_seq_u64 ) ;
2011-04-19 05:35:28 +04:00
# endif
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 21:10:24 +04:00
const char *
2015-05-05 01:12:44 +03:00
trace_print_bitmask_seq ( struct trace_seq * p , void * bitmask_ptr ,
unsigned int bitmask_size )
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 21:10:24 +04:00
{
2014-06-26 17:42:41 +04:00
const char * ret = trace_seq_buffer_ptr ( p ) ;
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 21:10:24 +04:00
trace_seq_bitmask ( p , bitmask_ptr , bitmask_size * 8 ) ;
trace_seq_putc ( p , 0 ) ;
return ret ;
}
2015-05-05 01:12:44 +03:00
EXPORT_SYMBOL_GPL ( trace_print_bitmask_seq ) ;
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 21:10:24 +04:00
2017-02-02 19:09:54 +03:00
/**
* trace_print_hex_seq - print buffer as hex sequence
* @ p : trace seq struct to write to
* @ buf : The buffer to print
* @ buf_len : Length of @ buf in bytes
* @ concatenate : Print @ buf as single hex string or with spacing
*
* Prints the passed buffer as a hex sequence either as a whole ,
* single hex string if @ concatenate is true or with spacing after
* each byte in case @ concatenate is false .
*/
2010-04-01 15:40:58 +04:00
const char *
2017-01-25 04:28:16 +03:00
trace_print_hex_seq ( struct trace_seq * p , const unsigned char * buf , int buf_len ,
2017-02-02 19:09:54 +03:00
bool concatenate )
2010-04-01 15:40:58 +04:00
{
int i ;
2014-06-26 17:42:41 +04:00
const char * ret = trace_seq_buffer_ptr ( p ) ;
2019-08-06 18:15:43 +03:00
const char * fmt = concatenate ? " %*phN " : " %*ph " ;
2010-04-01 15:40:58 +04:00
2019-08-06 18:15:43 +03:00
for ( i = 0 ; i < buf_len ; i + = 16 )
trace_seq_printf ( p , fmt , min ( buf_len - i , 16 ) , & buf [ i ] ) ;
2010-04-01 15:40:58 +04:00
trace_seq_putc ( p , 0 ) ;
return ret ;
}
2015-05-05 01:12:44 +03:00
EXPORT_SYMBOL ( trace_print_hex_seq ) ;
2010-04-01 15:40:58 +04:00
2015-01-28 15:48:53 +03:00
const char *
2015-05-05 01:12:44 +03:00
trace_print_array_seq ( struct trace_seq * p , const void * buf , int count ,
size_t el_size )
2015-01-28 15:48:53 +03:00
{
const char * ret = trace_seq_buffer_ptr ( p ) ;
const char * prefix = " " ;
void * ptr = ( void * ) buf ;
2015-04-29 18:18:46 +03:00
size_t buf_len = count * el_size ;
2015-01-28 15:48:53 +03:00
trace_seq_putc ( p , ' { ' ) ;
while ( ptr < buf + buf_len ) {
switch ( el_size ) {
case 1 :
trace_seq_printf ( p , " %s0x%x " , prefix ,
* ( u8 * ) ptr ) ;
break ;
case 2 :
trace_seq_printf ( p , " %s0x%x " , prefix ,
* ( u16 * ) ptr ) ;
break ;
case 4 :
trace_seq_printf ( p , " %s0x%x " , prefix ,
* ( u32 * ) ptr ) ;
break ;
case 8 :
trace_seq_printf ( p , " %s0x%llx " , prefix ,
* ( u64 * ) ptr ) ;
break ;
default :
trace_seq_printf ( p , " BAD SIZE:%zu 0x%x " , el_size ,
* ( u8 * ) ptr ) ;
el_size = 1 ;
}
prefix = " , " ;
ptr + = el_size ;
}
trace_seq_putc ( p , ' } ' ) ;
trace_seq_putc ( p , 0 ) ;
return ret ;
}
2015-05-05 01:12:44 +03:00
EXPORT_SYMBOL ( trace_print_array_seq ) ;
2015-01-28 15:48:53 +03:00
2019-11-07 15:45:38 +03:00
const char *
trace_print_hex_dump_seq ( struct trace_seq * p , const char * prefix_str ,
int prefix_type , int rowsize , int groupsize ,
const void * buf , size_t len , bool ascii )
{
const char * ret = trace_seq_buffer_ptr ( p ) ;
trace_seq_putc ( p , ' \n ' ) ;
trace_seq_hex_dump ( p , prefix_str , prefix_type ,
rowsize , groupsize , buf , len , ascii ) ;
trace_seq_putc ( p , 0 ) ;
return ret ;
}
EXPORT_SYMBOL ( trace_print_hex_dump_seq ) ;
2015-05-05 21:18:11 +03:00
int trace_raw_output_prep ( struct trace_iterator * iter ,
struct trace_event * trace_event )
2013-02-21 06:32:38 +04:00
{
2015-05-05 18:45:27 +03:00
struct trace_event_call * event ;
2013-02-21 06:32:38 +04:00
struct trace_seq * s = & iter - > seq ;
struct trace_seq * p = & iter - > tmp_seq ;
struct trace_entry * entry ;
2015-05-05 18:45:27 +03:00
event = container_of ( trace_event , struct trace_event_call , event ) ;
2013-02-21 06:32:38 +04:00
entry = iter - > ent ;
if ( entry - > type ! = event - > event . type ) {
WARN_ON_ONCE ( 1 ) ;
return TRACE_TYPE_UNHANDLED ;
}
trace_seq_init ( p ) ;
2015-05-13 21:20:14 +03:00
trace_seq_printf ( s , " %s: " , trace_event_name ( event ) ) ;
2014-11-12 18:29:54 +03:00
2014-11-14 19:42:06 +03:00
return trace_handle_return ( s ) ;
2013-02-21 06:32:38 +04:00
}
2015-05-05 21:18:11 +03:00
EXPORT_SYMBOL ( trace_raw_output_prep ) ;
2013-02-21 06:32:38 +04:00
2020-10-15 17:55:07 +03:00
void trace_event_printf ( struct trace_iterator * iter , const char * fmt , . . . )
{
va_list ap ;
va_start ( ap , fmt ) ;
tracing: Add a verifier to check string pointers for trace events
It is a common mistake for someone writing a trace event to save a pointer
to a string in the TP_fast_assign() and then display that string pointer
in the TP_printk() with %s. The problem is that those two events may happen
a long time apart, where the source of the string may no longer exist.
The proper way to handle displaying any string that is not guaranteed to be
in the kernel core rodata section, is to copy it into the ring buffer via
the __string(), __assign_str() and __get_str() helper macros.
Add a check at run time while displaying the TP_printk() of events to make
sure that every %s referenced is safe to dereference, and if it is not,
trigger a warning and only show the address of the pointer, and the
dereferenced string if it can be safely retrieved with a
strncpy_from_kernel_nofault() call.
In order to not have to copy the parsing of vsnprintf() formats, or even
exporting its code, the verifier relies on vsnprintf() being able to
modify the va_list that is passed to it, and it remains modified after it
is called. This is the case for some architectures like x86_64, but other
architectures like x86_32 pass the va_list to vsnprintf() as a value not a
reference, and the verifier can not use it to parse the non string
arguments. Thus, at boot up, it is checked if vsnprintf() modifies the
passed in va_list or not, and a static branch will disable the verifier if
it's not compatible.
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2021-02-26 06:00:57 +03:00
trace_check_vprintf ( iter , trace_event_format ( iter , fmt ) , ap ) ;
2020-10-15 17:55:07 +03:00
va_end ( ap ) ;
}
EXPORT_SYMBOL ( trace_event_printf ) ;
2015-05-05 21:18:11 +03:00
static int trace_output_raw ( struct trace_iterator * iter , char * name ,
char * fmt , va_list ap )
2012-08-10 03:16:14 +04:00
{
struct trace_seq * s = & iter - > seq ;
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " %s: " , name ) ;
2020-10-15 17:55:07 +03:00
trace_seq_vprintf ( s , trace_event_format ( iter , fmt ) , ap ) ;
2012-08-10 03:16:14 +04:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2012-08-10 03:16:14 +04:00
}
2015-05-05 21:18:11 +03:00
int trace_output_call ( struct trace_iterator * iter , char * name , char * fmt , . . . )
2012-08-10 03:16:14 +04:00
{
va_list ap ;
int ret ;
va_start ( ap , fmt ) ;
2015-05-05 21:18:11 +03:00
ret = trace_output_raw ( iter , name , fmt , ap ) ;
2012-08-10 03:16:14 +04:00
va_end ( ap ) ;
return ret ;
}
2015-05-05 21:18:11 +03:00
EXPORT_SYMBOL_GPL ( trace_output_call ) ;
2012-08-10 03:16:14 +04:00
2021-09-14 17:42:40 +03:00
static inline const char * kretprobed ( const char * name , unsigned long addr )
2008-12-24 07:24:12 +03:00
{
2021-09-14 17:42:40 +03:00
if ( is_kretprobe_trampoline ( addr ) )
2008-12-24 07:24:12 +03:00
return " [unknown/kretprobe'd] " ;
return name ;
}
2020-11-06 05:32:46 +03:00
void
trace_seq_print_sym ( struct trace_seq * s , unsigned long address , bool offset )
2008-12-24 07:24:12 +03:00
{
2017-06-23 00:04:55 +03:00
# ifdef CONFIG_KALLSYMS
2018-10-30 01:35:42 +03:00
char str [ KSYM_SYMBOL_LEN ] ;
2008-12-24 07:24:12 +03:00
const char * name ;
2018-10-30 01:35:40 +03:00
if ( offset )
sprint_symbol ( str , address ) ;
else
kallsyms_lookup ( address , NULL , NULL , NULL , str ) ;
2021-09-14 17:42:40 +03:00
name = kretprobed ( str , address ) ;
2008-12-24 07:24:12 +03:00
2017-06-23 00:04:55 +03:00
if ( name & & strlen ( name ) ) {
2018-10-30 01:35:42 +03:00
trace_seq_puts ( s , name ) ;
2017-06-23 00:04:55 +03:00
return ;
}
2008-12-24 07:24:12 +03:00
# endif
2018-10-30 01:35:42 +03:00
trace_seq_printf ( s , " 0x%08lx " , address ) ;
2008-12-24 07:24:12 +03:00
}
# ifndef CONFIG_64BIT
# define IP_FMT "%08lx"
# else
# define IP_FMT "%016lx"
# endif
2015-09-28 17:16:12 +03:00
static int seq_print_user_ip ( struct trace_seq * s , struct mm_struct * mm ,
unsigned long ip , unsigned long sym_flags )
2008-12-24 07:24:12 +03:00
{
struct file * file = NULL ;
unsigned long vmstart = 0 ;
int ret = 1 ;
2009-11-25 18:10:14 +03:00
if ( s - > full )
return 0 ;
2008-12-24 07:24:12 +03:00
if ( mm ) {
const struct vm_area_struct * vma ;
2020-06-09 07:33:25 +03:00
mmap_read_lock ( mm ) ;
2008-12-24 07:24:12 +03:00
vma = find_vma ( mm , ip ) ;
if ( vma ) {
file = vma - > vm_file ;
vmstart = vma - > vm_start ;
}
if ( file ) {
ret = trace_seq_path ( s , & file - > f_path ) ;
if ( ret )
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " [+0x%lx] " ,
ip - vmstart ) ;
2008-12-24 07:24:12 +03:00
}
2020-06-09 07:33:25 +03:00
mmap_read_unlock ( mm ) ;
2008-12-24 07:24:12 +03:00
}
if ( ret & & ( ( sym_flags & TRACE_ITER_SYM_ADDR ) | | ! file ) )
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " < " IP_FMT " > " , ip ) ;
return ! trace_seq_has_overflowed ( s ) ;
2008-12-24 07:24:12 +03:00
}
int
seq_print_ip_sym ( struct trace_seq * s , unsigned long ip , unsigned long sym_flags )
{
2014-11-12 18:29:54 +03:00
if ( ! ip ) {
trace_seq_putc ( s , ' 0 ' ) ;
goto out ;
}
2008-12-24 07:24:12 +03:00
2020-11-06 05:32:46 +03:00
trace_seq_print_sym ( s , ip , sym_flags & TRACE_ITER_SYM_OFFSET ) ;
2008-12-24 07:24:12 +03:00
if ( sym_flags & TRACE_ITER_SYM_ADDR )
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " < " IP_FMT " > " , ip ) ;
out :
return ! trace_seq_has_overflowed ( s ) ;
2008-12-24 07:24:12 +03:00
}
2009-09-11 22:24:13 +04:00
/**
* trace_print_lat_fmt - print the irq , preempt and lockdep fields
* @ s : trace seq struct to write to
* @ entry : The trace entry field from the ring buffer
*
* Prints the generic fields of irqs off , in hard or softirq , preempt
2011-03-09 18:41:56 +03:00
* count .
2009-09-11 22:24:13 +04:00
*/
int trace_print_lat_fmt ( struct trace_seq * s , struct trace_entry * entry )
2009-02-03 01:29:21 +03:00
{
2010-12-04 03:13:26 +03:00
char hardsoft_irq ;
char need_resched ;
char irqs_off ;
int hardirq ;
int softirq ;
2021-12-13 13:08:53 +03:00
int bh_off ;
2016-03-18 18:28:04 +03:00
int nmi ;
2009-02-03 01:29:21 +03:00
2016-03-18 18:28:04 +03:00
nmi = entry - > flags & TRACE_FLAG_NMI ;
2009-02-03 01:29:21 +03:00
hardirq = entry - > flags & TRACE_FLAG_HARDIRQ ;
softirq = entry - > flags & TRACE_FLAG_SOFTIRQ ;
2021-12-13 13:08:53 +03:00
bh_off = entry - > flags & TRACE_FLAG_BH_OFF ;
2009-02-04 01:20:41 +03:00
2010-12-04 03:13:26 +03:00
irqs_off =
2021-12-13 13:08:53 +03:00
( entry - > flags & TRACE_FLAG_IRQS_OFF & & bh_off ) ? ' D ' :
2010-12-04 03:13:26 +03:00
( entry - > flags & TRACE_FLAG_IRQS_OFF ) ? ' d ' :
2021-12-13 13:08:53 +03:00
bh_off ? ' b ' :
2010-12-04 03:13:26 +03:00
( entry - > flags & TRACE_FLAG_IRQS_NOSUPPORT ) ? ' X ' :
' . ' ;
2013-10-04 19:28:26 +04:00
switch ( entry - > flags & ( TRACE_FLAG_NEED_RESCHED |
TRACE_FLAG_PREEMPT_RESCHED ) ) {
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED :
need_resched = ' N ' ;
break ;
case TRACE_FLAG_NEED_RESCHED :
need_resched = ' n ' ;
break ;
case TRACE_FLAG_PREEMPT_RESCHED :
need_resched = ' p ' ;
break ;
default :
need_resched = ' . ' ;
break ;
}
2010-12-04 03:13:26 +03:00
hardsoft_irq =
2016-03-18 18:28:04 +03:00
( nmi & & hardirq ) ? ' Z ' :
nmi ? ' z ' :
2010-12-04 03:13:26 +03:00
( hardirq & & softirq ) ? ' H ' :
2016-03-18 18:28:04 +03:00
hardirq ? ' h ' :
softirq ? ' s ' :
' . ' ;
2010-12-04 03:13:26 +03:00
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " %c%c%c " ,
irqs_off , need_resched , hardsoft_irq ) ;
2009-02-03 01:29:21 +03:00
2021-08-10 16:26:25 +03:00
if ( entry - > preempt_count & 0xf )
trace_seq_printf ( s , " %x " , entry - > preempt_count & 0xf ) ;
else
trace_seq_putc ( s , ' . ' ) ;
if ( entry - > preempt_count & 0xf0 )
trace_seq_printf ( s , " %x " , entry - > preempt_count > > 4 ) ;
2009-09-11 21:55:35 +04:00
else
2014-11-12 18:29:54 +03:00
trace_seq_putc ( s , ' . ' ) ;
2009-09-27 15:02:07 +04:00
2014-11-12 18:29:54 +03:00
return ! trace_seq_has_overflowed ( s ) ;
2009-02-03 01:29:21 +03:00
}
2009-09-11 22:24:13 +04:00
static int
lat_print_generic ( struct trace_seq * s , struct trace_entry * entry , int cpu )
{
char comm [ TASK_COMM_LEN ] ;
trace_find_cmdline ( entry - > pid , comm ) ;
2020-09-04 11:23:31 +03:00
trace_seq_printf ( s , " %8.8s-%-7d %3d " ,
2014-11-12 18:29:54 +03:00
comm , entry - > pid , cpu ) ;
2009-09-11 22:24:13 +04:00
return trace_print_lat_fmt ( s , entry ) ;
}
2014-11-24 03:34:19 +03:00
# undef MARK
# define MARK(v, s) {.val = v, .sym = s}
/* trace overhead mark */
static const struct trace_mark {
unsigned long long val ; /* unit: nsec */
char sym ;
} mark [ ] = {
MARK ( 1000000000ULL , ' $ ' ) , /* 1 sec */
2015-07-11 17:51:40 +03:00
MARK ( 100000000ULL , ' @ ' ) , /* 100 msec */
MARK ( 10000000ULL , ' * ' ) , /* 10 msec */
2014-11-24 03:34:19 +03:00
MARK ( 1000000ULL , ' # ' ) , /* 1000 usecs */
MARK ( 100000ULL , ' ! ' ) , /* 100 usecs */
MARK ( 10000ULL , ' + ' ) , /* 10 usecs */
} ;
# undef MARK
char trace_find_mark ( unsigned long long d )
{
int i ;
int size = ARRAY_SIZE ( mark ) ;
for ( i = 0 ; i < size ; i + + ) {
2015-07-11 17:51:40 +03:00
if ( d > mark [ i ] . val )
2014-11-24 03:34:19 +03:00
break ;
}
return ( i = = size ) ? ' ' : mark [ i ] . sym ;
}
2009-02-03 01:29:21 +03:00
2009-02-04 01:20:41 +03:00
static int
2012-11-14 00:18:22 +04:00
lat_print_timestamp ( struct trace_iterator * iter , u64 next_ts )
2009-02-03 01:29:21 +03:00
{
2015-09-30 16:42:05 +03:00
struct trace_array * tr = iter - > tr ;
unsigned long verbose = tr - > trace_flags & TRACE_ITER_VERBOSE ;
2012-11-14 00:18:22 +04:00
unsigned long in_ns = iter - > iter_flags & TRACE_FILE_TIME_IN_NS ;
2020-01-10 02:53:48 +03:00
unsigned long long abs_ts = iter - > ts - iter - > array_buffer - > time_start ;
2012-11-14 00:18:22 +04:00
unsigned long long rel_ts = next_ts - iter - > ts ;
struct trace_seq * s = & iter - > seq ;
if ( in_ns ) {
abs_ts = ns2usecs ( abs_ts ) ;
rel_ts = ns2usecs ( rel_ts ) ;
}
if ( verbose & & in_ns ) {
unsigned long abs_usec = do_div ( abs_ts , USEC_PER_MSEC ) ;
unsigned long abs_msec = ( unsigned long ) abs_ts ;
unsigned long rel_usec = do_div ( rel_ts , USEC_PER_MSEC ) ;
unsigned long rel_msec = ( unsigned long ) rel_ts ;
2014-11-12 18:29:54 +03:00
trace_seq_printf (
s , " [%08llx] %ld.%03ldms (+%ld.%03ldms): " ,
ns2usecs ( iter - > ts ) ,
abs_msec , abs_usec ,
rel_msec , rel_usec ) ;
2012-11-14 00:18:22 +04:00
} else if ( verbose & & ! in_ns ) {
2014-11-12 18:29:54 +03:00
trace_seq_printf (
s , " [%016llx] %lld (+%lld): " ,
iter - > ts , abs_ts , rel_ts ) ;
2012-11-14 00:18:22 +04:00
} else if ( ! verbose & & in_ns ) {
2014-11-12 18:29:54 +03:00
trace_seq_printf (
s , " %4lldus%c: " ,
abs_ts ,
2014-11-24 03:34:19 +03:00
trace_find_mark ( rel_ts * NSEC_PER_USEC ) ) ;
2014-11-12 18:29:54 +03:00
2012-11-14 00:18:22 +04:00
} else { /* !verbose && !in_ns */
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " %4lld: " , abs_ts ) ;
2012-11-14 00:18:22 +04:00
}
2014-11-12 18:29:54 +03:00
return ! trace_seq_has_overflowed ( s ) ;
2009-02-03 01:29:21 +03:00
}
2021-04-15 21:18:49 +03:00
static void trace_print_time ( struct trace_seq * s , struct trace_iterator * iter ,
unsigned long long ts )
{
unsigned long secs , usec_rem ;
unsigned long long t ;
if ( iter - > iter_flags & TRACE_FILE_TIME_IN_NS ) {
t = ns2usecs ( ts ) ;
usec_rem = do_div ( t , USEC_PER_SEC ) ;
secs = ( unsigned long ) t ;
trace_seq_printf ( s , " %5lu.%06lu " , secs , usec_rem ) ;
} else
trace_seq_printf ( s , " %12llu " , ts ) ;
}
2009-02-03 01:29:21 +03:00
int trace_print_context ( struct trace_iterator * iter )
{
2015-09-30 16:42:05 +03:00
struct trace_array * tr = iter - > tr ;
2009-02-03 01:29:21 +03:00
struct trace_seq * s = & iter - > seq ;
struct trace_entry * entry = iter - > ent ;
2009-03-17 02:20:15 +03:00
char comm [ TASK_COMM_LEN ] ;
trace_find_cmdline ( entry - > pid , comm ) ;
2009-02-03 01:29:21 +03:00
2020-09-04 11:23:31 +03:00
trace_seq_printf ( s , " %16s-%-7d " , comm , entry - > pid ) ;
tracing: Add irq, preempt-count and need resched info to default trace output
People keep asking how to get the preempt count, irq, and need resched info
and we keep telling them to enable the latency format. Some developers think
that traces without this info is completely useless, and for a lot of tasks
it is useless.
The first option was to enable the latency trace as the default format, but
the header for the latency format is pretty useless for most tracers and
it also does the timestamp in straight microseconds from the time the trace
started. This is sometimes more difficult to read as the default trace is
seconds from the start of boot up.
Latency format:
# tracer: nop
#
# nop latency trace v1.1.5 on 3.2.0-rc1-test+
# --------------------------------------------------------------------
# latency: 0 us, #159771/64234230, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
# -----------------
# | task: -0 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / delay
# cmd pid ||||| time | caller
# \ / ||||| \ | /
migratio-6 0...2 41778231us+: rcu_note_context_switch <-__schedule
migratio-6 0...2 41778233us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778235us+: rcu_sched_qs <-rcu_note_context_switch
migratio-6 0d..2 41778236us+: rcu_preempt_qs <-rcu_note_context_switch
migratio-6 0...2 41778238us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778239us+: debug_lockdep_rcu_enabled <-__schedule
default format:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
migration/0-6 [000] 50.025810: rcu_note_context_switch <-__schedule
migration/0-6 [000] 50.025812: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025813: rcu_sched_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025815: rcu_preempt_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025817: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025818: debug_lockdep_rcu_enabled <-__schedule
migration/0-6 [000] 50.025820: debug_lockdep_rcu_enabled <-__schedule
The latency format header has latency information that is pretty meaningless
for most tracers. Although some of the header is useful, and we can add that
later to the default format as well.
What is really useful with the latency format is the irqs-off, need-resched
hard/softirq context and the preempt count.
This commit adds the option irq-info which is on by default that adds this
information:
# tracer: nop
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
<idle>-0 [000] d..2 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] d..2 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] d..2 49.309309: need_resched <-mwait_idle
<idle>-0 [000] d..2 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] d..2 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] d..2 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] d..2 49.309315: need_resched <-mwait_idle
If a user wants the old format, they can disable the 'irq-info' option:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
<idle>-0 [000] 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] 49.309309: need_resched <-mwait_idle
<idle>-0 [000] 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] 49.309315: need_resched <-mwait_idle
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-11-17 18:34:33 +04:00
2017-06-26 08:38:43 +03:00
if ( tr - > trace_flags & TRACE_ITER_RECORD_TGID ) {
unsigned int tgid = trace_find_tgid ( entry - > pid ) ;
if ( ! tgid )
2020-09-04 11:23:31 +03:00
trace_seq_printf ( s , " (-------) " ) ;
2017-06-26 08:38:43 +03:00
else
2020-09-04 11:23:31 +03:00
trace_seq_printf ( s , " (%7d) " , tgid ) ;
2017-06-26 08:38:43 +03:00
}
2018-06-26 03:08:22 +03:00
trace_seq_printf ( s , " [%03d] " , iter - > cpu ) ;
2015-09-30 16:42:05 +03:00
if ( tr - > trace_flags & TRACE_ITER_IRQ_INFO )
2014-11-12 18:29:54 +03:00
trace_print_lat_fmt ( s , entry ) ;
tracing: Add irq, preempt-count and need resched info to default trace output
People keep asking how to get the preempt count, irq, and need resched info
and we keep telling them to enable the latency format. Some developers think
that traces without this info is completely useless, and for a lot of tasks
it is useless.
The first option was to enable the latency trace as the default format, but
the header for the latency format is pretty useless for most tracers and
it also does the timestamp in straight microseconds from the time the trace
started. This is sometimes more difficult to read as the default trace is
seconds from the start of boot up.
Latency format:
# tracer: nop
#
# nop latency trace v1.1.5 on 3.2.0-rc1-test+
# --------------------------------------------------------------------
# latency: 0 us, #159771/64234230, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
# -----------------
# | task: -0 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / delay
# cmd pid ||||| time | caller
# \ / ||||| \ | /
migratio-6 0...2 41778231us+: rcu_note_context_switch <-__schedule
migratio-6 0...2 41778233us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778235us+: rcu_sched_qs <-rcu_note_context_switch
migratio-6 0d..2 41778236us+: rcu_preempt_qs <-rcu_note_context_switch
migratio-6 0...2 41778238us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778239us+: debug_lockdep_rcu_enabled <-__schedule
default format:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
migration/0-6 [000] 50.025810: rcu_note_context_switch <-__schedule
migration/0-6 [000] 50.025812: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025813: rcu_sched_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025815: rcu_preempt_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025817: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025818: debug_lockdep_rcu_enabled <-__schedule
migration/0-6 [000] 50.025820: debug_lockdep_rcu_enabled <-__schedule
The latency format header has latency information that is pretty meaningless
for most tracers. Although some of the header is useful, and we can add that
later to the default format as well.
What is really useful with the latency format is the irqs-off, need-resched
hard/softirq context and the preempt count.
This commit adds the option irq-info which is on by default that adds this
information:
# tracer: nop
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
<idle>-0 [000] d..2 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] d..2 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] d..2 49.309309: need_resched <-mwait_idle
<idle>-0 [000] d..2 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] d..2 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] d..2 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] d..2 49.309315: need_resched <-mwait_idle
If a user wants the old format, they can disable the 'irq-info' option:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
<idle>-0 [000] 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] 49.309309: need_resched <-mwait_idle
<idle>-0 [000] 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] 49.309315: need_resched <-mwait_idle
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-11-17 18:34:33 +04:00
2021-04-15 21:18:49 +03:00
trace_print_time ( s , iter , iter - > ts ) ;
trace_seq_puts ( s , " : " ) ;
2014-11-12 18:29:54 +03:00
return ! trace_seq_has_overflowed ( s ) ;
2009-02-03 01:29:21 +03:00
}
int trace_print_lat_context ( struct trace_iterator * iter )
{
2020-03-18 00:32:23 +03:00
struct trace_entry * entry , * next_entry ;
2015-09-30 16:42:05 +03:00
struct trace_array * tr = iter - > tr ;
2009-02-03 01:29:21 +03:00
struct trace_seq * s = & iter - > seq ;
2015-09-30 16:42:05 +03:00
unsigned long verbose = ( tr - > trace_flags & TRACE_ITER_VERBOSE ) ;
2020-03-18 00:32:23 +03:00
u64 next_ts ;
2009-02-03 01:29:21 +03:00
2020-03-18 00:32:23 +03:00
next_entry = trace_find_next_entry ( iter , NULL , & next_ts ) ;
2009-02-03 01:29:21 +03:00
if ( ! next_entry )
next_ts = iter - > ts ;
2020-03-18 00:32:23 +03:00
/* trace_find_next_entry() may change iter->ent */
entry = iter - > ent ;
2009-02-03 01:29:21 +03:00
if ( verbose ) {
2009-03-17 02:20:15 +03:00
char comm [ TASK_COMM_LEN ] ;
trace_find_cmdline ( entry - > pid , comm ) ;
2014-11-12 18:29:54 +03:00
trace_seq_printf (
2020-09-04 11:23:31 +03:00
s , " %16s %7d %3d %d %08x %08lx " ,
2014-11-12 18:29:54 +03:00
comm , entry - > pid , iter - > cpu , entry - > flags ,
2021-08-10 16:26:25 +03:00
entry - > preempt_count & 0xf , iter - > idx ) ;
2009-02-03 01:29:21 +03:00
} else {
2014-11-12 18:29:54 +03:00
lat_print_generic ( s , entry , iter - > cpu ) ;
2009-02-03 01:29:21 +03:00
}
2014-11-12 18:29:54 +03:00
lat_print_timestamp ( iter , next_ts ) ;
2012-11-14 00:18:22 +04:00
2014-11-12 18:29:54 +03:00
return ! trace_seq_has_overflowed ( s ) ;
2009-02-03 01:29:21 +03:00
}
2008-12-24 07:24:12 +03:00
/**
* ftrace_find_event - find a registered event
* @ type : the type of event to look for
*
* Returns an event of type @ type otherwise NULL
2009-05-18 15:35:34 +04:00
* Called with trace_event_read_lock ( ) held .
2008-12-24 07:24:12 +03:00
*/
struct trace_event * ftrace_find_event ( int type )
{
struct trace_event * event ;
unsigned key ;
key = type & ( EVENT_HASHSIZE - 1 ) ;
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 05:06:00 +04:00
hlist_for_each_entry ( event , & event_hash [ key ] , node ) {
2008-12-24 07:24:12 +03:00
if ( event - > type = = type )
return event ;
}
return NULL ;
}
2009-04-24 20:20:52 +04:00
static LIST_HEAD ( ftrace_event_list ) ;
static int trace_search_list ( struct list_head * * list )
{
2022-04-27 20:07:31 +03:00
struct trace_event * e = NULL , * iter ;
2020-07-03 05:06:10 +03:00
int next = __TRACE_LAST_TYPE ;
2009-04-24 20:20:52 +04:00
if ( list_empty ( & ftrace_event_list ) ) {
* list = & ftrace_event_list ;
2020-07-03 05:06:09 +03:00
return next ;
2009-04-24 20:20:52 +04:00
}
/*
* We used up all possible max events ,
* lets see if somebody freed one .
*/
2022-04-27 20:07:31 +03:00
list_for_each_entry ( iter , & ftrace_event_list , list ) {
if ( iter - > type ! = next ) {
e = iter ;
2009-04-24 20:20:52 +04:00
break ;
2022-04-27 20:07:31 +03:00
}
2020-07-03 05:06:09 +03:00
next + + ;
2009-04-24 20:20:52 +04:00
}
/* Did we used up all 65 thousand events??? */
2020-07-03 05:06:09 +03:00
if ( next > TRACE_EVENT_TYPE_MAX )
2009-04-24 20:20:52 +04:00
return 0 ;
2022-04-27 20:07:31 +03:00
if ( e )
* list = & e - > list ;
else
* list = & ftrace_event_list ;
2020-07-03 05:06:09 +03:00
return next ;
2009-04-24 20:20:52 +04:00
}
2009-05-18 15:35:34 +04:00
void trace_event_read_lock ( void )
{
2013-03-11 11:14:03 +04:00
down_read ( & trace_event_sem ) ;
2009-05-18 15:35:34 +04:00
}
void trace_event_read_unlock ( void )
{
2013-03-11 11:14:03 +04:00
up_read ( & trace_event_sem ) ;
2009-05-18 15:35:34 +04:00
}
2008-12-24 07:24:12 +03:00
/**
2015-05-05 16:39:12 +03:00
* register_trace_event - register output for an event type
2008-12-24 07:24:12 +03:00
* @ event : the event type to register
*
* Event types are stored in a hash and this hash is used to
* find a way to print an event . If the @ event - > type is set
* then it will use that type , otherwise it will assign a
* type to use .
*
* If you assign your own type , please make sure it is added
* to the trace_type enum in trace . h , to avoid collisions
* with the dynamic types .
*
* Returns the event type number or zero on error .
*/
2015-05-05 16:39:12 +03:00
int register_trace_event ( struct trace_event * event )
2008-12-24 07:24:12 +03:00
{
unsigned key ;
int ret = 0 ;
2013-03-11 11:14:03 +04:00
down_write ( & trace_event_sem ) ;
2008-12-24 07:24:12 +03:00
2009-04-24 20:20:52 +04:00
if ( WARN_ON ( ! event ) )
2009-03-19 22:26:14 +03:00
goto out ;
2010-04-23 02:46:14 +04:00
if ( WARN_ON ( ! event - > funcs ) )
goto out ;
2009-04-24 20:20:52 +04:00
INIT_LIST_HEAD ( & event - > list ) ;
if ( ! event - > type ) {
2009-05-06 14:15:45 +04:00
struct list_head * list = NULL ;
2009-04-24 20:20:52 +04:00
2015-05-13 20:44:36 +03:00
if ( next_event_type > TRACE_EVENT_TYPE_MAX ) {
2009-04-24 20:20:52 +04:00
event - > type = trace_search_list ( & list ) ;
if ( ! event - > type )
goto out ;
} else {
2014-11-24 03:34:19 +03:00
2009-04-24 20:20:52 +04:00
event - > type = next_event_type + + ;
list = & ftrace_event_list ;
}
if ( WARN_ON ( ftrace_find_event ( event - > type ) ) )
goto out ;
list_add_tail ( & event - > list , list ) ;
2022-04-24 16:19:32 +03:00
} else if ( WARN ( event - > type > __TRACE_LAST_TYPE ,
" Need to add type to trace.h " ) ) {
2008-12-24 07:24:12 +03:00
goto out ;
2009-04-24 20:20:52 +04:00
} else {
/* Is this event already used */
if ( ftrace_find_event ( event - > type ) )
goto out ;
}
2008-12-24 07:24:12 +03:00
2010-04-23 02:46:14 +04:00
if ( event - > funcs - > trace = = NULL )
event - > funcs - > trace = trace_nop_print ;
if ( event - > funcs - > raw = = NULL )
event - > funcs - > raw = trace_nop_print ;
if ( event - > funcs - > hex = = NULL )
event - > funcs - > hex = trace_nop_print ;
if ( event - > funcs - > binary = = NULL )
event - > funcs - > binary = trace_nop_print ;
2009-02-05 01:16:39 +03:00
2008-12-24 07:24:12 +03:00
key = event - > type & ( EVENT_HASHSIZE - 1 ) ;
2009-05-18 15:35:34 +04:00
hlist_add_head ( & event - > node , & event_hash [ key ] ) ;
2008-12-24 07:24:12 +03:00
ret = event - > type ;
out :
2013-03-11 11:14:03 +04:00
up_write ( & trace_event_sem ) ;
2008-12-24 07:24:12 +03:00
return ret ;
}
2015-05-05 16:39:12 +03:00
EXPORT_SYMBOL_GPL ( register_trace_event ) ;
2008-12-24 07:24:12 +03:00
2009-06-10 01:29:07 +04:00
/*
2013-03-11 11:14:03 +04:00
* Used by module code with the trace_event_sem held for write .
2009-06-10 01:29:07 +04:00
*/
2015-05-05 16:39:12 +03:00
int __unregister_trace_event ( struct trace_event * event )
2009-06-10 01:29:07 +04:00
{
hlist_del ( & event - > node ) ;
list_del ( & event - > list ) ;
return 0 ;
}
2008-12-24 07:24:12 +03:00
/**
2015-05-05 16:39:12 +03:00
* unregister_trace_event - remove a no longer used event
2008-12-24 07:24:12 +03:00
* @ event : the event to remove
*/
2015-05-05 16:39:12 +03:00
int unregister_trace_event ( struct trace_event * event )
2008-12-24 07:24:12 +03:00
{
2013-03-11 11:14:03 +04:00
down_write ( & trace_event_sem ) ;
2015-05-05 16:39:12 +03:00
__unregister_trace_event ( event ) ;
2013-03-11 11:14:03 +04:00
up_write ( & trace_event_sem ) ;
2008-12-24 07:24:12 +03:00
return 0 ;
}
2015-05-05 16:39:12 +03:00
EXPORT_SYMBOL_GPL ( unregister_trace_event ) ;
2008-12-24 07:24:13 +03:00
/*
* Standard events
*/
2010-04-23 02:46:14 +04:00
enum print_line_t trace_nop_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2014-11-12 18:29:54 +03:00
trace_seq_printf ( & iter - > seq , " type: %d \n " , iter - > ent - > type ) ;
2011-03-25 14:05:18 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( & iter - > seq ) ;
2008-12-24 07:24:13 +03:00
}
2021-04-15 23:34:26 +03:00
static void print_fn_trace ( struct trace_seq * s , unsigned long ip ,
unsigned long parent_ip , int flags )
{
seq_print_ip_sym ( s , ip , flags ) ;
if ( ( flags & TRACE_ITER_PRINT_PARENT ) & & parent_ip ) {
trace_seq_puts ( s , " <- " ) ;
seq_print_ip_sym ( s , parent_ip , flags ) ;
}
}
2008-12-24 07:24:13 +03:00
/* TRACE_FN */
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_fn_trace ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
struct ftrace_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2008-12-24 07:24:13 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2021-04-15 23:34:26 +03:00
print_fn_trace ( s , field - > ip , field - > parent_ip , flags ) ;
2014-11-12 18:29:54 +03:00
trace_seq_putc ( s , ' \n ' ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_fn_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
struct ftrace_entry * field ;
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
trace_seq_printf ( & iter - > seq , " %lx %lx \n " ,
field - > ip ,
field - > parent_ip ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( & iter - > seq ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_fn_hex ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
struct ftrace_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2008-12-24 07:24:13 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
SEQ_PUT_HEX_FIELD ( s , field - > ip ) ;
SEQ_PUT_HEX_FIELD ( s , field - > parent_ip ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_fn_bin ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
struct ftrace_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2008-12-24 07:24:13 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
SEQ_PUT_FIELD ( s , field - > ip ) ;
SEQ_PUT_FIELD ( s , field - > parent_ip ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static struct trace_event_functions trace_fn_funcs = {
2008-12-24 07:24:13 +03:00
. trace = trace_fn_trace ,
. raw = trace_fn_raw ,
. hex = trace_fn_hex ,
. binary = trace_fn_bin ,
} ;
2010-04-23 02:46:14 +04:00
static struct trace_event trace_fn_event = {
. type = TRACE_FN ,
. funcs = & trace_fn_funcs ,
} ;
2008-12-24 07:24:13 +03:00
/* TRACE_CTX an TRACE_WAKE */
2009-02-04 03:05:50 +03:00
static enum print_line_t trace_ctxwake_print ( struct trace_iterator * iter ,
char * delim )
2008-12-24 07:24:13 +03:00
{
struct ctx_switch_entry * field ;
2009-03-17 02:20:15 +03:00
char comm [ TASK_COMM_LEN ] ;
2008-12-24 07:24:13 +03:00
int S , T ;
2009-03-17 02:20:15 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2017-09-29 14:50:16 +03:00
T = task_index_to_char ( field - > next_state ) ;
S = task_index_to_char ( field - > prev_state ) ;
2009-03-17 02:20:15 +03:00
trace_find_cmdline ( field - > next_pid , comm ) ;
2014-11-12 18:29:54 +03:00
trace_seq_printf ( & iter - > seq ,
2020-09-04 11:23:31 +03:00
" %7d:%3d:%c %s [%03d] %7d:%3d:%c %s \n " ,
2014-11-12 18:29:54 +03:00
field - > prev_pid ,
field - > prev_prio ,
S , delim ,
field - > next_cpu ,
field - > next_pid ,
field - > next_prio ,
T , comm ) ;
return trace_handle_return ( & iter - > seq ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_ctx_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2009-02-03 01:30:12 +03:00
return trace_ctxwake_print ( iter , " ==> " ) ;
2008-12-24 07:24:13 +03:00
}
2009-02-04 03:05:50 +03:00
static enum print_line_t trace_wake_print ( struct trace_iterator * iter ,
2010-04-23 02:46:14 +04:00
int flags , struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2009-02-03 01:30:12 +03:00
return trace_ctxwake_print ( iter , " + " ) ;
2008-12-24 07:24:13 +03:00
}
2009-02-03 01:30:12 +03:00
static int trace_ctxwake_raw ( struct trace_iterator * iter , char S )
2008-12-24 07:24:13 +03:00
{
struct ctx_switch_entry * field ;
int T ;
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
if ( ! S )
2017-09-29 14:50:16 +03:00
S = task_index_to_char ( field - > prev_state ) ;
T = task_index_to_char ( field - > next_state ) ;
2014-11-12 18:29:54 +03:00
trace_seq_printf ( & iter - > seq , " %d %d %c %d %d %d %c \n " ,
field - > prev_pid ,
field - > prev_prio ,
S ,
field - > next_cpu ,
field - > next_pid ,
field - > next_prio ,
T ) ;
return trace_handle_return ( & iter - > seq ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_ctx_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2009-02-03 01:30:12 +03:00
return trace_ctxwake_raw ( iter , 0 ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_wake_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2009-02-03 01:30:12 +03:00
return trace_ctxwake_raw ( iter , ' + ' ) ;
2008-12-24 07:24:13 +03:00
}
2009-02-03 01:30:12 +03:00
static int trace_ctxwake_hex ( struct trace_iterator * iter , char S )
2008-12-24 07:24:13 +03:00
{
struct ctx_switch_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2008-12-24 07:24:13 +03:00
int T ;
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
if ( ! S )
2017-09-29 14:50:16 +03:00
S = task_index_to_char ( field - > prev_state ) ;
T = task_index_to_char ( field - > next_state ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
SEQ_PUT_HEX_FIELD ( s , field - > prev_pid ) ;
SEQ_PUT_HEX_FIELD ( s , field - > prev_prio ) ;
SEQ_PUT_HEX_FIELD ( s , S ) ;
SEQ_PUT_HEX_FIELD ( s , field - > next_cpu ) ;
SEQ_PUT_HEX_FIELD ( s , field - > next_pid ) ;
SEQ_PUT_HEX_FIELD ( s , field - > next_prio ) ;
SEQ_PUT_HEX_FIELD ( s , T ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_ctx_hex ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2009-02-03 01:30:12 +03:00
return trace_ctxwake_hex ( iter , 0 ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_wake_hex ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2009-02-03 01:30:12 +03:00
return trace_ctxwake_hex ( iter , ' + ' ) ;
2008-12-24 07:24:13 +03:00
}
2009-02-04 03:05:50 +03:00
static enum print_line_t trace_ctxwake_bin ( struct trace_iterator * iter ,
2010-04-23 02:46:14 +04:00
int flags , struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
struct ctx_switch_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2008-12-24 07:24:13 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
SEQ_PUT_FIELD ( s , field - > prev_pid ) ;
SEQ_PUT_FIELD ( s , field - > prev_prio ) ;
SEQ_PUT_FIELD ( s , field - > prev_state ) ;
SEQ_PUT_FIELD ( s , field - > next_cpu ) ;
SEQ_PUT_FIELD ( s , field - > next_pid ) ;
SEQ_PUT_FIELD ( s , field - > next_prio ) ;
SEQ_PUT_FIELD ( s , field - > next_state ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static struct trace_event_functions trace_ctx_funcs = {
2008-12-24 07:24:13 +03:00
. trace = trace_ctx_print ,
. raw = trace_ctx_raw ,
. hex = trace_ctx_hex ,
. binary = trace_ctxwake_bin ,
} ;
2010-04-23 02:46:14 +04:00
static struct trace_event trace_ctx_event = {
. type = TRACE_CTX ,
. funcs = & trace_ctx_funcs ,
} ;
static struct trace_event_functions trace_wake_funcs = {
2008-12-24 07:24:13 +03:00
. trace = trace_wake_print ,
. raw = trace_wake_raw ,
. hex = trace_wake_hex ,
. binary = trace_ctxwake_bin ,
} ;
2010-04-23 02:46:14 +04:00
static struct trace_event trace_wake_event = {
. type = TRACE_WAKE ,
. funcs = & trace_wake_funcs ,
} ;
2008-12-24 07:24:13 +03:00
/* TRACE_STACK */
2009-02-04 03:05:50 +03:00
static enum print_line_t trace_stack_print ( struct trace_iterator * iter ,
2010-04-23 02:46:14 +04:00
int flags , struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
struct stack_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2011-07-15 00:36:53 +04:00
unsigned long * p ;
unsigned long * end ;
2008-12-24 07:24:13 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2011-07-15 00:36:53 +04:00
end = ( unsigned long * ) ( ( long ) iter - > ent + iter - > ent_size ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
trace_seq_puts ( s , " <stack trace> \n " ) ;
2011-07-15 00:36:53 +04:00
2019-06-10 07:00:16 +03:00
for ( p = field - > caller ; p & & p < end & & * p ! = ULONG_MAX ; p + + ) {
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
if ( trace_seq_has_overflowed ( s ) )
break ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
trace_seq_puts ( s , " => " ) ;
seq_print_ip_sym ( s , * p , flags ) ;
trace_seq_putc ( s , ' \n ' ) ;
}
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static struct trace_event_functions trace_stack_funcs = {
2008-12-24 07:24:13 +03:00
. trace = trace_stack_print ,
} ;
2010-04-23 02:46:14 +04:00
static struct trace_event trace_stack_event = {
. type = TRACE_STACK ,
. funcs = & trace_stack_funcs ,
} ;
2008-12-24 07:24:13 +03:00
/* TRACE_USER_STACK */
2009-02-04 03:05:50 +03:00
static enum print_line_t trace_user_stack_print ( struct trace_iterator * iter ,
2010-04-23 02:46:14 +04:00
int flags , struct trace_event * event )
2008-12-24 07:24:13 +03:00
{
2015-09-30 16:42:05 +03:00
struct trace_array * tr = iter - > tr ;
2008-12-24 07:24:13 +03:00
struct userstack_entry * field ;
2009-02-03 01:30:12 +03:00
struct trace_seq * s = & iter - > seq ;
2015-09-28 17:11:44 +03:00
struct mm_struct * mm = NULL ;
unsigned int i ;
2008-12-24 07:24:13 +03:00
2009-02-03 01:30:12 +03:00
trace_assign_type ( field , iter - > ent ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
trace_seq_puts ( s , " <user stack trace> \n " ) ;
2015-09-28 17:11:44 +03:00
2015-09-30 16:42:05 +03:00
if ( tr - > trace_flags & TRACE_ITER_SYM_USEROBJ ) {
2015-09-28 17:11:44 +03:00
struct task_struct * task ;
/*
* we do the lookup on the thread group leader ,
* since individual threads might have already quit !
*/
rcu_read_lock ( ) ;
task = find_task_by_vpid ( field - > tgid ) ;
if ( task )
mm = get_task_mm ( task ) ;
rcu_read_unlock ( ) ;
}
for ( i = 0 ; i < FTRACE_STACK_ENTRIES ; i + + ) {
unsigned long ip = field - > caller [ i ] ;
2019-06-30 11:54:38 +03:00
if ( ! ip | | trace_seq_has_overflowed ( s ) )
2015-09-28 17:11:44 +03:00
break ;
trace_seq_puts ( s , " => " ) ;
seq_print_user_ip ( s , mm , ip , flags ) ;
trace_seq_putc ( s , ' \n ' ) ;
}
if ( mm )
mmput ( mm ) ;
2008-12-24 07:24:13 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2008-12-24 07:24:13 +03:00
}
2010-04-23 02:46:14 +04:00
static struct trace_event_functions trace_user_stack_funcs = {
2008-12-24 07:24:13 +03:00
. trace = trace_user_stack_print ,
} ;
2010-04-23 02:46:14 +04:00
static struct trace_event trace_user_stack_event = {
. type = TRACE_USER_STACK ,
. funcs = & trace_user_stack_funcs ,
} ;
2016-06-23 19:45:36 +03:00
/* TRACE_HWLAT */
static enum print_line_t
trace_hwlat_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct trace_entry * entry = iter - > ent ;
struct trace_seq * s = & iter - > seq ;
struct hwlat_entry * field ;
trace_assign_type ( field , entry ) ;
2020-02-12 20:21:03 +03:00
trace_seq_printf ( s , " #%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld count:%d " ,
2016-06-23 19:45:36 +03:00
field - > seqnum ,
field - > duration ,
field - > outer_duration ,
2017-05-09 01:59:13 +03:00
( long long ) field - > timestamp . tv_sec ,
2020-02-12 20:21:03 +03:00
field - > timestamp . tv_nsec , field - > count ) ;
2016-06-23 19:45:36 +03:00
2016-08-04 19:49:53 +03:00
if ( field - > nmi_count ) {
/*
* The generic sched_clock ( ) is not NMI safe , thus
* we only record the count and not the time .
*/
if ( ! IS_ENABLED ( CONFIG_GENERIC_SCHED_CLOCK ) )
trace_seq_printf ( s , " nmi-total:%llu " ,
field - > nmi_total_ts ) ;
trace_seq_printf ( s , " nmi-count:%u " ,
field - > nmi_count ) ;
}
trace_seq_putc ( s , ' \n ' ) ;
2016-06-23 19:45:36 +03:00
return trace_handle_return ( s ) ;
}
static enum print_line_t
trace_hwlat_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct hwlat_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
2017-05-09 01:59:13 +03:00
trace_seq_printf ( s , " %llu %lld %lld %09ld %u \n " ,
2016-06-23 19:45:36 +03:00
field - > duration ,
field - > outer_duration ,
2017-05-09 01:59:13 +03:00
( long long ) field - > timestamp . tv_sec ,
2016-06-23 19:45:36 +03:00
field - > timestamp . tv_nsec ,
field - > seqnum ) ;
return trace_handle_return ( s ) ;
}
static struct trace_event_functions trace_hwlat_funcs = {
. trace = trace_hwlat_print ,
. raw = trace_hwlat_raw ,
} ;
static struct trace_event trace_hwlat_event = {
. type = TRACE_HWLAT ,
. funcs = & trace_hwlat_funcs ,
} ;
trace: Add osnoise tracer
In the context of high-performance computing (HPC), the Operating System
Noise (*osnoise*) refers to the interference experienced by an application
due to activities inside the operating system. In the context of Linux,
NMIs, IRQs, SoftIRQs, and any other system thread can cause noise to the
system. Moreover, hardware-related jobs can also cause noise, for example,
via SMIs.
The osnoise tracer leverages the hwlat_detector by running a similar
loop with preemption, SoftIRQs and IRQs enabled, thus allowing all
the sources of *osnoise* during its execution. Using the same approach
of hwlat, osnoise takes note of the entry and exit point of any
source of interferences, increasing a per-cpu interference counter. The
osnoise tracer also saves an interference counter for each source of
interference. The interference counter for NMI, IRQs, SoftIRQs, and
threads is increased anytime the tool observes these interferences' entry
events. When a noise happens without any interference from the operating
system level, the hardware noise counter increases, pointing to a
hardware-related noise. In this way, osnoise can account for any
source of interference. At the end of the period, the osnoise tracer
prints the sum of all noise, the max single noise, the percentage of CPU
available for the thread, and the counters for the noise sources.
Usage
Write the ASCII text "osnoise" into the current_tracer file of the
tracing system (generally mounted at /sys/kernel/tracing).
For example::
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo osnoise > current_tracer
It is possible to follow the trace by reading the trace trace file::
[root@f32 tracing]# cat trace
# tracer: osnoise
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth MAX
# || / SINGLE Interference counters:
# |||| RUNTIME NOISE % OF CPU NOISE +-----------------------------+
# TASK-PID CPU# |||| TIMESTAMP IN US IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD
# | | | |||| | | | | | | | | | |
<...>-859 [000] .... 81.637220: 1000000 190 99.98100 9 18 0 1007 18 1
<...>-860 [001] .... 81.638154: 1000000 656 99.93440 74 23 0 1006 16 3
<...>-861 [002] .... 81.638193: 1000000 5675 99.43250 202 6 0 1013 25 21
<...>-862 [003] .... 81.638242: 1000000 125 99.98750 45 1 0 1011 23 0
<...>-863 [004] .... 81.638260: 1000000 1721 99.82790 168 7 0 1002 49 41
<...>-864 [005] .... 81.638286: 1000000 263 99.97370 57 6 0 1006 26 2
<...>-865 [006] .... 81.638302: 1000000 109 99.98910 21 3 0 1006 18 1
<...>-866 [007] .... 81.638326: 1000000 7816 99.21840 107 8 0 1016 39 19
In addition to the regular trace fields (from TASK-PID to TIMESTAMP), the
tracer prints a message at the end of each period for each CPU that is
running an osnoise/CPU thread. The osnoise specific fields report:
- The RUNTIME IN USE reports the amount of time in microseconds that
the osnoise thread kept looping reading the time.
- The NOISE IN US reports the sum of noise in microseconds observed
by the osnoise tracer during the associated runtime.
- The % OF CPU AVAILABLE reports the percentage of CPU available for
the osnoise thread during the runtime window.
- The MAX SINGLE NOISE IN US reports the maximum single noise observed
during the runtime window.
- The Interference counters display how many each of the respective
interference happened during the runtime window.
Note that the example above shows a high number of HW noise samples.
The reason being is that this sample was taken on a virtual machine,
and the host interference is detected as a hardware interference.
Tracer options
The tracer has a set of options inside the osnoise directory, they are:
- osnoise/cpus: CPUs at which a osnoise thread will execute.
- osnoise/period_us: the period of the osnoise thread.
- osnoise/runtime_us: how long an osnoise thread will look for noise.
- osnoise/stop_tracing_us: stop the system tracing if a single noise
higher than the configured value happens. Writing 0 disables this
option.
- osnoise/stop_tracing_total_us: stop the system tracing if total noise
higher than the configured value happens. Writing 0 disables this
option.
- tracing_threshold: the minimum delta between two time() reads to be
considered as noise, in us. When set to 0, the default value will
be used, which is currently 5 us.
Additional Tracing
In addition to the tracer, a set of tracepoints were added to
facilitate the identification of the osnoise source.
- osnoise:sample_threshold: printed anytime a noise is higher than
the configurable tolerance_ns.
- osnoise:nmi_noise: noise from NMI, including the duration.
- osnoise:irq_noise: noise from an IRQ, including the duration.
- osnoise:softirq_noise: noise from a SoftIRQ, including the
duration.
- osnoise:thread_noise: noise from a thread, including the duration.
Note that all the values are *net values*. For example, if while osnoise
is running, another thread preempts the osnoise thread, it will start a
thread_noise duration at the start. Then, an IRQ takes place, preempting
the thread_noise, starting a irq_noise. When the IRQ ends its execution,
it will compute its duration, and this duration will be subtracted from
the thread_noise, in such a way as to avoid the double accounting of the
IRQ execution. This logic is valid for all sources of noise.
Here is one example of the usage of these tracepoints::
osnoise/8-961 [008] d.h. 5789.857532: irq_noise: local_timer:236 start 5789.857529929 duration 1845 ns
osnoise/8-961 [008] dNh. 5789.858408: irq_noise: local_timer:236 start 5789.858404871 duration 2848 ns
migration/8-54 [008] d... 5789.858413: thread_noise: migration/8:54 start 5789.858409300 duration 3068 ns
osnoise/8-961 [008] .... 5789.858413: sample_threshold: start 5789.858404555 duration 8723 ns interferences 2
In this example, a noise sample of 8 microseconds was reported in the last
line, pointing to two interferences. Looking backward in the trace, the
two previous entries were about the migration thread running after a
timer IRQ execution. The first event is not part of the noise because
it took place one millisecond before.
It is worth noticing that the sum of the duration reported in the
tracepoints is smaller than eight us reported in the sample_threshold.
The reason roots in the overhead of the entry and exit code that happens
before and after any interference execution. This justifies the dual
approach: measuring thread and tracing.
Link: https://lkml.kernel.org/r/e649467042d60e7b62714c9c6751a56299d15119.1624372313.git.bristot@redhat.com
Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
[
Made the following functions static:
trace_irqentry_callback()
trace_irqexit_callback()
trace_intel_irqentry_callback()
trace_intel_irqexit_callback()
Added to include/trace.h:
osnoise_arch_register()
osnoise_arch_unregister()
Fixed define logic for LATENCY_FS_NOTIFY
Reported-by: kernel test robot <lkp@intel.com>
]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2021-06-22 17:42:27 +03:00
/* TRACE_OSNOISE */
static enum print_line_t
trace_osnoise_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct trace_entry * entry = iter - > ent ;
struct trace_seq * s = & iter - > seq ;
struct osnoise_entry * field ;
u64 ratio , ratio_dec ;
u64 net_runtime ;
trace_assign_type ( field , entry ) ;
/*
* compute the available % of cpu time .
*/
net_runtime = field - > runtime - field - > noise ;
ratio = net_runtime * 10000000 ;
do_div ( ratio , field - > runtime ) ;
ratio_dec = do_div ( ratio , 100000 ) ;
trace_seq_printf ( s , " %llu %10llu %3llu.%05llu %7llu " ,
field - > runtime ,
field - > noise ,
ratio , ratio_dec ,
field - > max_sample ) ;
trace_seq_printf ( s , " %6u " , field - > hw_count ) ;
trace_seq_printf ( s , " %6u " , field - > nmi_count ) ;
trace_seq_printf ( s , " %6u " , field - > irq_count ) ;
trace_seq_printf ( s , " %6u " , field - > softirq_count ) ;
trace_seq_printf ( s , " %6u " , field - > thread_count ) ;
trace_seq_putc ( s , ' \n ' ) ;
return trace_handle_return ( s ) ;
}
static enum print_line_t
trace_osnoise_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct osnoise_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
trace_seq_printf ( s , " %lld %llu %llu %u %u %u %u %u \n " ,
field - > runtime ,
field - > noise ,
field - > max_sample ,
field - > hw_count ,
field - > nmi_count ,
field - > irq_count ,
field - > softirq_count ,
field - > thread_count ) ;
return trace_handle_return ( s ) ;
}
static struct trace_event_functions trace_osnoise_funcs = {
. trace = trace_osnoise_print ,
. raw = trace_osnoise_raw ,
} ;
static struct trace_event trace_osnoise_event = {
. type = TRACE_OSNOISE ,
. funcs = & trace_osnoise_funcs ,
} ;
2021-06-22 17:42:28 +03:00
/* TRACE_TIMERLAT */
static enum print_line_t
trace_timerlat_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct trace_entry * entry = iter - > ent ;
struct trace_seq * s = & iter - > seq ;
struct timerlat_entry * field ;
trace_assign_type ( field , entry ) ;
trace_seq_printf ( s , " #%-5u context %6s timer_latency %9llu ns \n " ,
field - > seqnum ,
field - > context ? " thread " : " irq " ,
field - > timer_latency ) ;
return trace_handle_return ( s ) ;
}
static enum print_line_t
trace_timerlat_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct timerlat_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
trace_seq_printf ( s , " %u %d %llu \n " ,
field - > seqnum ,
field - > context ,
field - > timer_latency ) ;
return trace_handle_return ( s ) ;
}
static struct trace_event_functions trace_timerlat_funcs = {
. trace = trace_timerlat_print ,
. raw = trace_timerlat_raw ,
} ;
static struct trace_event trace_timerlat_event = {
. type = TRACE_TIMERLAT ,
. funcs = & trace_timerlat_funcs ,
} ;
2013-03-09 06:02:34 +04:00
/* TRACE_BPUTS */
static enum print_line_t
trace_bputs_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct trace_entry * entry = iter - > ent ;
struct trace_seq * s = & iter - > seq ;
struct bputs_entry * field ;
trace_assign_type ( field , entry ) ;
2014-11-12 18:29:54 +03:00
seq_print_ip_sym ( s , field - > ip , flags ) ;
trace_seq_puts ( s , " : " ) ;
trace_seq_puts ( s , field - > str ) ;
2013-03-09 06:02:34 +04:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2013-03-09 06:02:34 +04:00
}
static enum print_line_t
trace_bputs_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct bputs_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " : %lx : " , field - > ip ) ;
trace_seq_puts ( s , field - > str ) ;
2013-03-09 06:02:34 +04:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2013-03-09 06:02:34 +04:00
}
static struct trace_event_functions trace_bputs_funcs = {
. trace = trace_bputs_print ,
. raw = trace_bputs_raw ,
} ;
static struct trace_event trace_bputs_event = {
. type = TRACE_BPUTS ,
. funcs = & trace_bputs_funcs ,
} ;
2009-03-12 20:24:49 +03:00
/* TRACE_BPRINT */
2009-03-06 19:21:47 +03:00
static enum print_line_t
2010-04-23 02:46:14 +04:00
trace_bprint_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2009-03-06 19:21:47 +03:00
{
struct trace_entry * entry = iter - > ent ;
struct trace_seq * s = & iter - > seq ;
2009-03-12 20:24:49 +03:00
struct bprint_entry * field ;
2009-03-06 19:21:47 +03:00
trace_assign_type ( field , entry ) ;
2014-11-12 18:29:54 +03:00
seq_print_ip_sym ( s , field - > ip , flags ) ;
trace_seq_puts ( s , " : " ) ;
trace_seq_bprintf ( s , field - > fmt , field - > buf ) ;
2009-03-06 19:21:47 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2009-03-06 19:21:47 +03:00
}
2009-03-06 19:21:49 +03:00
2009-03-12 20:24:49 +03:00
static enum print_line_t
2010-04-23 02:46:14 +04:00
trace_bprint_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2009-03-06 19:21:47 +03:00
{
2009-03-12 20:24:49 +03:00
struct bprint_entry * field ;
2009-03-06 19:21:47 +03:00
struct trace_seq * s = & iter - > seq ;
2009-03-06 19:21:49 +03:00
trace_assign_type ( field , iter - > ent ) ;
2009-03-06 19:21:47 +03:00
2014-11-12 18:29:54 +03:00
trace_seq_printf ( s , " : %lx : " , field - > ip ) ;
trace_seq_bprintf ( s , field - > fmt , field - > buf ) ;
2009-03-06 19:21:47 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2009-03-06 19:21:47 +03:00
}
2010-04-23 02:46:14 +04:00
static struct trace_event_functions trace_bprint_funcs = {
. trace = trace_bprint_print ,
. raw = trace_bprint_raw ,
} ;
2009-03-06 19:21:49 +03:00
2009-03-12 20:24:49 +03:00
static struct trace_event trace_bprint_event = {
. type = TRACE_BPRINT ,
2010-04-23 02:46:14 +04:00
. funcs = & trace_bprint_funcs ,
2009-03-12 20:24:49 +03:00
} ;
/* TRACE_PRINT */
static enum print_line_t trace_print_print ( struct trace_iterator * iter ,
2010-04-23 02:46:14 +04:00
int flags , struct trace_event * event )
2009-03-12 20:24:49 +03:00
{
struct print_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
2014-11-12 18:29:54 +03:00
seq_print_ip_sym ( s , field - > ip , flags ) ;
trace_seq_printf ( s , " : %s " , field - > buf ) ;
2009-03-12 20:24:49 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( s ) ;
2009-03-12 20:24:49 +03:00
}
2010-04-23 02:46:14 +04:00
static enum print_line_t trace_print_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
2009-03-12 20:24:49 +03:00
{
struct print_entry * field ;
trace_assign_type ( field , iter - > ent ) ;
2014-11-12 18:29:54 +03:00
trace_seq_printf ( & iter - > seq , " # %lx %s " , field - > ip , field - > buf ) ;
2009-03-12 20:24:49 +03:00
2014-11-12 18:29:54 +03:00
return trace_handle_return ( & iter - > seq ) ;
2009-03-12 20:24:49 +03:00
}
2010-04-23 02:46:14 +04:00
static struct trace_event_functions trace_print_funcs = {
2009-03-06 19:21:49 +03:00
. trace = trace_print_print ,
. raw = trace_print_raw ,
2009-03-06 19:21:47 +03:00
} ;
2010-04-23 02:46:14 +04:00
static struct trace_event trace_print_event = {
. type = TRACE_PRINT ,
. funcs = & trace_print_funcs ,
} ;
2016-07-06 22:25:08 +03:00
static enum print_line_t trace_raw_data ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct raw_data_entry * field ;
int i ;
trace_assign_type ( field , iter - > ent ) ;
trace_seq_printf ( & iter - > seq , " # %x buf: " , field - > id ) ;
for ( i = 0 ; i < iter - > ent_size - offsetof ( struct raw_data_entry , buf ) ; i + + )
trace_seq_printf ( & iter - > seq , " %02x " ,
( unsigned char ) field - > buf [ i ] ) ;
trace_seq_putc ( & iter - > seq , ' \n ' ) ;
return trace_handle_return ( & iter - > seq ) ;
}
static struct trace_event_functions trace_raw_data_funcs = {
. trace = trace_raw_data ,
. raw = trace_raw_data ,
} ;
static struct trace_event trace_raw_data_event = {
. type = TRACE_RAW_DATA ,
. funcs = & trace_raw_data_funcs ,
} ;
2021-04-15 21:18:50 +03:00
static enum print_line_t
trace_func_repeats_raw ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct func_repeats_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
trace_seq_printf ( s , " %lu %lu %u %llu \n " ,
field - > ip ,
field - > parent_ip ,
field - > count ,
FUNC_REPEATS_GET_DELTA_TS ( field ) ) ;
return trace_handle_return ( s ) ;
}
static enum print_line_t
trace_func_repeats_print ( struct trace_iterator * iter , int flags ,
struct trace_event * event )
{
struct func_repeats_entry * field ;
struct trace_seq * s = & iter - > seq ;
trace_assign_type ( field , iter - > ent ) ;
2021-04-15 23:34:26 +03:00
print_fn_trace ( s , field - > ip , field - > parent_ip , flags ) ;
2021-04-15 21:18:50 +03:00
trace_seq_printf ( s , " (repeats: %u, last_ts: " , field - > count ) ;
trace_print_time ( s , iter ,
iter - > ts - FUNC_REPEATS_GET_DELTA_TS ( field ) ) ;
trace_seq_puts ( s , " ) \n " ) ;
return trace_handle_return ( s ) ;
}
static struct trace_event_functions trace_func_repeats_funcs = {
. trace = trace_func_repeats_print ,
. raw = trace_func_repeats_raw ,
} ;
static struct trace_event trace_func_repeats_event = {
. type = TRACE_FUNC_REPEATS ,
. funcs = & trace_func_repeats_funcs ,
} ;
2009-03-12 20:24:49 +03:00
2008-12-24 07:24:13 +03:00
static struct trace_event * events [ ] __initdata = {
& trace_fn_event ,
& trace_ctx_event ,
& trace_wake_event ,
& trace_stack_event ,
& trace_user_stack_event ,
2013-03-09 06:02:34 +04:00
& trace_bputs_event ,
2009-03-12 20:24:49 +03:00
& trace_bprint_event ,
2008-12-24 07:24:13 +03:00
& trace_print_event ,
2016-06-23 19:45:36 +03:00
& trace_hwlat_event ,
trace: Add osnoise tracer
In the context of high-performance computing (HPC), the Operating System
Noise (*osnoise*) refers to the interference experienced by an application
due to activities inside the operating system. In the context of Linux,
NMIs, IRQs, SoftIRQs, and any other system thread can cause noise to the
system. Moreover, hardware-related jobs can also cause noise, for example,
via SMIs.
The osnoise tracer leverages the hwlat_detector by running a similar
loop with preemption, SoftIRQs and IRQs enabled, thus allowing all
the sources of *osnoise* during its execution. Using the same approach
of hwlat, osnoise takes note of the entry and exit point of any
source of interferences, increasing a per-cpu interference counter. The
osnoise tracer also saves an interference counter for each source of
interference. The interference counter for NMI, IRQs, SoftIRQs, and
threads is increased anytime the tool observes these interferences' entry
events. When a noise happens without any interference from the operating
system level, the hardware noise counter increases, pointing to a
hardware-related noise. In this way, osnoise can account for any
source of interference. At the end of the period, the osnoise tracer
prints the sum of all noise, the max single noise, the percentage of CPU
available for the thread, and the counters for the noise sources.
Usage
Write the ASCII text "osnoise" into the current_tracer file of the
tracing system (generally mounted at /sys/kernel/tracing).
For example::
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo osnoise > current_tracer
It is possible to follow the trace by reading the trace trace file::
[root@f32 tracing]# cat trace
# tracer: osnoise
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth MAX
# || / SINGLE Interference counters:
# |||| RUNTIME NOISE % OF CPU NOISE +-----------------------------+
# TASK-PID CPU# |||| TIMESTAMP IN US IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD
# | | | |||| | | | | | | | | | |
<...>-859 [000] .... 81.637220: 1000000 190 99.98100 9 18 0 1007 18 1
<...>-860 [001] .... 81.638154: 1000000 656 99.93440 74 23 0 1006 16 3
<...>-861 [002] .... 81.638193: 1000000 5675 99.43250 202 6 0 1013 25 21
<...>-862 [003] .... 81.638242: 1000000 125 99.98750 45 1 0 1011 23 0
<...>-863 [004] .... 81.638260: 1000000 1721 99.82790 168 7 0 1002 49 41
<...>-864 [005] .... 81.638286: 1000000 263 99.97370 57 6 0 1006 26 2
<...>-865 [006] .... 81.638302: 1000000 109 99.98910 21 3 0 1006 18 1
<...>-866 [007] .... 81.638326: 1000000 7816 99.21840 107 8 0 1016 39 19
In addition to the regular trace fields (from TASK-PID to TIMESTAMP), the
tracer prints a message at the end of each period for each CPU that is
running an osnoise/CPU thread. The osnoise specific fields report:
- The RUNTIME IN USE reports the amount of time in microseconds that
the osnoise thread kept looping reading the time.
- The NOISE IN US reports the sum of noise in microseconds observed
by the osnoise tracer during the associated runtime.
- The % OF CPU AVAILABLE reports the percentage of CPU available for
the osnoise thread during the runtime window.
- The MAX SINGLE NOISE IN US reports the maximum single noise observed
during the runtime window.
- The Interference counters display how many each of the respective
interference happened during the runtime window.
Note that the example above shows a high number of HW noise samples.
The reason being is that this sample was taken on a virtual machine,
and the host interference is detected as a hardware interference.
Tracer options
The tracer has a set of options inside the osnoise directory, they are:
- osnoise/cpus: CPUs at which a osnoise thread will execute.
- osnoise/period_us: the period of the osnoise thread.
- osnoise/runtime_us: how long an osnoise thread will look for noise.
- osnoise/stop_tracing_us: stop the system tracing if a single noise
higher than the configured value happens. Writing 0 disables this
option.
- osnoise/stop_tracing_total_us: stop the system tracing if total noise
higher than the configured value happens. Writing 0 disables this
option.
- tracing_threshold: the minimum delta between two time() reads to be
considered as noise, in us. When set to 0, the default value will
be used, which is currently 5 us.
Additional Tracing
In addition to the tracer, a set of tracepoints were added to
facilitate the identification of the osnoise source.
- osnoise:sample_threshold: printed anytime a noise is higher than
the configurable tolerance_ns.
- osnoise:nmi_noise: noise from NMI, including the duration.
- osnoise:irq_noise: noise from an IRQ, including the duration.
- osnoise:softirq_noise: noise from a SoftIRQ, including the
duration.
- osnoise:thread_noise: noise from a thread, including the duration.
Note that all the values are *net values*. For example, if while osnoise
is running, another thread preempts the osnoise thread, it will start a
thread_noise duration at the start. Then, an IRQ takes place, preempting
the thread_noise, starting a irq_noise. When the IRQ ends its execution,
it will compute its duration, and this duration will be subtracted from
the thread_noise, in such a way as to avoid the double accounting of the
IRQ execution. This logic is valid for all sources of noise.
Here is one example of the usage of these tracepoints::
osnoise/8-961 [008] d.h. 5789.857532: irq_noise: local_timer:236 start 5789.857529929 duration 1845 ns
osnoise/8-961 [008] dNh. 5789.858408: irq_noise: local_timer:236 start 5789.858404871 duration 2848 ns
migration/8-54 [008] d... 5789.858413: thread_noise: migration/8:54 start 5789.858409300 duration 3068 ns
osnoise/8-961 [008] .... 5789.858413: sample_threshold: start 5789.858404555 duration 8723 ns interferences 2
In this example, a noise sample of 8 microseconds was reported in the last
line, pointing to two interferences. Looking backward in the trace, the
two previous entries were about the migration thread running after a
timer IRQ execution. The first event is not part of the noise because
it took place one millisecond before.
It is worth noticing that the sum of the duration reported in the
tracepoints is smaller than eight us reported in the sample_threshold.
The reason roots in the overhead of the entry and exit code that happens
before and after any interference execution. This justifies the dual
approach: measuring thread and tracing.
Link: https://lkml.kernel.org/r/e649467042d60e7b62714c9c6751a56299d15119.1624372313.git.bristot@redhat.com
Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
[
Made the following functions static:
trace_irqentry_callback()
trace_irqexit_callback()
trace_intel_irqentry_callback()
trace_intel_irqexit_callback()
Added to include/trace.h:
osnoise_arch_register()
osnoise_arch_unregister()
Fixed define logic for LATENCY_FS_NOTIFY
Reported-by: kernel test robot <lkp@intel.com>
]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2021-06-22 17:42:27 +03:00
& trace_osnoise_event ,
2021-06-22 17:42:28 +03:00
& trace_timerlat_event ,
2016-07-06 22:25:08 +03:00
& trace_raw_data_event ,
2021-04-15 21:18:50 +03:00
& trace_func_repeats_event ,
2008-12-24 07:24:13 +03:00
NULL
} ;
__init static int init_events ( void )
{
struct trace_event * event ;
int i , ret ;
for ( i = 0 ; events [ i ] ; i + + ) {
event = events [ i ] ;
2015-05-05 16:39:12 +03:00
ret = register_trace_event ( event ) ;
2022-04-24 16:19:32 +03:00
WARN_ONCE ( ! ret , " event %d failed to register " , event - > type ) ;
2008-12-24 07:24:13 +03:00
}
return 0 ;
}
2012-06-01 05:40:05 +04:00
early_initcall ( init_events ) ;