2019-05-29 07:18:02 -07:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-10-30 16:09:42 +02:00
/*
* thread - stack . h : Synthesize a thread ' s stack using call / return events
* Copyright ( c ) 2014 , Intel Corporation .
*/
# ifndef __PERF_THREAD_STACK_H
# define __PERF_THREAD_STACK_H
# include <sys/types.h>
# include <linux/types.h>
struct thread ;
2014-10-30 16:09:45 +02:00
struct comm ;
2014-10-30 16:09:42 +02:00
struct ip_callchain ;
2014-10-30 16:09:45 +02:00
struct symbol ;
struct dso ;
struct comm ;
struct perf_sample ;
struct addr_location ;
2016-04-28 01:19:07 -07:00
struct call_path ;
2014-10-30 16:09:45 +02:00
/*
* Call / Return flags .
*
* CALL_RETURN_NO_CALL : ' return ' but no matching ' call '
* CALL_RETURN_NO_RETURN : ' call ' but no matching ' return '
perf thread-stack: Represent jmps to the start of a different symbol
The compiler might optimize a call/ret combination by making it a jmp.
However the thread-stack does not presently cater for that, so that such
control flow is not visible in the call graph. Make it visible by
recording on the stack a branch to the start of a different symbol.
Note, that means when a ret pops the stack, all jmps must be popped off
first.
Example:
$ cat jmp-to-fn.c
__attribute__((noinline)) int bar(void)
{
return -1;
}
__attribute__((noinline)) int foo(void)
{
return bar() + 1;
}
int main()
{
return foo();
}
$ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c
$ objdump -d jmp-to-fn
<SNIP>
0000000000001040 <main>:
1040: 31 c0 xor %eax,%eax
1042: e9 09 01 00 00 jmpq 1150 <foo>
<SNIP>
0000000000001140 <bar>:
1140: b8 ff ff ff ff mov $0xffffffff,%eax
1145: c3 retq
<SNIP>
0000000000001150 <foo>:
1150: 31 c0 xor %eax,%eax
1152: e8 e9 ff ff ff callq 1140 <bar>
1157: 83 c0 01 add $0x1,%eax
115a: c3 retq
<SNIP>
$ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ]
$ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls
2019-01-08 13:24:58.783069 Creating database...
2019-01-08 13:24:58.794650 Writing records...
2019-01-08 13:24:59.008050 Adding indexes
2019-01-08 13:24:59.015802 Done
$ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db
Before:
main
-> bar
After:
main
-> foo
-> bar
Committer testing:
Install the python2-pyside package, then select these menu options
on the GUI:
"Reports"
"Context sensitive callgraphs"
Then go on expanding the symbols, to get, full picture when doing this
on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC):
jmp-to-fn
PID:TID
_start (ld-2.28.so)
__libc_start_main
main
foo
bar
To verify that indeed, this fixes the problem.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-01-09 11:18:33 +02:00
* CALL_RETURN_NON_CALL : a branch but not a ' call ' to the start of a different
* symbol
2014-10-30 16:09:45 +02:00
*/
enum {
CALL_RETURN_NO_CALL = 1 < < 0 ,
CALL_RETURN_NO_RETURN = 1 < < 1 ,
perf thread-stack: Represent jmps to the start of a different symbol
The compiler might optimize a call/ret combination by making it a jmp.
However the thread-stack does not presently cater for that, so that such
control flow is not visible in the call graph. Make it visible by
recording on the stack a branch to the start of a different symbol.
Note, that means when a ret pops the stack, all jmps must be popped off
first.
Example:
$ cat jmp-to-fn.c
__attribute__((noinline)) int bar(void)
{
return -1;
}
__attribute__((noinline)) int foo(void)
{
return bar() + 1;
}
int main()
{
return foo();
}
$ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c
$ objdump -d jmp-to-fn
<SNIP>
0000000000001040 <main>:
1040: 31 c0 xor %eax,%eax
1042: e9 09 01 00 00 jmpq 1150 <foo>
<SNIP>
0000000000001140 <bar>:
1140: b8 ff ff ff ff mov $0xffffffff,%eax
1145: c3 retq
<SNIP>
0000000000001150 <foo>:
1150: 31 c0 xor %eax,%eax
1152: e8 e9 ff ff ff callq 1140 <bar>
1157: 83 c0 01 add $0x1,%eax
115a: c3 retq
<SNIP>
$ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ]
$ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls
2019-01-08 13:24:58.783069 Creating database...
2019-01-08 13:24:58.794650 Writing records...
2019-01-08 13:24:59.008050 Adding indexes
2019-01-08 13:24:59.015802 Done
$ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db
Before:
main
-> bar
After:
main
-> foo
-> bar
Committer testing:
Install the python2-pyside package, then select these menu options
on the GUI:
"Reports"
"Context sensitive callgraphs"
Then go on expanding the symbols, to get, full picture when doing this
on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC):
jmp-to-fn
PID:TID
_start (ld-2.28.so)
__libc_start_main
main
foo
bar
To verify that indeed, this fixes the problem.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-01-09 11:18:33 +02:00
CALL_RETURN_NON_CALL = 1 < < 2 ,
2014-10-30 16:09:45 +02:00
} ;
/**
* struct call_return - paired call / return information .
* @ thread : thread in which call / return occurred
* @ comm : comm in which call / return occurred
* @ cp : call path
* @ call_time : timestamp of call ( if known )
* @ return_time : timestamp of return ( if known )
* @ branch_count : number of branches seen between call and return
2019-05-20 14:37:19 +03:00
* @ insn_count : approx . number of instructions between call and return
* @ cyc_count : approx . number of cycles between call and return
2014-10-30 16:09:45 +02:00
* @ call_ref : external reference to ' call ' sample ( e . g . db_id )
* @ return_ref : external reference to ' return ' sample ( e . g . db_id )
* @ db_id : id used for db - export
2019-02-28 15:00:24 +02:00
* @ parent_db_id : id of parent call used for db - export
2014-10-30 16:09:45 +02:00
* @ flags : Call / Return flags
*/
struct call_return {
struct thread * thread ;
struct comm * comm ;
struct call_path * cp ;
u64 call_time ;
u64 return_time ;
u64 branch_count ;
2019-05-20 14:37:19 +03:00
u64 insn_count ;
u64 cyc_count ;
2014-10-30 16:09:45 +02:00
u64 call_ref ;
u64 return_ref ;
u64 db_id ;
2019-02-28 15:00:24 +02:00
u64 parent_db_id ;
2014-10-30 16:09:45 +02:00
u32 flags ;
} ;
2016-04-28 01:19:10 -07:00
/**
* struct call_return_processor - provides a call - back to consume call - return
* information .
* @ cpr : call path root
* @ process : call - back that accepts call / return information
* @ data : anonymous data for call - back
*/
struct call_return_processor {
struct call_path_root * cpr ;
2019-02-28 15:00:24 +02:00
int ( * process ) ( struct call_return * cr , u64 * parent_db_id , void * data ) ;
2016-04-28 01:19:10 -07:00
void * data ;
} ;
2018-12-21 14:06:19 +02:00
int thread_stack__event ( struct thread * thread , int cpu , u32 flags , u64 from_ip ,
2020-04-29 18:07:43 +03:00
u64 to_ip , u16 insn_len , u64 trace_nr , bool callstack ,
unsigned int br_stack_sz , bool mispred_all ) ;
2018-12-21 14:06:19 +02:00
void thread_stack__set_trace_nr ( struct thread * thread , int cpu , u64 trace_nr ) ;
void thread_stack__sample ( struct thread * thread , int cpu , struct ip_callchain * chain ,
2018-10-31 11:10:42 +02:00
size_t sz , u64 ip , u64 kernel_start ) ;
2020-04-01 13:16:06 +03:00
void thread_stack__sample_late ( struct thread * thread , int cpu ,
struct ip_callchain * chain , size_t sz , u64 ip ,
u64 kernel_start ) ;
2020-04-29 18:07:43 +03:00
void thread_stack__br_sample ( struct thread * thread , int cpu ,
struct branch_stack * dst , unsigned int sz ) ;
2020-04-29 18:07:48 +03:00
void thread_stack__br_sample_late ( struct thread * thread , int cpu ,
struct branch_stack * dst , unsigned int sz ,
u64 sample_ip , u64 kernel_start ) ;
2015-05-29 16:33:30 +03:00
int thread_stack__flush ( struct thread * thread ) ;
2014-10-30 16:09:42 +02:00
void thread_stack__free ( struct thread * thread ) ;
2018-12-21 14:06:19 +02:00
size_t thread_stack__depth ( struct thread * thread , int cpu ) ;
2014-10-30 16:09:42 +02:00
2014-10-30 16:09:45 +02:00
struct call_return_processor *
2019-02-28 15:00:24 +02:00
call_return_processor__new ( int ( * process ) ( struct call_return * cr , u64 * parent_db_id , void * data ) ,
2014-10-30 16:09:45 +02:00
void * data ) ;
void call_return_processor__free ( struct call_return_processor * crp ) ;
int thread_stack__process ( struct thread * thread , struct comm * comm ,
struct perf_sample * sample ,
struct addr_location * from_al ,
struct addr_location * to_al , u64 ref ,
struct call_return_processor * crp ) ;
2014-10-30 16:09:42 +02:00
# endif