f08046cb30
The compiler might optimize a call/ret combination by making it a jmp. However the thread-stack does not presently cater for that, so that such control flow is not visible in the call graph. Make it visible by recording on the stack a branch to the start of a different symbol. Note, that means when a ret pops the stack, all jmps must be popped off first. Example: $ cat jmp-to-fn.c __attribute__((noinline)) int bar(void) { return -1; } __attribute__((noinline)) int foo(void) { return bar() + 1; } int main() { return foo(); } $ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c $ objdump -d jmp-to-fn <SNIP> 0000000000001040 <main>: 1040: 31 c0 xor %eax,%eax 1042: e9 09 01 00 00 jmpq 1150 <foo> <SNIP> 0000000000001140 <bar>: 1140: b8 ff ff ff ff mov $0xffffffff,%eax 1145: c3 retq <SNIP> 0000000000001150 <foo>: 1150: 31 c0 xor %eax,%eax 1152: e8 e9 ff ff ff callq 1140 <bar> 1157: 83 c0 01 add $0x1,%eax 115a: c3 retq <SNIP> $ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ] $ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls 2019-01-08 13:24:58.783069 Creating database... 2019-01-08 13:24:58.794650 Writing records... 2019-01-08 13:24:59.008050 Adding indexes 2019-01-08 13:24:59.015802 Done $ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db Before: main -> bar After: main -> foo -> bar Committer testing: Install the python2-pyside package, then select these menu options on the GUI: "Reports" "Context sensitive callgraphs" Then go on expanding the symbols, to get, full picture when doing this on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC): jmp-to-fn PID:TID _start (ld-2.28.so) __libc_start_main main foo bar To verify that indeed, this fixes the problem. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
106 lines
3.2 KiB
C
106 lines
3.2 KiB
C
/*
|
|
* thread-stack.h: Synthesize a thread's stack using call / return events
|
|
* Copyright (c) 2014, Intel Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
*/
|
|
|
|
#ifndef __PERF_THREAD_STACK_H
|
|
#define __PERF_THREAD_STACK_H
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
struct thread;
|
|
struct comm;
|
|
struct ip_callchain;
|
|
struct symbol;
|
|
struct dso;
|
|
struct comm;
|
|
struct perf_sample;
|
|
struct addr_location;
|
|
struct call_path;
|
|
|
|
/*
|
|
* Call/Return flags.
|
|
*
|
|
* CALL_RETURN_NO_CALL: 'return' but no matching 'call'
|
|
* CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
|
|
* CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different
|
|
* symbol
|
|
*/
|
|
enum {
|
|
CALL_RETURN_NO_CALL = 1 << 0,
|
|
CALL_RETURN_NO_RETURN = 1 << 1,
|
|
CALL_RETURN_NON_CALL = 1 << 2,
|
|
};
|
|
|
|
/**
|
|
* struct call_return - paired call/return information.
|
|
* @thread: thread in which call/return occurred
|
|
* @comm: comm in which call/return occurred
|
|
* @cp: call path
|
|
* @call_time: timestamp of call (if known)
|
|
* @return_time: timestamp of return (if known)
|
|
* @branch_count: number of branches seen between call and return
|
|
* @call_ref: external reference to 'call' sample (e.g. db_id)
|
|
* @return_ref: external reference to 'return' sample (e.g. db_id)
|
|
* @db_id: id used for db-export
|
|
* @flags: Call/Return flags
|
|
*/
|
|
struct call_return {
|
|
struct thread *thread;
|
|
struct comm *comm;
|
|
struct call_path *cp;
|
|
u64 call_time;
|
|
u64 return_time;
|
|
u64 branch_count;
|
|
u64 call_ref;
|
|
u64 return_ref;
|
|
u64 db_id;
|
|
u32 flags;
|
|
};
|
|
|
|
/**
|
|
* struct call_return_processor - provides a call-back to consume call-return
|
|
* information.
|
|
* @cpr: call path root
|
|
* @process: call-back that accepts call/return information
|
|
* @data: anonymous data for call-back
|
|
*/
|
|
struct call_return_processor {
|
|
struct call_path_root *cpr;
|
|
int (*process)(struct call_return *cr, void *data);
|
|
void *data;
|
|
};
|
|
|
|
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
|
|
u64 to_ip, u16 insn_len, u64 trace_nr);
|
|
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
|
|
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
|
|
size_t sz, u64 ip, u64 kernel_start);
|
|
int thread_stack__flush(struct thread *thread);
|
|
void thread_stack__free(struct thread *thread);
|
|
size_t thread_stack__depth(struct thread *thread, int cpu);
|
|
|
|
struct call_return_processor *
|
|
call_return_processor__new(int (*process)(struct call_return *cr, void *data),
|
|
void *data);
|
|
void call_return_processor__free(struct call_return_processor *crp);
|
|
int thread_stack__process(struct thread *thread, struct comm *comm,
|
|
struct perf_sample *sample,
|
|
struct addr_location *from_al,
|
|
struct addr_location *to_al, u64 ref,
|
|
struct call_return_processor *crp);
|
|
|
|
#endif
|