2011-02-04 09:45:46 -02:00
/*
* Copyright ( C ) 2011 , Red Hat Inc , Arnaldo Carvalho de Melo < acme @ redhat . com >
*
* Parts came from builtin - annotate . c , see those files for further
* copyright notes .
*
* Released under the GPL v2 . ( and only v2 , not any later version )
*/
2017-04-18 10:46:11 -03:00
# include <errno.h>
2017-04-17 15:23:08 -03:00
# include <inttypes.h>
2011-02-04 09:45:46 -02:00
# include "util.h"
2014-02-20 10:32:53 +09:00
# include "ui/ui.h"
# include "sort.h"
2011-02-04 09:45:46 -02:00
# include "build-id.h"
# include "color.h"
2018-03-16 14:33:38 -03:00
# include "config.h"
2011-02-04 09:45:46 -02:00
# include "cache.h"
# include "symbol.h"
# include "debug.h"
# include "annotate.h"
2013-03-05 14:53:21 +09:00
# include "evsel.h"
perf annotate: Add branch stack / basic block
I wanted to know the hottest path through a function and figured the
branch-stack (LBR) information should be able to help out with that.
The below uses the branch-stack to create basic blocks and generate
statistics from them.
from to branch_i
* ----> *
|
| block
v
* ----> *
from to branch_i+1
The blocks are broken down into non-overlapping ranges, while tracking
if the start of each range is an entry point and/or the end of a range
is a branch.
Each block iterates all ranges it covers (while splitting where required
to exactly match the block) and increments the 'coverage' count.
For the range including the branch we increment the taken counter, as
well as the pred counter if flags.predicted.
Using these number we can find if an instruction:
- had coverage; given by:
br->coverage / br->sym->max_coverage
This metric ensures each symbol has a 100% spot, which reflects the
observation that each symbol must have a most covered/hottest
block.
- is a branch target: br->is_target && br->start == add
- for targets, how much of a branch's coverages comes from it:
target->entry / branch->coverage
- is a branch: br->is_branch && br->end == addr
- for branches, how often it was taken:
br->taken / br->coverage
after all, all execution that didn't take the branch would have
incremented the coverage and continued onward to a later branch.
- for branches, how often it was predicted:
br->pred / br->taken
The coverage percentage is used to color the address and asm sections;
for low (<1%) coverage we use NORMAL (uncolored), indicating that these
instructions are not 'important'. For high coverage (>75%) we color the
address RED.
For each branch, we add an asm comment after the instruction with
information on how often it was taken and predicted.
Output looks like (sans color, which does loose a lot of the
information :/)
$ perf record --branch-filter u,any -e cycles:p ./branches 27
$ perf annotate branches
Percent | Source code & Disassembly of branches for cycles:pu (217 samples)
---------------------------------------------------------------------------------
: branches():
0.00 : 40057a: push %rbp
0.00 : 40057b: mov %rsp,%rbp
0.00 : 40057e: sub $0x20,%rsp
0.00 : 400582: mov %rdi,-0x18(%rbp)
0.00 : 400586: mov %rsi,-0x20(%rbp)
0.00 : 40058a: mov -0x18(%rbp),%rax
0.00 : 40058e: mov %rax,-0x10(%rbp)
0.00 : 400592: movq $0x0,-0x8(%rbp)
0.00 : 40059a: jmpq 400656 <branches+0xdc>
1.84 : 40059f: mov -0x10(%rbp),%rax # +100.00%
3.23 : 4005a3: and $0x1,%eax
1.84 : 4005a6: test %rax,%rax
0.00 : 4005a9: je 4005bf <branches+0x45> # -54.50% (p:42.00%)
0.46 : 4005ab: mov 0x200bbe(%rip),%rax # 601170 <acc>
12.90 : 4005b2: add $0x1,%rax
2.30 : 4005b6: mov %rax,0x200bb3(%rip) # 601170 <acc>
0.46 : 4005bd: jmp 4005d1 <branches+0x57> # -100.00% (p:100.00%)
0.92 : 4005bf: mov 0x200baa(%rip),%rax # 601170 <acc> # +49.54%
13.82 : 4005c6: sub $0x1,%rax
0.46 : 4005ca: mov %rax,0x200b9f(%rip) # 601170 <acc>
2.30 : 4005d1: mov -0x10(%rbp),%rax # +50.46%
0.46 : 4005d5: mov %rax,%rdi
0.46 : 4005d8: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 4005dd: mov %rax,-0x10(%rbp) # +100.00%
0.92 : 4005e1: mov -0x18(%rbp),%rax
0.00 : 4005e5: and $0x1,%eax
0.00 : 4005e8: test %rax,%rax
0.00 : 4005eb: je 4005ff <branches+0x85> # -100.00% (p:100.00%)
0.00 : 4005ed: mov 0x200b7c(%rip),%rax # 601170 <acc>
0.00 : 4005f4: shr $0x2,%rax
0.00 : 4005f8: mov %rax,0x200b71(%rip) # 601170 <acc>
0.00 : 4005ff: mov -0x10(%rbp),%rax # +100.00%
7.37 : 400603: and $0x1,%eax
3.69 : 400606: test %rax,%rax
0.00 : 400609: jne 400612 <branches+0x98> # -59.25% (p:42.99%)
1.84 : 40060b: mov $0x1,%eax
14.29 : 400610: jmp 400617 <branches+0x9d> # -100.00% (p:100.00%)
1.38 : 400612: mov $0x0,%eax # +57.65%
10.14 : 400617: test %al,%al # +42.35%
0.00 : 400619: je 40062f <branches+0xb5> # -57.65% (p:100.00%)
0.46 : 40061b: mov 0x200b4e(%rip),%rax # 601170 <acc>
2.76 : 400622: sub $0x1,%rax
0.00 : 400626: mov %rax,0x200b43(%rip) # 601170 <acc>
0.46 : 40062d: jmp 400641 <branches+0xc7> # -100.00% (p:100.00%)
0.92 : 40062f: mov 0x200b3a(%rip),%rax # 601170 <acc> # +56.13%
2.30 : 400636: add $0x1,%rax
0.92 : 40063a: mov %rax,0x200b2f(%rip) # 601170 <acc>
0.92 : 400641: mov -0x10(%rbp),%rax # +43.87%
2.30 : 400645: mov %rax,%rdi
0.00 : 400648: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 40064d: mov %rax,-0x10(%rbp) # +100.00%
1.84 : 400651: addq $0x1,-0x8(%rbp)
0.92 : 400656: mov -0x8(%rbp),%rax
5.07 : 40065a: cmp -0x20(%rbp),%rax
0.00 : 40065e: jb 40059f <branches+0x25> # -100.00% (p:100.00%)
0.00 : 400664: nop
0.00 : 400665: leaveq
0.00 : 400666: retq
(Note: the --branch-filter u,any was used to avoid spurious target and
branch points due to interrupts/faults, they show up as very small -/+
annotations on 'weird' locations)
Committer note:
Please take a look at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
To see the colors.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
[ Moved sym->max_coverage to 'struct annotate', aka symbol__annotate(sym) ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-05 16:08:12 -03:00
# include "block-range.h"
2017-04-17 16:51:59 -03:00
# include "string2.h"
2016-11-16 15:39:50 -03:00
# include "arch/common.h"
2014-11-12 18:05:26 -08:00
# include <regex.h>
2011-02-08 13:27:39 -02:00
# include <pthread.h>
2012-10-27 23:18:29 +02:00
# include <linux/bitops.h>
2017-04-17 11:39:06 -03:00
# include <linux/kernel.h>
2011-02-04 09:45:46 -02:00
2018-03-15 19:12:39 -03:00
/* FIXME: For the HE_COLORSET */
# include "ui/browser.h"
/*
* FIXME : Using the same values as slang . h ,
* but that header may not be available everywhere
*/
2018-03-15 23:14:51 -03:00
# define LARROW_CHAR ((unsigned char)',')
# define RARROW_CHAR ((unsigned char)'+')
# define DARROW_CHAR ((unsigned char)'.')
# define UARROW_CHAR ((unsigned char)'-')
2018-03-15 19:12:39 -03:00
2017-04-17 16:10:49 -03:00
# include "sane_ctype.h"
2018-03-16 14:33:38 -03:00
struct annotation_options annotation__default_options = {
. use_offset = true ,
. jump_arrows = true ,
} ;
2011-09-15 14:31:41 -07:00
const char * disassembler_style ;
2012-09-04 12:32:30 +02:00
const char * objdump_path ;
2014-11-12 18:05:26 -08:00
static regex_t file_lineno ;
2011-09-15 14:31:41 -07:00
2016-11-24 11:16:06 -03:00
static struct ins_ops * ins__find ( struct arch * arch , const char * name ) ;
2016-11-24 11:37:08 -03:00
static void ins__sort ( struct arch * arch ) ;
2016-11-24 11:16:06 -03:00
static int disasm_line__parse ( char * line , const char * * namep , char * * rawp ) ;
2012-05-12 13:15:34 -03:00
2016-11-16 15:39:50 -03:00
struct arch {
const char * name ;
2016-11-17 12:31:51 -03:00
struct ins * instructions ;
size_t nr_instructions ;
2016-11-24 11:37:08 -03:00
size_t nr_instructions_allocated ;
struct ins_ops * ( * associate_instruction_ops ) ( struct arch * arch , const char * name ) ;
2016-11-17 12:31:51 -03:00
bool sorted_instructions ;
2016-11-18 12:34:26 -03:00
bool initialized ;
void * priv ;
perf annotate: Check for fused instructions
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-07 13:06:34 +08:00
unsigned int model ;
unsigned int family ;
2017-10-11 17:01:24 +02:00
int ( * init ) ( struct arch * arch , char * cpuid ) ;
perf annotate: Check for fused instructions
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-07 13:06:34 +08:00
bool ( * ins_is_fused ) ( struct arch * arch , const char * ins1 ,
const char * ins2 ) ;
2016-11-16 15:39:50 -03:00
struct {
char comment_char ;
2016-11-16 15:50:38 -03:00
char skip_functions_char ;
2016-11-16 15:39:50 -03:00
} objdump ;
} ;
2016-11-17 12:31:51 -03:00
static struct ins_ops call_ops ;
static struct ins_ops dec_ops ;
static struct ins_ops jump_ops ;
static struct ins_ops mov_ops ;
static struct ins_ops nop_ops ;
static struct ins_ops lock_ops ;
static struct ins_ops ret_ops ;
2016-11-24 11:37:08 -03:00
static int arch__grow_instructions ( struct arch * arch )
{
struct ins * new_instructions ;
size_t new_nr_allocated ;
if ( arch - > nr_instructions_allocated = = 0 & & arch - > instructions )
goto grow_from_non_allocated_table ;
new_nr_allocated = arch - > nr_instructions_allocated + 128 ;
new_instructions = realloc ( arch - > instructions , new_nr_allocated * sizeof ( struct ins ) ) ;
if ( new_instructions = = NULL )
return - 1 ;
out_update_instructions :
arch - > instructions = new_instructions ;
arch - > nr_instructions_allocated = new_nr_allocated ;
return 0 ;
grow_from_non_allocated_table :
new_nr_allocated = arch - > nr_instructions + 128 ;
new_instructions = calloc ( new_nr_allocated , sizeof ( struct ins ) ) ;
if ( new_instructions = = NULL )
return - 1 ;
memcpy ( new_instructions , arch - > instructions , arch - > nr_instructions ) ;
goto out_update_instructions ;
}
2016-11-18 16:54:10 -03:00
static int arch__associate_ins_ops ( struct arch * arch , const char * name , struct ins_ops * ops )
2016-11-24 11:37:08 -03:00
{
struct ins * ins ;
if ( arch - > nr_instructions = = arch - > nr_instructions_allocated & &
arch__grow_instructions ( arch ) )
return - 1 ;
ins = & arch - > instructions [ arch - > nr_instructions ] ;
ins - > name = strdup ( name ) ;
if ( ! ins - > name )
return - 1 ;
ins - > ops = ops ;
arch - > nr_instructions + + ;
ins__sort ( arch ) ;
return 0 ;
}
2016-11-17 12:31:51 -03:00
# include "arch/arm/annotate/instructions.c"
perf annotate: AArch64 support
This is a regex converted version from the original:
https://lkml.org/lkml/2016/5/19/461
Add basic support to recognise AArch64 assembly. This allows perf to
identify AArch64 instructions that branch to other parts within the
same function, thereby properly annotating them.
Rebased onto new cross-arch annotation bits:
https://lkml.org/lkml/2016/11/25/546
Sample output:
security_file_permission vmlinux
5.80 │ ← ret ▒
│70: ldr w0, [x21,#68] ▒
4.44 │ ↓ tbnz d0 ▒
│ mov w0, #0x24 // #36 ▒
1.37 │ ands w0, w22, w0 ▒
│ ↑ b.eq 60 ▒
1.37 │ ↓ tbnz e4 ▒
│ mov w19, #0x20000 // #131072 ▒
1.02 │ ↓ tbz ec ▒
│90:┌─→ldr x3, [x21,#24] ▒
1.37 │ │ add x21, x21, #0x10 ▒
│ │ mov w2, w19 ▒
1.02 │ │ mov x0, x21 ▒
│ │ mov x1, x3 ▒
1.71 │ │ ldr x20, [x3,#48] ▒
│ │→ bl __fsnotify_parent ▒
0.68 │ │↑ cbnz 60 ▒
│ │ mov x2, x21 ▒
1.37 │ │ mov w1, w19 ▒
│ │ mov x0, x20 ▒
0.68 │ │ mov w5, #0x0 // #0 ▒
│ │ mov x4, #0x0 // #0 ▒
1.71 │ │ mov w3, #0x1 // #1 ▒
│ │→ bl fsnotify ▒
1.37 │ │↑ b 60 ▒
│d0:│ mov w0, #0x0 // #0 ▒
│ │ ldp x19, x20, [sp,#16] ▒
│ │ ldp x21, x22, [sp,#32] ▒
│ │ ldp x29, x30, [sp],#48 ▒
│ │← ret ▒
│e4:│ mov w19, #0x10000 // #65536 ▒
│ └──b 90 ◆
│ec: brk #0x800 ▒
Press 'h' for help on key bindings
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Signed-off-by: Chris Ryder <chris.ryder@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20161130092344.012e18e3e623bea395162f95@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-11-30 09:23:44 -06:00
# include "arch/arm64/annotate/instructions.c"
2016-11-17 12:31:51 -03:00
# include "arch/x86/annotate/instructions.c"
perf annotate: Initial PowerPC support
Support the PowerPC architecture using the ins_ops association
method.
Committer notes:
Testing it with a perf.data file collected on a PowerPC machine and
cross-annotated on a x86_64 workstation, using the associated vmlinux
file:
$ perf report -i perf.data.f22vm.powerdev --vmlinux vmlinux.powerpc
.ktime_get vmlinux.powerpc
│ clrldi r9,r28,63
8.57 │ ┌──bne e0 <- TUI cursor positioned here
│54:│ lwsync
2.86 │ │ std r2,40(r1)
│ │ ld r9,144(r31)
│ │ ld r3,136(r31)
│ │ ld r30,184(r31)
│ │ ld r10,0(r9)
│ │ mtctr r10
│ │ ld r2,8(r9)
8.57 │ │→ bctrl
│ │ ld r2,40(r1)
│ │ ld r10,160(r31)
│ │ ld r5,152(r31)
│ │ lwz r7,168(r31)
│ │ ld r9,176(r31)
8.57 │ │ lwz r6,172(r31)
│ │ lwsync
2.86 │ │ lwz r8,128(r31)
│ │ cmpw cr7,r8,r28
2.86 │ │↑ bne 48
│ │ subf r10,r10,r3
│ │ mr r3,r29
│ │ and r10,r10,r5
2.86 │ │ mulld r10,r10,r7
│ │ add r9,r10,r9
│ │ srd r9,r9,r6
│ │ add r9,r9,r30
│ │ std r9,0(r29)
│ │ addi r1,r1,144
│ │ ld r0,16(r1)
│ │ ld r28,-32(r1)
│ │ ld r29,-24(r1)
│ │ ld r30,-16(r1)
│ │ mtlr r0
│ │ ld r31,-8(r1)
│ │← blr
5.71 │e0:└─→mr r1,r1
11.43 │ mr r2,r2
11.43 │ lwz r28,128(r31)
Press 'h' for help on key bindings
$ perf report -i perf.data.f22vm.powerdev --header-only
# ========
# captured on: Thu Nov 24 12:40:38 2016
# hostname : pdev-f22-qemu
# os release : 4.4.10-200.fc22.ppc64
# perf version : 4.9.rc1.g6298ce
# arch : ppc64
# nrcpus online : 48
# nrcpus avail : 48
# cpudesc : POWER7 (architected), altivec supported
# cpuid : 74,513
# total memory : 4158976 kB
# cmdline : /home/ravi/Workspace/linux/tools/perf/perf record -a
# event : name = cycles:ppp, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1
# HEADER_CPU_TOPOLOGY info available, use -I to display
# HEADER_NUMA_TOPOLOGY info available, use -I to display
# pmu mappings: cpu = 4, software = 1, tracepoint = 2, breakpoint = 5
# missing features: HEADER_TRACING_DATA HEADER_BRANCH_STACK HEADER_GROUP_DESC HEADER_AUXTRACE HEADER_STAT HEADER_CACHE
# ========
#
$
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/n/tip-tbjnp40ddoxxl474uvhwi6g4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-11-23 21:33:46 +05:30
# include "arch/powerpc/annotate/instructions.c"
2017-04-06 09:51:52 +02:00
# include "arch/s390/annotate/instructions.c"
2016-11-17 12:31:51 -03:00
2016-11-16 15:39:50 -03:00
static struct arch architectures [ ] = {
{
. name = " arm " ,
2016-11-18 16:54:10 -03:00
. init = arm__annotate_init ,
2016-11-16 15:39:50 -03:00
} ,
perf annotate: AArch64 support
This is a regex converted version from the original:
https://lkml.org/lkml/2016/5/19/461
Add basic support to recognise AArch64 assembly. This allows perf to
identify AArch64 instructions that branch to other parts within the
same function, thereby properly annotating them.
Rebased onto new cross-arch annotation bits:
https://lkml.org/lkml/2016/11/25/546
Sample output:
security_file_permission vmlinux
5.80 │ ← ret ▒
│70: ldr w0, [x21,#68] ▒
4.44 │ ↓ tbnz d0 ▒
│ mov w0, #0x24 // #36 ▒
1.37 │ ands w0, w22, w0 ▒
│ ↑ b.eq 60 ▒
1.37 │ ↓ tbnz e4 ▒
│ mov w19, #0x20000 // #131072 ▒
1.02 │ ↓ tbz ec ▒
│90:┌─→ldr x3, [x21,#24] ▒
1.37 │ │ add x21, x21, #0x10 ▒
│ │ mov w2, w19 ▒
1.02 │ │ mov x0, x21 ▒
│ │ mov x1, x3 ▒
1.71 │ │ ldr x20, [x3,#48] ▒
│ │→ bl __fsnotify_parent ▒
0.68 │ │↑ cbnz 60 ▒
│ │ mov x2, x21 ▒
1.37 │ │ mov w1, w19 ▒
│ │ mov x0, x20 ▒
0.68 │ │ mov w5, #0x0 // #0 ▒
│ │ mov x4, #0x0 // #0 ▒
1.71 │ │ mov w3, #0x1 // #1 ▒
│ │→ bl fsnotify ▒
1.37 │ │↑ b 60 ▒
│d0:│ mov w0, #0x0 // #0 ▒
│ │ ldp x19, x20, [sp,#16] ▒
│ │ ldp x21, x22, [sp,#32] ▒
│ │ ldp x29, x30, [sp],#48 ▒
│ │← ret ▒
│e4:│ mov w19, #0x10000 // #65536 ▒
│ └──b 90 ◆
│ec: brk #0x800 ▒
Press 'h' for help on key bindings
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Signed-off-by: Chris Ryder <chris.ryder@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20161130092344.012e18e3e623bea395162f95@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-11-30 09:23:44 -06:00
{
. name = " arm64 " ,
. init = arm64__annotate_init ,
} ,
2016-11-16 15:39:50 -03:00
{
. name = " x86 " ,
2017-10-11 17:01:24 +02:00
. init = x86__annotate_init ,
2016-11-17 12:31:51 -03:00
. instructions = x86__instructions ,
. nr_instructions = ARRAY_SIZE ( x86__instructions ) ,
perf annotate: Check for fused instructions
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-07 13:06:34 +08:00
. ins_is_fused = x86__ins_is_fused ,
2016-11-16 15:39:50 -03:00
. objdump = {
. comment_char = ' # ' ,
} ,
} ,
perf annotate: Initial PowerPC support
Support the PowerPC architecture using the ins_ops association
method.
Committer notes:
Testing it with a perf.data file collected on a PowerPC machine and
cross-annotated on a x86_64 workstation, using the associated vmlinux
file:
$ perf report -i perf.data.f22vm.powerdev --vmlinux vmlinux.powerpc
.ktime_get vmlinux.powerpc
│ clrldi r9,r28,63
8.57 │ ┌──bne e0 <- TUI cursor positioned here
│54:│ lwsync
2.86 │ │ std r2,40(r1)
│ │ ld r9,144(r31)
│ │ ld r3,136(r31)
│ │ ld r30,184(r31)
│ │ ld r10,0(r9)
│ │ mtctr r10
│ │ ld r2,8(r9)
8.57 │ │→ bctrl
│ │ ld r2,40(r1)
│ │ ld r10,160(r31)
│ │ ld r5,152(r31)
│ │ lwz r7,168(r31)
│ │ ld r9,176(r31)
8.57 │ │ lwz r6,172(r31)
│ │ lwsync
2.86 │ │ lwz r8,128(r31)
│ │ cmpw cr7,r8,r28
2.86 │ │↑ bne 48
│ │ subf r10,r10,r3
│ │ mr r3,r29
│ │ and r10,r10,r5
2.86 │ │ mulld r10,r10,r7
│ │ add r9,r10,r9
│ │ srd r9,r9,r6
│ │ add r9,r9,r30
│ │ std r9,0(r29)
│ │ addi r1,r1,144
│ │ ld r0,16(r1)
│ │ ld r28,-32(r1)
│ │ ld r29,-24(r1)
│ │ ld r30,-16(r1)
│ │ mtlr r0
│ │ ld r31,-8(r1)
│ │← blr
5.71 │e0:└─→mr r1,r1
11.43 │ mr r2,r2
11.43 │ lwz r28,128(r31)
Press 'h' for help on key bindings
$ perf report -i perf.data.f22vm.powerdev --header-only
# ========
# captured on: Thu Nov 24 12:40:38 2016
# hostname : pdev-f22-qemu
# os release : 4.4.10-200.fc22.ppc64
# perf version : 4.9.rc1.g6298ce
# arch : ppc64
# nrcpus online : 48
# nrcpus avail : 48
# cpudesc : POWER7 (architected), altivec supported
# cpuid : 74,513
# total memory : 4158976 kB
# cmdline : /home/ravi/Workspace/linux/tools/perf/perf record -a
# event : name = cycles:ppp, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1
# HEADER_CPU_TOPOLOGY info available, use -I to display
# HEADER_NUMA_TOPOLOGY info available, use -I to display
# pmu mappings: cpu = 4, software = 1, tracepoint = 2, breakpoint = 5
# missing features: HEADER_TRACING_DATA HEADER_BRANCH_STACK HEADER_GROUP_DESC HEADER_AUXTRACE HEADER_STAT HEADER_CACHE
# ========
#
$
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/n/tip-tbjnp40ddoxxl474uvhwi6g4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-11-23 21:33:46 +05:30
{
. name = " powerpc " ,
. init = powerpc__annotate_init ,
} ,
2017-04-06 09:51:51 +02:00
{
. name = " s390 " ,
2017-04-06 09:51:52 +02:00
. init = s390__annotate_init ,
2017-04-06 09:51:51 +02:00
. objdump = {
. comment_char = ' # ' ,
} ,
} ,
2016-11-16 15:39:50 -03:00
} ;
2012-05-12 13:26:20 -03:00
static void ins__delete ( struct ins_operands * ops )
{
2015-03-05 15:27:28 -03:00
if ( ops = = NULL )
return ;
2013-12-27 16:55:14 -03:00
zfree ( & ops - > source . raw ) ;
zfree ( & ops - > source . name ) ;
zfree ( & ops - > target . raw ) ;
zfree ( & ops - > target . name ) ;
2012-05-12 13:26:20 -03:00
}
2012-05-07 18:54:16 -03:00
static int ins__raw_scnprintf ( struct ins * ins , char * bf , size_t size ,
struct ins_operands * ops )
{
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s %s " , ins - > name , ops - > raw ) ;
2012-05-07 18:54:16 -03:00
}
int ins__scnprintf ( struct ins * ins , char * bf , size_t size ,
struct ins_operands * ops )
{
if ( ins - > ops - > scnprintf )
return ins - > ops - > scnprintf ( ins , bf , size , ops ) ;
return ins__raw_scnprintf ( ins , bf , size , ops ) ;
}
perf annotate: Check for fused instructions
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-07 13:06:34 +08:00
bool ins__is_fused ( struct arch * arch , const char * ins1 , const char * ins2 )
{
if ( ! arch | | ! arch - > ins_is_fused )
return false ;
return arch - > ins_is_fused ( arch , ins1 , ins2 ) ;
}
2016-11-16 15:50:38 -03:00
static int call__parse ( struct arch * arch , struct ins_operands * ops , struct map * map )
2012-04-18 16:07:38 -03:00
{
2012-04-20 15:26:47 -03:00
char * endptr , * tok , * name ;
2018-03-02 11:59:36 -03:00
struct addr_map_symbol target = {
. map = map ,
} ;
2012-04-20 15:26:47 -03:00
2012-04-25 08:00:23 -03:00
ops - > target . addr = strtoull ( ops - > raw , & endptr , 16 ) ;
2012-04-20 15:26:47 -03:00
name = strchr ( endptr , ' < ' ) ;
if ( name = = NULL )
goto indirect_call ;
name + + ;
2016-11-16 15:50:38 -03:00
if ( arch - > objdump . skip_functions_char & &
strchr ( name , arch - > objdump . skip_functions_char ) )
2015-12-06 23:07:13 +00:00
return - 1 ;
2012-04-20 15:26:47 -03:00
tok = strchr ( name , ' > ' ) ;
if ( tok = = NULL )
return - 1 ;
* tok = ' \0 ' ;
2012-04-25 08:00:23 -03:00
ops - > target . name = strdup ( name ) ;
2012-04-20 15:26:47 -03:00
* tok = ' > ' ;
2018-03-02 11:59:36 -03:00
if ( ops - > target . name = = NULL )
return - 1 ;
find_target :
target . addr = map__objdump_2mem ( map , ops - > target . addr ) ;
2012-04-20 15:26:47 -03:00
2018-03-02 11:59:36 -03:00
if ( map_groups__find_ams ( & target ) = = 0 & &
map__rip_2objdump ( target . map , map - > map_ip ( target . map , target . addr ) ) = = ops - > target . addr )
ops - > target . sym = target . sym ;
2012-05-11 12:28:55 -03:00
2012-04-18 16:07:38 -03:00
return 0 ;
2018-03-02 11:59:36 -03:00
indirect_call :
tok = strchr ( endptr , ' * ' ) ;
if ( tok ! = NULL )
ops - > target . addr = strtoull ( tok + 1 , NULL , 16 ) ;
goto find_target ;
2012-04-18 16:07:38 -03:00
}
2012-04-20 15:26:47 -03:00
static int call__scnprintf ( struct ins * ins , char * bf , size_t size ,
2012-05-07 18:54:16 -03:00
struct ins_operands * ops )
2012-04-20 15:26:47 -03:00
{
2018-03-02 11:59:36 -03:00
if ( ops - > target . sym )
return scnprintf ( bf , size , " %-6s %s " , ins - > name , ops - > target . sym - > name ) ;
2012-04-20 15:26:47 -03:00
2012-05-11 12:28:55 -03:00
if ( ops - > target . addr = = 0 )
return ins__raw_scnprintf ( ins , bf , size , ops ) ;
perf annotate: Use ops->target.name when available for unresolved call targets
There is a bug where when using 'perf annotate timerqueue_add' the
target for its only routine called with the 'callq' instruction,
'rb_insert_color', doesn't get resolved from its address when parsing
that 'callq' instruction.
That symbol resolution works when using 'perf report --tui' and then
doing annotation for 'timerqueue_add' from there, the vmlinux
dso->symbols rb_tree somehow gets in a state that we can't find that
address, that is a bug that has to be further investigated.
But since the objdump output has the function name, i.e. the raw objdump
disassembled line looks like:
So, before:
# perf annotate timerqueue_add
│ mov %rbx,%rdi
│ mov %rbx,(%rdx)
│ → callq *ffffffff8184dc80
│ mov 0x8(%rbp),%rdx
│ test %rdx,%rdx
│ ↓ je 67
# perf report
│ mov %rbx,%rdi
│ mov %rbx,(%rdx)
│ → callq rb_insert_color
│ mov 0x8(%rbp),%rdx
│ test %rdx,%rdx
│ ↓ je 67
And after both look the same:
# perf annotate timerqueue_add
│ mov %rbx,%rdi
│ mov %rbx,(%rdx)
│ → callq rb_insert_color
│ mov 0x8(%rbp),%rdx
│ test %rdx,%rdx
│ ↓ je 67
From 'perf report' one can annotate and navigate to that 'rb_insert_color'
function, but not directly from 'perf annotate timerqueue_add', that
remains to be investigated and fixed.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-nkktz6355rhqtq7o8atr8f8r@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-16 13:28:09 -03:00
if ( ops - > target . name )
return scnprintf ( bf , size , " %-6s %s " , ins - > name , ops - > target . name ) ;
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s *% " PRIx64 , ins - > name , ops - > target . addr ) ;
2012-04-20 15:26:47 -03:00
}
2012-04-18 16:07:38 -03:00
static struct ins_ops call_ops = {
2012-04-20 15:26:47 -03:00
. parse = call__parse ,
. scnprintf = call__scnprintf ,
2012-04-18 16:07:38 -03:00
} ;
bool ins__is_call ( const struct ins * ins )
{
2018-03-07 14:43:25 +01:00
return ins - > ops = = & call_ops | | ins - > ops = = & s390_call_ops ;
2012-04-18 16:07:38 -03:00
}
2016-11-16 15:39:50 -03:00
static int jump__parse ( struct arch * arch __maybe_unused , struct ins_operands * ops , struct map * map __maybe_unused )
2012-04-18 13:58:34 -03:00
{
2012-04-20 14:38:46 -03:00
const char * s = strchr ( ops - > raw , ' + ' ) ;
perf annotate: Support jump instruction with target as second operand
Architectures like PowerPC have jump instructions that includes a target
address as a second operand. For example, 'bne cr7,0xc0000000000f6154'.
Add support for such instruction in perf annotate.
objdump o/p:
c0000000000f6140: ld r9,1032(r31)
c0000000000f6144: cmpdi cr7,r9,0
c0000000000f6148: bne cr7,0xc0000000000f6154
c0000000000f614c: ld r9,2312(r30)
c0000000000f6150: std r9,1032(r31)
c0000000000f6154: ld r9,88(r31)
Corresponding perf annotate o/p:
Before patch:
ld r9,1032(r31)
cmpdi cr7,r9,0
v bne 3ffffffffff09f2c
ld r9,2312(r30)
std r9,1032(r31)
74: ld r9,88(r31)
After patch:
ld r9,1032(r31)
cmpdi cr7,r9,0
v bne 74
ld r9,2312(r30)
std r9,1032(r31)
74: ld r9,88(r31)
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:46 +05:30
const char * c = strchr ( ops - > raw , ' , ' ) ;
2012-04-18 13:58:34 -03:00
perf annotate: Fix branch instruction with multiple operands
'perf annotate' is dropping the cr* fields from branch instructions.
Fix it by adding support to display branch instructions having
multiple operands.
Power Arch objdump of int_sqrt:
20.36 | c0000000004d2694: subf r10,r10,r3
| c0000000004d2698: v bgt cr6,c0000000004d26a0 <int_sqrt+0x40>
1.82 | c0000000004d269c: mr r3,r10
29.18 | c0000000004d26a0: mr r10,r8
| c0000000004d26a4: v bgt cr7,c0000000004d26ac <int_sqrt+0x4c>
| c0000000004d26a8: mr r10,r7
Power Arch Before Patch:
20.36 | subf r10,r10,r3
| v bgt 40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt 4c
| mr r10,r7
Power Arch After patch:
20.36 | subf r10,r10,r3
| v bgt cr6,40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt cr7,4c
| mr r10,r7
Also support AArch64 conditional branch instructions, which can
have up to three operands:
Aarch64 Non-simplified (raw objdump) view:
│ffff0000083cd11c: ↑ cbz w0, ffff0000083cd100 <security_fil▒
...
4.44 │ffff000│083cd134: ↓ tbnz w0, #26, ffff0000083cd190 <securit▒
...
1.37 │ffff000│083cd144: ↓ tbnz w22, #5, ffff0000083cd1a4 <securit▒
│ffff000│083cd148: mov w19, #0x20000 //▒
1.02 │ffff000│083cd14c: ↓ tbz w22, #2, ffff0000083cd1ac <securit▒
...
0.68 │ffff000└──3cd16c: ↑ cbnz w0, ffff0000083cd120 <security_fil▒
Aarch64 Simplified, before this patch:
│ ↑ cbz 40
...
4.44 │ │↓ tbnz w0, #26, ffff0000083cd190 <security_file_permiss▒
...
1.37 │ │↓ tbnz w22, #5, ffff0000083cd1a4 <security_file_permiss▒
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ffff0000083cd1ac <security_file_permiss▒
...
0.68 │ └──cbnz 60
the cbz operand is missing, and the tbz doesn't get simplified processing
at all because the parsing function failed to match an address.
Aarch64 Simplified, After this patch applied:
│ ↑ cbz w0, 40
...
4.44 │ │↓ tbnz w0, #26, d0
...
1.37 │ │↓ tbnz w22, #5, e4
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ec
...
0.68 │ └──cbnz w0, 60
Originally-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20170601092959.f60d98912e8a1b66fd1e4c0e@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-01 09:29:59 -05:00
/*
* skip over possible up to 2 operands to get to address , e . g . :
* tbnz w0 , # 26 , ffff0000083cd190 < security_file_permission + 0xd0 >
*/
if ( c + + ! = NULL ) {
perf annotate: Support jump instruction with target as second operand
Architectures like PowerPC have jump instructions that includes a target
address as a second operand. For example, 'bne cr7,0xc0000000000f6154'.
Add support for such instruction in perf annotate.
objdump o/p:
c0000000000f6140: ld r9,1032(r31)
c0000000000f6144: cmpdi cr7,r9,0
c0000000000f6148: bne cr7,0xc0000000000f6154
c0000000000f614c: ld r9,2312(r30)
c0000000000f6150: std r9,1032(r31)
c0000000000f6154: ld r9,88(r31)
Corresponding perf annotate o/p:
Before patch:
ld r9,1032(r31)
cmpdi cr7,r9,0
v bne 3ffffffffff09f2c
ld r9,2312(r30)
std r9,1032(r31)
74: ld r9,88(r31)
After patch:
ld r9,1032(r31)
cmpdi cr7,r9,0
v bne 74
ld r9,2312(r30)
std r9,1032(r31)
74: ld r9,88(r31)
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:46 +05:30
ops - > target . addr = strtoull ( c , NULL , 16 ) ;
perf annotate: Fix branch instruction with multiple operands
'perf annotate' is dropping the cr* fields from branch instructions.
Fix it by adding support to display branch instructions having
multiple operands.
Power Arch objdump of int_sqrt:
20.36 | c0000000004d2694: subf r10,r10,r3
| c0000000004d2698: v bgt cr6,c0000000004d26a0 <int_sqrt+0x40>
1.82 | c0000000004d269c: mr r3,r10
29.18 | c0000000004d26a0: mr r10,r8
| c0000000004d26a4: v bgt cr7,c0000000004d26ac <int_sqrt+0x4c>
| c0000000004d26a8: mr r10,r7
Power Arch Before Patch:
20.36 | subf r10,r10,r3
| v bgt 40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt 4c
| mr r10,r7
Power Arch After patch:
20.36 | subf r10,r10,r3
| v bgt cr6,40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt cr7,4c
| mr r10,r7
Also support AArch64 conditional branch instructions, which can
have up to three operands:
Aarch64 Non-simplified (raw objdump) view:
│ffff0000083cd11c: ↑ cbz w0, ffff0000083cd100 <security_fil▒
...
4.44 │ffff000│083cd134: ↓ tbnz w0, #26, ffff0000083cd190 <securit▒
...
1.37 │ffff000│083cd144: ↓ tbnz w22, #5, ffff0000083cd1a4 <securit▒
│ffff000│083cd148: mov w19, #0x20000 //▒
1.02 │ffff000│083cd14c: ↓ tbz w22, #2, ffff0000083cd1ac <securit▒
...
0.68 │ffff000└──3cd16c: ↑ cbnz w0, ffff0000083cd120 <security_fil▒
Aarch64 Simplified, before this patch:
│ ↑ cbz 40
...
4.44 │ │↓ tbnz w0, #26, ffff0000083cd190 <security_file_permiss▒
...
1.37 │ │↓ tbnz w22, #5, ffff0000083cd1a4 <security_file_permiss▒
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ffff0000083cd1ac <security_file_permiss▒
...
0.68 │ └──cbnz 60
the cbz operand is missing, and the tbz doesn't get simplified processing
at all because the parsing function failed to match an address.
Aarch64 Simplified, After this patch applied:
│ ↑ cbz w0, 40
...
4.44 │ │↓ tbnz w0, #26, d0
...
1.37 │ │↓ tbnz w22, #5, e4
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ec
...
0.68 │ └──cbnz w0, 60
Originally-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20170601092959.f60d98912e8a1b66fd1e4c0e@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-01 09:29:59 -05:00
if ( ! ops - > target . addr ) {
c = strchr ( c , ' , ' ) ;
if ( c + + ! = NULL )
ops - > target . addr = strtoull ( c , NULL , 16 ) ;
}
} else {
perf annotate: Support jump instruction with target as second operand
Architectures like PowerPC have jump instructions that includes a target
address as a second operand. For example, 'bne cr7,0xc0000000000f6154'.
Add support for such instruction in perf annotate.
objdump o/p:
c0000000000f6140: ld r9,1032(r31)
c0000000000f6144: cmpdi cr7,r9,0
c0000000000f6148: bne cr7,0xc0000000000f6154
c0000000000f614c: ld r9,2312(r30)
c0000000000f6150: std r9,1032(r31)
c0000000000f6154: ld r9,88(r31)
Corresponding perf annotate o/p:
Before patch:
ld r9,1032(r31)
cmpdi cr7,r9,0
v bne 3ffffffffff09f2c
ld r9,2312(r30)
std r9,1032(r31)
74: ld r9,88(r31)
After patch:
ld r9,1032(r31)
cmpdi cr7,r9,0
v bne 74
ld r9,2312(r30)
std r9,1032(r31)
74: ld r9,88(r31)
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:46 +05:30
ops - > target . addr = strtoull ( ops - > raw , NULL , 16 ) ;
perf annotate: Fix branch instruction with multiple operands
'perf annotate' is dropping the cr* fields from branch instructions.
Fix it by adding support to display branch instructions having
multiple operands.
Power Arch objdump of int_sqrt:
20.36 | c0000000004d2694: subf r10,r10,r3
| c0000000004d2698: v bgt cr6,c0000000004d26a0 <int_sqrt+0x40>
1.82 | c0000000004d269c: mr r3,r10
29.18 | c0000000004d26a0: mr r10,r8
| c0000000004d26a4: v bgt cr7,c0000000004d26ac <int_sqrt+0x4c>
| c0000000004d26a8: mr r10,r7
Power Arch Before Patch:
20.36 | subf r10,r10,r3
| v bgt 40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt 4c
| mr r10,r7
Power Arch After patch:
20.36 | subf r10,r10,r3
| v bgt cr6,40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt cr7,4c
| mr r10,r7
Also support AArch64 conditional branch instructions, which can
have up to three operands:
Aarch64 Non-simplified (raw objdump) view:
│ffff0000083cd11c: ↑ cbz w0, ffff0000083cd100 <security_fil▒
...
4.44 │ffff000│083cd134: ↓ tbnz w0, #26, ffff0000083cd190 <securit▒
...
1.37 │ffff000│083cd144: ↓ tbnz w22, #5, ffff0000083cd1a4 <securit▒
│ffff000│083cd148: mov w19, #0x20000 //▒
1.02 │ffff000│083cd14c: ↓ tbz w22, #2, ffff0000083cd1ac <securit▒
...
0.68 │ffff000└──3cd16c: ↑ cbnz w0, ffff0000083cd120 <security_fil▒
Aarch64 Simplified, before this patch:
│ ↑ cbz 40
...
4.44 │ │↓ tbnz w0, #26, ffff0000083cd190 <security_file_permiss▒
...
1.37 │ │↓ tbnz w22, #5, ffff0000083cd1a4 <security_file_permiss▒
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ffff0000083cd1ac <security_file_permiss▒
...
0.68 │ └──cbnz 60
the cbz operand is missing, and the tbz doesn't get simplified processing
at all because the parsing function failed to match an address.
Aarch64 Simplified, After this patch applied:
│ ↑ cbz w0, 40
...
4.44 │ │↓ tbnz w0, #26, d0
...
1.37 │ │↓ tbnz w22, #5, e4
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ec
...
0.68 │ └──cbnz w0, 60
Originally-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20170601092959.f60d98912e8a1b66fd1e4c0e@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-01 09:29:59 -05:00
}
2012-04-25 14:16:03 -03:00
perf annotate: Fix jump target outside of function address range
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared to
be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).
34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0
Objdump output:
0000000000034cf0 <__sigaction>:
__GI___sigaction():
34cf0: lea -0x20(%rdi),%eax
34cf3: cmp -bashx1,%eax
34cf6: jbe 34d00 <__sigaction+0x10>
34cf8: jmpq 34ac0 <__GI___libc_sigaction>
34cfd: nopl (%rax)
34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8>
34d07: movl -bashx16,%fs:(%rax)
34d0e: mov -bashxffffffff,%eax
34d13: retq
perf annotate before applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
v jmpq fffffffffffffdd0
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
perf annotate after applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
^ jmpq 34ac0 <__GI___libc_sigaction>
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:47 +05:30
if ( s + + ! = NULL ) {
2013-08-07 14:38:54 +03:00
ops - > target . offset = strtoull ( s , NULL , 16 ) ;
perf annotate: Fix jump target outside of function address range
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared to
be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).
34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0
Objdump output:
0000000000034cf0 <__sigaction>:
__GI___sigaction():
34cf0: lea -0x20(%rdi),%eax
34cf3: cmp -bashx1,%eax
34cf6: jbe 34d00 <__sigaction+0x10>
34cf8: jmpq 34ac0 <__GI___libc_sigaction>
34cfd: nopl (%rax)
34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8>
34d07: movl -bashx16,%fs:(%rax)
34d0e: mov -bashxffffffff,%eax
34d13: retq
perf annotate before applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
v jmpq fffffffffffffdd0
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
perf annotate after applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
^ jmpq 34ac0 <__GI___libc_sigaction>
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:47 +05:30
ops - > target . offset_avail = true ;
} else {
ops - > target . offset_avail = false ;
}
2012-04-18 13:58:34 -03:00
return 0 ;
}
2012-04-20 14:38:46 -03:00
static int jump__scnprintf ( struct ins * ins , char * bf , size_t size ,
2012-05-07 18:54:16 -03:00
struct ins_operands * ops )
2012-04-19 10:16:27 -03:00
{
perf annotate: Fix branch instruction with multiple operands
'perf annotate' is dropping the cr* fields from branch instructions.
Fix it by adding support to display branch instructions having
multiple operands.
Power Arch objdump of int_sqrt:
20.36 | c0000000004d2694: subf r10,r10,r3
| c0000000004d2698: v bgt cr6,c0000000004d26a0 <int_sqrt+0x40>
1.82 | c0000000004d269c: mr r3,r10
29.18 | c0000000004d26a0: mr r10,r8
| c0000000004d26a4: v bgt cr7,c0000000004d26ac <int_sqrt+0x4c>
| c0000000004d26a8: mr r10,r7
Power Arch Before Patch:
20.36 | subf r10,r10,r3
| v bgt 40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt 4c
| mr r10,r7
Power Arch After patch:
20.36 | subf r10,r10,r3
| v bgt cr6,40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt cr7,4c
| mr r10,r7
Also support AArch64 conditional branch instructions, which can
have up to three operands:
Aarch64 Non-simplified (raw objdump) view:
│ffff0000083cd11c: ↑ cbz w0, ffff0000083cd100 <security_fil▒
...
4.44 │ffff000│083cd134: ↓ tbnz w0, #26, ffff0000083cd190 <securit▒
...
1.37 │ffff000│083cd144: ↓ tbnz w22, #5, ffff0000083cd1a4 <securit▒
│ffff000│083cd148: mov w19, #0x20000 //▒
1.02 │ffff000│083cd14c: ↓ tbz w22, #2, ffff0000083cd1ac <securit▒
...
0.68 │ffff000└──3cd16c: ↑ cbnz w0, ffff0000083cd120 <security_fil▒
Aarch64 Simplified, before this patch:
│ ↑ cbz 40
...
4.44 │ │↓ tbnz w0, #26, ffff0000083cd190 <security_file_permiss▒
...
1.37 │ │↓ tbnz w22, #5, ffff0000083cd1a4 <security_file_permiss▒
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ffff0000083cd1ac <security_file_permiss▒
...
0.68 │ └──cbnz 60
the cbz operand is missing, and the tbz doesn't get simplified processing
at all because the parsing function failed to match an address.
Aarch64 Simplified, After this patch applied:
│ ↑ cbz w0, 40
...
4.44 │ │↓ tbnz w0, #26, d0
...
1.37 │ │↓ tbnz w22, #5, e4
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ec
...
0.68 │ └──cbnz w0, 60
Originally-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20170601092959.f60d98912e8a1b66fd1e4c0e@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-01 09:29:59 -05:00
const char * c = strchr ( ops - > raw , ' , ' ) ;
perf annotate: Fix jump target outside of function address range
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared to
be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).
34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0
Objdump output:
0000000000034cf0 <__sigaction>:
__GI___sigaction():
34cf0: lea -0x20(%rdi),%eax
34cf3: cmp -bashx1,%eax
34cf6: jbe 34d00 <__sigaction+0x10>
34cf8: jmpq 34ac0 <__GI___libc_sigaction>
34cfd: nopl (%rax)
34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8>
34d07: movl -bashx16,%fs:(%rax)
34d0e: mov -bashxffffffff,%eax
34d13: retq
perf annotate before applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
v jmpq fffffffffffffdd0
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
perf annotate after applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
^ jmpq 34ac0 <__GI___libc_sigaction>
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:47 +05:30
if ( ! ops - > target . addr | | ops - > target . offset < 0 )
perf annotate: Show raw form for jump instruction with indirect target
For jump instructions that does not include target address as direct operand,
show the original disassembled line for them. This is needed for certain
powerpc jump instructions that use target address in a register (such as bctr,
btar, ...).
Before:
ld r12,32088(r12)
mtctr r12
v bctr ffffffffffffca2c
std r2,24(r1)
addis r12,r2,-1
After:
ld r12,32088(r12)
mtctr r12
v bctr
std r2,24(r1)
addis r12,r2,-1
Committer notes:
Testing it using a perf.data file and vmlinux for powerpc64,
cross-annotating it on a x86_64 workstation:
Before:
.__bpf_prog_run vmlinux.powerpc
│ std r10,512(r9) ▒
│ lbz r9,0(r31) ▒
│ rldicr r9,r9,3,60 ▒
│ ldx r9,r30,r9 ▒
│ mtctr r9 ▒
100.00 │ ↓ bctr 3fffffffffe01510 ▒
│ lwa r10,4(r31) ▒
│ lwz r9,0(r31) ▒
<SNIP>
Invalid jump offset: 3fffffffffe01510
After:
.__bpf_prog_run vmlinux.powerpc
│ std r10,512(r9) ▒
│ lbz r9,0(r31) ▒
│ rldicr r9,r9,3,60 ▒
│ ldx r9,r30,r9 ▒
│ mtctr r9 ▒
100.00 │ ↓ bctr ▒
│ lwa r10,4(r31) ▒
│ lwz r9,0(r31) ▒
<SNIP>
Invalid jump offset: 3fffffffffe01510
This, in turn, uncovers another problem with jumps without operands, the
ENTER/-> operation, to jump to the target, still continues using the bogus
target :-)
BTW, this was the file used for the above tests:
[acme@jouet ravi_bangoria]$ perf report --header-only -i perf.data.f22vm.powerdev
# ========
# captured on: Thu Nov 24 12:40:38 2016
# hostname : pdev-f22-qemu
# os release : 4.4.10-200.fc22.ppc64
# perf version : 4.9.rc1.g6298ce
# arch : ppc64
# nrcpus online : 48
# nrcpus avail : 48
# cpudesc : POWER7 (architected), altivec supported
# cpuid : 74,513
# total memory : 4158976 kB
# cmdline : /home/ravi/Workspace/linux/tools/perf/perf record -a
# event : name = cycles:ppp, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD, disabled = 1, inherit = 1, mmap = 1, c
# HEADER_CPU_TOPOLOGY info available, use -I to display
# HEADER_NUMA_TOPOLOGY info available, use -I to display
# pmu mappings: cpu = 4, software = 1, tracepoint = 2, breakpoint = 5
# missing features: HEADER_TRACING_DATA HEADER_BRANCH_STACK HEADER_GROUP_DESC HEADER_AUXTRACE HEADER_STAT HEADER_CACHE
# ========
#
[acme@jouet ravi_bangoria]$
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:45 +05:30
return ins__raw_scnprintf ( ins , bf , size , ops ) ;
perf annotate: Fix branch instruction with multiple operands
'perf annotate' is dropping the cr* fields from branch instructions.
Fix it by adding support to display branch instructions having
multiple operands.
Power Arch objdump of int_sqrt:
20.36 | c0000000004d2694: subf r10,r10,r3
| c0000000004d2698: v bgt cr6,c0000000004d26a0 <int_sqrt+0x40>
1.82 | c0000000004d269c: mr r3,r10
29.18 | c0000000004d26a0: mr r10,r8
| c0000000004d26a4: v bgt cr7,c0000000004d26ac <int_sqrt+0x4c>
| c0000000004d26a8: mr r10,r7
Power Arch Before Patch:
20.36 | subf r10,r10,r3
| v bgt 40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt 4c
| mr r10,r7
Power Arch After patch:
20.36 | subf r10,r10,r3
| v bgt cr6,40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt cr7,4c
| mr r10,r7
Also support AArch64 conditional branch instructions, which can
have up to three operands:
Aarch64 Non-simplified (raw objdump) view:
│ffff0000083cd11c: ↑ cbz w0, ffff0000083cd100 <security_fil▒
...
4.44 │ffff000│083cd134: ↓ tbnz w0, #26, ffff0000083cd190 <securit▒
...
1.37 │ffff000│083cd144: ↓ tbnz w22, #5, ffff0000083cd1a4 <securit▒
│ffff000│083cd148: mov w19, #0x20000 //▒
1.02 │ffff000│083cd14c: ↓ tbz w22, #2, ffff0000083cd1ac <securit▒
...
0.68 │ffff000└──3cd16c: ↑ cbnz w0, ffff0000083cd120 <security_fil▒
Aarch64 Simplified, before this patch:
│ ↑ cbz 40
...
4.44 │ │↓ tbnz w0, #26, ffff0000083cd190 <security_file_permiss▒
...
1.37 │ │↓ tbnz w22, #5, ffff0000083cd1a4 <security_file_permiss▒
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ffff0000083cd1ac <security_file_permiss▒
...
0.68 │ └──cbnz 60
the cbz operand is missing, and the tbz doesn't get simplified processing
at all because the parsing function failed to match an address.
Aarch64 Simplified, After this patch applied:
│ ↑ cbz w0, 40
...
4.44 │ │↓ tbnz w0, #26, d0
...
1.37 │ │↓ tbnz w22, #5, e4
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ec
...
0.68 │ └──cbnz w0, 60
Originally-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20170601092959.f60d98912e8a1b66fd1e4c0e@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-01 09:29:59 -05:00
if ( c ! = NULL ) {
const char * c2 = strchr ( c + 1 , ' , ' ) ;
/* check for 3-op insn */
if ( c2 ! = NULL )
c = c2 ;
c + + ;
/* mirror arch objdump's space-after-comma style */
if ( * c = = ' ' )
c + + ;
}
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s %.*s% " PRIx64 ,
perf annotate: Fix branch instruction with multiple operands
'perf annotate' is dropping the cr* fields from branch instructions.
Fix it by adding support to display branch instructions having
multiple operands.
Power Arch objdump of int_sqrt:
20.36 | c0000000004d2694: subf r10,r10,r3
| c0000000004d2698: v bgt cr6,c0000000004d26a0 <int_sqrt+0x40>
1.82 | c0000000004d269c: mr r3,r10
29.18 | c0000000004d26a0: mr r10,r8
| c0000000004d26a4: v bgt cr7,c0000000004d26ac <int_sqrt+0x4c>
| c0000000004d26a8: mr r10,r7
Power Arch Before Patch:
20.36 | subf r10,r10,r3
| v bgt 40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt 4c
| mr r10,r7
Power Arch After patch:
20.36 | subf r10,r10,r3
| v bgt cr6,40
1.82 | mr r3,r10
29.18 | 40: mr r10,r8
| v bgt cr7,4c
| mr r10,r7
Also support AArch64 conditional branch instructions, which can
have up to three operands:
Aarch64 Non-simplified (raw objdump) view:
│ffff0000083cd11c: ↑ cbz w0, ffff0000083cd100 <security_fil▒
...
4.44 │ffff000│083cd134: ↓ tbnz w0, #26, ffff0000083cd190 <securit▒
...
1.37 │ffff000│083cd144: ↓ tbnz w22, #5, ffff0000083cd1a4 <securit▒
│ffff000│083cd148: mov w19, #0x20000 //▒
1.02 │ffff000│083cd14c: ↓ tbz w22, #2, ffff0000083cd1ac <securit▒
...
0.68 │ffff000└──3cd16c: ↑ cbnz w0, ffff0000083cd120 <security_fil▒
Aarch64 Simplified, before this patch:
│ ↑ cbz 40
...
4.44 │ │↓ tbnz w0, #26, ffff0000083cd190 <security_file_permiss▒
...
1.37 │ │↓ tbnz w22, #5, ffff0000083cd1a4 <security_file_permiss▒
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ffff0000083cd1ac <security_file_permiss▒
...
0.68 │ └──cbnz 60
the cbz operand is missing, and the tbz doesn't get simplified processing
at all because the parsing function failed to match an address.
Aarch64 Simplified, After this patch applied:
│ ↑ cbz w0, 40
...
4.44 │ │↓ tbnz w0, #26, d0
...
1.37 │ │↓ tbnz w22, #5, e4
│ │ mov w19, #0x20000 // #131072
1.02 │ │↓ tbz w22, #2, ec
...
0.68 │ └──cbnz w0, 60
Originally-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20170601092959.f60d98912e8a1b66fd1e4c0e@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-01 09:29:59 -05:00
ins - > name , c ? c - ops - > raw : 0 , ops - > raw ,
ops - > target . offset ) ;
2012-04-19 10:16:27 -03:00
}
2012-04-18 13:58:34 -03:00
static struct ins_ops jump_ops = {
2012-04-20 14:38:46 -03:00
. parse = jump__parse ,
. scnprintf = jump__scnprintf ,
2012-04-18 13:58:34 -03:00
} ;
bool ins__is_jump ( const struct ins * ins )
{
return ins - > ops = = & jump_ops ;
}
2012-05-11 16:48:49 -03:00
static int comment__symbol ( char * raw , char * comment , u64 * addrp , char * * namep )
{
char * endptr , * name , * t ;
if ( strstr ( raw , " (%rip) " ) = = NULL )
return 0 ;
* addrp = strtoull ( comment , & endptr , 16 ) ;
2017-11-28 08:56:32 +01:00
if ( endptr = = comment )
return 0 ;
2012-05-11 16:48:49 -03:00
name = strchr ( endptr , ' < ' ) ;
if ( name = = NULL )
return - 1 ;
name + + ;
t = strchr ( name , ' > ' ) ;
if ( t = = NULL )
return 0 ;
* t = ' \0 ' ;
* namep = strdup ( name ) ;
* t = ' > ' ;
return 0 ;
}
2016-11-16 15:39:50 -03:00
static int lock__parse ( struct arch * arch , struct ins_operands * ops , struct map * map )
2012-05-12 13:15:34 -03:00
{
ops - > locked . ops = zalloc ( sizeof ( * ops - > locked . ops ) ) ;
if ( ops - > locked . ops = = NULL )
return 0 ;
2016-11-24 11:16:06 -03:00
if ( disasm_line__parse ( ops - > raw , & ops - > locked . ins . name , & ops - > locked . ops - > raw ) < 0 )
2012-05-12 13:15:34 -03:00
goto out_free_ops ;
2016-11-24 11:16:06 -03:00
ops - > locked . ins . ops = ins__find ( arch , ops - > locked . ins . name ) ;
2015-01-18 20:00:21 +01:00
2016-11-24 11:16:06 -03:00
if ( ops - > locked . ins . ops = = NULL )
2012-11-10 02:27:13 +09:00
goto out_free_ops ;
2012-05-12 13:15:34 -03:00
2016-11-24 11:16:06 -03:00
if ( ops - > locked . ins . ops - > parse & &
ops - > locked . ins . ops - > parse ( arch , ops - > locked . ops , map ) < 0 )
2015-01-18 20:00:20 +01:00
goto out_free_ops ;
2012-05-12 13:15:34 -03:00
return 0 ;
out_free_ops :
2013-12-26 17:41:15 -03:00
zfree ( & ops - > locked . ops ) ;
2012-05-12 13:15:34 -03:00
return 0 ;
}
static int lock__scnprintf ( struct ins * ins , char * bf , size_t size ,
struct ins_operands * ops )
{
int printed ;
2016-11-24 11:16:06 -03:00
if ( ops - > locked . ins . ops = = NULL )
2012-05-12 13:15:34 -03:00
return ins__raw_scnprintf ( ins , bf , size , ops ) ;
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
printed = scnprintf ( bf , size , " %-6s " , ins - > name ) ;
2016-11-24 11:16:06 -03:00
return printed + ins__scnprintf ( & ops - > locked . ins , bf + printed ,
2012-05-12 13:15:34 -03:00
size - printed , ops - > locked . ops ) ;
}
2012-05-12 13:26:20 -03:00
static void lock__delete ( struct ins_operands * ops )
{
2016-11-24 11:16:06 -03:00
struct ins * ins = & ops - > locked . ins ;
2015-01-18 20:00:21 +01:00
2016-11-24 11:16:06 -03:00
if ( ins - > ops & & ins - > ops - > free )
2015-01-18 20:00:21 +01:00
ins - > ops - > free ( ops - > locked . ops ) ;
else
ins__delete ( ops - > locked . ops ) ;
2013-12-27 16:55:14 -03:00
zfree ( & ops - > locked . ops ) ;
zfree ( & ops - > target . raw ) ;
zfree ( & ops - > target . name ) ;
2012-05-12 13:26:20 -03:00
}
2012-05-12 13:15:34 -03:00
static struct ins_ops lock_ops = {
2012-05-12 13:26:20 -03:00
. free = lock__delete ,
2012-05-12 13:15:34 -03:00
. parse = lock__parse ,
. scnprintf = lock__scnprintf ,
} ;
2016-11-16 15:39:50 -03:00
static int mov__parse ( struct arch * arch , struct ins_operands * ops , struct map * map __maybe_unused )
2012-05-11 16:48:49 -03:00
{
char * s = strchr ( ops - > raw , ' , ' ) , * target , * comment , prev ;
if ( s = = NULL )
return - 1 ;
* s = ' \0 ' ;
ops - > source . raw = strdup ( ops - > raw ) ;
* s = ' , ' ;
2014-12-17 17:24:45 -03:00
2012-05-11 16:48:49 -03:00
if ( ops - > source . raw = = NULL )
return - 1 ;
target = + + s ;
2016-11-16 15:39:50 -03:00
comment = strchr ( s , arch - > objdump . comment_char ) ;
2014-08-14 14:03:00 -07:00
if ( comment ! = NULL )
s = comment - 1 ;
else
s = strchr ( s , ' \0 ' ) - 1 ;
2012-05-11 16:48:49 -03:00
2014-08-14 14:03:00 -07:00
while ( s > target & & isspace ( s [ 0 ] ) )
- - s ;
s + + ;
2012-05-11 16:48:49 -03:00
prev = * s ;
* s = ' \0 ' ;
ops - > target . raw = strdup ( target ) ;
* s = prev ;
if ( ops - > target . raw = = NULL )
goto out_free_source ;
if ( comment = = NULL )
return 0 ;
2017-04-08 09:52:24 +09:00
comment = ltrim ( comment ) ;
2017-11-28 08:56:32 +01:00
comment__symbol ( ops - > source . raw , comment + 1 , & ops - > source . addr , & ops - > source . name ) ;
comment__symbol ( ops - > target . raw , comment + 1 , & ops - > target . addr , & ops - > target . name ) ;
2012-05-11 16:48:49 -03:00
return 0 ;
out_free_source :
2013-12-26 17:41:15 -03:00
zfree ( & ops - > source . raw ) ;
2012-05-11 16:48:49 -03:00
return - 1 ;
}
static int mov__scnprintf ( struct ins * ins , char * bf , size_t size ,
struct ins_operands * ops )
{
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s %s,%s " , ins - > name ,
2012-05-11 16:48:49 -03:00
ops - > source . name ? : ops - > source . raw ,
ops - > target . name ? : ops - > target . raw ) ;
}
static struct ins_ops mov_ops = {
. parse = mov__parse ,
. scnprintf = mov__scnprintf ,
} ;
2016-11-16 15:39:50 -03:00
static int dec__parse ( struct arch * arch __maybe_unused , struct ins_operands * ops , struct map * map __maybe_unused )
2012-05-11 17:21:09 -03:00
{
char * target , * comment , * s , prev ;
target = s = ops - > raw ;
while ( s [ 0 ] ! = ' \0 ' & & ! isspace ( s [ 0 ] ) )
+ + s ;
prev = * s ;
* s = ' \0 ' ;
ops - > target . raw = strdup ( target ) ;
* s = prev ;
if ( ops - > target . raw = = NULL )
return - 1 ;
2016-11-30 09:23:33 -06:00
comment = strchr ( s , arch - > objdump . comment_char ) ;
2012-05-11 17:21:09 -03:00
if ( comment = = NULL )
return 0 ;
2017-04-08 09:52:24 +09:00
comment = ltrim ( comment ) ;
2017-11-28 08:56:32 +01:00
comment__symbol ( ops - > target . raw , comment + 1 , & ops - > target . addr , & ops - > target . name ) ;
2012-05-11 17:21:09 -03:00
return 0 ;
}
static int dec__scnprintf ( struct ins * ins , char * bf , size_t size ,
struct ins_operands * ops )
{
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s %s " , ins - > name ,
2012-05-11 17:21:09 -03:00
ops - > target . name ? : ops - > target . raw ) ;
}
static struct ins_ops dec_ops = {
. parse = dec__parse ,
. scnprintf = dec__scnprintf ,
} ;
2012-09-11 01:15:03 +03:00
static int nop__scnprintf ( struct ins * ins __maybe_unused , char * bf , size_t size ,
struct ins_operands * ops __maybe_unused )
2012-05-07 18:57:02 -03:00
{
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s " , " nop " ) ;
2012-05-07 18:57:02 -03:00
}
static struct ins_ops nop_ops = {
. scnprintf = nop__scnprintf ,
} ;
2016-06-24 17:23:58 +05:30
static struct ins_ops ret_ops = {
. scnprintf = ins__raw_scnprintf ,
} ;
bool ins__is_ret ( const struct ins * ins )
{
return ins - > ops = = & ret_ops ;
}
2017-07-07 13:06:35 +08:00
bool ins__is_lock ( const struct ins * ins )
{
return ins - > ops = = & lock_ops ;
}
2016-05-19 17:59:46 +01:00
static int ins__key_cmp ( const void * name , const void * insp )
2012-04-18 13:58:34 -03:00
{
const struct ins * ins = insp ;
return strcmp ( name , ins - > name ) ;
}
2016-05-19 17:59:46 +01:00
static int ins__cmp ( const void * a , const void * b )
{
const struct ins * ia = a ;
const struct ins * ib = b ;
return strcmp ( ia - > name , ib - > name ) ;
}
2016-11-17 12:31:51 -03:00
static void ins__sort ( struct arch * arch )
2016-05-19 17:59:46 +01:00
{
2016-11-17 12:31:51 -03:00
const int nmemb = arch - > nr_instructions ;
2016-05-19 17:59:46 +01:00
2016-11-17 12:31:51 -03:00
qsort ( arch - > instructions , nmemb , sizeof ( struct ins ) , ins__cmp ) ;
2016-05-19 17:59:46 +01:00
}
2016-11-24 11:37:08 -03:00
static struct ins_ops * __ins__find ( struct arch * arch , const char * name )
2012-04-18 13:58:34 -03:00
{
2016-11-24 11:16:06 -03:00
struct ins * ins ;
2016-11-17 12:31:51 -03:00
const int nmemb = arch - > nr_instructions ;
2016-05-19 17:59:46 +01:00
2016-11-17 12:31:51 -03:00
if ( ! arch - > sorted_instructions ) {
ins__sort ( arch ) ;
arch - > sorted_instructions = true ;
2016-05-19 17:59:46 +01:00
}
2012-04-18 13:58:34 -03:00
2016-11-24 11:16:06 -03:00
ins = bsearch ( name , arch - > instructions , nmemb , sizeof ( struct ins ) , ins__key_cmp ) ;
return ins ? ins - > ops : NULL ;
2012-04-18 13:58:34 -03:00
}
2016-11-24 11:37:08 -03:00
static struct ins_ops * ins__find ( struct arch * arch , const char * name )
{
struct ins_ops * ops = __ins__find ( arch , name ) ;
if ( ! ops & & arch - > associate_instruction_ops )
ops = arch - > associate_instruction_ops ( arch , name ) ;
return ops ;
}
2016-11-16 15:39:50 -03:00
static int arch__key_cmp ( const void * name , const void * archp )
{
const struct arch * arch = archp ;
return strcmp ( name , arch - > name ) ;
}
static int arch__cmp ( const void * a , const void * b )
{
const struct arch * aa = a ;
const struct arch * ab = b ;
return strcmp ( aa - > name , ab - > name ) ;
}
static void arch__sort ( void )
{
const int nmemb = ARRAY_SIZE ( architectures ) ;
qsort ( architectures , nmemb , sizeof ( struct arch ) , arch__cmp ) ;
}
static struct arch * arch__find ( const char * name )
{
const int nmemb = ARRAY_SIZE ( architectures ) ;
static bool sorted ;
if ( ! sorted ) {
arch__sort ( ) ;
sorted = true ;
}
return bsearch ( name , architectures , nmemb , sizeof ( struct arch ) , arch__key_cmp ) ;
}
2011-11-11 22:17:32 -02:00
int symbol__alloc_hist ( struct symbol * sym )
2011-02-08 13:27:39 -02:00
{
struct annotation * notes = symbol__annotation ( sym ) ;
2017-10-24 19:50:06 +05:30
size_t size = symbol__size ( sym ) ;
2012-07-19 20:05:25 -07:00
size_t sizeof_sym_hist ;
2017-10-24 19:50:06 +05:30
/*
* Add buffer of one element for zero length symbol .
* When sample is taken from first instruction of
* zero length symbol , perf still resolves it and
* shows symbol name in perf report and allows to
* annotate it .
*/
if ( size = = 0 )
size = 1 ;
2012-07-19 20:05:25 -07:00
/* Check for overflow when calculating sizeof_sym_hist */
2017-07-20 06:36:45 +09:00
if ( size > ( SIZE_MAX - sizeof ( struct sym_hist ) ) / sizeof ( struct sym_hist_entry ) )
2012-07-19 20:05:25 -07:00
return - 1 ;
2017-07-20 06:36:45 +09:00
sizeof_sym_hist = ( sizeof ( struct sym_hist ) + size * sizeof ( struct sym_hist_entry ) ) ;
2012-07-19 20:05:25 -07:00
/* Check for overflow in zalloc argument */
if ( sizeof_sym_hist > ( SIZE_MAX - sizeof ( * notes - > src ) )
/ symbol_conf . nr_events )
return - 1 ;
2011-02-08 13:27:39 -02:00
2011-11-11 22:17:32 -02:00
notes - > src = zalloc ( sizeof ( * notes - > src ) + symbol_conf . nr_events * sizeof_sym_hist ) ;
2011-02-08 13:27:39 -02:00
if ( notes - > src = = NULL )
return - 1 ;
notes - > src - > sizeof_sym_hist = sizeof_sym_hist ;
2011-11-11 22:17:32 -02:00
notes - > src - > nr_histograms = symbol_conf . nr_events ;
2011-02-08 13:27:39 -02:00
INIT_LIST_HEAD ( & notes - > src - > source ) ;
return 0 ;
2011-02-04 09:45:46 -02:00
}
2015-07-18 08:24:48 -07:00
/* The cycles histogram is lazily allocated. */
static int symbol__alloc_hist_cycles ( struct symbol * sym )
{
struct annotation * notes = symbol__annotation ( sym ) ;
const size_t size = symbol__size ( sym ) ;
notes - > src - > cycles_hist = calloc ( size , sizeof ( struct cyc_hist ) ) ;
if ( notes - > src - > cycles_hist = = NULL )
return - 1 ;
return 0 ;
}
2011-02-06 14:54:44 -02:00
void symbol__annotate_zero_histograms ( struct symbol * sym )
{
struct annotation * notes = symbol__annotation ( sym ) ;
2011-02-08 13:27:39 -02:00
pthread_mutex_lock ( & notes - > lock ) ;
2015-07-18 08:24:48 -07:00
if ( notes - > src ! = NULL ) {
2011-02-08 13:27:39 -02:00
memset ( notes - > src - > histograms , 0 ,
notes - > src - > nr_histograms * notes - > src - > sizeof_sym_hist ) ;
2015-07-18 08:24:48 -07:00
if ( notes - > src - > cycles_hist )
memset ( notes - > src - > cycles_hist , 0 ,
symbol__size ( sym ) * sizeof ( struct cyc_hist ) ) ;
}
2011-02-08 13:27:39 -02:00
pthread_mutex_unlock ( & notes - > lock ) ;
2011-02-06 14:54:44 -02:00
}
2015-07-18 08:24:48 -07:00
static int __symbol__account_cycles ( struct annotation * notes ,
u64 start ,
unsigned offset , unsigned cycles ,
unsigned have_start )
{
struct cyc_hist * ch ;
ch = notes - > src - > cycles_hist ;
/*
* For now we can only account one basic block per
* final jump . But multiple could be overlapping .
* Always account the longest one . So when
* a shorter one has been already seen throw it away .
*
* We separately always account the full cycles .
*/
ch [ offset ] . num_aggr + + ;
ch [ offset ] . cycles_aggr + = cycles ;
if ( ! have_start & & ch [ offset ] . have_start )
return 0 ;
if ( ch [ offset ] . num ) {
if ( have_start & & ( ! ch [ offset ] . have_start | |
ch [ offset ] . start > start ) ) {
ch [ offset ] . have_start = 0 ;
ch [ offset ] . cycles = 0 ;
ch [ offset ] . num = 0 ;
if ( ch [ offset ] . reset < 0xffff )
ch [ offset ] . reset + + ;
} else if ( have_start & &
ch [ offset ] . start < start )
return 0 ;
}
ch [ offset ] . have_start = have_start ;
ch [ offset ] . start = start ;
ch [ offset ] . cycles + = cycles ;
ch [ offset ] . num + + ;
return 0 ;
}
2013-12-18 15:37:41 -03:00
static int __symbol__inc_addr_samples ( struct symbol * sym , struct map * map ,
2017-07-20 16:28:53 -03:00
struct annotation * notes , int evidx , u64 addr ,
2017-07-20 17:18:05 -03:00
struct perf_sample * sample )
2011-02-04 09:45:46 -02:00
{
2011-02-04 13:43:24 -02:00
unsigned offset ;
2011-02-04 09:45:46 -02:00
struct sym_hist * h ;
pr_debug3 ( " %s: addr=%# " PRIx64 " \n " , __func__ , map - > unmap_ip ( map , addr ) ) ;
2016-11-22 14:10:50 +05:30
if ( ( addr < sym - > start | | addr > = sym - > end ) & &
( addr ! = sym - > end | | sym - > start ! = sym - > end ) ) {
2015-10-21 15:45:13 -03:00
pr_debug ( " %s(%d): ERANGE! sym->name=%s, start=%# " PRIx64 " , addr=%# " PRIx64 " , end=%# " PRIx64 " \n " ,
__func__ , __LINE__ , sym - > name , sym - > start , addr , sym - > end ) ;
2012-03-27 12:55:57 -03:00
return - ERANGE ;
2015-10-21 15:45:13 -03:00
}
2011-02-04 09:45:46 -02:00
2011-02-04 13:43:24 -02:00
offset = addr - sym - > start ;
h = annotation__histogram ( notes , evidx ) ;
2017-07-20 06:36:51 +09:00
h - > nr_samples + + ;
2017-07-20 06:36:45 +09:00
h - > addr [ offset ] . nr_samples + + ;
2017-07-20 17:18:05 -03:00
h - > period + = sample - > period ;
h - > addr [ offset ] . period + = sample - > period ;
2011-02-04 09:45:46 -02:00
pr_debug3 ( " %# " PRIx64 " %s: period++ [addr: %# " PRIx64 " , %# " PRIx64
2017-07-20 17:18:05 -03:00
" , evidx=%d] => nr_samples: % " PRIu64 " , period: % " PRIu64 " \n " ,
sym - > start , sym - > name , addr , addr - sym - > start , evidx ,
h - > addr [ offset ] . nr_samples , h - > addr [ offset ] . period ) ;
2011-02-04 09:45:46 -02:00
return 0 ;
}
2015-07-18 08:24:48 -07:00
static struct annotation * symbol__get_annotation ( struct symbol * sym , bool cycles )
2015-05-27 10:51:46 -07:00
{
struct annotation * notes = symbol__annotation ( sym ) ;
if ( notes - > src = = NULL ) {
if ( symbol__alloc_hist ( sym ) < 0 )
return NULL ;
}
2015-07-18 08:24:48 -07:00
if ( ! notes - > src - > cycles_hist & & cycles ) {
if ( symbol__alloc_hist_cycles ( sym ) < 0 )
return NULL ;
}
2015-05-27 10:51:46 -07:00
return notes ;
}
2013-12-18 17:12:24 -03:00
static int symbol__inc_addr_samples ( struct symbol * sym , struct map * map ,
2017-07-20 16:28:53 -03:00
int evidx , u64 addr ,
struct perf_sample * sample )
2013-12-18 15:37:41 -03:00
{
struct annotation * notes ;
2014-02-20 10:32:53 +09:00
if ( sym = = NULL )
2013-12-18 15:37:41 -03:00
return 0 ;
2015-07-18 08:24:48 -07:00
notes = symbol__get_annotation ( sym , false ) ;
2015-05-27 10:51:46 -07:00
if ( notes = = NULL )
return - ENOMEM ;
2017-07-20 16:28:53 -03:00
return __symbol__inc_addr_samples ( sym , map , notes , evidx , addr , sample ) ;
2013-12-18 15:37:41 -03:00
}
2015-07-18 08:24:48 -07:00
static int symbol__account_cycles ( u64 addr , u64 start ,
struct symbol * sym , unsigned cycles )
{
struct annotation * notes ;
unsigned offset ;
if ( sym = = NULL )
return 0 ;
notes = symbol__get_annotation ( sym , true ) ;
if ( notes = = NULL )
return - ENOMEM ;
if ( addr < sym - > start | | addr > = sym - > end )
return - ERANGE ;
if ( start ) {
if ( start < sym - > start | | start > = sym - > end )
return - ERANGE ;
if ( start > = addr )
start = 0 ;
}
offset = addr - sym - > start ;
return __symbol__account_cycles ( notes ,
start ? start - sym - > start : 0 ,
offset , cycles ,
! ! start ) ;
}
int addr_map_symbol__account_cycles ( struct addr_map_symbol * ams ,
struct addr_map_symbol * start ,
unsigned cycles )
{
2015-08-14 10:11:34 +03:00
u64 saddr = 0 ;
2015-07-18 08:24:48 -07:00
int err ;
if ( ! cycles )
return 0 ;
/*
* Only set start when IPC can be computed . We can only
* compute it when the basic block is completely in a single
* function .
* Special case the case when the jump is elsewhere , but
* it starts on the function start .
*/
if ( start & &
( start - > sym = = ams - > sym | |
( ams - > sym & &
start - > addr = = ams - > sym - > start + ams - > map - > start ) ) )
saddr = start - > al_addr ;
if ( saddr = = 0 )
2015-08-14 10:11:34 +03:00
pr_debug2 ( " BB with bad start: addr % " PRIx64 " start % " PRIx64 " sym % " PRIx64 " saddr % " PRIx64 " \n " ,
2015-07-18 08:24:48 -07:00
ams - > addr ,
start ? start - > addr : 0 ,
ams - > sym ? ams - > sym - > start + ams - > map - > start : 0 ,
saddr ) ;
err = symbol__account_cycles ( ams - > al_addr , saddr , ams - > sym , cycles ) ;
if ( err )
pr_debug2 ( " account_cycles failed %d \n " , err ) ;
return err ;
}
2018-03-15 11:46:23 -03:00
static unsigned annotation__count_insn ( struct annotation * notes , u64 start , u64 end )
{
unsigned n_insn = 0 ;
u64 offset ;
for ( offset = start ; offset < = end ; offset + + ) {
if ( notes - > offsets [ offset ] )
n_insn + + ;
}
return n_insn ;
}
static void annotation__count_and_fill ( struct annotation * notes , u64 start , u64 end , struct cyc_hist * ch )
{
unsigned n_insn ;
u64 offset ;
n_insn = annotation__count_insn ( notes , start , end ) ;
if ( n_insn & & ch - > num & & ch - > cycles ) {
float ipc = n_insn / ( ( double ) ch - > cycles / ( double ) ch - > num ) ;
/* Hide data when there are too many overlaps. */
if ( ch - > reset > = 0x7fff | | ch - > reset > = ch - > num / 2 )
return ;
for ( offset = start ; offset < = end ; offset + + ) {
struct annotation_line * al = notes - > offsets [ offset ] ;
if ( al )
al - > ipc = ipc ;
}
}
}
void annotation__compute_ipc ( struct annotation * notes , size_t size )
{
u64 offset ;
if ( ! notes - > src | | ! notes - > src - > cycles_hist )
return ;
pthread_mutex_lock ( & notes - > lock ) ;
for ( offset = 0 ; offset < size ; + + offset ) {
struct cyc_hist * ch ;
ch = & notes - > src - > cycles_hist [ offset ] ;
if ( ch & & ch - > cycles ) {
struct annotation_line * al ;
if ( ch - > have_start )
annotation__count_and_fill ( notes , ch - > start , offset , ch ) ;
al = notes - > offsets [ offset ] ;
if ( al & & ch - > num_aggr )
al - > cycles = ch - > cycles_aggr / ch - > num_aggr ;
notes - > have_cycles = true ;
}
}
pthread_mutex_unlock ( & notes - > lock ) ;
}
2017-07-20 16:28:53 -03:00
int addr_map_symbol__inc_samples ( struct addr_map_symbol * ams , struct perf_sample * sample ,
int evidx )
2013-12-18 16:48:29 -03:00
{
2017-07-20 16:28:53 -03:00
return symbol__inc_addr_samples ( ams - > sym , ams - > map , evidx , ams - > al_addr , sample ) ;
2013-12-18 16:48:29 -03:00
}
2017-07-20 16:28:53 -03:00
int hist_entry__inc_addr_samples ( struct hist_entry * he , struct perf_sample * sample ,
int evidx , u64 ip )
2013-12-18 17:10:15 -03:00
{
2017-07-20 16:28:53 -03:00
return symbol__inc_addr_samples ( he - > ms . sym , he - > ms . map , evidx , ip , sample ) ;
2013-12-18 17:10:15 -03:00
}
2016-11-16 15:39:50 -03:00
static void disasm_line__init_ins ( struct disasm_line * dl , struct arch * arch , struct map * map )
2012-04-18 13:58:34 -03:00
{
2016-11-24 11:16:06 -03:00
dl - > ins . ops = ins__find ( arch , dl - > ins . name ) ;
2012-04-18 13:58:34 -03:00
2016-11-24 11:16:06 -03:00
if ( ! dl - > ins . ops )
2012-04-18 13:58:34 -03:00
return ;
2016-11-24 11:16:06 -03:00
if ( dl - > ins . ops - > parse & & dl - > ins . ops - > parse ( arch , & dl - > ops , map ) < 0 )
dl - > ins . ops = NULL ;
2012-04-18 13:58:34 -03:00
}
2016-11-24 11:16:06 -03:00
static int disasm_line__parse ( char * line , const char * * namep , char * * rawp )
2012-05-12 13:15:34 -03:00
{
2017-04-08 09:52:24 +09:00
char tmp , * name = ltrim ( line ) ;
2012-05-12 13:15:34 -03:00
if ( name [ 0 ] = = ' \0 ' )
return - 1 ;
* rawp = name + 1 ;
while ( ( * rawp ) [ 0 ] ! = ' \0 ' & & ! isspace ( ( * rawp ) [ 0 ] ) )
+ + * rawp ;
tmp = ( * rawp ) [ 0 ] ;
( * rawp ) [ 0 ] = ' \0 ' ;
* namep = strdup ( name ) ;
if ( * namep = = NULL )
goto out_free_name ;
( * rawp ) [ 0 ] = tmp ;
2017-04-08 09:52:24 +09:00
* rawp = ltrim ( * rawp ) ;
2012-05-12 13:15:34 -03:00
return 0 ;
out_free_name :
2016-11-24 11:16:06 -03:00
free ( ( void * ) namep ) ;
* namep = NULL ;
2012-05-12 13:15:34 -03:00
return - 1 ;
}
2017-10-11 17:01:29 +02:00
struct annotate_args {
size_t privsize ;
2017-10-11 17:01:30 +02:00
struct arch * arch ;
2017-10-11 17:01:31 +02:00
struct map * map ;
2017-10-11 17:01:33 +02:00
struct perf_evsel * evsel ;
2017-10-11 17:01:32 +02:00
s64 offset ;
char * line ;
int line_nr ;
2017-10-11 17:01:29 +02:00
} ;
2017-10-11 17:01:37 +02:00
static void annotation_line__delete ( struct annotation_line * al )
{
void * ptr = ( void * ) al - al - > privsize ;
2017-10-11 17:01:41 +02:00
free_srcline ( al - > path ) ;
2017-10-11 17:01:37 +02:00
zfree ( & al - > line ) ;
free ( ptr ) ;
}
/*
* Allocating the annotation line data with following
* structure :
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* private space | struct annotation_line
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* Size of the private space is stored in ' struct annotation_line ' .
*
*/
static struct annotation_line *
annotation_line__new ( struct annotate_args * args , size_t privsize )
{
struct annotation_line * al ;
2017-10-11 17:01:39 +02:00
struct perf_evsel * evsel = args - > evsel ;
2017-10-11 17:01:37 +02:00
size_t size = privsize + sizeof ( * al ) ;
2017-10-11 17:01:39 +02:00
int nr = 1 ;
if ( perf_evsel__is_group_event ( evsel ) )
nr = evsel - > nr_members ;
size + = sizeof ( al - > samples [ 0 ] ) * nr ;
2017-10-11 17:01:37 +02:00
al = zalloc ( size ) ;
if ( al ) {
al = ( void * ) al + privsize ;
al - > privsize = privsize ;
al - > offset = args - > offset ;
al - > line = strdup ( args - > line ) ;
al - > line_nr = args - > line_nr ;
2017-10-11 17:01:39 +02:00
al - > samples_nr = nr ;
2017-10-11 17:01:37 +02:00
}
return al ;
}
/*
* Allocating the disasm annotation line data with
* following structure :
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* privsize space | struct disasm_line | struct annotation_line
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* We have ' struct annotation_line ' member as last member
* of ' struct disasm_line ' to have an easy access .
*
*/
2017-10-11 17:01:32 +02:00
static struct disasm_line * disasm_line__new ( struct annotate_args * args )
2011-02-04 09:45:46 -02:00
{
2017-10-11 17:01:37 +02:00
struct disasm_line * dl = NULL ;
struct annotation_line * al ;
size_t privsize = args - > privsize + offsetof ( struct disasm_line , al ) ;
2011-02-04 09:45:46 -02:00
2017-10-11 17:01:37 +02:00
al = annotation_line__new ( args , privsize ) ;
if ( al ! = NULL ) {
dl = disasm_line ( al ) ;
2017-10-11 17:01:26 +02:00
if ( dl - > al . line = = NULL )
2012-04-02 12:59:01 -03:00
goto out_delete ;
2012-04-15 15:52:18 -03:00
2017-10-11 17:01:32 +02:00
if ( args - > offset ! = - 1 ) {
2017-10-11 17:01:26 +02:00
if ( disasm_line__parse ( dl - > al . line , & dl - > ins . name , & dl - > ops . raw ) < 0 )
2012-04-15 15:52:18 -03:00
goto out_free_line ;
2017-10-11 17:01:31 +02:00
disasm_line__init_ins ( dl , args - > arch , args - > map ) ;
2012-04-15 15:52:18 -03:00
}
2011-02-04 09:45:46 -02:00
}
2012-04-15 15:24:39 -03:00
return dl ;
2012-04-15 15:52:18 -03:00
out_free_line :
2017-10-11 17:01:26 +02:00
zfree ( & dl - > al . line ) ;
2012-04-02 12:59:01 -03:00
out_delete :
2012-04-15 15:24:39 -03:00
free ( dl ) ;
2012-04-02 12:59:01 -03:00
return NULL ;
2011-02-04 09:45:46 -02:00
}
2012-04-15 15:24:39 -03:00
void disasm_line__free ( struct disasm_line * dl )
2011-02-04 09:45:46 -02:00
{
2016-11-24 11:16:06 -03:00
if ( dl - > ins . ops & & dl - > ins . ops - > free )
dl - > ins . ops - > free ( & dl - > ops ) ;
2012-05-12 13:26:20 -03:00
else
ins__delete ( & dl - > ops ) ;
2016-11-24 11:16:06 -03:00
free ( ( void * ) dl - > ins . name ) ;
dl - > ins . name = NULL ;
2017-10-11 17:01:37 +02:00
annotation_line__delete ( & dl - > al ) ;
2011-02-04 09:45:46 -02:00
}
2012-05-07 18:54:16 -03:00
int disasm_line__scnprintf ( struct disasm_line * dl , char * bf , size_t size , bool raw )
{
2016-11-24 11:16:06 -03:00
if ( raw | | ! dl - > ins . ops )
perf annotate: Do not truncate instruction names at 6 chars
There are many instructions, esp on PowerPC, whose mnemonics are longer
than 6 characters. Using precision limit causes truncation of such
mnemonics.
Fix this by removing precision limit. Note that, 'width' is still 6, so
alignment won't get affected for length <= 6.
Before:
li r11,-1
xscvdp vs1,vs1
add. r10,r10,r11
After:
li r11,-1
xscvdpsxds vs1,vs1
add. r10,r10,r11
Reported-by: Donald Stence <dstence@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-11-14 08:55:40 +05:30
return scnprintf ( bf , size , " %-6s %s " , dl - > ins . name , dl - > ops . raw ) ;
2012-05-07 18:54:16 -03:00
2016-11-24 11:16:06 -03:00
return ins__scnprintf ( & dl - > ins , bf , size , & dl - > ops ) ;
2012-05-07 18:54:16 -03:00
}
2017-10-11 17:01:35 +02:00
static void annotation_line__add ( struct annotation_line * al , struct list_head * head )
2011-02-04 09:45:46 -02:00
{
2017-10-11 17:01:35 +02:00
list_add_tail ( & al - > node , head ) ;
2011-02-04 09:45:46 -02:00
}
2017-10-11 17:01:34 +02:00
struct annotation_line *
annotation_line__next ( struct annotation_line * pos , struct list_head * head )
2011-02-04 09:45:46 -02:00
{
2017-10-11 17:01:34 +02:00
list_for_each_entry_continue ( pos , head , node )
if ( pos - > offset > = 0 )
2011-02-04 09:45:46 -02:00
return pos ;
return NULL ;
}
perf annotate: Add branch stack / basic block
I wanted to know the hottest path through a function and figured the
branch-stack (LBR) information should be able to help out with that.
The below uses the branch-stack to create basic blocks and generate
statistics from them.
from to branch_i
* ----> *
|
| block
v
* ----> *
from to branch_i+1
The blocks are broken down into non-overlapping ranges, while tracking
if the start of each range is an entry point and/or the end of a range
is a branch.
Each block iterates all ranges it covers (while splitting where required
to exactly match the block) and increments the 'coverage' count.
For the range including the branch we increment the taken counter, as
well as the pred counter if flags.predicted.
Using these number we can find if an instruction:
- had coverage; given by:
br->coverage / br->sym->max_coverage
This metric ensures each symbol has a 100% spot, which reflects the
observation that each symbol must have a most covered/hottest
block.
- is a branch target: br->is_target && br->start == add
- for targets, how much of a branch's coverages comes from it:
target->entry / branch->coverage
- is a branch: br->is_branch && br->end == addr
- for branches, how often it was taken:
br->taken / br->coverage
after all, all execution that didn't take the branch would have
incremented the coverage and continued onward to a later branch.
- for branches, how often it was predicted:
br->pred / br->taken
The coverage percentage is used to color the address and asm sections;
for low (<1%) coverage we use NORMAL (uncolored), indicating that these
instructions are not 'important'. For high coverage (>75%) we color the
address RED.
For each branch, we add an asm comment after the instruction with
information on how often it was taken and predicted.
Output looks like (sans color, which does loose a lot of the
information :/)
$ perf record --branch-filter u,any -e cycles:p ./branches 27
$ perf annotate branches
Percent | Source code & Disassembly of branches for cycles:pu (217 samples)
---------------------------------------------------------------------------------
: branches():
0.00 : 40057a: push %rbp
0.00 : 40057b: mov %rsp,%rbp
0.00 : 40057e: sub $0x20,%rsp
0.00 : 400582: mov %rdi,-0x18(%rbp)
0.00 : 400586: mov %rsi,-0x20(%rbp)
0.00 : 40058a: mov -0x18(%rbp),%rax
0.00 : 40058e: mov %rax,-0x10(%rbp)
0.00 : 400592: movq $0x0,-0x8(%rbp)
0.00 : 40059a: jmpq 400656 <branches+0xdc>
1.84 : 40059f: mov -0x10(%rbp),%rax # +100.00%
3.23 : 4005a3: and $0x1,%eax
1.84 : 4005a6: test %rax,%rax
0.00 : 4005a9: je 4005bf <branches+0x45> # -54.50% (p:42.00%)
0.46 : 4005ab: mov 0x200bbe(%rip),%rax # 601170 <acc>
12.90 : 4005b2: add $0x1,%rax
2.30 : 4005b6: mov %rax,0x200bb3(%rip) # 601170 <acc>
0.46 : 4005bd: jmp 4005d1 <branches+0x57> # -100.00% (p:100.00%)
0.92 : 4005bf: mov 0x200baa(%rip),%rax # 601170 <acc> # +49.54%
13.82 : 4005c6: sub $0x1,%rax
0.46 : 4005ca: mov %rax,0x200b9f(%rip) # 601170 <acc>
2.30 : 4005d1: mov -0x10(%rbp),%rax # +50.46%
0.46 : 4005d5: mov %rax,%rdi
0.46 : 4005d8: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 4005dd: mov %rax,-0x10(%rbp) # +100.00%
0.92 : 4005e1: mov -0x18(%rbp),%rax
0.00 : 4005e5: and $0x1,%eax
0.00 : 4005e8: test %rax,%rax
0.00 : 4005eb: je 4005ff <branches+0x85> # -100.00% (p:100.00%)
0.00 : 4005ed: mov 0x200b7c(%rip),%rax # 601170 <acc>
0.00 : 4005f4: shr $0x2,%rax
0.00 : 4005f8: mov %rax,0x200b71(%rip) # 601170 <acc>
0.00 : 4005ff: mov -0x10(%rbp),%rax # +100.00%
7.37 : 400603: and $0x1,%eax
3.69 : 400606: test %rax,%rax
0.00 : 400609: jne 400612 <branches+0x98> # -59.25% (p:42.99%)
1.84 : 40060b: mov $0x1,%eax
14.29 : 400610: jmp 400617 <branches+0x9d> # -100.00% (p:100.00%)
1.38 : 400612: mov $0x0,%eax # +57.65%
10.14 : 400617: test %al,%al # +42.35%
0.00 : 400619: je 40062f <branches+0xb5> # -57.65% (p:100.00%)
0.46 : 40061b: mov 0x200b4e(%rip),%rax # 601170 <acc>
2.76 : 400622: sub $0x1,%rax
0.00 : 400626: mov %rax,0x200b43(%rip) # 601170 <acc>
0.46 : 40062d: jmp 400641 <branches+0xc7> # -100.00% (p:100.00%)
0.92 : 40062f: mov 0x200b3a(%rip),%rax # 601170 <acc> # +56.13%
2.30 : 400636: add $0x1,%rax
0.92 : 40063a: mov %rax,0x200b2f(%rip) # 601170 <acc>
0.92 : 400641: mov -0x10(%rbp),%rax # +43.87%
2.30 : 400645: mov %rax,%rdi
0.00 : 400648: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 40064d: mov %rax,-0x10(%rbp) # +100.00%
1.84 : 400651: addq $0x1,-0x8(%rbp)
0.92 : 400656: mov -0x8(%rbp),%rax
5.07 : 40065a: cmp -0x20(%rbp),%rax
0.00 : 40065e: jb 40059f <branches+0x25> # -100.00% (p:100.00%)
0.00 : 400664: nop
0.00 : 400665: leaveq
0.00 : 400666: retq
(Note: the --branch-filter u,any was used to avoid spurious target and
branch points due to interrupts/faults, they show up as very small -/+
annotations on 'weird' locations)
Committer note:
Please take a look at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
To see the colors.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
[ Moved sym->max_coverage to 'struct annotate', aka symbol__annotate(sym) ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-05 16:08:12 -03:00
static const char * annotate__address_color ( struct block_range * br )
{
double cov = block_range__coverage ( br ) ;
if ( cov > = 0 ) {
/* mark red for >75% coverage */
if ( cov > 0.75 )
return PERF_COLOR_RED ;
/* mark dull for <1% coverage */
if ( cov < 0.01 )
return PERF_COLOR_NORMAL ;
}
return PERF_COLOR_MAGENTA ;
}
static const char * annotate__asm_color ( struct block_range * br )
{
double cov = block_range__coverage ( br ) ;
if ( cov > = 0 ) {
/* mark dull for <1% coverage */
if ( cov < 0.01 )
return PERF_COLOR_NORMAL ;
}
return PERF_COLOR_BLUE ;
}
static void annotate__branch_printf ( struct block_range * br , u64 addr )
{
bool emit_comment = true ;
if ( ! br )
return ;
# if 1
if ( br - > is_target & & br - > start = = addr ) {
struct block_range * branch = br ;
double p ;
/*
* Find matching branch to our target .
*/
while ( ! branch - > is_branch )
branch = block_range__next ( branch ) ;
p = 100 * ( double ) br - > entry / branch - > coverage ;
if ( p > 0.1 ) {
if ( emit_comment ) {
emit_comment = false ;
printf ( " \t # " ) ;
}
/*
* The percentage of coverage joined at this target in relation
* to the next branch .
*/
printf ( " +%.2f%% " , p ) ;
}
}
# endif
if ( br - > is_branch & & br - > end = = addr ) {
double p = 100 * ( double ) br - > taken / br - > coverage ;
if ( p > 0.1 ) {
if ( emit_comment ) {
emit_comment = false ;
printf ( " \t # " ) ;
}
/*
* The percentage of coverage leaving at this branch , and
* its prediction ratio .
*/
printf ( " -%.2f%% (p:%.2f%%) " , p , 100 * ( double ) br - > pred / br - > taken ) ;
}
}
}
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
static int disasm_line__print ( struct disasm_line * dl , u64 start , int addr_fmt_width )
2011-02-04 09:45:46 -02:00
{
2017-10-11 17:01:47 +02:00
s64 offset = dl - > al . offset ;
const u64 addr = start + offset ;
struct block_range * br ;
br = block_range__find ( addr ) ;
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
color_fprintf ( stdout , annotate__address_color ( br ) , " %* " PRIx64 " : " , addr_fmt_width , addr ) ;
2017-10-11 17:01:47 +02:00
color_fprintf ( stdout , annotate__asm_color ( br ) , " %s " , dl - > al . line ) ;
annotate__branch_printf ( br , addr ) ;
return 0 ;
}
static int
annotation_line__print ( struct annotation_line * al , struct symbol * sym , u64 start ,
struct perf_evsel * evsel , u64 len , int min_pcnt , int printed ,
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
int max_lines , struct annotation_line * queue , int addr_fmt_width )
2017-10-11 17:01:47 +02:00
{
struct disasm_line * dl = container_of ( al , struct disasm_line , al ) ;
2011-02-04 09:45:46 -02:00
static const char * prev_line ;
static const char * prev_color ;
2017-10-11 17:01:47 +02:00
if ( al - > offset ! = - 1 ) {
2017-10-11 17:01:42 +02:00
double max_percent = 0.0 ;
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
int i , nr_percent = 1 ;
2011-02-04 09:45:46 -02:00
const char * color ;
struct annotation * notes = symbol__annotation ( sym ) ;
2011-02-08 13:27:39 -02:00
2017-10-11 17:01:47 +02:00
for ( i = 0 ; i < al - > samples_nr ; i + + ) {
struct annotation_data * sample = & al - > samples [ i ] ;
2011-02-04 09:45:46 -02:00
2017-10-11 17:01:42 +02:00
if ( sample - > percent > max_percent )
max_percent = sample - > percent ;
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
}
if ( max_percent < min_pcnt )
2011-02-06 14:54:44 -02:00
return - 1 ;
2011-02-08 15:01:39 -02:00
if ( max_lines & & printed > = max_lines )
2011-02-06 14:54:44 -02:00
return 1 ;
2011-02-05 15:37:31 -02:00
2011-02-08 15:29:25 -02:00
if ( queue ! = NULL ) {
2017-10-11 17:01:47 +02:00
list_for_each_entry_from ( queue , & notes - > src - > source , node ) {
if ( queue = = al )
2011-02-08 15:29:25 -02:00
break ;
2017-10-11 17:01:47 +02:00
annotation_line__print ( queue , sym , start , evsel , len ,
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
0 , 0 , 1 , NULL , addr_fmt_width ) ;
2011-02-08 15:29:25 -02:00
}
}
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
color = get_percent_color ( max_percent ) ;
2011-02-04 09:45:46 -02:00
/*
* Also color the filename and line if needed , with
* the same color than the percentage . Don ' t print it
* twice for close colored addr with the same filename : line
*/
2017-10-11 17:01:47 +02:00
if ( al - > path ) {
if ( ! prev_line | | strcmp ( prev_line , al - > path )
2011-02-04 09:45:46 -02:00
| | color ! = prev_color ) {
2017-10-11 17:01:47 +02:00
color_fprintf ( stdout , color , " %s " , al - > path ) ;
prev_line = al - > path ;
2011-02-04 09:45:46 -02:00
prev_color = color ;
}
}
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
for ( i = 0 ; i < nr_percent ; i + + ) {
2017-10-11 17:01:47 +02:00
struct annotation_data * sample = & al - > samples [ i ] ;
2017-10-11 17:01:42 +02:00
color = get_percent_color ( sample - > percent ) ;
2015-06-19 16:10:43 -03:00
if ( symbol_conf . show_total_period )
2017-07-26 17:16:46 -03:00
color_fprintf ( stdout , color , " %11 " PRIu64 ,
2017-10-11 17:01:42 +02:00
sample - > he . period ) ;
2017-08-18 17:46:48 +09:00
else if ( symbol_conf . show_nr_samples )
color_fprintf ( stdout , color , " %7 " PRIu64 ,
2017-10-11 17:01:42 +02:00
sample - > he . nr_samples ) ;
2015-06-19 16:10:43 -03:00
else
2017-10-11 17:01:42 +02:00
color_fprintf ( stdout , color , " %7.2f " , sample - > percent ) ;
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
}
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
printf ( " : " ) ;
perf annotate: Add branch stack / basic block
I wanted to know the hottest path through a function and figured the
branch-stack (LBR) information should be able to help out with that.
The below uses the branch-stack to create basic blocks and generate
statistics from them.
from to branch_i
* ----> *
|
| block
v
* ----> *
from to branch_i+1
The blocks are broken down into non-overlapping ranges, while tracking
if the start of each range is an entry point and/or the end of a range
is a branch.
Each block iterates all ranges it covers (while splitting where required
to exactly match the block) and increments the 'coverage' count.
For the range including the branch we increment the taken counter, as
well as the pred counter if flags.predicted.
Using these number we can find if an instruction:
- had coverage; given by:
br->coverage / br->sym->max_coverage
This metric ensures each symbol has a 100% spot, which reflects the
observation that each symbol must have a most covered/hottest
block.
- is a branch target: br->is_target && br->start == add
- for targets, how much of a branch's coverages comes from it:
target->entry / branch->coverage
- is a branch: br->is_branch && br->end == addr
- for branches, how often it was taken:
br->taken / br->coverage
after all, all execution that didn't take the branch would have
incremented the coverage and continued onward to a later branch.
- for branches, how often it was predicted:
br->pred / br->taken
The coverage percentage is used to color the address and asm sections;
for low (<1%) coverage we use NORMAL (uncolored), indicating that these
instructions are not 'important'. For high coverage (>75%) we color the
address RED.
For each branch, we add an asm comment after the instruction with
information on how often it was taken and predicted.
Output looks like (sans color, which does loose a lot of the
information :/)
$ perf record --branch-filter u,any -e cycles:p ./branches 27
$ perf annotate branches
Percent | Source code & Disassembly of branches for cycles:pu (217 samples)
---------------------------------------------------------------------------------
: branches():
0.00 : 40057a: push %rbp
0.00 : 40057b: mov %rsp,%rbp
0.00 : 40057e: sub $0x20,%rsp
0.00 : 400582: mov %rdi,-0x18(%rbp)
0.00 : 400586: mov %rsi,-0x20(%rbp)
0.00 : 40058a: mov -0x18(%rbp),%rax
0.00 : 40058e: mov %rax,-0x10(%rbp)
0.00 : 400592: movq $0x0,-0x8(%rbp)
0.00 : 40059a: jmpq 400656 <branches+0xdc>
1.84 : 40059f: mov -0x10(%rbp),%rax # +100.00%
3.23 : 4005a3: and $0x1,%eax
1.84 : 4005a6: test %rax,%rax
0.00 : 4005a9: je 4005bf <branches+0x45> # -54.50% (p:42.00%)
0.46 : 4005ab: mov 0x200bbe(%rip),%rax # 601170 <acc>
12.90 : 4005b2: add $0x1,%rax
2.30 : 4005b6: mov %rax,0x200bb3(%rip) # 601170 <acc>
0.46 : 4005bd: jmp 4005d1 <branches+0x57> # -100.00% (p:100.00%)
0.92 : 4005bf: mov 0x200baa(%rip),%rax # 601170 <acc> # +49.54%
13.82 : 4005c6: sub $0x1,%rax
0.46 : 4005ca: mov %rax,0x200b9f(%rip) # 601170 <acc>
2.30 : 4005d1: mov -0x10(%rbp),%rax # +50.46%
0.46 : 4005d5: mov %rax,%rdi
0.46 : 4005d8: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 4005dd: mov %rax,-0x10(%rbp) # +100.00%
0.92 : 4005e1: mov -0x18(%rbp),%rax
0.00 : 4005e5: and $0x1,%eax
0.00 : 4005e8: test %rax,%rax
0.00 : 4005eb: je 4005ff <branches+0x85> # -100.00% (p:100.00%)
0.00 : 4005ed: mov 0x200b7c(%rip),%rax # 601170 <acc>
0.00 : 4005f4: shr $0x2,%rax
0.00 : 4005f8: mov %rax,0x200b71(%rip) # 601170 <acc>
0.00 : 4005ff: mov -0x10(%rbp),%rax # +100.00%
7.37 : 400603: and $0x1,%eax
3.69 : 400606: test %rax,%rax
0.00 : 400609: jne 400612 <branches+0x98> # -59.25% (p:42.99%)
1.84 : 40060b: mov $0x1,%eax
14.29 : 400610: jmp 400617 <branches+0x9d> # -100.00% (p:100.00%)
1.38 : 400612: mov $0x0,%eax # +57.65%
10.14 : 400617: test %al,%al # +42.35%
0.00 : 400619: je 40062f <branches+0xb5> # -57.65% (p:100.00%)
0.46 : 40061b: mov 0x200b4e(%rip),%rax # 601170 <acc>
2.76 : 400622: sub $0x1,%rax
0.00 : 400626: mov %rax,0x200b43(%rip) # 601170 <acc>
0.46 : 40062d: jmp 400641 <branches+0xc7> # -100.00% (p:100.00%)
0.92 : 40062f: mov 0x200b3a(%rip),%rax # 601170 <acc> # +56.13%
2.30 : 400636: add $0x1,%rax
0.92 : 40063a: mov %rax,0x200b2f(%rip) # 601170 <acc>
0.92 : 400641: mov -0x10(%rbp),%rax # +43.87%
2.30 : 400645: mov %rax,%rdi
0.00 : 400648: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 40064d: mov %rax,-0x10(%rbp) # +100.00%
1.84 : 400651: addq $0x1,-0x8(%rbp)
0.92 : 400656: mov -0x8(%rbp),%rax
5.07 : 40065a: cmp -0x20(%rbp),%rax
0.00 : 40065e: jb 40059f <branches+0x25> # -100.00% (p:100.00%)
0.00 : 400664: nop
0.00 : 400665: leaveq
0.00 : 400666: retq
(Note: the --branch-filter u,any was used to avoid spurious target and
branch points due to interrupts/faults, they show up as very small -/+
annotations on 'weird' locations)
Committer note:
Please take a look at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
To see the colors.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
[ Moved sym->max_coverage to 'struct annotate', aka symbol__annotate(sym) ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-05 16:08:12 -03:00
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
disasm_line__print ( dl , start , addr_fmt_width ) ;
perf annotate: Add branch stack / basic block
I wanted to know the hottest path through a function and figured the
branch-stack (LBR) information should be able to help out with that.
The below uses the branch-stack to create basic blocks and generate
statistics from them.
from to branch_i
* ----> *
|
| block
v
* ----> *
from to branch_i+1
The blocks are broken down into non-overlapping ranges, while tracking
if the start of each range is an entry point and/or the end of a range
is a branch.
Each block iterates all ranges it covers (while splitting where required
to exactly match the block) and increments the 'coverage' count.
For the range including the branch we increment the taken counter, as
well as the pred counter if flags.predicted.
Using these number we can find if an instruction:
- had coverage; given by:
br->coverage / br->sym->max_coverage
This metric ensures each symbol has a 100% spot, which reflects the
observation that each symbol must have a most covered/hottest
block.
- is a branch target: br->is_target && br->start == add
- for targets, how much of a branch's coverages comes from it:
target->entry / branch->coverage
- is a branch: br->is_branch && br->end == addr
- for branches, how often it was taken:
br->taken / br->coverage
after all, all execution that didn't take the branch would have
incremented the coverage and continued onward to a later branch.
- for branches, how often it was predicted:
br->pred / br->taken
The coverage percentage is used to color the address and asm sections;
for low (<1%) coverage we use NORMAL (uncolored), indicating that these
instructions are not 'important'. For high coverage (>75%) we color the
address RED.
For each branch, we add an asm comment after the instruction with
information on how often it was taken and predicted.
Output looks like (sans color, which does loose a lot of the
information :/)
$ perf record --branch-filter u,any -e cycles:p ./branches 27
$ perf annotate branches
Percent | Source code & Disassembly of branches for cycles:pu (217 samples)
---------------------------------------------------------------------------------
: branches():
0.00 : 40057a: push %rbp
0.00 : 40057b: mov %rsp,%rbp
0.00 : 40057e: sub $0x20,%rsp
0.00 : 400582: mov %rdi,-0x18(%rbp)
0.00 : 400586: mov %rsi,-0x20(%rbp)
0.00 : 40058a: mov -0x18(%rbp),%rax
0.00 : 40058e: mov %rax,-0x10(%rbp)
0.00 : 400592: movq $0x0,-0x8(%rbp)
0.00 : 40059a: jmpq 400656 <branches+0xdc>
1.84 : 40059f: mov -0x10(%rbp),%rax # +100.00%
3.23 : 4005a3: and $0x1,%eax
1.84 : 4005a6: test %rax,%rax
0.00 : 4005a9: je 4005bf <branches+0x45> # -54.50% (p:42.00%)
0.46 : 4005ab: mov 0x200bbe(%rip),%rax # 601170 <acc>
12.90 : 4005b2: add $0x1,%rax
2.30 : 4005b6: mov %rax,0x200bb3(%rip) # 601170 <acc>
0.46 : 4005bd: jmp 4005d1 <branches+0x57> # -100.00% (p:100.00%)
0.92 : 4005bf: mov 0x200baa(%rip),%rax # 601170 <acc> # +49.54%
13.82 : 4005c6: sub $0x1,%rax
0.46 : 4005ca: mov %rax,0x200b9f(%rip) # 601170 <acc>
2.30 : 4005d1: mov -0x10(%rbp),%rax # +50.46%
0.46 : 4005d5: mov %rax,%rdi
0.46 : 4005d8: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 4005dd: mov %rax,-0x10(%rbp) # +100.00%
0.92 : 4005e1: mov -0x18(%rbp),%rax
0.00 : 4005e5: and $0x1,%eax
0.00 : 4005e8: test %rax,%rax
0.00 : 4005eb: je 4005ff <branches+0x85> # -100.00% (p:100.00%)
0.00 : 4005ed: mov 0x200b7c(%rip),%rax # 601170 <acc>
0.00 : 4005f4: shr $0x2,%rax
0.00 : 4005f8: mov %rax,0x200b71(%rip) # 601170 <acc>
0.00 : 4005ff: mov -0x10(%rbp),%rax # +100.00%
7.37 : 400603: and $0x1,%eax
3.69 : 400606: test %rax,%rax
0.00 : 400609: jne 400612 <branches+0x98> # -59.25% (p:42.99%)
1.84 : 40060b: mov $0x1,%eax
14.29 : 400610: jmp 400617 <branches+0x9d> # -100.00% (p:100.00%)
1.38 : 400612: mov $0x0,%eax # +57.65%
10.14 : 400617: test %al,%al # +42.35%
0.00 : 400619: je 40062f <branches+0xb5> # -57.65% (p:100.00%)
0.46 : 40061b: mov 0x200b4e(%rip),%rax # 601170 <acc>
2.76 : 400622: sub $0x1,%rax
0.00 : 400626: mov %rax,0x200b43(%rip) # 601170 <acc>
0.46 : 40062d: jmp 400641 <branches+0xc7> # -100.00% (p:100.00%)
0.92 : 40062f: mov 0x200b3a(%rip),%rax # 601170 <acc> # +56.13%
2.30 : 400636: add $0x1,%rax
0.92 : 40063a: mov %rax,0x200b2f(%rip) # 601170 <acc>
0.92 : 400641: mov -0x10(%rbp),%rax # +43.87%
2.30 : 400645: mov %rax,%rdi
0.00 : 400648: callq 400526 <lfsr> # -100.00% (p:100.00%)
0.00 : 40064d: mov %rax,-0x10(%rbp) # +100.00%
1.84 : 400651: addq $0x1,-0x8(%rbp)
0.92 : 400656: mov -0x8(%rbp),%rax
5.07 : 40065a: cmp -0x20(%rbp),%rax
0.00 : 40065e: jb 40059f <branches+0x25> # -100.00% (p:100.00%)
0.00 : 400664: nop
0.00 : 400665: leaveq
0.00 : 400666: retq
(Note: the --branch-filter u,any was used to avoid spurious target and
branch points due to interrupts/faults, they show up as very small -/+
annotations on 'weird' locations)
Committer note:
Please take a look at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
To see the colors.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
[ Moved sym->max_coverage to 'struct annotate', aka symbol__annotate(sym) ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-05 16:08:12 -03:00
printf ( " \n " ) ;
2011-02-08 15:01:39 -02:00
} else if ( max_lines & & printed > = max_lines )
2011-02-06 14:54:44 -02:00
return 1 ;
else {
2017-07-26 17:16:46 -03:00
int width = symbol_conf . show_total_period ? 12 : 8 ;
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
2011-02-08 15:29:25 -02:00
if ( queue )
return - 1 ;
2013-03-05 14:53:26 +09:00
if ( perf_evsel__is_group_event ( evsel ) )
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
width * = evsel - > nr_members ;
2017-10-11 17:01:47 +02:00
if ( ! * al - > line )
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
printf ( " %*s: \n " , width , " " ) ;
2011-02-04 09:45:46 -02:00
else
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
printf ( " %*s: %*s %s \n " , width , " " , addr_fmt_width , " " , al - > line ) ;
2011-02-04 09:45:46 -02:00
}
2011-02-06 14:54:44 -02:00
return 0 ;
2011-02-04 09:45:46 -02:00
}
2013-03-05 14:53:22 +09:00
/*
* symbol__parse_objdump_line ( ) parses objdump output ( with - d - - no - show - raw )
* which looks like following
*
* 0000000000415500 < _init > :
* 415500 : sub $ 0x8 , % rsp
* 415504 : mov 0x2f5ad5 ( % rip ) , % rax # 70 afe0 < _DYNAMIC + 0x2f8 >
* 41550 b : test % rax , % rax
* 41550 e : je 415515 < _init + 0x15 >
* 415510 : callq 416e70 < __gmon_start__ @ plt >
* 415515 : add $ 0x8 , % rsp
* 415519 : retq
*
* it will be parsed and saved into struct disasm_line as
* < offset > < name > < ops . raw >
*
* The offset will be a relative offset from the start of the symbol and - 1
* means that it ' s not a disassembly line so should be treated differently .
* The ops . raw part will be parsed further according to type of the instruction .
*/
2017-10-11 17:01:31 +02:00
static int symbol__parse_objdump_line ( struct symbol * sym , FILE * file ,
2017-10-11 17:01:29 +02:00
struct annotate_args * args ,
2014-11-12 18:05:26 -08:00
int * line_nr )
2011-02-04 09:45:46 -02:00
{
2017-10-11 17:01:31 +02:00
struct map * map = args - > map ;
2011-02-08 13:27:39 -02:00
struct annotation * notes = symbol__annotation ( sym ) ;
2012-04-15 15:24:39 -03:00
struct disasm_line * dl ;
2017-04-08 09:52:24 +09:00
char * line = NULL , * parsed_line , * tmp , * tmp2 ;
2011-02-04 09:45:46 -02:00
size_t line_len ;
s64 line_ip , offset = - 1 ;
2014-11-12 18:05:26 -08:00
regmatch_t match [ 2 ] ;
2011-02-04 09:45:46 -02:00
if ( getline ( & line , & line_len , file ) < 0 )
return - 1 ;
if ( ! line )
return - 1 ;
line_ip = - 1 ;
2017-04-08 09:52:24 +09:00
parsed_line = rtrim ( line ) ;
2011-02-04 09:45:46 -02:00
2014-11-12 18:05:26 -08:00
/* /filename:linenr ? Save line number and ignore. */
2017-04-08 09:52:25 +09:00
if ( regexec ( & file_lineno , parsed_line , 2 , match , 0 ) = = 0 ) {
* line_nr = atoi ( parsed_line + match [ 1 ] . rm_so ) ;
2014-11-12 18:05:26 -08:00
return 0 ;
}
2017-04-08 09:52:24 +09:00
tmp = ltrim ( parsed_line ) ;
2011-02-04 09:45:46 -02:00
if ( * tmp ) {
/*
* Parse hexa addresses followed by ' : '
*/
line_ip = strtoull ( tmp , & tmp2 , 16 ) ;
if ( * tmp2 ! = ' : ' | | tmp = = tmp2 | | tmp2 [ 1 ] = = ' \0 ' )
line_ip = - 1 ;
}
if ( line_ip ! = - 1 ) {
u64 start = map__rip_2objdump ( map , sym - > start ) ,
end = map__rip_2objdump ( map , sym - > end ) ;
offset = line_ip - start ;
2014-10-14 17:19:44 -03:00
if ( ( u64 ) line_ip < start | | ( u64 ) line_ip > = end )
2011-02-04 09:45:46 -02:00
offset = - 1 ;
2012-04-02 12:59:01 -03:00
else
parsed_line = tmp2 + 1 ;
2012-04-11 17:04:59 -03:00
}
2011-02-04 09:45:46 -02:00
2017-10-11 17:01:32 +02:00
args - > offset = offset ;
args - > line = parsed_line ;
args - > line_nr = * line_nr ;
dl = disasm_line__new ( args ) ;
2012-04-02 12:59:01 -03:00
free ( line ) ;
2014-11-12 18:05:26 -08:00
( * line_nr ) + + ;
2012-04-02 12:59:01 -03:00
2012-04-15 15:24:39 -03:00
if ( dl = = NULL )
2011-02-04 09:45:46 -02:00
return - 1 ;
2012-04-02 12:59:01 -03:00
perf annotate: Fix jump target outside of function address range
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared to
be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).
34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0
Objdump output:
0000000000034cf0 <__sigaction>:
__GI___sigaction():
34cf0: lea -0x20(%rdi),%eax
34cf3: cmp -bashx1,%eax
34cf6: jbe 34d00 <__sigaction+0x10>
34cf8: jmpq 34ac0 <__GI___libc_sigaction>
34cfd: nopl (%rax)
34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8>
34d07: movl -bashx16,%fs:(%rax)
34d0e: mov -bashxffffffff,%eax
34d13: retq
perf annotate before applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
v jmpq fffffffffffffdd0
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
perf annotate after applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
^ jmpq 34ac0 <__GI___libc_sigaction>
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:47 +05:30
if ( ! disasm_line__has_offset ( dl ) ) {
2013-08-07 14:38:54 +03:00
dl - > ops . target . offset = dl - > ops . target . addr -
map__rip_2objdump ( map , sym - > start ) ;
perf annotate: Fix jump target outside of function address range
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared to
be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).
34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0
Objdump output:
0000000000034cf0 <__sigaction>:
__GI___sigaction():
34cf0: lea -0x20(%rdi),%eax
34cf3: cmp -bashx1,%eax
34cf6: jbe 34d00 <__sigaction+0x10>
34cf8: jmpq 34ac0 <__GI___libc_sigaction>
34cfd: nopl (%rax)
34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8>
34d07: movl -bashx16,%fs:(%rax)
34d0e: mov -bashxffffffff,%eax
34d13: retq
perf annotate before applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
v jmpq fffffffffffffdd0
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
perf annotate after applying patch:
__GI___sigaction /usr/lib64/libc-2.22.so
lea -0x20(%rdi),%eax
cmp -bashx1,%eax
v jbe 10
^ jmpq 34ac0 <__GI___libc_sigaction>
nop
10: mov _DYNAMIC+0x2e8,%rax
movl -bashx16,%fs:(%rax)
mov -bashxffffffff,%eax
retq
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-05 21:26:47 +05:30
dl - > ops . target . offset_avail = true ;
}
2013-08-07 14:38:54 +03:00
2018-03-02 11:59:36 -03:00
/* kcore has no symbols, so add the call target symbol */
if ( dl - > ins . ops & & ins__is_call ( & dl - > ins ) & & ! dl - > ops . target . sym ) {
2013-10-14 13:43:40 +03:00
struct addr_map_symbol target = {
. map = map ,
. addr = dl - > ops . target . addr ,
} ;
2016-09-01 19:25:52 -03:00
if ( ! map_groups__find_ams ( & target ) & &
2013-10-14 13:43:40 +03:00
target . sym - > start = = target . al_addr )
2018-03-02 11:59:36 -03:00
dl - > ops . target . sym = target . sym ;
2013-08-07 14:38:57 +03:00
}
2017-10-11 17:01:35 +02:00
annotation_line__add ( & dl - > al , & notes - > src - > source ) ;
2011-02-04 09:45:46 -02:00
return 0 ;
}
2014-11-12 18:05:26 -08:00
static __attribute__ ( ( constructor ) ) void symbol__init_regexpr ( void )
{
regcomp ( & file_lineno , " ^/[^:]+:([0-9]+) " , REG_EXTENDED ) ;
}
2013-08-07 14:38:56 +03:00
static void delete_last_nop ( struct symbol * sym )
{
struct annotation * notes = symbol__annotation ( sym ) ;
struct list_head * list = & notes - > src - > source ;
struct disasm_line * dl ;
while ( ! list_empty ( list ) ) {
2017-10-11 17:01:25 +02:00
dl = list_entry ( list - > prev , struct disasm_line , al . node ) ;
2013-08-07 14:38:56 +03:00
2016-11-24 11:16:06 -03:00
if ( dl - > ins . ops ) {
if ( dl - > ins . ops ! = & nop_ops )
2013-08-07 14:38:56 +03:00
return ;
} else {
2017-10-11 17:01:26 +02:00
if ( ! strstr ( dl - > al . line , " nop " ) & &
! strstr ( dl - > al . line , " nopl " ) & &
! strstr ( dl - > al . line , " nopw " ) )
2013-08-07 14:38:56 +03:00
return ;
}
2017-10-11 17:01:25 +02:00
list_del ( & dl - > al . node ) ;
2013-08-07 14:38:56 +03:00
disasm_line__free ( dl ) ;
}
}
2016-07-29 16:27:18 -03:00
int symbol__strerror_disassemble ( struct symbol * sym __maybe_unused , struct map * map ,
int errnum , char * buf , size_t buflen )
{
struct dso * dso = map - > dso ;
BUG_ON ( buflen = = 0 ) ;
if ( errnum > = 0 ) {
str_error_r ( errnum , buf , buflen ) ;
return 0 ;
}
switch ( errnum ) {
case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX : {
char bf [ SBUILD_ID_SIZE + 15 ] = " with build id " ;
char * build_id_msg = NULL ;
if ( dso - > has_build_id ) {
build_id__sprintf ( dso - > build_id ,
sizeof ( dso - > build_id ) , bf + 15 ) ;
build_id_msg = bf ;
}
scnprintf ( buf , buflen ,
" No vmlinux file%s \n was found in the path. \n \n "
" Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability. \n \n "
" Please use: \n \n "
" perf buildid-cache -vu vmlinux \n \n "
" or: \n \n "
" --vmlinux vmlinux \n " , build_id_msg ? : " " ) ;
}
break ;
default :
scnprintf ( buf , buflen , " Internal error: Invalid %d error code \n " , errnum ) ;
break ;
}
return 0 ;
}
2016-08-09 15:32:53 -03:00
static int dso__disassemble_filename ( struct dso * dso , char * filename , size_t filename_size )
2011-02-04 09:45:46 -02:00
{
2016-08-09 15:32:53 -03:00
char linkname [ PATH_MAX ] ;
char * build_id_filename ;
2017-03-27 16:10:36 +09:00
char * build_id_path = NULL ;
2017-06-08 16:31:01 +09:00
char * pos ;
2011-02-04 09:45:46 -02:00
2016-08-09 14:56:13 -03:00
if ( dso - > symtab_type = = DSO_BINARY_TYPE__KALLSYMS & &
! dso__is_kcore ( dso ) )
2016-08-09 15:32:53 -03:00
return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX ;
2016-08-09 14:56:13 -03:00
2017-07-05 18:48:13 -07:00
build_id_filename = dso__build_id_filename ( dso , NULL , 0 , false ) ;
2016-08-09 15:32:53 -03:00
if ( build_id_filename ) {
__symbol__join_symfs ( filename , filename_size , build_id_filename ) ;
free ( build_id_filename ) ;
2016-08-09 15:16:37 -03:00
} else {
2016-07-29 16:27:18 -03:00
if ( dso - > has_build_id )
return ENOMEM ;
2011-02-04 09:45:46 -02:00
goto fallback ;
2016-08-09 15:16:37 -03:00
}
2017-03-27 16:10:36 +09:00
build_id_path = strdup ( filename ) ;
if ( ! build_id_path )
return - 1 ;
2017-06-08 16:31:01 +09:00
/*
* old style build - id cache has name of XX / XXXXXXX . . while
* new style has XX / XXXXXXX . . / { elf , kallsyms , vdso } .
* extract the build - id part of dirname in the new style only .
*/
pos = strrchr ( build_id_path , ' / ' ) ;
if ( pos & & strlen ( pos ) < SBUILD_ID_SIZE - 2 )
dirname ( build_id_path ) ;
2017-03-27 16:10:36 +09:00
2016-08-09 15:16:37 -03:00
if ( dso__is_kcore ( dso ) | |
2017-03-27 16:10:36 +09:00
readlink ( build_id_path , linkname , sizeof ( linkname ) ) < 0 | |
2016-08-09 15:32:53 -03:00
strstr ( linkname , DSO__NAME_KALLSYMS ) | |
access ( filename , R_OK ) ) {
2011-02-04 09:45:46 -02:00
fallback :
/*
* If we don ' t have build - ids or the build - id file isn ' t in the
* cache , or is just a kallsyms file , well , lets hope that this
* DSO is the same as when ' perf record ' ran .
*/
2016-08-09 15:32:53 -03:00
__symbol__join_symfs ( filename , filename_size , dso - > long_name ) ;
2011-02-04 09:45:46 -02:00
}
2017-03-27 16:10:36 +09:00
free ( build_id_path ) ;
2016-08-09 15:32:53 -03:00
return 0 ;
}
2017-10-11 17:01:31 +02:00
static int symbol__disassemble ( struct symbol * sym , struct annotate_args * args )
2016-08-09 15:32:53 -03:00
{
2017-10-11 17:01:31 +02:00
struct map * map = args - > map ;
2016-08-09 15:32:53 -03:00
struct dso * dso = map - > dso ;
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
char * command ;
2016-08-09 15:32:53 -03:00
FILE * file ;
char symfs_filename [ PATH_MAX ] ;
struct kcore_extract kce ;
bool delete_extract = false ;
int stdout_fd [ 2 ] ;
int lineno = 0 ;
int nline ;
pid_t pid ;
int err = dso__disassemble_filename ( dso , symfs_filename , sizeof ( symfs_filename ) ) ;
if ( err )
return err ;
2011-02-04 09:45:46 -02:00
pr_debug ( " %s: filename=%s, sym=%s, start=%# " PRIx64 " , end=%# " PRIx64 " \n " , __func__ ,
2016-08-09 15:16:37 -03:00
symfs_filename , sym - > name , map - > unmap_ip ( map , sym - > start ) ,
2011-02-04 09:45:46 -02:00
map - > unmap_ip ( map , sym - > end ) ) ;
pr_debug ( " annotating [%p] %30s : [%p] %30s \n " ,
dso , dso - > long_name , sym , sym - > name ) ;
2013-10-09 15:01:12 +03:00
if ( dso__is_kcore ( dso ) ) {
kce . kcore_filename = symfs_filename ;
kce . addr = map__rip_2objdump ( map , sym - > start ) ;
kce . offs = sym - > start ;
2014-10-14 17:19:44 -03:00
kce . len = sym - > end - sym - > start ;
2013-10-09 15:01:12 +03:00
if ( ! kcore_extract__create ( & kce ) ) {
delete_extract = true ;
strlcpy ( symfs_filename , kce . extract_filename ,
sizeof ( symfs_filename ) ) ;
}
2015-03-02 12:56:12 -05:00
} else if ( dso__needs_decompress ( dso ) ) {
2017-06-08 16:31:04 +09:00
char tmp [ KMOD_DECOMP_LEN ] ;
2015-03-02 12:56:12 -05:00
2017-06-08 16:31:04 +09:00
if ( dso__decompress_kmodule_path ( dso , symfs_filename ,
tmp , sizeof ( tmp ) ) < 0 )
2016-08-09 15:16:37 -03:00
goto out ;
2015-03-02 12:56:12 -05:00
strcpy ( symfs_filename , tmp ) ;
2013-10-09 15:01:12 +03:00
}
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
err = asprintf ( & command ,
2012-09-04 12:32:30 +02:00
" %s %s%s --start-address=0x%016 " PRIx64
2011-05-17 17:32:07 +02:00
" --stop-address=0x%016 " PRIx64
2017-05-05 15:44:17 +05:30
" -l -d %s %s -C \" %s \" 2>/dev/null|grep -v \" %s: \" |expand " ,
2012-09-04 12:32:30 +02:00
objdump_path ? objdump_path : " objdump " ,
2011-09-15 14:31:41 -07:00
disassembler_style ? " -M " : " " ,
disassembler_style ? disassembler_style : " " ,
2011-02-04 09:45:46 -02:00
map__rip_2objdump ( map , sym - > start ) ,
2014-10-14 17:19:44 -03:00
map__rip_2objdump ( map , sym - > end ) ,
2011-05-17 17:32:07 +02:00
symbol_conf . annotate_asm_raw ? " " : " --no-show-raw " ,
symbol_conf . annotate_src ? " -S " : " " ,
2016-08-09 15:16:37 -03:00
symfs_filename , symfs_filename ) ;
2011-02-04 09:45:46 -02:00
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
if ( err < 0 ) {
pr_err ( " Failure allocating memory for the command to run \n " ) ;
goto out_remove_tmp ;
}
2011-02-04 09:45:46 -02:00
pr_debug ( " Executing: %s \n " , command ) ;
2016-06-15 15:48:08 -03:00
err = - 1 ;
if ( pipe ( stdout_fd ) < 0 ) {
pr_err ( " Failure creating the pipe to run %s \n " , command ) ;
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
goto out_free_command ;
2016-06-15 15:48:08 -03:00
}
pid = fork ( ) ;
if ( pid < 0 ) {
pr_err ( " Failure forking to run %s \n " , command ) ;
goto out_close_stdout ;
}
if ( pid = = 0 ) {
close ( stdout_fd [ 0 ] ) ;
dup2 ( stdout_fd [ 1 ] , 1 ) ;
close ( stdout_fd [ 1 ] ) ;
execl ( " /bin/sh " , " sh " , " -c " , command , NULL ) ;
perror ( command ) ;
exit ( - 1 ) ;
}
close ( stdout_fd [ 1 ] ) ;
file = fdopen ( stdout_fd [ 0 ] , " r " ) ;
2015-11-05 19:06:07 -08:00
if ( ! file ) {
2016-06-15 15:48:08 -03:00
pr_err ( " Failure creating FILE stream for %s \n " , command ) ;
2015-11-05 19:06:07 -08:00
/*
* If we were using debug info should retry with
* original binary .
*/
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
goto out_free_command ;
2015-11-05 19:06:07 -08:00
}
2011-02-04 09:45:46 -02:00
2015-11-05 19:06:07 -08:00
nline = 0 ;
while ( ! feof ( file ) ) {
2017-03-21 16:00:50 -03:00
/*
* The source code line number ( lineno ) needs to be kept in
* accross calls to symbol__parse_objdump_line ( ) , so that it
* can associate it with the instructions till the next one .
* See disasm_line__new ( ) and struct disasm_line : : line_nr .
*/
2017-10-11 17:01:31 +02:00
if ( symbol__parse_objdump_line ( sym , file , args , & lineno ) < 0 )
2011-02-04 09:45:46 -02:00
break ;
2015-11-05 19:06:07 -08:00
nline + + ;
}
if ( nline = = 0 )
pr_err ( " No output from %s \n " , command ) ;
2011-02-04 09:45:46 -02:00
2013-08-07 14:38:56 +03:00
/*
* kallsyms does not have symbol sizes so there may a nop at the end .
* Remove it .
*/
if ( dso__is_kcore ( dso ) )
delete_last_nop ( sym ) ;
2016-06-15 15:48:08 -03:00
fclose ( file ) ;
err = 0 ;
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
out_free_command :
free ( command ) ;
2015-03-02 12:56:12 -05:00
out_remove_tmp :
2016-06-15 15:48:08 -03:00
close ( stdout_fd [ 0 ] ) ;
2015-03-02 12:56:12 -05:00
if ( dso__needs_decompress ( dso ) )
unlink ( symfs_filename ) ;
2016-08-09 15:16:37 -03:00
2013-10-09 15:01:12 +03:00
if ( delete_extract )
kcore_extract__delete ( & kce ) ;
2016-08-09 14:56:13 -03:00
out :
2011-02-04 09:45:46 -02:00
return err ;
2016-06-15 15:48:08 -03:00
out_close_stdout :
close ( stdout_fd [ 1 ] ) ;
perf annotate: Use asprintf when formatting objdump command line
We were using a local buffer with an arbitrary size, that would have to
get increased to avoid truncation as warned by gcc 8:
util/annotate.c: In function 'symbol__disassemble':
util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=]
"%s %s%s --start-address=0x%016" PRIx64
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
util/annotate.c:1498:20:
symfs_filename, symfs_filename);
~~~~~~~~~~~~~~
util/annotate.c:1490:50: note: format string is defined here
" -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
^~
In file included from /usr/include/stdio.h:861,
from util/color.h:5,
from util/sort.h:8,
from util/annotate.c:14:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192
return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__bos (__s), __fmt, __va_arg_pack ());
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
So switch to asprintf, that will make sure enough space is available.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-14 10:34:11 -03:00
goto out_free_command ;
2011-02-04 09:45:46 -02:00
}
2017-10-11 17:01:40 +02:00
static void calc_percent ( struct sym_hist * hist ,
struct annotation_data * sample ,
s64 offset , s64 end )
{
unsigned int hits = 0 ;
u64 period = 0 ;
while ( offset < end ) {
hits + = hist - > addr [ offset ] . nr_samples ;
period + = hist - > addr [ offset ] . period ;
+ + offset ;
}
if ( hist - > nr_samples ) {
sample - > he . period = period ;
sample - > he . nr_samples = hits ;
sample - > percent = 100.0 * hits / hist - > nr_samples ;
}
}
2017-11-15 12:05:59 +01:00
static void annotation__calc_percent ( struct annotation * notes ,
struct perf_evsel * evsel , s64 len )
2017-10-11 17:01:40 +02:00
{
struct annotation_line * al , * next ;
list_for_each_entry ( al , & notes - > src - > source , node ) {
s64 end ;
int i ;
if ( al - > offset = = - 1 )
continue ;
next = annotation_line__next ( al , & notes - > src - > source ) ;
end = next ? next - > offset : len ;
for ( i = 0 ; i < al - > samples_nr ; i + + ) {
struct annotation_data * sample ;
struct sym_hist * hist ;
hist = annotation__histogram ( notes , evsel - > idx + i ) ;
sample = & al - > samples [ i ] ;
calc_percent ( hist , sample , al - > offset , end ) ;
}
}
}
2017-11-15 12:05:59 +01:00
void symbol__calc_percent ( struct symbol * sym , struct perf_evsel * evsel )
2017-10-11 17:01:40 +02:00
{
struct annotation * notes = symbol__annotation ( sym ) ;
2017-11-15 12:05:59 +01:00
annotation__calc_percent ( notes , evsel , symbol__size ( sym ) ) ;
2017-10-11 17:01:40 +02:00
}
2017-10-11 17:01:28 +02:00
int symbol__annotate ( struct symbol * sym , struct map * map ,
2017-10-11 17:01:33 +02:00
struct perf_evsel * evsel , size_t privsize ,
2017-12-11 12:46:11 -03:00
struct arch * * parch )
2017-10-11 17:01:28 +02:00
{
2017-10-11 17:01:29 +02:00
struct annotate_args args = {
. privsize = privsize ,
2017-10-11 17:01:31 +02:00
. map = map ,
2017-10-11 17:01:33 +02:00
. evsel = evsel ,
2017-10-11 17:01:29 +02:00
} ;
2017-12-11 12:46:11 -03:00
struct perf_env * env = perf_evsel__env ( evsel ) ;
2017-12-11 12:52:17 -03:00
const char * arch_name = perf_env__arch ( env ) ;
2017-10-11 17:01:28 +02:00
struct arch * arch ;
int err ;
if ( ! arch_name )
return - 1 ;
2017-10-11 17:01:30 +02:00
args . arch = arch = arch__find ( arch_name ) ;
2017-10-11 17:01:28 +02:00
if ( arch = = NULL )
return - ENOTSUP ;
if ( parch )
* parch = arch ;
if ( arch - > init ) {
2017-12-11 12:46:11 -03:00
err = arch - > init ( arch , env ? env - > cpuid : NULL ) ;
2017-10-11 17:01:28 +02:00
if ( err ) {
pr_err ( " %s: failed to initialize %s arch priv area \n " , __func__ , arch - > name ) ;
return err ;
}
}
2017-11-15 12:20:08 +01:00
return symbol__disassemble ( sym , & args ) ;
2017-10-11 17:01:28 +02:00
}
2017-10-11 17:01:41 +02:00
static void insert_source_line ( struct rb_root * root , struct annotation_line * al )
2011-02-04 09:45:46 -02:00
{
2017-10-11 17:01:41 +02:00
struct annotation_line * iter ;
2011-02-04 09:45:46 -02:00
struct rb_node * * p = & root - > rb_node ;
struct rb_node * parent = NULL ;
2013-03-05 14:53:28 +09:00
int i , ret ;
2011-02-04 09:45:46 -02:00
while ( * p ! = NULL ) {
parent = * p ;
2017-10-11 17:01:41 +02:00
iter = rb_entry ( parent , struct annotation_line , rb_node ) ;
2011-02-04 09:45:46 -02:00
2017-10-11 17:01:41 +02:00
ret = strcmp ( iter - > path , al - > path ) ;
2012-11-09 14:58:49 +09:00
if ( ret = = 0 ) {
2017-10-11 17:01:41 +02:00
for ( i = 0 ; i < al - > samples_nr ; i + + )
iter - > samples [ i ] . percent_sum + = al - > samples [ i ] . percent ;
2012-11-09 14:58:49 +09:00
return ;
}
if ( ret < 0 )
p = & ( * p ) - > rb_left ;
else
p = & ( * p ) - > rb_right ;
}
2017-10-11 17:01:41 +02:00
for ( i = 0 ; i < al - > samples_nr ; i + + )
al - > samples [ i ] . percent_sum = al - > samples [ i ] . percent ;
2012-11-09 14:58:49 +09:00
2017-10-11 17:01:41 +02:00
rb_link_node ( & al - > rb_node , parent , p ) ;
rb_insert_color ( & al - > rb_node , root ) ;
2012-11-09 14:58:49 +09:00
}
2017-10-11 17:01:41 +02:00
static int cmp_source_line ( struct annotation_line * a , struct annotation_line * b )
2013-03-05 14:53:28 +09:00
{
int i ;
2017-10-11 17:01:41 +02:00
for ( i = 0 ; i < a - > samples_nr ; i + + ) {
2015-06-19 16:36:12 -03:00
if ( a - > samples [ i ] . percent_sum = = b - > samples [ i ] . percent_sum )
2013-03-05 14:53:28 +09:00
continue ;
2015-06-19 16:36:12 -03:00
return a - > samples [ i ] . percent_sum > b - > samples [ i ] . percent_sum ;
2013-03-05 14:53:28 +09:00
}
return 0 ;
}
2017-10-11 17:01:41 +02:00
static void __resort_source_line ( struct rb_root * root , struct annotation_line * al )
2012-11-09 14:58:49 +09:00
{
2017-10-11 17:01:41 +02:00
struct annotation_line * iter ;
2012-11-09 14:58:49 +09:00
struct rb_node * * p = & root - > rb_node ;
struct rb_node * parent = NULL ;
while ( * p ! = NULL ) {
parent = * p ;
2017-10-11 17:01:41 +02:00
iter = rb_entry ( parent , struct annotation_line , rb_node ) ;
2012-11-09 14:58:49 +09:00
2017-10-11 17:01:41 +02:00
if ( cmp_source_line ( al , iter ) )
2011-02-04 09:45:46 -02:00
p = & ( * p ) - > rb_left ;
else
p = & ( * p ) - > rb_right ;
}
2017-10-11 17:01:41 +02:00
rb_link_node ( & al - > rb_node , parent , p ) ;
rb_insert_color ( & al - > rb_node , root ) ;
2011-02-04 09:45:46 -02:00
}
2012-11-09 14:58:49 +09:00
static void resort_source_line ( struct rb_root * dest_root , struct rb_root * src_root )
{
2017-10-11 17:01:41 +02:00
struct annotation_line * al ;
2012-11-09 14:58:49 +09:00
struct rb_node * node ;
node = rb_first ( src_root ) ;
while ( node ) {
struct rb_node * next ;
2017-10-11 17:01:41 +02:00
al = rb_entry ( node , struct annotation_line , rb_node ) ;
2012-11-09 14:58:49 +09:00
next = rb_next ( node ) ;
rb_erase ( node , src_root ) ;
2017-10-11 17:01:41 +02:00
__resort_source_line ( dest_root , al ) ;
2012-11-09 14:58:49 +09:00
node = next ;
}
}
2011-02-04 09:45:46 -02:00
static void print_summary ( struct rb_root * root , const char * filename )
{
2017-10-11 17:01:41 +02:00
struct annotation_line * al ;
2011-02-04 09:45:46 -02:00
struct rb_node * node ;
printf ( " \n Sorted summary for file %s \n " , filename ) ;
printf ( " ---------------------------------------------- \n \n " ) ;
if ( RB_EMPTY_ROOT ( root ) ) {
printf ( " Nothing higher than %1.1f%% \n " , MIN_GREEN ) ;
return ;
}
node = rb_first ( root ) ;
while ( node ) {
2013-03-05 14:53:28 +09:00
double percent , percent_max = 0.0 ;
2011-02-04 09:45:46 -02:00
const char * color ;
char * path ;
2013-03-05 14:53:28 +09:00
int i ;
2011-02-04 09:45:46 -02:00
2017-10-11 17:01:41 +02:00
al = rb_entry ( node , struct annotation_line , rb_node ) ;
for ( i = 0 ; i < al - > samples_nr ; i + + ) {
percent = al - > samples [ i ] . percent_sum ;
2013-03-05 14:53:28 +09:00
color = get_percent_color ( percent ) ;
color_fprintf ( stdout , color , " %7.2f " , percent ) ;
if ( percent > percent_max )
percent_max = percent ;
}
2017-10-11 17:01:41 +02:00
path = al - > path ;
2013-03-05 14:53:28 +09:00
color = get_percent_color ( percent_max ) ;
2013-09-11 14:09:28 +09:00
color_fprintf ( stdout , color , " %s \n " , path ) ;
2011-02-04 09:45:46 -02:00
node = rb_next ( node ) ;
}
}
2013-03-05 14:53:21 +09:00
static void symbol__annotate_hits ( struct symbol * sym , struct perf_evsel * evsel )
2011-02-04 09:45:46 -02:00
{
struct annotation * notes = symbol__annotation ( sym ) ;
2013-03-05 14:53:21 +09:00
struct sym_hist * h = annotation__histogram ( notes , evsel - > idx ) ;
2012-04-19 10:57:06 -03:00
u64 len = symbol__size ( sym ) , offset ;
2011-02-04 09:45:46 -02:00
for ( offset = 0 ; offset < len ; + + offset )
2017-07-20 06:36:45 +09:00
if ( h - > addr [ offset ] . nr_samples ! = 0 )
2011-02-04 09:45:46 -02:00
printf ( " %* " PRIx64 " : % " PRIu64 " \n " , BITS_PER_LONG / 2 ,
2017-07-20 06:36:45 +09:00
sym - > start + offset , h - > addr [ offset ] . nr_samples ) ;
2017-07-20 06:36:51 +09:00
printf ( " %*s: % " PRIu64 " \n " , BITS_PER_LONG / 2 , " h->nr_samples " , h - > nr_samples ) ;
2011-02-04 09:45:46 -02:00
}
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
static int annotated_source__addr_fmt_width ( struct list_head * lines , u64 start )
{
char bf [ 32 ] ;
struct annotation_line * line ;
list_for_each_entry_reverse ( line , lines , node ) {
if ( line - > offset ! = - 1 )
return scnprintf ( bf , sizeof ( bf ) , " % " PRIx64 , start + line - > offset ) ;
}
return 0 ;
}
2013-03-05 14:53:21 +09:00
int symbol__annotate_printf ( struct symbol * sym , struct map * map ,
struct perf_evsel * evsel , bool full_paths ,
int min_pcnt , int max_lines , int context )
2011-02-04 09:45:46 -02:00
{
struct dso * dso = map - > dso ;
2012-09-08 09:06:50 -06:00
char * filename ;
const char * d_filename ;
2014-03-18 11:50:21 -03:00
const char * evsel_name = perf_evsel__name ( evsel ) ;
2011-02-08 13:27:39 -02:00
struct annotation * notes = symbol__annotation ( sym ) ;
2016-06-30 10:29:55 +02:00
struct sym_hist * h = annotation__histogram ( notes , evsel - > idx ) ;
2017-10-11 17:01:46 +02:00
struct annotation_line * pos , * queue = NULL ;
2012-04-02 12:59:01 -03:00
u64 start = map__rip_2objdump ( map , sym - > start ) ;
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
int printed = 2 , queue_len = 0 , addr_fmt_width ;
2011-02-06 14:54:44 -02:00
int more = 0 ;
2011-02-04 09:45:46 -02:00
u64 len ;
2017-07-26 17:16:46 -03:00
int width = symbol_conf . show_total_period ? 12 : 8 ;
2016-06-30 09:17:26 -03:00
int graph_dotted_len ;
2011-02-04 09:45:46 -02:00
2012-09-08 09:06:50 -06:00
filename = strdup ( dso - > long_name ) ;
if ( ! filename )
return - ENOMEM ;
2011-02-04 09:45:46 -02:00
if ( full_paths )
d_filename = filename ;
else
d_filename = basename ( filename ) ;
2012-04-19 10:57:06 -03:00
len = symbol__size ( sym ) ;
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
2013-03-05 14:53:26 +09:00
if ( perf_evsel__is_group_event ( evsel ) )
perf annotate: Add basic support to event group view
Add --group option to enable event grouping. When enabled, all the
group members information will be shown with the leader so skip
non-leader events.
It only supports --stdio output currently. Later patches will extend
additional features.
$ perf annotate --group --stdio
...
Percent | Source code & Disassembly of libpthread-2.15.so
--------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 000000387dc0aa50 <__pthread_mutex_unlock_usercnt>:
8.08 2.40 5.29 : 387dc0aa50: mov %rdi,%rdx
0.00 0.00 0.00 : 387dc0aa53: mov 0x10(%rdi),%edi
0.00 0.00 0.00 : 387dc0aa56: mov %edi,%eax
0.00 0.80 0.00 : 387dc0aa58: and $0x7f,%eax
3.03 2.40 3.53 : 387dc0aa5b: test $0x7c,%dil
0.00 0.00 0.00 : 387dc0aa5f: jne 387dc0aaa9 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa61: test %eax,%eax
0.00 0.00 0.00 : 387dc0aa63: jne 387dc0aa85 <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa65: and $0x80,%edi
0.00 0.00 0.00 : 387dc0aa6b: test %esi,%esi
3.03 5.60 7.06 : 387dc0aa6d: movl $0x0,0x8(%rdx)
0.00 0.00 0.59 : 387dc0aa74: je 387dc0aa7a <__pthread_mutex_unlock_use
0.00 0.00 0.00 : 387dc0aa76: subl $0x1,0xc(%rdx)
2.02 5.60 1.18 : 387dc0aa7a: mov %edi,%esi
0.00 0.00 0.00 : 387dc0aa7c: lock decl (%rdx)
83.84 83.20 82.35 : 387dc0aa7f: jne 387dc0aada <_L_unlock_586>
0.00 0.00 0.00 : 387dc0aa81: nop
0.00 0.00 0.00 : 387dc0aa82: xor %eax,%eax
0.00 0.00 0.00 : 387dc0aa84: retq
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362462812-30885-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2013-03-05 14:53:25 +09:00
width * = evsel - > nr_members ;
2011-02-04 09:45:46 -02:00
2016-06-30 10:29:55 +02:00
graph_dotted_len = printf ( " %-*.*s| Source code & Disassembly of %s for %s (% " PRIu64 " samples) \n " ,
2017-08-18 17:46:48 +09:00
width , width , symbol_conf . show_total_period ? " Period " :
symbol_conf . show_nr_samples ? " Samples " : " Percent " ,
2017-07-25 06:28:42 +09:00
d_filename , evsel_name , h - > nr_samples ) ;
2014-03-18 11:50:21 -03:00
2016-06-30 09:17:26 -03:00
printf ( " %-*.*s---- \n " ,
2014-03-18 11:50:21 -03:00
graph_dotted_len , graph_dotted_len , graph_dotted_line ) ;
2011-02-04 09:45:46 -02:00
2017-02-17 17:17:38 +09:00
if ( verbose > 0 )
2013-03-05 14:53:21 +09:00
symbol__annotate_hits ( sym , evsel ) ;
2011-02-04 09:45:46 -02:00
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
addr_fmt_width = annotated_source__addr_fmt_width ( & notes - > src - > source , start ) ;
2017-10-11 17:01:46 +02:00
list_for_each_entry ( pos , & notes - > src - > source , node ) {
int err ;
2011-02-08 15:29:25 -02:00
if ( context & & queue = = NULL ) {
queue = pos ;
queue_len = 0 ;
}
2017-10-11 17:01:46 +02:00
err = annotation_line__print ( pos , sym , start , evsel , len ,
min_pcnt , printed , max_lines ,
perf annotate: Align source and offset lines
Align source with offset lines, which are more advanced, because of the
address column.
Before:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
After:
: static void *worker_thread(void *__tdata)
: {
0.00 : 48a971: push %rbp
0.00 : 48a972: mov %rsp,%rbp
0.00 : 48a975: sub $0x30,%rsp
0.00 : 48a979: mov %rdi,-0x28(%rbp)
0.00 : 48a97d: mov %fs:0x28,%rax
0.00 : 48a986: mov %rax,-0x8(%rbp)
0.00 : 48a98a: xor %eax,%eax
: struct thread_data *td = __tdata;
0.00 : 48a98c: mov -0x28(%rbp),%rax
0.00 : 48a990: mov %rax,-0x10(%rbp)
: int m = 0, i;
0.00 : 48a994: movl $0x0,-0x1c(%rbp)
: int ret;
:
: for (i = 0; i < loops; i++) {
0.00 : 48a99b: movl $0x0,-0x18(%rbp)
It makes bigger different when displaying script sources, where the
comment lines looks oddly shifted from the lines which actually hold
code. I'll send script support separately.
Committer note:
Do not use a fixed column width for the addresses, as kernel ones se
more than 10 columns, look at the last offset and get the right width.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-10-11 17:01:58 +02:00
queue , addr_fmt_width ) ;
2017-10-11 17:01:46 +02:00
switch ( err ) {
2011-02-06 14:54:44 -02:00
case 0 :
+ + printed ;
2011-02-08 15:29:25 -02:00
if ( context ) {
printed + = queue_len ;
queue = NULL ;
queue_len = 0 ;
}
2011-02-06 14:54:44 -02:00
break ;
case 1 :
/* filtered by max_lines */
+ + more ;
2011-02-05 15:37:31 -02:00
break ;
2011-02-06 14:54:44 -02:00
case - 1 :
default :
2011-02-08 15:29:25 -02:00
/*
* Filtered by min_pcnt or non IP lines when
* context ! = 0
*/
if ( ! context )
break ;
if ( queue_len = = context )
2017-10-11 17:01:46 +02:00
queue = list_entry ( queue - > node . next , typeof ( * queue ) , node ) ;
2011-02-08 15:29:25 -02:00
else
+ + queue_len ;
2011-02-06 14:54:44 -02:00
break ;
}
}
2012-09-08 09:06:50 -06:00
free ( filename ) ;
2011-02-06 14:54:44 -02:00
return more ;
}
2011-02-05 18:51:38 -02:00
2018-03-15 23:44:34 -03:00
static void FILE__set_percent_color ( void * fp __maybe_unused ,
double percent __maybe_unused ,
bool current __maybe_unused )
{
}
static int FILE__set_jumps_percent_color ( void * fp __maybe_unused ,
int nr __maybe_unused , bool current __maybe_unused )
{
return 0 ;
}
static int FILE__set_color ( void * fp __maybe_unused , int color __maybe_unused )
{
return 0 ;
}
static void FILE__printf ( void * fp , const char * fmt , . . . )
{
va_list args ;
va_start ( args , fmt ) ;
vfprintf ( fp , fmt , args ) ;
va_end ( args ) ;
}
static void FILE__write_graph ( void * fp , int graph )
{
const char * s ;
switch ( graph ) {
case DARROW_CHAR : s = " ↓ " ; break ;
case UARROW_CHAR : s = " ↑ " ; break ;
case LARROW_CHAR : s = " ← " ; break ;
case RARROW_CHAR : s = " → " ; break ;
default : s = " ? " ; break ;
}
fputs ( s , fp ) ;
}
int symbol__annotate_fprintf2 ( struct symbol * sym , FILE * fp )
{
struct annotation * notes = symbol__annotation ( sym ) ;
struct annotation_write_ops ops = {
. first_line = true ,
. obj = fp ,
. set_color = FILE__set_color ,
. set_percent_color = FILE__set_percent_color ,
. set_jumps_percent_color = FILE__set_jumps_percent_color ,
. printf = FILE__printf ,
. write_graph = FILE__write_graph ,
} ;
struct annotation_line * al ;
list_for_each_entry ( al , & notes - > src - > source , node ) {
if ( annotation_line__filter ( al , notes ) )
continue ;
annotation_line__write ( al , notes , & ops ) ;
fputc ( ' \n ' , fp ) ;
ops . first_line = false ;
}
return 0 ;
}
2011-02-06 14:54:44 -02:00
void symbol__annotate_zero_histogram ( struct symbol * sym , int evidx )
{
struct annotation * notes = symbol__annotation ( sym ) ;
struct sym_hist * h = annotation__histogram ( notes , evidx ) ;
2011-02-08 13:27:39 -02:00
memset ( h , 0 , notes - > src - > sizeof_sym_hist ) ;
2011-02-06 14:54:44 -02:00
}
2011-02-08 13:27:39 -02:00
void symbol__annotate_decay_histogram ( struct symbol * sym , int evidx )
2011-02-06 14:54:44 -02:00
{
struct annotation * notes = symbol__annotation ( sym ) ;
struct sym_hist * h = annotation__histogram ( notes , evidx ) ;
2012-04-19 10:57:06 -03:00
int len = symbol__size ( sym ) , offset ;
2011-02-06 14:54:44 -02:00
2017-07-20 06:36:51 +09:00
h - > nr_samples = 0 ;
2012-04-05 16:15:59 -03:00
for ( offset = 0 ; offset < len ; + + offset ) {
2017-07-20 06:36:45 +09:00
h - > addr [ offset ] . nr_samples = h - > addr [ offset ] . nr_samples * 7 / 8 ;
2017-07-20 06:36:51 +09:00
h - > nr_samples + = h - > addr [ offset ] . nr_samples ;
2011-02-05 18:51:38 -02:00
}
}
2017-10-11 17:01:38 +02:00
void annotated_source__purge ( struct annotated_source * as )
2011-02-05 18:51:38 -02:00
{
2017-10-11 17:01:38 +02:00
struct annotation_line * al , * n ;
2011-02-05 18:51:38 -02:00
2017-10-11 17:01:38 +02:00
list_for_each_entry_safe ( al , n , & as - > source , node ) {
list_del ( & al - > node ) ;
disasm_line__free ( disasm_line ( al ) ) ;
2011-02-05 18:51:38 -02:00
}
}
2012-04-15 15:52:18 -03:00
static size_t disasm_line__fprintf ( struct disasm_line * dl , FILE * fp )
{
size_t printed ;
2017-10-11 17:01:26 +02:00
if ( dl - > al . offset = = - 1 )
return fprintf ( fp , " %s \n " , dl - > al . line ) ;
2012-04-15 15:52:18 -03:00
2017-10-11 17:01:26 +02:00
printed = fprintf ( fp , " %# " PRIx64 " %s " , dl - > al . offset , dl - > ins . name ) ;
2012-04-15 15:52:18 -03:00
2012-04-20 14:38:46 -03:00
if ( dl - > ops . raw [ 0 ] ! = ' \0 ' ) {
2012-04-15 15:52:18 -03:00
printed + = fprintf ( fp , " %.*s %s \n " , 6 - ( int ) printed , " " ,
2012-04-20 14:38:46 -03:00
dl - > ops . raw ) ;
2012-04-15 15:52:18 -03:00
}
return printed + fprintf ( fp , " \n " ) ;
}
size_t disasm__fprintf ( struct list_head * head , FILE * fp )
{
struct disasm_line * pos ;
size_t printed = 0 ;
2017-10-11 17:01:25 +02:00
list_for_each_entry ( pos , head , al . node )
2012-04-15 15:52:18 -03:00
printed + = disasm_line__fprintf ( pos , fp ) ;
return printed ;
}
2018-03-15 15:31:56 -03:00
bool disasm_line__is_valid_jump ( struct disasm_line * dl , struct symbol * sym )
{
if ( ! dl | | ! dl - > ins . ops | | ! ins__is_jump ( & dl - > ins ) | |
! disasm_line__has_offset ( dl ) | | dl - > ops . target . offset < 0 | |
dl - > ops . target . offset > = ( s64 ) symbol__size ( sym ) )
return false ;
return true ;
}
void annotation__mark_jump_targets ( struct annotation * notes , struct symbol * sym )
{
u64 offset , size = symbol__size ( sym ) ;
/* PLT symbols contain external offsets */
if ( strstr ( sym - > name , " @plt " ) )
return ;
for ( offset = 0 ; offset < size ; + + offset ) {
struct annotation_line * al = notes - > offsets [ offset ] ;
struct disasm_line * dl ;
dl = disasm_line ( al ) ;
if ( ! disasm_line__is_valid_jump ( dl , sym ) )
continue ;
al = notes - > offsets [ dl - > ops . target . offset ] ;
/*
* FIXME : Oops , no jump target ? Buggy disassembler ? Or do we
* have to adjust to the previous offset ?
*/
if ( al = = NULL )
continue ;
if ( + + al - > jump_sources > notes - > max_jump_sources )
notes - > max_jump_sources = al - > jump_sources ;
+ + notes - > nr_jumps ;
}
}
2018-03-15 15:59:01 -03:00
void annotation__set_offsets ( struct annotation * notes , s64 size )
{
struct annotation_line * al ;
notes - > max_line_len = 0 ;
list_for_each_entry ( al , & notes - > src - > source , node ) {
size_t line_len = strlen ( al - > line ) ;
if ( notes - > max_line_len < line_len )
notes - > max_line_len = line_len ;
al - > idx = notes - > nr_entries + + ;
if ( al - > offset ! = - 1 ) {
al - > idx_asm = notes - > nr_asm_entries + + ;
/*
* FIXME : short term bandaid to cope with assembly
* routines that comes with labels in the same column
* as the address in objdump , sigh .
*
* E . g . copy_user_generic_unrolled
*/
if ( al - > offset < size )
notes - > offsets [ al - > offset ] = al ;
} else
al - > idx_asm = - 1 ;
}
}
2018-03-15 16:26:29 -03:00
static inline int width_jumps ( int n )
{
if ( n > = 100 )
return 5 ;
if ( n / 10 )
return 2 ;
return 1 ;
}
void annotation__init_column_widths ( struct annotation * notes , struct symbol * sym )
{
notes - > widths . addr = notes - > widths . target =
notes - > widths . min_addr = hex_width ( symbol__size ( sym ) ) ;
notes - > widths . max_addr = hex_width ( sym - > end ) ;
notes - > widths . jumps = width_jumps ( notes - > max_jump_sources ) ;
}
2018-03-15 16:19:59 -03:00
void annotation__update_column_widths ( struct annotation * notes )
{
if ( notes - > options - > use_offset )
notes - > widths . target = notes - > widths . min_addr ;
else
notes - > widths . target = notes - > widths . max_addr ;
notes - > widths . addr = notes - > widths . target ;
if ( notes - > options - > show_nr_jumps )
notes - > widths . addr + = notes - > widths . jumps + 1 ;
}
2017-10-11 17:01:41 +02:00
static void annotation__calc_lines ( struct annotation * notes , struct map * map ,
struct rb_root * root , u64 start )
{
struct annotation_line * al ;
struct rb_root tmp_root = RB_ROOT ;
list_for_each_entry ( al , & notes - > src - > source , node ) {
double percent_max = 0.0 ;
int i ;
for ( i = 0 ; i < al - > samples_nr ; i + + ) {
struct annotation_data * sample ;
sample = & al - > samples [ i ] ;
if ( sample - > percent > percent_max )
percent_max = sample - > percent ;
}
if ( percent_max < = 0.5 )
continue ;
2017-12-30 00:26:52 +08:00
al - > path = get_srcline ( map - > dso , start + al - > offset , NULL ,
false , true , start + al - > offset ) ;
2017-10-11 17:01:41 +02:00
insert_source_line ( & tmp_root , al ) ;
}
resort_source_line ( root , & tmp_root ) ;
}
static void symbol__calc_lines ( struct symbol * sym , struct map * map ,
struct rb_root * root )
{
struct annotation * notes = symbol__annotation ( sym ) ;
u64 start = map__rip_2objdump ( map , sym - > start ) ;
annotation__calc_lines ( notes , map , root , start ) ;
}
2018-03-15 23:44:34 -03:00
int symbol__tty_annotate2 ( struct symbol * sym , struct map * map ,
struct perf_evsel * evsel , bool print_lines ,
bool full_paths )
{
struct dso * dso = map - > dso ;
struct rb_root source_line = RB_ROOT ;
perf annotate: Use the default annotation options for --stdio2
With an empty '[annotate]' section in ~/.perfconfig:
# perf record -a --all-kernel -e '{cycles,instructions}:P' sleep 5
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.243 MB perf.data (5513 samples) ]
# perf annotate --stdio2 _raw_spin_lock | head -20
Disassembly of section .text:
ffffffff81868790 <_raw_spin_lock>:
_raw_spin_lock():
EXPORT_SYMBOL(_raw_spin_trylock_bh);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK
void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
{
→ callq __fentry__
atomic_cmpxchg():
return xadd(&v->counter, -i);
}
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
# perf annotate --stdio2 _raw_spin_lock | head -20
→ callq __fentry__
xor %eax,%eax
mov $0x1,%edx
87.50 100.00 lock cmpxchg %edx,(%rdi)
6.25 0.00 test %eax,%eax
↓ jne 16
6.25 0.00 repz retq
16: mov %eax,%esi
↑ jmpq ffffffff810e96b0 <queued_spin_lock_slowpath>
#
# cat ~/.perfconfig
[annotate]
hide_src_code = false
show_linenr = true
# perf annotate --stdio2 _raw_spin_lock | head -20
3 Disassembly of section .text:
5 ffffffff81868790 <_raw_spin_lock>:
6 _raw_spin_lock():
143 EXPORT_SYMBOL(_raw_spin_trylock_bh);
144 #endif
146 #ifndef CONFIG_INLINE_SPIN_LOCK
147 void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
148 {
→ callq __fentry__
150 atomic_cmpxchg():
187 return xadd(&v->counter, -i);
188 }
190 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
191 {
#
# cat ~/.perfconfig
[annotate]
hide_src_code = true
show_total_period = true
# perf annotate --stdio2 _raw_spin_lock | head -20
→ callq __fentry__
xor %eax,%eax
mov $0x1,%edx
1411316 152339 lock cmpxchg %edx,(%rdi)
344694 0 test %eax,%eax
↓ jne 16
80806 0 repz retq
16: mov %eax,%esi
↑ jmpq ffffffff810e96b0 <queued_spin_lock_slowpath>
#
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-nu4rxg5zkdtgs1b2gc40p7v7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-03-16 14:37:33 -03:00
struct annotation_options opts = annotation__default_options ;
2018-03-16 15:17:23 -03:00
const char * ev_name = perf_evsel__name ( evsel ) ;
char buf [ 1024 ] ;
2018-03-15 23:44:34 -03:00
if ( symbol__annotate2 ( sym , map , evsel , & opts , NULL ) < 0 )
return - 1 ;
if ( print_lines ) {
srcline_full_filename = full_paths ;
symbol__calc_lines ( sym , map , & source_line ) ;
print_summary ( & source_line , dso - > long_name ) ;
}
2018-03-16 15:17:23 -03:00
if ( perf_evsel__is_group_event ( evsel ) ) {
perf_evsel__group_desc ( evsel , buf , sizeof ( buf ) ) ;
ev_name = buf ;
}
fprintf ( stdout , " %s() %s \n Event: %s \n \n " , sym - > name , dso - > long_name , ev_name ) ;
2018-03-15 23:44:34 -03:00
symbol__annotate_fprintf2 ( sym , stdout ) ;
annotated_source__purge ( symbol__annotation ( sym ) - > src ) ;
return 0 ;
}
2013-03-05 14:53:21 +09:00
int symbol__tty_annotate ( struct symbol * sym , struct map * map ,
struct perf_evsel * evsel , bool print_lines ,
bool full_paths , int min_pcnt , int max_lines )
2011-02-05 18:51:38 -02:00
{
struct dso * dso = map - > dso ;
struct rb_root source_line = RB_ROOT ;
2017-12-11 12:46:11 -03:00
if ( symbol__annotate ( sym , map , evsel , 0 , NULL ) < 0 )
2011-02-05 18:51:38 -02:00
return - 1 ;
2017-11-15 12:20:08 +01:00
symbol__calc_percent ( sym , evsel ) ;
2011-02-05 18:51:38 -02:00
if ( print_lines ) {
2015-11-09 16:33:31 +01:00
srcline_full_filename = full_paths ;
2017-10-11 17:01:41 +02:00
symbol__calc_lines ( sym , map , & source_line ) ;
2013-09-11 14:09:30 +09:00
print_summary ( & source_line , dso - > long_name ) ;
2011-02-04 09:45:46 -02:00
}
2013-03-05 14:53:21 +09:00
symbol__annotate_printf ( sym , map , evsel , full_paths ,
2011-02-08 15:29:25 -02:00
min_pcnt , max_lines , 0 ) ;
2011-02-04 09:45:46 -02:00
2017-10-11 17:01:38 +02:00
annotated_source__purge ( symbol__annotation ( sym ) - > src ) ;
2011-02-05 18:51:38 -02:00
2011-02-04 09:45:46 -02:00
return 0 ;
}
2013-12-18 17:10:15 -03:00
2014-02-20 10:32:53 +09:00
bool ui__has_annotation ( void )
{
2016-05-03 13:54:44 +02:00
return use_browser = = 1 & & perf_hpp_list . sym ;
2014-02-20 10:32:53 +09:00
}
2018-03-15 16:54:11 -03:00
2018-03-15 17:04:53 -03:00
double annotation_line__max_percent ( struct annotation_line * al , struct annotation * notes )
{
double percent_max = 0.0 ;
int i ;
for ( i = 0 ; i < notes - > nr_events ; i + + ) {
if ( al - > samples [ i ] . percent > percent_max )
percent_max = al - > samples [ i ] . percent ;
}
return percent_max ;
}
2018-03-15 19:12:39 -03:00
static void disasm_line__write ( struct disasm_line * dl , struct annotation * notes ,
void * obj , char * bf , size_t size ,
void ( * obj__printf ) ( void * obj , const char * fmt , . . . ) ,
void ( * obj__write_graph ) ( void * obj , int graph ) )
{
if ( dl - > ins . ops & & dl - > ins . ops - > scnprintf ) {
if ( ins__is_jump ( & dl - > ins ) ) {
bool fwd = dl - > ops . target . offset > dl - > al . offset ;
obj__write_graph ( obj , fwd ? DARROW_CHAR : UARROW_CHAR ) ;
obj__printf ( obj , " " ) ;
} else if ( ins__is_call ( & dl - > ins ) ) {
obj__write_graph ( obj , RARROW_CHAR ) ;
obj__printf ( obj , " " ) ;
} else if ( ins__is_ret ( & dl - > ins ) ) {
obj__write_graph ( obj , LARROW_CHAR ) ;
obj__printf ( obj , " " ) ;
} else {
obj__printf ( obj , " " ) ;
}
} else {
obj__printf ( obj , " " ) ;
}
disasm_line__scnprintf ( dl , bf , size , ! notes - > options - > use_offset ) ;
}
2018-03-15 23:14:51 -03:00
static void __annotation_line__write ( struct annotation_line * al , struct annotation * notes ,
bool first_line , bool current_entry , bool change_color , int width ,
void * obj ,
int ( * obj__set_color ) ( void * obj , int color ) ,
void ( * obj__set_percent_color ) ( void * obj , double percent , bool current ) ,
int ( * obj__set_jumps_percent_color ) ( void * obj , int nr , bool current ) ,
void ( * obj__printf ) ( void * obj , const char * fmt , . . . ) ,
void ( * obj__write_graph ) ( void * obj , int graph ) )
2018-03-15 17:54:36 -03:00
{
double percent_max = annotation_line__max_percent ( al , notes ) ;
2018-03-15 19:12:39 -03:00
int pcnt_width = annotation__pcnt_width ( notes ) ,
cycles_width = annotation__cycles_width ( notes ) ;
2018-03-15 17:54:36 -03:00
bool show_title = false ;
2018-03-15 19:12:39 -03:00
char bf [ 256 ] ;
int printed ;
2018-03-15 17:54:36 -03:00
if ( first_line & & ( al - > offset = = - 1 | | percent_max = = 0.0 ) ) {
if ( notes - > have_cycles ) {
if ( al - > ipc = = 0.0 & & al - > cycles = = 0 )
show_title = true ;
} else
show_title = true ;
}
if ( al - > offset ! = - 1 & & percent_max ! = 0.0 ) {
int i ;
for ( i = 0 ; i < notes - > nr_events ; i + + ) {
obj__set_percent_color ( obj , al - > samples [ i ] . percent , current_entry ) ;
if ( notes - > options - > show_total_period ) {
obj__printf ( obj , " %11 " PRIu64 " " , al - > samples [ i ] . he . period ) ;
} else if ( notes - > options - > show_nr_samples ) {
obj__printf ( obj , " %6 " PRIu64 " " ,
al - > samples [ i ] . he . nr_samples ) ;
} else {
obj__printf ( obj , " %6.2f " ,
al - > samples [ i ] . percent ) ;
}
}
} else {
obj__set_percent_color ( obj , 0 , current_entry ) ;
if ( ! show_title )
2018-03-15 19:12:39 -03:00
obj__printf ( obj , " %-*s " , pcnt_width , " " ) ;
2018-03-15 17:54:36 -03:00
else {
2018-03-15 19:12:39 -03:00
obj__printf ( obj , " %-*s " , pcnt_width ,
2018-03-15 17:54:36 -03:00
notes - > options - > show_total_period ? " Period " :
notes - > options - > show_nr_samples ? " Samples " : " Percent " ) ;
}
}
if ( notes - > have_cycles ) {
if ( al - > ipc )
obj__printf ( obj , " %*.2f " , ANNOTATION__IPC_WIDTH - 1 , al - > ipc ) ;
else if ( ! show_title )
obj__printf ( obj , " %*s " , ANNOTATION__IPC_WIDTH , " " ) ;
else
obj__printf ( obj , " %*s " , ANNOTATION__IPC_WIDTH - 1 , " IPC " ) ;
if ( al - > cycles )
obj__printf ( obj , " %* " PRIu64 " " ,
ANNOTATION__CYCLES_WIDTH - 1 , al - > cycles ) ;
else if ( ! show_title )
obj__printf ( obj , " %*s " , ANNOTATION__CYCLES_WIDTH , " " ) ;
else
obj__printf ( obj , " %*s " , ANNOTATION__CYCLES_WIDTH - 1 , " Cycle " ) ;
}
obj__printf ( obj , " " ) ;
2018-03-15 19:12:39 -03:00
if ( ! * al - > line )
obj__printf ( obj , " %-*s " , width - pcnt_width - cycles_width , " " ) ;
else if ( al - > offset = = - 1 ) {
if ( al - > line_nr & & notes - > options - > show_linenr )
printed = scnprintf ( bf , sizeof ( bf ) , " %-*d " , notes - > widths . addr + 1 , al - > line_nr ) ;
else
printed = scnprintf ( bf , sizeof ( bf ) , " %-*s " , notes - > widths . addr , " " ) ;
obj__printf ( obj , bf ) ;
obj__printf ( obj , " %-*s " , width - printed - pcnt_width - cycles_width + 1 , al - > line ) ;
} else {
u64 addr = al - > offset ;
int color = - 1 ;
if ( ! notes - > options - > use_offset )
addr + = notes - > start ;
if ( ! notes - > options - > use_offset ) {
printed = scnprintf ( bf , sizeof ( bf ) , " % " PRIx64 " : " , addr ) ;
} else {
if ( al - > jump_sources ) {
if ( notes - > options - > show_nr_jumps ) {
int prev ;
printed = scnprintf ( bf , sizeof ( bf ) , " %*d " ,
notes - > widths . jumps ,
al - > jump_sources ) ;
prev = obj__set_jumps_percent_color ( obj , al - > jump_sources ,
current_entry ) ;
obj__printf ( obj , bf ) ;
obj__set_color ( obj , prev ) ;
}
printed = scnprintf ( bf , sizeof ( bf ) , " %* " PRIx64 " : " ,
notes - > widths . target , addr ) ;
} else {
printed = scnprintf ( bf , sizeof ( bf ) , " %-*s " ,
notes - > widths . addr , " " ) ;
}
}
if ( change_color )
color = obj__set_color ( obj , HE_COLORSET_ADDR ) ;
obj__printf ( obj , bf ) ;
if ( change_color )
obj__set_color ( obj , color ) ;
disasm_line__write ( disasm_line ( al ) , notes , obj , bf , sizeof ( bf ) , obj__printf , obj__write_graph ) ;
obj__printf ( obj , " %-*s " , width - pcnt_width - cycles_width - 3 - printed , bf ) ;
}
2018-03-15 17:54:36 -03:00
}
2018-03-15 23:14:51 -03:00
void annotation_line__write ( struct annotation_line * al , struct annotation * notes ,
struct annotation_write_ops * ops )
{
__annotation_line__write ( al , notes , ops - > first_line , ops - > current_entry ,
ops - > change_color , ops - > width , ops - > obj ,
ops - > set_color , ops - > set_percent_color ,
ops - > set_jumps_percent_color , ops - > printf ,
ops - > write_graph ) ;
}
2018-03-15 16:54:11 -03:00
int symbol__annotate2 ( struct symbol * sym , struct map * map , struct perf_evsel * evsel ,
struct annotation_options * options , struct arch * * parch )
{
struct annotation * notes = symbol__annotation ( sym ) ;
size_t size = symbol__size ( sym ) ;
int nr_pcnt = 1 , err ;
notes - > offsets = zalloc ( size * sizeof ( struct annotation_line * ) ) ;
if ( notes - > offsets = = NULL )
return - 1 ;
if ( perf_evsel__is_group_event ( evsel ) )
nr_pcnt = evsel - > nr_members ;
err = symbol__annotate ( sym , map , evsel , 0 , parch ) ;
if ( err )
goto out_free_offsets ;
notes - > options = options ;
symbol__calc_percent ( sym , evsel ) ;
notes - > start = map__rip_2objdump ( map , sym - > start ) ;
annotation__set_offsets ( notes , size ) ;
annotation__mark_jump_targets ( notes , sym ) ;
annotation__compute_ipc ( notes , size ) ;
annotation__init_column_widths ( notes , sym ) ;
notes - > nr_events = nr_pcnt ;
annotation__update_column_widths ( notes ) ;
return 0 ;
out_free_offsets :
zfree ( & notes - > offsets ) ;
return - 1 ;
}
2018-03-16 14:33:38 -03:00
# define ANNOTATION__CFG(n) \
{ . name = # n , . value = & annotation__default_options . n , }
/*
* Keep the entries sorted , they are bsearch ' ed
*/
static struct annotation_config {
const char * name ;
bool * value ;
} annotation__configs [ ] = {
ANNOTATION__CFG ( hide_src_code ) ,
ANNOTATION__CFG ( jump_arrows ) ,
ANNOTATION__CFG ( show_linenr ) ,
ANNOTATION__CFG ( show_nr_jumps ) ,
ANNOTATION__CFG ( show_nr_samples ) ,
ANNOTATION__CFG ( show_total_period ) ,
ANNOTATION__CFG ( use_offset ) ,
} ;
# undef ANNOTATION__CFG
static int annotation_config__cmp ( const void * name , const void * cfgp )
{
const struct annotation_config * cfg = cfgp ;
return strcmp ( name , cfg - > name ) ;
}
static int annotation__config ( const char * var , const char * value ,
void * data __maybe_unused )
{
struct annotation_config * cfg ;
const char * name ;
if ( ! strstarts ( var , " annotate. " ) )
return 0 ;
name = var + 9 ;
cfg = bsearch ( name , annotation__configs , ARRAY_SIZE ( annotation__configs ) ,
sizeof ( struct annotation_config ) , annotation_config__cmp ) ;
if ( cfg = = NULL )
pr_debug ( " %s variable unknown, ignoring... " , var ) ;
else
* cfg - > value = perf_config_bool ( name , value ) ;
return 0 ;
}
void annotation_config__init ( void )
{
perf_config ( annotation__config , NULL ) ;
annotation__default_options . show_total_period = symbol_conf . show_total_period ;
annotation__default_options . show_nr_samples = symbol_conf . show_nr_samples ;
}