From 1dc27f63303db58ce1b1a6932d1825305f86d574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Li=C5=A1ka?= Date: Thu, 23 Aug 2018 14:29:34 +0200 Subject: [PATCH 01/90] perf annotate: Properly interpret indirect call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch changes the parsing of: callq *0x8(%rbx) from: 0.26 │ → callq *8 to: 0.26 │ → callq *0x8(%rbx) in this case an address is followed by a register, thus one can't parse only the address. Committer testing: 1) run 'perf record sleep 10' 2) before applying the patch, run: perf annotate --stdio2 > /tmp/before 3) after applying the patch, run: perf annotate --stdio2 > /tmp/after 4) diff /tmp/before /tmp/after: --- /tmp/before 2018-08-28 11:16:03.238384143 -0300 +++ /tmp/after 2018-08-28 11:15:39.335341042 -0300 @@ -13274,7 +13274,7 @@ ↓ jle 128 hash_value = hash_table->hash_func (key); mov 0x8(%rsp),%rdi - 0.91 → callq *30 + 0.91 → callq *0x30(%r12) mov $0x2,%r8d cmp $0x2,%eax node_hash = hash_table->hashes[node_index]; @@ -13848,7 +13848,7 @@ mov %r14,%rdi sub %rbx,%r13 mov %r13,%rdx - → callq *38 + → callq *0x38(%r15) cmp %rax,%r13 1.91 ↓ je 240 1b4: mov $0xffffffff,%r13d @@ -14026,7 +14026,7 @@ mov %rcx,-0x500(%rbp) mov %r15,%rsi mov %r14,%rdi - → callq *38 + → callq *0x38(%rax) mov -0x500(%rbp),%rcx cmp %rax,%rcx ↓ jne 9b0 Signed-off-by: Martin Liška Tested-by: Arnaldo Carvalho de Melo Tested-by: Kim Phillips Cc: Jiri Olsa Link: http://lkml.kernel.org/r/bd1f3932-be2b-85f9-7582-111ee0a43b07@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 20061cf42288..e62b69ea87cd 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -246,8 +246,14 @@ find_target: indirect_call: tok = strchr(endptr, '*'); - if (tok != NULL) - ops->target.addr = strtoull(tok + 1, NULL, 16); + if (tok != NULL) { + endptr++; + + /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). + * Do not parse such instruction. */ + if (strstr(endptr, "(%r") == NULL) + ops->target.addr = strtoull(endptr, NULL, 16); + } goto find_target; } From 9b3579fc6c6ac45502de1fa9a1fdf873805c2157 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 27 Aug 2018 11:12:24 +0200 Subject: [PATCH 02/90] perf tests: Add breakpoint modify tests Adding to tests that aims on kernel breakpoint modification bugs. First test creates HW breakpoint, tries to change it and checks it was properly changed. It aims on kernel issue that prevents HW breakpoint to be changed via ptrace interface. The first test forks, the child sets itself as ptrace tracee and waits in signal for parent to trace it, then it calls bp_1 and quits. The parent does following steps: - creates a new breakpoint (id 0) for bp_2 function - changes that breakpoint to bp_1 function - waits for the breakpoint to hit and checks it has proper rip of bp_1 function This test aims on an issue in kernel preventing to change disabled breakpoints Second test mimics the first one except for few steps in the parent: - creates a new breakpoint (id 0) for bp_1 function - changes that breakpoint to bogus (-1) address - waits for the breakpoint to hit and checks it has proper rip of bp_1 function This test aims on an issue in kernel disabling enabled breakpoint after unsuccesful change. Committer testing: # uname -a Linux jouet 4.18.0-rc8-00002-g1236568ee3cb #12 SMP Tue Aug 7 14:08:26 -03 2018 x86_64 x86_64 x86_64 GNU/Linux # perf test -v "bp modify" 62: x86 bp modify : --- start --- test child forked, pid 25671 in bp_1 tracee exited prematurely 2 FAILED arch/x86/tests/bp-modify.c:209 modify test 1 failed test child finished with -1 ---- end ---- x86 bp modify: FAILED! # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Milind Chabbi Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180827091228.2878-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/Build | 1 + tools/perf/arch/x86/tests/arch-tests.c | 6 + tools/perf/arch/x86/tests/bp-modify.c | 213 +++++++++++++++++++++++ 4 files changed, 221 insertions(+) create mode 100644 tools/perf/arch/x86/tests/bp-modify.c diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index c1bd979b957b..613709cfbbd0 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -9,6 +9,7 @@ struct test; int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); +int test__bp_modify(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 8e2c5a38c3b9..586849ff83a0 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -5,3 +5,4 @@ libperf-y += arch-tests.o libperf-y += rdpmc.o libperf-y += perf-time-to-tsc.o libperf-$(CONFIG_AUXTRACE) += insn-x86.o +libperf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index cc1802ff5410..d47d3f8e3c8e 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -23,6 +23,12 @@ struct test arch_tests[] = { .desc = "x86 instruction decoder - new instructions", .func = test__insn_x86, }, +#endif +#if defined(__x86_64__) + { + .desc = "x86 bp modify", + .func = test__bp_modify, + }, #endif { .func = NULL, diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c new file mode 100644 index 000000000000..f53e4406709f --- /dev/null +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +static noinline int bp_1(void) +{ + pr_debug("in %s\n", __func__); + return 0; +} + +static noinline int bp_2(void) +{ + pr_debug("in %s\n", __func__); + return 0; +} + +static int spawn_child(void) +{ + int child = fork(); + + if (child == 0) { + /* + * The child sets itself for as tracee and + * waits in signal for parent to trace it, + * then it calls bp_1 and quits. + */ + int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + if (err) { + pr_debug("failed to PTRACE_TRACEME\n"); + exit(1); + } + + raise(SIGCONT); + bp_1(); + exit(0); + } + + return child; +} + +/* + * This tests creates HW breakpoint, tries to + * change it and checks it was properly changed. + */ +static int bp_modify1(void) +{ + pid_t child; + int status; + unsigned long rip = 0, dr7 = 1; + + child = spawn_child(); + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 1\n"); + return TEST_FAIL; + } + + /* + * The parent does following steps: + * - creates a new breakpoint (id 0) for bp_2 function + * - changes that breakponit to bp_1 function + * - waits for the breakpoint to hit and checks + * it has proper rip of bp_1 function + * - detaches the child + */ + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_2)) { + pr_debug("failed to set breakpoint, 1st time: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_1)) { + pr_debug("failed to set breakpoint, 2nd time: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[7]), dr7)) { + pr_debug("failed to set dr7: %s\n", strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_CONT, child, NULL, NULL)) { + pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno)); + goto out; + } + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 2\n"); + return TEST_FAIL; + } + + rip = ptrace(PTRACE_PEEKUSER, child, + offsetof(struct user_regs_struct, rip), NULL); + if (rip == (unsigned long) -1) { + pr_debug("failed to PTRACE_PEEKUSER: %s\n", + strerror(errno)); + goto out; + } + + pr_debug("rip %lx, bp_1 %p\n", rip, bp_1); + +out: + if (ptrace(PTRACE_DETACH, child, NULL, NULL)) { + pr_debug("failed to PTRACE_DETACH: %s", strerror(errno)); + return TEST_FAIL; + } + + return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; +} + +/* + * This tests creates HW breakpoint, tries to + * change it to bogus value and checks the original + * breakpoint is hit. + */ +static int bp_modify2(void) +{ + pid_t child; + int status; + unsigned long rip = 0, dr7 = 1; + + child = spawn_child(); + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 1\n"); + return TEST_FAIL; + } + + /* + * The parent does following steps: + * - creates a new breakpoint (id 0) for bp_1 function + * - tries to change that breakpoint to (-1) address + * - waits for the breakpoint to hit and checks + * it has proper rip of bp_1 function + * - detaches the child + */ + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_1)) { + pr_debug("failed to set breakpoint: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[7]), dr7)) { + pr_debug("failed to set dr7: %s\n", strerror(errno)); + goto out; + } + + if (!ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) { + pr_debug("failed, breakpoint set to bogus address\n"); + goto out; + } + + if (ptrace(PTRACE_CONT, child, NULL, NULL)) { + pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno)); + goto out; + } + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 2\n"); + return TEST_FAIL; + } + + rip = ptrace(PTRACE_PEEKUSER, child, + offsetof(struct user_regs_struct, rip), NULL); + if (rip == (unsigned long) -1) { + pr_debug("failed to PTRACE_PEEKUSER: %s\n", + strerror(errno)); + goto out; + } + + pr_debug("rip %lx, bp_1 %p\n", rip, bp_1); + +out: + if (ptrace(PTRACE_DETACH, child, NULL, NULL)) { + pr_debug("failed to PTRACE_DETACH: %s", strerror(errno)); + return TEST_FAIL; + } + + return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; +} + +int test__bp_modify(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1()); + TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2()); + + return 0; +} From bd14406b78e6daa1ea3c1673bda1ffc9efdeead0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 27 Aug 2018 11:12:25 +0200 Subject: [PATCH 03/90] perf/hw_breakpoint: Modify breakpoint even if the new attr has disabled set We need to change the breakpoint even if the attr with new fields has disabled set to true. Current code prevents following user code to change the breakpoint address: ptrace(PTRACE_POKEUSER, child, offsetof(struct user, u_debugreg[0]), addr_1) ptrace(PTRACE_POKEUSER, child, offsetof(struct user, u_debugreg[0]), addr_2) ptrace(PTRACE_POKEUSER, child, offsetof(struct user, u_debugreg[7]), dr7) The first PTRACE_POKEUSER creates the breakpoint with attr.disabled set to true: ptrace_set_breakpoint_addr(nr = 0) struct perf_event *bp = t->ptrace_bps[nr]; ptrace_register_breakpoint(..., disabled = true) ptrace_fill_bp_fields(..., disabled) register_user_hw_breakpoint So the second PTRACE_POKEUSER will be omitted: ptrace_set_breakpoint_addr(nr = 0) struct perf_event *bp = t->ptrace_bps[nr]; struct perf_event_attr attr = bp->attr; modify_user_hw_breakpoint(bp, &attr) if (!attr->disabled) modify_user_hw_breakpoint_check Reported-by: Milind Chabbi Signed-off-by: Jiri Olsa Acked-by: Frederic Weisbecker Acked-by: Oleg Nesterov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180827091228.2878-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/hw_breakpoint.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index b3814fce5ecb..fb229d9c7f3c 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -509,6 +509,8 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *a */ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { + int err; + /* * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it * will not be possible to raise IPIs that invoke __perf_event_disable. @@ -520,11 +522,11 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att else perf_event_disable(bp); - if (!attr->disabled) { - int err = modify_user_hw_breakpoint_check(bp, attr, false); + err = modify_user_hw_breakpoint_check(bp, attr, false); + if (err) + return err; - if (err) - return err; + if (!attr->disabled) { perf_event_enable(bp); bp->attr.disabled = 0; } From cb45302d7c5e20f0c0598cdbd7753fa44daceb2a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 27 Aug 2018 11:12:26 +0200 Subject: [PATCH 04/90] perf/hw_breakpoint: Remove superfluous bp->attr.disabled = 0 Once the breakpoint was succesfully modified, the attr->disabled value is in bp->attr.disabled. So there's no reason to set it again, removing that. Signed-off-by: Jiri Olsa Acked-by: Frederic Weisbecker Acked-by: Oleg Nesterov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Milind Chabbi Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180827091228.2878-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/hw_breakpoint.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index fb229d9c7f3c..3e560d7609fd 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -526,10 +526,9 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att if (err) return err; - if (!attr->disabled) { + if (!attr->disabled) perf_event_enable(bp); - bp->attr.disabled = 0; - } + return 0; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); From 969558371bf926258241727ebb994f516f2e6f61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 27 Aug 2018 11:12:27 +0200 Subject: [PATCH 05/90] perf/hw_breakpoint: Enable breakpoint in modify_user_hw_breakpoint Currently we enable the breakpoint back only if the breakpoint modification was successful. If it fails we can leave the breakpoint in disabled state with attr->disabled == 0. We can safely enable the breakpoint back for both the fail and success paths by checking the bp->attr.disabled, which either holds the new 'requested' disabled state or the original breakpoint state. Suggested-by: Oleg Nesterov Signed-off-by: Jiri Olsa Acked-by: Frederic Weisbecker Acked-by: Oleg Nesterov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Milind Chabbi Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180827091228.2878-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/hw_breakpoint.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 3e560d7609fd..d6b56180827c 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -523,13 +523,11 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att perf_event_disable(bp); err = modify_user_hw_breakpoint_check(bp, attr, false); - if (err) - return err; - if (!attr->disabled) + if (!bp->attr.disabled) perf_event_enable(bp); - return 0; + return err; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); From bf06278c3fdf8909c3a9283e2c270b0fc170fa90 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 27 Aug 2018 11:12:28 +0200 Subject: [PATCH 06/90] perf/hw_breakpoint: Simplify breakpoint enable in perf_event_modify_breakpoint We can safely enable the breakpoint back for both the fail and success paths by checking only the bp->attr.disabled, which either holds the new 'requested' disabled state or the original breakpoint state. Committer testing: At the end of the series, the 'perf test' entry introduced as the first patch now runs to completion without finding the fixed issues: # perf test "bp modify" 62: x86 bp modify : Ok # In verbose mode: # perf test -v "bp modify" 62: x86 bp modify : --- start --- test child forked, pid 5161 rip 5950a0, bp_1 0x5950a0 in bp_1 rip 5950a0, bp_1 0x5950a0 in bp_1 test child finished with 0 ---- end ---- x86 bp modify: Ok Suggested-by: Oleg Nesterov Acked-by: Oleg Nesterov Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Milind Chabbi Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180827091228.2878-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f6ea33a9f904..22ede28ec07d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2867,16 +2867,11 @@ static int perf_event_modify_breakpoint(struct perf_event *bp, _perf_event_disable(bp); err = modify_user_hw_breakpoint_check(bp, attr, true); - if (err) { - if (!bp->attr.disabled) - _perf_event_enable(bp); - return err; - } - - if (!attr->disabled) + if (!bp->attr.disabled) _perf_event_enable(bp); - return 0; + + return err; } static int perf_event_modify_attr(struct perf_event *event, From 5ab1de932e2923f490645ad017a689c5b58dc433 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 6 Aug 2018 17:28:00 -0500 Subject: [PATCH 07/90] perf arm64: Fix include path for asm-generic/unistd.h The new syscall table support for arm64 mistakenly used the system's asm-generic/unistd.h file when processing the tools/arch/arm64/include/uapi/asm/unistd.h file's include directive: #include See "Committer notes" section of commit 2b5882435606 "perf arm64: Generate system call table from asm/unistd.h" for more details. This patch removes the committer's temporary workaround, and instructs the host compiler to search the build tree's include path for the right copy of the unistd.h file, instead of the one on the system's /usr/include path. It thus fixes the committer's test that cross-builds an arm64 perf on an x86 platform running Ubuntu 14.04.5 LTS with an old toolchain: $ tools/perf/arch/arm64/entry/syscalls/mksyscalltbl /gcc-linaro-5.4.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc gcc `pwd`/tools tools/arch/arm64/include/uapi/asm/unistd.h | grep bpf [280] = "bpf", Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Thomas Richter Fixes: 2b5882435606 ("perf arm64: Generate system call table from asm/unistd.h") Link: http://lkml.kernel.org/r/20180806172800.bbcec3cfcc51e2facc978bf2@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/Makefile | 5 +++-- tools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index f013b115dc86..dbef716a1913 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -11,7 +11,8 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 out := $(OUTPUT)arch/arm64/include/generated/asm header := $(out)/syscalls.c -sysdef := $(srctree)/tools/include/uapi/asm-generic/unistd.h +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/ systbl := $(sysprf)/mksyscalltbl @@ -19,7 +20,7 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(sysdef) > $@ + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ clean:: $(call QUIET_CLEAN, arm64) $(RM) $(header) diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index 52e197317d3e..2dbb8cade048 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -11,7 +11,8 @@ gcc=$1 hostcc=$2 -input=$3 +incpath=$3 +input=$4 if ! test -r $input; then echo "Could not read input file" >&2 @@ -28,7 +29,6 @@ create_table_from_c() cat <<-_EoHEADER #include - #define __ARCH_WANT_RENAMEAT #include "$input" int main(int argc, char *argv[]) { @@ -42,7 +42,7 @@ create_table_from_c() printf "%s\n" " printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);" printf "}\n" - } | $hostcc -o $create_table_exe -x c - + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - $create_table_exe From fd8d2702791a970c751f8b526a17d8e725a05b46 Mon Sep 17 00:00:00 2001 From: Hisao Tanabe Date: Sat, 25 Aug 2018 00:45:56 +0900 Subject: [PATCH 08/90] perf evsel: Fix potential null pointer dereference in perf_evsel__new_idx() If evsel is NULL, we should return NULL to avoid a NULL pointer dereference a bit later in the code. Signed-off-by: Hisao Tanabe Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Wang Nan Fixes: 03e0a7df3efd ("perf tools: Introduce bpf-output event") LPU-Reference: 20180824154556.23428-1-xtanabe@gmail.com Link: https://lkml.kernel.org/n/tip-e5plzjhx6595a5yjaf22jss3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c980bbff6353..1a61628a1c12 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -251,8 +251,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) { struct perf_evsel *evsel = zalloc(perf_evsel__object.size); - if (evsel != NULL) - perf_evsel__init(evsel, attr, idx); + if (!evsel) + return NULL; + perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | From dad2762aac17eac01ea97779e78a061ed1b83b86 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Aug 2018 17:31:52 -0300 Subject: [PATCH 09/90] perf tools: Streamline bpf examples and headers installation We were emitting 4 lines, two of them misleading: make: Entering directory '/home/acme/git/perf/tools/perf' INSTALL lib INSTALL include/bpf INSTALL lib INSTALL examples/bpf make: Leaving directory '/home/acme/git/perf/tools/perf' Make it more compact by showing just two lines: make: Entering directory '/home/acme/git/perf/tools/perf' INSTALL bpf-headers INSTALL bpf-examples make: Leaving directory '/home/acme/git/perf/tools/perf' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-0nvkyciqdkrgy829lony5925@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b3d1b12a5081..5224ade3d5af 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -777,14 +777,12 @@ endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBBPF - $(call QUIET_INSTALL, lib) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' - $(call QUIET_INSTALL, include/bpf) \ - $(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' - $(call QUIET_INSTALL, lib) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' - $(call QUIET_INSTALL, examples/bpf) \ - $(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' + $(call QUIET_INSTALL, bpf-headers) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ + $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' + $(call QUIET_INSTALL, bpf-examples) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ + $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' endif $(call QUIET_INSTALL, perf-archive) \ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' From a72f64261359b7451f8478f2a2bf357b4e6c757f Mon Sep 17 00:00:00 2001 From: Chris Phlipot Date: Tue, 28 Aug 2018 23:19:54 -0700 Subject: [PATCH 10/90] perf util: Fix bad memory access in trace info. In the write to the output_fd in the error condition of record_saved_cmdline(), we are writing 8 bytes from a memory location on the stack that contains a primitive that is only 4 bytes in size. Change the primitive to 8 bytes in size to match the size of the write in order to avoid reading unknown memory from the stack. Signed-off-by: Chris Phlipot Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180829061954.18871-1-cphlipot0@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index c85d0d1a65ed..7b0ca7cbb7de 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -377,7 +377,7 @@ out: static int record_saved_cmdline(void) { - unsigned int size; + unsigned long long size; char *path; struct stat st; int ret, err = 0; From c9f23d2bc21cb263ae931f3e264d003d746107bb Mon Sep 17 00:00:00 2001 From: Chris Phlipot Date: Wed, 29 Aug 2018 19:19:50 -0700 Subject: [PATCH 11/90] perf event-parse: Use fixed size string for comms Some implementations of libc do not support the 'm' width modifier as part of the scanf string format specifier. This can cause the parsing to fail. Since the parser never checks if the scanf parsing was successesful, this can result in a crash. Change the comm string to be allocated as a fixed size instead of dynamically using 'm' scanf width modifier. This can be safely done since comm size is limited to 16 bytes by TASK_COMM_LEN within the kernel. This change prevents perf from crashing when linked against bionic as well as reduces the total number of heap allocations and frees invoked while accomplishing the same task. Signed-off-by: Chris Phlipot Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830021950.15563-1-cphlipot0@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-parse.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 920b1d58a068..e76214f8d596 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -164,16 +164,15 @@ void parse_ftrace_printk(struct tep_handle *pevent, void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size __maybe_unused) { - char *comm; + char comm[17]; /* Max comm length in the kernel is 16. */ char *line; char *next = NULL; int pid; line = strtok_r(file, "\n", &next); while (line) { - sscanf(line, "%d %ms", &pid, &comm); - tep_register_comm(pevent, comm, pid); - free(comm); + if (sscanf(line, "%d %16s", &pid, comm) == 2) + tep_register_comm(pevent, comm, pid); line = strtok_r(NULL, "\n", &next); } } From fa694160cca6dbba17c57dc7efec5f93feaf8795 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 28 Aug 2018 14:38:48 +0530 Subject: [PATCH 12/90] perf probe powerpc: Ignore SyS symbols irrespective of endianness This makes sure that the SyS symbols are ignored for any powerpc system, not just the big endian ones. Reported-by: Naveen N. Rao Signed-off-by: Sandipan Das Reviewed-by: Kamalesh Babulal Acked-by: Naveen N. Rao Cc: Jiri Olsa Cc: Ravi Bangoria Fixes: fb6d59423115 ("perf probe ppc: Use the right prefix when ignoring SyS symbols on ppc") Link: http://lkml.kernel.org/r/20180828090848.1914-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 20e7d74d86cd..10a44e946f77 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) #endif -#if !defined(_CALL_ELF) || _CALL_ELF != 2 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { char *sym = syma->name; +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Skip over any initial dot */ if (*sym == '.') sym++; +#endif /* Avoid "SyS" kernel syscall aliases */ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3)) @@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma, return SYMBOL_A; } +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Allow matching against dot variants */ int arch__compare_symbol_names(const char *namea, const char *nameb) { From 4e67b2a5df5d3f341776d12ee575e00ca3ef92de Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 27 Aug 2018 12:53:40 -0500 Subject: [PATCH 13/90] perf annotate: Fix parsing aarch64 branch instructions after objdump update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with binutils 2.28, aarch64 objdump adds comments to the disassembly output to show the alternative names of a condition code [1]. It is assumed that commas in objdump comments could occur in other arches now or in the future, so this fix is arch-independent. The fix could have been done with arm64 specific jump__parse and jump__scnprintf functions, but the jump__scnprintf instruction would have to have its comment character be a literal, since the scnprintf functions cannot receive a struct arch easily. This inconvenience also applies to the generic jump__scnprintf, which is why we add a raw_comment pointer to struct ins_operands, so the __parse function assigns it to be re-used by its corresponding __scnprintf function. Example differences in 'perf annotate --stdio2' output on an aarch64 perf.data file: BEFORE: → b.cs ffff200008133d1c // b.hs, dffff7ecc47b AFTER : ↓ b.cs 18c BEFORE: → b.cc ffff200008d8d9cc // b.lo, b.ul, dffff727295b AFTER : ↓ b.cc 31c The branch target labels 18c and 31c also now appear in the output: BEFORE: add x26, x29, #0x80 AFTER : 18c: add x26, x29, #0x80 BEFORE: add x21, x21, #0x8 AFTER : 31c: add x21, x21, #0x8 The Fixes: tag below is added so stable branches will get the update; it doesn't necessarily mean that commit was broken at the time, rather it didn't withstand the aarch64 objdump update. Tested no difference in output for sample x86_64, power arch perf.data files. [1] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=bb7eff5206e4795ac79c177a80fe9f4630aaf730 Signed-off-by: Kim Phillips Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Anton Blanchard Cc: Christian Borntraeger Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Robin Murphy Cc: Taeung Song Cc: linux-arm-kernel@lists.infradead.org Fixes: b13bbeee5ee6 ("perf annotate: Fix branch instruction with multiple operands") Link: http://lkml.kernel.org/r/20180827125340.a2f7e291901d17cea05daba4@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 22 +++++++++++++++++++++- tools/perf/util/annotate.h | 1 + 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e62b69ea87cd..28cd6a17491b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -282,7 +282,19 @@ bool ins__is_call(const struct ins *ins) return ins->ops == &call_ops || ins->ops == &s390_call_ops; } -static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms) +/* + * Prevents from matching commas in the comment section, e.g.: + * ffff200008446e70: b.cs ffff2000084470f4 // b.hs, b.nlast + */ +static inline const char *validate_comma(const char *c, struct ins_operands *ops) +{ + if (ops->raw_comment && c > ops->raw_comment) + return NULL; + + return c; +} + +static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -291,6 +303,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op }; const char *c = strchr(ops->raw, ','); u64 start, end; + + ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + c = validate_comma(c, ops); + /* * Examples of lines to parse for the _cpp_lex_token@@Base * function: @@ -310,6 +326,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op ops->target.addr = strtoull(c, NULL, 16); if (!ops->target.addr) { c = strchr(c, ','); + c = validate_comma(c, ops); if (c++ != NULL) ops->target.addr = strtoull(c, NULL, 16); } @@ -367,9 +384,12 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name); c = strchr(ops->raw, ','); + c = validate_comma(c, ops); + if (c != NULL) { const char *c2 = strchr(c + 1, ','); + c2 = validate_comma(c2, ops); /* check for 3-op insn */ if (c2 != NULL) c = c2; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 005a5fe8a8c6..5399ba2321bb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -22,6 +22,7 @@ struct ins { struct ins_operands { char *raw; + char *raw_comment; struct { char *raw; char *name; From 7a983a0fe2a29ec849f6748d6bd86904d6e88eea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 20 Aug 2018 17:58:17 -0300 Subject: [PATCH 14/90] perf trace: Pass augmented args to the arg formatters when available If the tracepoint payload is bigger than what a syscall expected from what is in its format file in tracefs, then that will be used as augmented args, i.e. the expansion of syscall arg pointers, with things like a filename, structs, etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-bsbqx7xi2ot4q9bf570f7tqs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 28 ++++++++++++++++++++++------ tools/perf/trace/beauty/beauty.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 22ab8e67c760..7deae6c8cb25 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -856,10 +856,12 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) /* * is_exit: is this "exit" or "exit_group"? * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. + * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. */ struct syscall { struct event_format *tp_format; int nr_args; + int args_size; bool is_exit; bool is_open; struct format_field *args; @@ -1258,10 +1260,12 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) static int syscall__set_arg_fmts(struct syscall *sc) { - struct format_field *field; + struct format_field *field, *last_field = NULL; int idx = 0, len; for (field = sc->args; field; field = field->next, ++idx) { + last_field = field; + if (sc->fmt && sc->fmt->arg[idx].scnprintf) continue; @@ -1292,6 +1296,9 @@ static int syscall__set_arg_fmts(struct syscall *sc) } } + if (last_field) + sc->args_size = last_field->offset + last_field->size; + return 0; } @@ -1472,14 +1479,18 @@ static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, } static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned char *args, struct trace *trace, - struct thread *thread) + unsigned char *args, void *augmented_args, int augmented_args_size, + struct trace *trace, struct thread *thread) { size_t printed = 0; unsigned long val; u8 bit = 1; struct syscall_arg arg = { .args = args, + .augmented = { + .size = augmented_args_size, + .args = augmented_args, + }, .idx = 0, .mask = 0, .trace = trace, @@ -1692,7 +1703,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, - args, trace, thread); + args, NULL, 0, trace, thread); if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) { @@ -1723,7 +1734,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; struct syscall *sc = trace__syscall_info(trace, evsel, id); char msg[1024]; - void *args; + void *args, *augmented_args = NULL; + int augmented_args_size; if (sc == NULL) return -1; @@ -1738,7 +1750,11 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse goto out_put; args = perf_evsel__sc_tp_ptr(evsel, args, sample); - syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread); + augmented_args_size = sample->raw_size - sc->args_size; + if (augmented_args_size > 0) + augmented_args = sample->raw_data + sc->args_size; + + syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); fprintf(trace->output, "%s", msg); err = 0; out_put: diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 9615af5d412b..6ca044d3d851 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -30,9 +30,34 @@ struct thread; size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); +/** + * augmented_arg: extra payload for syscall pointer arguments + + * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts, + * then its the arguments contents, so that we can show more than just a + * pointer. This will be done initially with eBPF, the start of that is at the + * tools/perf/examples/bpf/augmented_syscalls.c example for the openat, but + * will eventually be done automagically caching the running kernel tracefs + * events data into an eBPF C script, that then gets compiled and its .o file + * cached for subsequent use. For char pointers like the ones for 'open' like + * syscalls its easy, for the rest we should use DWARF or better, BTF, much + * more compact. + * + * @size: 8 if all we need is an integer, otherwise all of the augmented arg. + * @int_arg: will be used for integer like pointer contents, like 'accept's 'upeer_addrlen' + * @value: u64 aligned, for structs, pathnames + */ +struct augmented_arg { + int size; + int int_arg; + u64 value[]; +}; + /** * @val: value of syscall argument being formatted * @args: All the args, use syscall_args__val(arg, nth) to access one + * @augmented_args: Extra data that can be collected, for instance, with eBPF for expanding the pathname for open, etc + * @augmented_args_size: augmented_args total payload size * @thread: tid state (maps, pid, tid, etc) * @trace: 'perf trace' internals: all threads, etc * @parm: private area, may be an strarray, for instance @@ -43,6 +68,10 @@ size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_ struct syscall_arg { unsigned long val; unsigned char *args; + struct { + struct augmented_arg *args; + int size; + } augmented; struct thread *thread; struct trace *trace; void *parm; From 6ccc18a9a17a1189b8b157176ce4a58c458c9eee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 11:14:15 -0300 Subject: [PATCH 15/90] perf trace: Make the augmented_syscalls filter out the tracepoint event When we attach a eBPF object to a tracepoint, if we return 1, then that tracepoint will be stored in the perf's ring buffer. In the augmented_syscalls.c case we want to just attach and _override_ the tracepoint payload with an augmented, extended one. In this example, tools/perf/examples/bpf/augmented_syscalls.c, we are attaching to the 'openat' syscall, and adding, after the syscalls:sys_enter_openat usual payload as defined by /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/format, a snapshot of its sole pointer arg: # grep 'field:.*\*' /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/format field:const char * filename; offset:24; size:8; signed:0; # For now this is not being considered, the next csets will make use of it, but as this is overriding the syscall tracepoint enter, we don't want that event appearing on the ring buffer, just our synthesized one. Before: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: , flags: CLOEXEC 0.007 ( 0.004 ms): cat/24044 openat(dfd: CWD, filename: 0x216dda8, flags: CLOEXEC ) = 3 0.028 ( ): __augmented_syscalls__:dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC 0.030 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: , flags: CLOEXEC 0.031 ( 0.006 ms): cat/24044 openat(dfd: CWD, filename: 0x2375ce0, flags: CLOEXEC ) = 3 0.291 ( ): __augmented_syscalls__:dfd: CWD, filename: /etc/passwd 0.293 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0.294 ( 0.004 ms): cat/24044 openat(dfd: CWD, filename: 0x637db06b ) = 3 # After: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:dfd: CWD, filename: 0x9c6a1da8, flags: CLOEXEC 0.005 ( 0.015 ms): cat/27341 openat(dfd: CWD, filename: 0x9c6a1da8, flags: CLOEXEC ) = 3 0.040 ( ): __augmented_syscalls__:dfd: CWD, filename: 0x9c8a9ce0, flags: CLOEXEC 0.041 ( 0.006 ms): cat/27341 openat(dfd: CWD, filename: 0x9c8a9ce0, flags: CLOEXEC ) = 3 0.294 ( ): __augmented_syscalls__:dfd: CWD, filename: 0x482a706b 0.296 ( 0.067 ms): cat/27341 openat(dfd: CWD, filename: 0x482a706b ) = 3 # Now lets replace that __augmented_syscalls__ name with the syscall name, using: # grep 'field:.*syscall_nr' /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/format field:int __syscall_nr; offset:8; size:4; signed:1; # That the synthesized payload has exactly where the syscall enter tracepoint puts it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-og4r9k87mzp9hv7el046idmd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 69a31386d8cd..10e7997ab481 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -49,7 +49,7 @@ int syscall_enter(openat)(struct syscall_enter_openat_args *args) probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); - return 1; + return 0; } license(GPL); From 1cdf618f23867dd1dae58f10f1f82839f2bf73b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 11:44:23 -0300 Subject: [PATCH 16/90] perf trace: Print the syscall name for augmented_syscalls Since we copy all the payload for raw_syscalls:sys_enter plus add expanded pointers, we can use the syscall id to get its name, etc: # grep 'field:.* id' /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/format field:long id; offset:8; size:8; signed:1; # Before: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:dfd: CWD, filename: 0xec9f9da8, flags: CLOEXEC 0.006 ( 0.006 ms): cat/2395 openat(dfd: CWD, filename: 0xec9f9da8, flags: CLOEXEC) = 3 0.041 ( ): __augmented_syscalls__:dfd: CWD, filename: 0xecc01ce0, flags: CLOEXEC 0.042 ( 0.007 ms): cat/2395 openat(dfd: CWD, filename: 0xecc01ce0, flags: CLOEXEC) = 3 0.376 ( ): __augmented_syscalls__:dfd: CWD, filename: 0xac0a806b 0.379 ( 0.006 ms): cat/2395 openat(dfd: CWD, filename: 0xac0a806b) = 3 # After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): openat(dfd: CWD, filename: 0x31b6dda8, flags: CLOEXEC) 0.009 ( 0.009 ms): cat/3619 openat(dfd: CWD, filename: 0x31b6dda8, flags: CLOEXEC) = 3 0.051 ( ): openat(dfd: CWD, filename: 0x31d75ce0, flags: CLOEXEC) 0.054 ( 0.010 ms): cat/3619 openat(dfd: CWD, filename: 0x31d75ce0, flags: CLOEXEC) = 3 0.539 ( ): openat(dfd: CWD, filename: 0xca71506b) 0.543 ( 0.115 ms): cat/3619 openat(dfd: CWD, filename: 0xca71506b) = 3 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-epz6y9i0eavmerc5ha98t7gn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7deae6c8cb25..bcf882afd6d0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2055,13 +2055,33 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, if (trace->trace_syscalls) fprintf(trace->output, "( ): "); + if (evsel == trace->syscalls.events.augmented) { + int id = perf_evsel__sc_tp_uint(evsel, id, sample); + struct syscall *sc = trace__syscall_info(trace, evsel, id); + + if (sc) { + struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + + if (thread) { + fprintf(trace->output, "%s(", sc->name); + trace__fprintf_sys_enter(trace, evsel, sample); + fputc(')', trace->output); + thread__put(thread); + goto newline; + } + } + + /* + * XXX: Not having the associated syscall info or not finding/adding + * the thread should never happen, but if it does... + * fall thru and print it as a bpf_output event. + */ + } + fprintf(trace->output, "%s:", evsel->name); if (perf_evsel__is_bpf_output(evsel)) { - if (evsel == trace->syscalls.events.augmented) - trace__fprintf_sys_enter(trace, evsel, sample); - else - bpf_output__fprintf(trace, sample); + bpf_output__fprintf(trace, sample); } else if (evsel->tp_format) { if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || trace__fprintf_sys_enter(trace, evsel, sample)) { @@ -2071,6 +2091,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, } } +newline: fprintf(trace->output, "\n"); if (callchain_ret > 0) From 6dcbd212ff4988d8e6caa0b6700bd1c1d317dc02 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 11:40:09 -0300 Subject: [PATCH 17/90] perf trace: Extract the comm/tid printing for syscall enter Will be used with augmented syscalls, where we haven't transitioned completely to combining sys_enter_FOO with sys_exit_FOO, so we'll go as far as having it similar to the end result, strace like, as possible. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-canomaoiybkswwnhj69u9ae4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bcf882afd6d0..92488edd00eb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1144,11 +1144,9 @@ static void sig_handler(int sig) interrupted = sig == SIGINT; } -static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) +static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp) { - size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, duration_calculated, fp); + size_t printed = 0; if (trace->multiple_threads) { if (trace->show_comm) @@ -1159,6 +1157,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre return printed; } +static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) +{ + size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); + printed += fprintf_duration(duration, duration_calculated, fp); + return printed + trace__fprintf_comm_tid(trace, thread, fp); +} + static int trace__process_event(struct trace *trace, struct machine *machine, union perf_event *event, struct perf_sample *sample) { From c96f4edcc3560a7dc910271d30d91c54a422d46c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 11:47:44 -0300 Subject: [PATCH 18/90] perf trace: Show comm/tid for augmented_syscalls To get us a bit more like the sys_enter + sys_exit combo: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): openat(dfd: CWD, filename: 0x31b6dda8, flags: CLOEXEC) 0.009 ( 0.009 ms): cat/3619 openat(dfd: CWD, filename: 0x31b6dda8, flags: CLOEXEC) = 3 0.051 ( ): openat(dfd: CWD, filename: 0x31d75ce0, flags: CLOEXEC) 0.054 ( 0.010 ms): cat/3619 openat(dfd: CWD, filename: 0x31d75ce0, flags: CLOEXEC) = 3 0.539 ( ): openat(dfd: CWD, filename: 0xca71506b) 0.543 ( 0.115 ms): cat/3619 openat(dfd: CWD, filename: 0xca71506b) = 3 # After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/4919 openat(dfd: CWD, filename: 0xc8358da8, flags: CLOEXEC) 0.007 ( 0.005 ms): cat/4919 openat(dfd: CWD, filename: 0xc8358da8, flags: CLOEXEC) = 3 0.032 ( ): cat/4919 openat(dfd: CWD, filename: 0xc8560ce0, flags: CLOEXEC) 0.033 ( 0.006 ms): cat/4919 openat(dfd: CWD, filename: 0xc8560ce0, flags: CLOEXEC) = 3 0.301 ( ): cat/4919 openat(dfd: CWD, filename: 0x91fa306b) 0.304 ( 0.004 ms): cat/4919 openat(dfd: CWD, filename: 0x91fa306b) = 3 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-6w8ytyo5y655a1hsyfpfily6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 92488edd00eb..e46ac9009172 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2069,6 +2069,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); if (thread) { + trace__fprintf_comm_tid(trace, thread, trace->output); fprintf(trace->output, "%s(", sc->name); trace__fprintf_sys_enter(trace, evsel, sample); fputc(')', trace->output); From 75d1e30681d006f4dc148c153395fb938acfc7c0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 12:00:39 -0300 Subject: [PATCH 19/90] perf trace: Use the augmented filename, expanding syscall enter pointers This is the final touch in showing how a syscall argument beautifier can access the augmented args put in place by the tools/perf/examples/bpf/augmented_syscalls.c eBPF script, right after the regular raw syscall args, i.e. the up to 6 long integer values in the syscall interface. With this we are able to show the 'openat' syscall arg, now with up to 64 bytes, but in time this will be configurable, just like with the 'strace -s strsize' argument, from 'strace''s man page: -s strsize Specify the maximum string size to print (the default is 32). This actually is the maximum string to _collect_ and store in the ring buffer, not just print. Before: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/9658 openat(dfd: CWD, filename: 0x6626eda8, flags: CLOEXEC) 0.017 ( 0.007 ms): cat/9658 openat(dfd: CWD, filename: 0x6626eda8, flags: CLOEXEC) = 3 0.049 ( ): cat/9658 openat(dfd: CWD, filename: 0x66476ce0, flags: CLOEXEC) 0.051 ( 0.007 ms): cat/9658 openat(dfd: CWD, filename: 0x66476ce0, flags: CLOEXEC) = 3 0.377 ( ): cat/9658 openat(dfd: CWD, filename: 0x1e8f806b) 0.379 ( 0.005 ms): cat/9658 openat(dfd: CWD, filename: 0x1e8f806b) = 3 # After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/11966 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.006 ( 0.006 ms): cat/11966 openat(dfd: CWD, filename: 0x4bfdcda8, flags: CLOEXEC) = 3 0.034 ( ): cat/11966 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.036 ( 0.008 ms): cat/11966 openat(dfd: CWD, filename: 0x4c1e4ce0, flags: CLOEXEC) = 3 0.375 ( ): cat/11966 openat(dfd: CWD, filename: /etc/passwd) 0.377 ( 0.005 ms): cat/11966 openat(dfd: CWD, filename: 0xe87906b) = 3 # This cset should show all the aspects of establishing a protocol between an eBPF syscall arg augmenter program, tools/perf/examples/bpf/augmented_syscalls.c and a 'perf trace' beautifier, the one associated with all 'char *' point syscall args with names that can heuristically be associated with filenames. Now to wire up 'open' to show a second syscall using this scheme, all we have to do now is to change tools/perf/examples/bpf/augmented_syscalls.c, as 'perf trace' will notice that the perf_sample.raw_size is more than what is expected for a particular syscall payload as defined by its tracefs format file and will then use the augmented payload in the 'filename' syscall arg beautifier. The same protocol will be used for structs such as 'struct sockaddr *', 'struct pollfd', etc, with additions for handling arrays. This will all be done under the hood when 'perf trace' realizes the system has the necessary components, and also can be done by providing a precompiled augmented_syscalls.c eBPF ELF object. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-gj9kqb61wo7m3shtpzercbcr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 10 ++++++++++ tools/perf/examples/bpf/augmented_syscalls.c | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e46ac9009172..5d841114a745 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1097,11 +1097,21 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf, ttrace->filename.entry_str_pos = bf - ttrace->entry_str; } +static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) +{ + struct augmented_arg *augmented_arg = arg->augmented.args; + + return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value); +} + static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, struct syscall_arg *arg) { unsigned long ptr = arg->val; + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_string(arg, bf, size); + if (!arg->trace->vfs_getname) return scnprintf(bf, size, "%#x", ptr); diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 10e7997ab481..93960e891478 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -27,6 +27,12 @@ struct bpf_map SEC("maps") __augmented_syscalls__ = { .max_entries = __NR_CPUS__, }; +struct augmented_filename { + int size; + int reserved; + char value[256]; +}; + struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; @@ -38,17 +44,20 @@ struct syscall_enter_openat_args { struct augmented_enter_openat_args { struct syscall_enter_openat_args args; - char filename[64]; + struct augmented_filename filename; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { - struct augmented_enter_openat_args augmented_args; + struct augmented_enter_openat_args augmented_args = { .filename.reserved = 0, }; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); - probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, - &augmented_args, sizeof(augmented_args)); + &augmented_args, + sizeof(augmented_args) - sizeof(augmented_args.filename.value) + augmented_args.filename.size); return 0; } From daa1284af3b9a8d73b1dadaa12bc4068a4ce22ab Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 12:20:06 -0300 Subject: [PATCH 20/90] perf trace: Augment the 'open' syscall 'filename' arg As described in the previous cset, all we had to do was to touch the augmented_syscalls.c eBPF program, fire up 'perf trace' with that new eBPF script in system wide mode and wait for 'open' syscalls, in addition to 'openat' ones to see that it works: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs.js, flags: CREAT|EXCL|TRUNC|WRONLY, mode: IRUSR|IWUSR) 0.065 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs-1.js, flags: CREAT|EXCL|TRUNC|WRONLY, mode: IRUSR|IWUSR) 0.435 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs-1.js, flags: CREAT|TRUNC|WRONLY, mode: IRUSR|IWUSR) 1.875 perf/16772 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 1227.260 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 1227.397 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 7227.619 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 7227.661 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 10018.079 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 10018.514 perf/16772 openat(dfd: CWD, filename: /proc/1237/status) 10018.568 perf/16772 openat(dfd: CWD, filename: /proc/1237/status) 10022.409 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 10090.044 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10090.351 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10090.407 perf/16772 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format) 10091.763 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10091.812 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10092.807 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10092.851 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10094.650 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10094.926 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10096.010 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10096.057 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10097.056 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10097.099 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13228.345 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 13232.734 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 15198.956 lighttpd/16748 open(filename: /proc/loadavg, mode: ISGID|IXOTH) ^C# It even catches 'perf' itself looking at the sys_enter_open and sys_enter_openat tracefs format dictionaries when it first finds them in the trace... :-) Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-upmogc57uatljr6el6u8537l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 93960e891478..154379463c95 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -61,4 +61,31 @@ int syscall_enter(openat)(struct syscall_enter_openat_args *args) return 0; } +struct syscall_enter_open_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + long flags; + long mode; +}; + +struct augmented_enter_open_args { + struct syscall_enter_open_args args; + struct augmented_filename filename; +}; + +int syscall_enter(open)(struct syscall_enter_open_args *args) +{ + struct augmented_enter_open_args augmented_args = { .filename.reserved = 0, }; + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + args->filename_ptr); + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, + &augmented_args, + sizeof(augmented_args) - sizeof(augmented_args.filename.value) + augmented_args.filename.size); + return 0; +} + license(GPL); From 9779fc021410e7d8433d923a2202dac7e1384b05 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 13:21:36 -0300 Subject: [PATCH 21/90] perf trace: Augment inotify_add_watch pathname syscall arg Again, just changing tools/perf/examples/bpf/augmented_syscalls.c, that is starting to have too much boilerplate, some macro will come to the rescue. # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 gmain/2590 inotify_add_watch(fd: 3, pathname: /var/cache/app-info/yaml, mask: 16789454) 0.023 gmain/2590 inotify_add_watch(fd: 3, pathname: /var/lib/app-info/xmls, mask: 16789454) 0.028 gmain/2590 inotify_add_watch(fd: 3, pathname: /var/lib/app-info/yaml, mask: 16789454) 0.032 gmain/2590 inotify_add_watch(fd: 3, pathname: /usr/share/app-info/yaml, mask: 16789454) 0.039 gmain/2590 inotify_add_watch(fd: 3, pathname: /usr/local/share/app-info/xmls, mask: 16789454) 0.045 gmain/2590 inotify_add_watch(fd: 3, pathname: /usr/local/share/app-info/yaml, mask: 16789454) 0.049 gmain/2590 inotify_add_watch(fd: 3, pathname: /home/acme/.local/share/app-info/yaml, mask: 16789454) 0.056 gmain/2590 inotify_add_watch(fd: 3, pathname: , mask: 16789454) 0.010 gmain/2245 inotify_add_watch(fd: 7, pathname: /home/acme/~, mask: 16789454) 0.087 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_inotify_add) 0.436 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 56.042 gmain/2791 inotify_add_watch(fd: 4, pathname: /var/lib/fwupd/remotes.d/lvfs-testing, mask: 16789454) 113.986 gmain/1721 inotify_add_watch(fd: 3, pathname: /var/lib/gdm/~, mask: 16789454) 3777.265 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 3777.550 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) ^C[root@jouet perf]# Still not combining raw_syscalls:sys_enter + raw_syscalls:sys_exit, to get it strace-like, but that probably will come very naturally with some more wiring up... Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ol83juin2cht9vzquynec5hz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 154379463c95..6ec327850a15 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -88,4 +88,31 @@ int syscall_enter(open)(struct syscall_enter_open_args *args) return 0; } +struct syscall_enter_inotify_add_watch_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + char *pathname_ptr; + long mask; +}; + +struct augmented_enter_inotify_add_watch_args { + struct syscall_enter_inotify_add_watch_args args; + struct augmented_filename pathname; +}; + +int syscall_enter(inotify_add_watch)(struct syscall_enter_inotify_add_watch_args *args) +{ + struct augmented_enter_inotify_add_watch_args augmented_args = { .pathname.reserved = 0, }; + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + augmented_args.pathname.size = probe_read_str(&augmented_args.pathname.value, + sizeof(augmented_args.pathname.value), + args->pathname_ptr); + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, + &augmented_args, + sizeof(augmented_args) - sizeof(augmented_args.pathname.value) + augmented_args.pathname.size); + return 0; +} + license(GPL); From f6618ce6c024ec90b156700fc39eb313ec117881 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 13:44:49 -0300 Subject: [PATCH 22/90] perf trace: Introduce augmented_filename_syscall_enter() declarator Helping with tons of boilerplate for syscalls that only want to augment a filename. Now supporting one such syscall is just a matter of declaring its arguments struct + using: augmented_filename_syscall_enter(openat); Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ls7ojdseu8fxw7fvj77ejpao@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 78 ++++++-------------- 1 file changed, 23 insertions(+), 55 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 6ec327850a15..e8486e8597de 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -33,6 +33,25 @@ struct augmented_filename { char value[256]; }; +#define augmented_filename_syscall_enter(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct augmented_filename filename; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ + sizeof(augmented_args.filename.value), \ + args->filename_ptr); \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ + return 0; \ +} + struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; @@ -42,24 +61,7 @@ struct syscall_enter_openat_args { long mode; }; -struct augmented_enter_openat_args { - struct syscall_enter_openat_args args; - struct augmented_filename filename; -}; - -int syscall_enter(openat)(struct syscall_enter_openat_args *args) -{ - struct augmented_enter_openat_args augmented_args = { .filename.reserved = 0, }; - - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, - sizeof(augmented_args.filename.value), - args->filename_ptr); - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, - &augmented_args, - sizeof(augmented_args) - sizeof(augmented_args.filename.value) + augmented_args.filename.size); - return 0; -} +augmented_filename_syscall_enter(openat); struct syscall_enter_open_args { unsigned long long common_tp_fields; @@ -69,50 +71,16 @@ struct syscall_enter_open_args { long mode; }; -struct augmented_enter_open_args { - struct syscall_enter_open_args args; - struct augmented_filename filename; -}; - -int syscall_enter(open)(struct syscall_enter_open_args *args) -{ - struct augmented_enter_open_args augmented_args = { .filename.reserved = 0, }; - - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, - sizeof(augmented_args.filename.value), - args->filename_ptr); - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, - &augmented_args, - sizeof(augmented_args) - sizeof(augmented_args.filename.value) + augmented_args.filename.size); - return 0; -} +augmented_filename_syscall_enter(open); struct syscall_enter_inotify_add_watch_args { unsigned long long common_tp_fields; long syscall_nr; long fd; - char *pathname_ptr; + char *filename_ptr; long mask; }; -struct augmented_enter_inotify_add_watch_args { - struct syscall_enter_inotify_add_watch_args args; - struct augmented_filename pathname; -}; - -int syscall_enter(inotify_add_watch)(struct syscall_enter_inotify_add_watch_args *args) -{ - struct augmented_enter_inotify_add_watch_args augmented_args = { .pathname.reserved = 0, }; - - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); - augmented_args.pathname.size = probe_read_str(&augmented_args.pathname.value, - sizeof(augmented_args.pathname.value), - args->pathname_ptr); - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, - &augmented_args, - sizeof(augmented_args) - sizeof(augmented_args.pathname.value) + augmented_args.pathname.size); - return 0; -} +augmented_filename_syscall_enter(inotify_add_watch); license(GPL); From 16cc63593f67477e9ca62f10182e74e949af1acb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 13:55:50 -0300 Subject: [PATCH 23/90] perf trace: Augment 'newstat' (aka 'stat') filename ptr This one will need some more work, that 'statbuf' pointer requires a beautifier in 'perf trace'. # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 weechat/3596 stat(filename: /etc/localtime, statbuf: 0x7ffd87d11f60) 0.186 perf/29818 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_stat/format) 0.279 perf/29818 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_newstat/for) 0.670 perf/29818 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 60.805 DNS Res~er #20/21308 stat(filename: /etc/resolv.conf, statbuf: 0x7ffa733fe4a0) 60.836 DNS Res~er #20/21308 open(filename: /etc/hosts, flags: CLOEXEC) 60.931 perf/29818 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format) 607.070 DNS Res~er #21/29812 stat(filename: /etc/resolv.conf, statbuf: 0x7ffa5e1fe3f0) 607.098 DNS Res~er #21/29812 open(filename: /etc/hosts, flags: CLOEXEC) 999.336 weechat/3596 stat(filename: /etc/localtime, statbuf: 0x7ffd87d11f60) ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-4lhabe7m4uzo76lnqpyfmnvk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index e8486e8597de..a9695c7f7aab 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -83,4 +83,15 @@ struct syscall_enter_inotify_add_watch_args { augmented_filename_syscall_enter(inotify_add_watch); +struct statbuf; + +struct syscall_enter_newstat_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + struct stat *statbuf; +}; + +augmented_filename_syscall_enter(newstat); + license(GPL); From 9ab5aadebeddc77fccfdf94a048259315ce95fe1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Aug 2018 15:02:09 -0300 Subject: [PATCH 24/90] perf trace: Add a etcsnoop.c augmented syscalls eBPF utility We need to put common stuff into a separate header in tools/perf/include/bpf/ for these augmented syscalls, but I couldn't resist adding a etcsnoop.c tool, combining augmented syscalls + filtering, that in the future will be passed from 'perf trace''s command line, to use in building the eBPF program to do that specific filtering at the source, inside the kernel: Running system wide: (hope there isn't any embarassing stuff here... ;-) ) # perf trace -e tools/perf/examples/bpf/etcsnoop.c 0.000 sed/21878 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 1741.473 cat/21883 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 1741.892 cat/21883 openat(dfd: CWD, filename: /etc/passwd) 1748.948 sed/21886 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 1777.136 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1777.738 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.158 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.528 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.595 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.901 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.939 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.966 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1778.992 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.019 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.045 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.071 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.095 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.121 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.148 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.175 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.202 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.229 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.254 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.279 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.309 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.336 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.363 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.388 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.414 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.442 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.470 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.500 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.529 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.557 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.586 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.617 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.648 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.679 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.706 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.739 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.769 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.798 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.823 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.844 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.862 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.880 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.911 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.942 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1779.972 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1780.004 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 1780.035 gvfs-udisks2-v/2302 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 13059.154 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13060.739 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13061.990 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13063.177 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13064.265 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13065.483 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13067.383 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13068.902 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13069.922 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13070.915 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13072.612 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13074.816 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13077.343 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13078.731 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13559.064 DNS Res~er #22/21054 open(filename: /etc/hosts, flags: CLOEXEC) 22419.522 sed/21896 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 24473.313 git/21900 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 24491.988 less/21901 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 24493.793 git/21901 openat(dfd: CWD, filename: /etc/sysless) 24565.772 sed/21924 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 25878.752 git/21928 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 26075.666 git/21928 open(filename: /etc/localtime, flags: CLOEXEC) 26075.565 less/21929 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 26076.060 less/21929 openat(dfd: CWD, filename: /etc/sysless) 26346.395 sed/21932 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 26483.583 sed/21938 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 26954.890 sed/21944 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 27016.165 gsd-color/1762 openat(dfd: CWD, filename: /etc/localtime) 27016.414 gsd-color/1762 openat(dfd: CWD, filename: /etc/localtime) 27712.313 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 27712.616 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 27829.035 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 27829.368 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 27829.584 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 27829.800 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 27830.107 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 27830.521 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 27961.516 git/21948 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 27987.568 less/21949 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 27988.948 bash/21949 openat(dfd: CWD, filename: /etc/sysless) 28043.536 sed/21972 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 28736.008 sed/21978 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 34882.664 git/21991 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 34882.664 sort/21990 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 34884.441 uniq/21992 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 35593.098 git/21997 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 35638.839 git/21997 openat(dfd: CWD, filename: /etc/gitattributes) 35702.851 sed/22000 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 36076.039 sed/22006 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 37569.049 git/22014 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 37673.712 git/22014 open(filename: /etc/localtime, flags: CLOEXEC) 37781.710 vim/22040 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 37783.667 git/22040 openat(dfd: CWD, filename: /etc/vimrc) 37792.394 git/22040 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 37792.436 git/22040 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 37792.580 git/22040 open(filename: /etc/passwd, flags: CLOEXEC) 43893.625 DNS Res~er #23/21365 open(filename: /etc/hosts, flags: CLOEXEC) 48060.409 nm-dhcp-helper/22044 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48071.745 systemd/1 openat(dfd: CWD, filename: /etc/systemd/system/dbus-org.freedesktop.nm-dispatcher.service, flags: CLOEXEC|NOFOLLOW|NOCTTY) 48082.780 nm-dispatcher/22049 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48111.418 systemd/22049 open(filename: /etc/NetworkManager/dispatcher.d, flags: CLOEXEC|DIRECTORY|NONBLOCK) 48111.904 systemd/22049 open(filename: /etc/localtime, flags: CLOEXEC) 48118.357 00-netreport/22052 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48119.668 systemd/22052 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 48119.762 systemd/22052 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48119.887 systemd/22052 open(filename: /etc/passwd, flags: CLOEXEC) 48120.025 systemd/22052 openat(dfd: CWD, filename: /etc/NetworkManager/dispatcher.d/00-netreport) 48124.144 hostname/22054 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48125.492 systemd/22052 openat(dfd: CWD, filename: /etc/init.d/functions) 48127.253 systemd/22052 openat(dfd: CWD, filename: /etc/profile.d/lang.sh) 48127.388 systemd/22052 openat(dfd: CWD, filename: /etc/locale.conf) 48137.749 cat/22056 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48143.519 04-iscsi/22058 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48144.438 04-iscsi/22058 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 48144.478 04-iscsi/22058 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48144.577 04-iscsi/22058 open(filename: /etc/passwd, flags: CLOEXEC) 48144.819 04-iscsi/22058 openat(dfd: CWD, filename: /etc/NetworkManager/dispatcher.d/04-iscsi) 48145.620 10-ifcfg-rh-ro/22059 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48146.169 systemd/22059 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 48146.207 systemd/22059 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48146.287 systemd/22059 open(filename: /etc/passwd, flags: CLOEXEC) 48146.387 systemd/22059 openat(dfd: CWD, filename: /etc/NetworkManager/dispatcher.d/10-ifcfg-rh-routes.sh) 48147.215 11-dhclient/22060 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48147.787 11-dhclient/22060 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 48147.813 11-dhclient/22060 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48147.929 11-dhclient/22060 open(filename: /etc/passwd, flags: CLOEXEC) 48148.016 11-dhclient/22060 openat(dfd: CWD, filename: /etc/NetworkManager/dispatcher.d/11-dhclient) 48148.906 grep/22063 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48151.165 11-dhclient/22060 openat(dfd: CWD, filename: /etc/sysconfig/network) 48151.560 11-dhclient/22060 open(filename: /etc/dhcp/dhclient.d/, flags: CLOEXEC|DIRECTORY|NONBLOCK) 48151.704 11-dhclient/22060 openat(dfd: CWD, filename: /etc/dhcp/dhclient.d/chrony.sh) 48153.593 20-chrony/22065 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48154.695 20-chrony/22065 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 48154.756 20-chrony/22065 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48154.914 20-chrony/22065 open(filename: /etc/passwd, flags: CLOEXEC) 48155.067 20-chrony/22065 openat(dfd: CWD, filename: /etc/NetworkManager/dispatcher.d/20-chrony) 48156.962 25-polipo/22066 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48157.824 systemd/22066 open(filename: /etc/nsswitch.conf, flags: CLOEXEC) 48157.866 systemd/22066 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 48157.981 systemd/22066 open(filename: /etc/passwd, flags: CLOEXEC) 48158.090 systemd/22066 openat(dfd: CWD, filename: /etc/NetworkManager/dispatcher.d/25-polipo) 48533.616 gsd-housekeepi/2412 openat(dfd: CWD, filename: /etc/fstab, flags: CLOEXEC) 87122.021 gsd-color/1762 openat(dfd: CWD, filename: /etc/localtime) 87122.146 gsd-color/1762 openat(dfd: CWD, filename: /etc/localtime) 87825.582 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 87825.844 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 87829.524 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 87830.531 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 87831.288 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 87832.011 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 87832.672 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) 87833.276 gnome-shell/2125 openat(dfd: CWD, filename: /etc/localtime) ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-0o770jvdcy04ee6vhv6v471m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/etcsnoop.c | 80 ++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tools/perf/examples/bpf/etcsnoop.c diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c new file mode 100644 index 000000000000..b59e8812ee8c --- /dev/null +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the filename syscalls with the contents of the filename pointer argument + * filtering only those that do not start with /etc/. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null + * + * It'll catch some openat syscalls related to the dynamic linked and + * the last one should be the one for '/etc/passwd'. + * + * This matches what is marshalled into the raw_syscall:sys_enter payload + * expected by the 'perf trace' beautifiers, and can be used by them unmodified, + * which will be done as that feature is implemented in the next csets, for now + * it will appear in a dump done by the default tracepoint handler in 'perf trace', + * that uses bpf_output__fprintf() to just dump those contents, as done with + * the bpf-output event associated with the __bpf_output__ map declared in + * tools/perf/include/bpf/stdio.h. + */ + +#include + +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct augmented_filename { + int size; + int reserved; + char value[64]; +}; + +#define augmented_filename_syscall_enter(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct augmented_filename filename; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + char etc[6] = "/etc/"; \ + struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ + sizeof(augmented_args.filename.value), \ + args->filename_ptr); \ + if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ + return 0; \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ + return 0; \ +} + +struct syscall_enter_openat_args { + unsigned long long common_tp_fields; + long syscall_nr; + long dfd; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall_enter(openat); + +struct syscall_enter_open_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall_enter(open); + +license(GPL); From d35b168c3dcdf103c2b2d694ad8513a669781703 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Aug 2018 23:53:22 -0300 Subject: [PATCH 25/90] perf bpf: Give precedence to bpf header dir I need to check the need for $KERNEL_INC_OPTIONS when building eBPF restricted C programs, for now just give precedence to $PERF_BPF_INC_OPTIONS so that we can get a linux/socket.h usable in eBPF programs. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-5z7qw529sdebrn9y1xxqw9hf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 19262f98cd4e..5b0b60f00275 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -19,7 +19,7 @@ #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \ + "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ "-Wno-unused-value -Wno-pointer-sign " \ "-working-directory $WORKING_DIR " \ "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" From 403f833d15a33bfd8e50dd79fa8e25fb4aa132f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Aug 2018 17:41:53 -0300 Subject: [PATCH 26/90] perf bpf: Add linux/socket.h to the headers accessible to bpf proggies So that we don't have to define sockaddr_storage in the augmented_syscalls.c bpf example when hooking into syscalls needing it, idea is to mimic the system headers. Eventually we probably need to have sys/socket.h, etc. Start by having at least linux/socket.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yhzarcvsjue8pgpvkjhqgioc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 +++- tools/perf/include/bpf/linux/socket.h | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 tools/perf/include/bpf/linux/socket.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5224ade3d5af..92514fb3689f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -779,7 +779,9 @@ endif ifndef NO_LIBBPF $(call QUIET_INSTALL, bpf-headers) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \ + $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ + $(INSTALL) include/bpf/linux/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux' $(call QUIET_INSTALL, bpf-examples) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h new file mode 100644 index 000000000000..7f844568dab8 --- /dev/null +++ b/tools/perf/include/bpf/linux/socket.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_SOCKET_H +#define _UAPI_LINUX_SOCKET_H + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define _K_SS_MAXSIZE 128 /* Implementation specific max size */ +#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) + /* Implementation specific desired alignment */ + +typedef unsigned short __kernel_sa_family_t; + +struct __kernel_sockaddr_storage { + __kernel_sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ +} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ + +#define sockaddr_storage __kernel_sockaddr_storage + +#endif /* _UAPI_LINUX_SOCKET_H */ From d5a7e6613b00d46a4971e8b69e18e2cfd7b00df3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Aug 2018 16:24:44 -0300 Subject: [PATCH 27/90] perf trace augmented_syscalls: Augment connect's 'sockaddr' arg As the first example of augmenting something other than a 'filename', augment the 'struct sockaddr' argument for the 'connect' syscall: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c ssh -6 fedorapeople.org 0.000 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 0.042 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.329 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.362 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.458 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.478 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.683 ssh/29669 connect(fd: 3, uservaddr: { .family: INET, port: 53, addr: 192.168.43.1 }, addrlen: 16) 4.710 ssh/29669 connect(fd: 3, uservaddr: { .family: INET6, port: 22, addr: 2610:28:3090:3001:5054:ff:fea7:9474 }, addrlen: 28) root@fedorapeople.org: Permission denied (publickey). # This is still just augmenting the syscalls:sys_enter_connect part, later we'll wire this up to augment the enter+exit combo, like in the tradicional 'perf trace' and 'strace' outputs. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-s7l541cbiqb22ifio6z7dpf6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 12 +--- tools/perf/examples/bpf/augmented_syscalls.c | 34 +++++++++++- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 5 ++ tools/perf/trace/beauty/sockaddr.c | 58 ++++++++++++++++++++ 5 files changed, 99 insertions(+), 11 deletions(-) create mode 100644 tools/perf/trace/beauty/sockaddr.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5d841114a745..ab2ed30b8dcc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -498,16 +498,6 @@ static const char *clockid[] = { }; static DEFINE_STRARRAY(clockid); -static const char *socket_families[] = { - "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", - "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", - "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", - "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", - "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", - "ALG", "NFC", "VSOCK", -}; -static DEFINE_STRARRAY(socket_families); - static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, struct syscall_arg *arg) { @@ -645,6 +635,8 @@ static struct syscall_fmt { [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, { .name = "close", .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, + { .name = "connect", + .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, }, { .name = "epoll_ctl", .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, { .name = "eventfd2", diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index a9695c7f7aab..6dfead0be74e 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -19,6 +19,7 @@ */ #include +#include struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, @@ -33,7 +34,7 @@ struct augmented_filename { char value[256]; }; -#define augmented_filename_syscall_enter(syscall) \ +#define augmented_filename_syscall_enter(syscall) \ struct augmented_enter_##syscall##_args { \ struct syscall_enter_##syscall##_args args; \ struct augmented_filename filename; \ @@ -94,4 +95,35 @@ struct syscall_enter_newstat_args { augmented_filename_syscall_enter(newstat); +struct sockaddr; + +struct syscall_enter_connect_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + struct sockaddr *addr_ptr; + unsigned long addrlen; +}; + +struct augmented_enter_connect_args { + struct syscall_enter_connect_args args; + struct sockaddr_storage addr; +}; + +int syscall_enter(connect)(struct syscall_enter_connect_args *args) +{ + struct augmented_enter_connect_args augmented_args; + unsigned long addrlen = sizeof(augmented_args.addr); + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); +#ifdef FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK + if (addrlen > augmented_args.args.addrlen) + addrlen = augmented_args.args.addrlen; +#endif + probe_read(&augmented_args.addr, addrlen, args->addr_ptr); + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); + return 0; +} + license(GPL); diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index f528ba35e140..c3b0afd67760 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -7,5 +7,6 @@ endif libperf-y += kcmp.o libperf-y += pkey_alloc.o libperf-y += prctl.o +libperf-y += sockaddr.o libperf-y += socket.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 6ca044d3d851..2570152d3909 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -30,6 +30,8 @@ struct thread; size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); +extern struct strarray strarray__socket_families; + /** * augmented_arg: extra payload for syscall pointer arguments @@ -135,6 +137,9 @@ size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_a size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3 +size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_SOCKADDR syscall_arg__scnprintf_sockaddr + size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg); #define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c new file mode 100644 index 000000000000..3944a7d54d3c --- /dev/null +++ b/tools/perf/trace/beauty/sockaddr.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo + +#include "trace/beauty/beauty.h" +#include +#include +#include +#include + +static const char *socket_families[] = { + "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", + "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", + "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", + "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", + "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", + "ALG", "NFC", "VSOCK", +}; +DEFINE_STRARRAY(socket_families); + +static size_t syscall_arg__scnprintf_augmented_sockaddr(struct syscall_arg *arg, char *bf, size_t size) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)arg->augmented.args; + char family[32]; + size_t printed; + + strarray__scnprintf(&strarray__socket_families, family, sizeof(family), "%d", sin->sin_family); + printed = scnprintf(bf, size, "{ .family: %s", family); + + if (sin->sin_family == AF_INET) { + char tmp[512]; + printed += scnprintf(bf + printed, size - printed, ", port: %d, addr: %s", ntohs(sin->sin_port), + inet_ntop(sin->sin_family, &sin->sin_addr, tmp, sizeof(tmp))); + } else if (sin->sin_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin; + u32 flowinfo = ntohl(sin6->sin6_flowinfo); + char tmp[512]; + + printed += scnprintf(bf + printed, size - printed, ", port: %d, addr: %s", ntohs(sin6->sin6_port), + inet_ntop(sin6->sin6_family, &sin6->sin6_addr, tmp, sizeof(tmp))); + if (flowinfo != 0) + printed += scnprintf(bf + printed, size - printed, ", flowinfo: %lu", flowinfo); + if (sin6->sin6_scope_id != 0) + printed += scnprintf(bf + printed, size - printed, ", scope_id: %lu", sin6->sin6_scope_id); + } else if (sin->sin_family == AF_LOCAL) { + struct sockaddr_un *sun = (struct sockaddr_un *)sin; + printed += scnprintf(bf + printed, size - printed, ", path: %s", sun->sun_path); + } + + return printed + scnprintf(bf + printed, size - printed, " }"); +} + +size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_sockaddr(arg, bf, size); + + return scnprintf(bf, size, "%#x", arg->val); +} From 24a6c2cd1dbd85bd5624a6d0b05de891d0f07696 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Aug 2018 16:39:11 -0300 Subject: [PATCH 28/90] perf trace augmented_syscalls: Add augmented_sockaddr_syscall_enter() From the one for 'connect', so that we can use it with sendto and others that receive a 'struct sockaddr'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-8bdqv1q0ndcjl1nqns5r5je2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 46 +++++++++++--------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 6dfead0be74e..5f417e528419 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -95,6 +95,31 @@ struct syscall_enter_newstat_args { augmented_filename_syscall_enter(newstat); +#ifndef _K_SS_MAXSIZE +#define _K_SS_MAXSIZE 128 +#endif + +#define augmented_sockaddr_syscall_enter(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct sockaddr_storage addr; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + struct augmented_enter_##syscall##_args augmented_args; \ + unsigned long addrlen = sizeof(augmented_args.addr); \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ +/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ +/* if (addrlen > augmented_args.args.addrlen) */ \ +/* addrlen = augmented_args.args.addrlen; */ \ +/* */ \ + probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ + return 0; \ +} + struct sockaddr; struct syscall_enter_connect_args { @@ -105,25 +130,6 @@ struct syscall_enter_connect_args { unsigned long addrlen; }; -struct augmented_enter_connect_args { - struct syscall_enter_connect_args args; - struct sockaddr_storage addr; -}; - -int syscall_enter(connect)(struct syscall_enter_connect_args *args) -{ - struct augmented_enter_connect_args augmented_args; - unsigned long addrlen = sizeof(augmented_args.addr); - - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); -#ifdef FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK - if (addrlen > augmented_args.args.addrlen) - addrlen = augmented_args.args.addrlen; -#endif - probe_read(&augmented_args.addr, addrlen, args->addr_ptr); - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); - return 0; -} +augmented_sockaddr_syscall_enter(connect); license(GPL); From 02ef288420775542316e41dc610a6a88725aa83a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Aug 2018 17:03:53 -0300 Subject: [PATCH 29/90] perf trace augmented_syscalls: Augment bind's 'myaddr' sockaddr arg One more, to reuse the augmented_sockaddr_syscall_enter() macro introduced from the augmentation of connect's sockaddr arg, also to get a subset of the struct arg augmentations done using the manual method, before switching to something automatic, using tracefs's format file or, even better, BTF containing the syscall args structs. # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 sshd/11479 bind(fd: 3, umyaddr: { .family: NETLINK }, addrlen: 12) 1.752 sshd/11479 bind(fd: 3, umyaddr: { .family: INET, port: 22, addr: 0.0.0.0 }, addrlen: 16) 1.924 sshd/11479 bind(fd: 4, umyaddr: { .family: INET6, port: 22, addr: :: }, addrlen: 28) ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-a2drqpahpmc7uwb3n3gj2plu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 ++ tools/perf/examples/bpf/augmented_syscalls.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ab2ed30b8dcc..537bb30895df 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -621,6 +621,8 @@ static struct syscall_fmt { } syscall_fmts[] = { { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, + { .name = "bind", + .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, }, { .name = "bpf", .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, { .name = "brk", .hexret = true, diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 5f417e528419..71edb7ad8698 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -122,6 +122,16 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ struct sockaddr; +struct syscall_enter_bind_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + struct sockaddr *addr_ptr; + unsigned long addrlen; +}; + +augmented_sockaddr_syscall_enter(bind); + struct syscall_enter_connect_args { unsigned long long common_tp_fields; long syscall_nr; From 6ebb686225a83200b94777cfc651c94f4e0f6f50 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Aug 2018 08:47:08 -0300 Subject: [PATCH 30/90] perf trace augmented_syscalls: Augment sendto's 'addr' arg Its a 'struct sockaddr' pointer, augment it with the same beautifier as for 'connect' and 'bind', that all receive from userspace that pointer. Doing it in the other direction remains to be done, hooking at the syscalls:sys_exit_{accept4?,recvmsg} tracepoints somehow. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-k2eu68lsphnm2fthc32gq76c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- tools/perf/examples/bpf/augmented_syscalls.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 537bb30895df..759d14e3fe6b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -795,7 +795,8 @@ static struct syscall_fmt { { .name = "sendmsg", .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "sendto", - .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, + .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, + [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", .arg = { [0] = STRARRAY(which, itimers), }, }, diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 71edb7ad8698..be06d2c9e8c9 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -142,4 +142,17 @@ struct syscall_enter_connect_args { augmented_sockaddr_syscall_enter(connect); +struct syscall_enter_sendto_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + void *buff; + long len; + unsigned long flags; + struct sockaddr *addr_ptr; + long addr_len; +}; + +augmented_sockaddr_syscall_enter(sendto); + license(GPL); From b043cb524d2892be75c78bc348e83863829d50a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Aug 2018 17:11:33 -0300 Subject: [PATCH 31/90] perf trace beauty: Reorganize 'struct sockaddr *' beautifier Use an array to multiplex by sockaddr->sa_family, this way adding new families gets a bit easier and tidy. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-v3s85ra659tc40g1s1xaqoun@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/sockaddr.c | 60 +++++++++++++++++++----------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c index 3944a7d54d3c..71a79f72d9d9 100644 --- a/tools/perf/trace/beauty/sockaddr.c +++ b/tools/perf/trace/beauty/sockaddr.c @@ -17,34 +17,52 @@ static const char *socket_families[] = { }; DEFINE_STRARRAY(socket_families); +static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + char tmp[16]; + return scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin->sin_port), + inet_ntop(sin->sin_family, &sin->sin_addr, tmp, sizeof(tmp))); +} + +static size_t af_inet6__scnprintf(struct sockaddr *sa, char *bf, size_t size) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + u32 flowinfo = ntohl(sin6->sin6_flowinfo); + char tmp[512]; + size_t printed = scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin6->sin6_port), + inet_ntop(sin6->sin6_family, &sin6->sin6_addr, tmp, sizeof(tmp))); + if (flowinfo != 0) + printed += scnprintf(bf + printed, size - printed, ", flowinfo: %lu", flowinfo); + if (sin6->sin6_scope_id != 0) + printed += scnprintf(bf + printed, size - printed, ", scope_id: %lu", sin6->sin6_scope_id); + + return printed; +} + +static size_t af_local__scnprintf(struct sockaddr *sa, char *bf, size_t size) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)sa; + return scnprintf(bf, size, ", path: %s", sun->sun_path); +} + +static size_t (*af_scnprintfs[])(struct sockaddr *sa, char *bf, size_t size) = { + [AF_LOCAL] = af_local__scnprintf, + [AF_INET] = af_inet__scnprintf, + [AF_INET6] = af_inet6__scnprintf, +}; + static size_t syscall_arg__scnprintf_augmented_sockaddr(struct syscall_arg *arg, char *bf, size_t size) { - struct sockaddr_in *sin = (struct sockaddr_in *)arg->augmented.args; + struct sockaddr *sa = (struct sockaddr *)arg->augmented.args; char family[32]; size_t printed; - strarray__scnprintf(&strarray__socket_families, family, sizeof(family), "%d", sin->sin_family); + strarray__scnprintf(&strarray__socket_families, family, sizeof(family), "%d", sa->sa_family); printed = scnprintf(bf, size, "{ .family: %s", family); - if (sin->sin_family == AF_INET) { - char tmp[512]; - printed += scnprintf(bf + printed, size - printed, ", port: %d, addr: %s", ntohs(sin->sin_port), - inet_ntop(sin->sin_family, &sin->sin_addr, tmp, sizeof(tmp))); - } else if (sin->sin_family == AF_INET6) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin; - u32 flowinfo = ntohl(sin6->sin6_flowinfo); - char tmp[512]; - - printed += scnprintf(bf + printed, size - printed, ", port: %d, addr: %s", ntohs(sin6->sin6_port), - inet_ntop(sin6->sin6_family, &sin6->sin6_addr, tmp, sizeof(tmp))); - if (flowinfo != 0) - printed += scnprintf(bf + printed, size - printed, ", flowinfo: %lu", flowinfo); - if (sin6->sin6_scope_id != 0) - printed += scnprintf(bf + printed, size - printed, ", scope_id: %lu", sin6->sin6_scope_id); - } else if (sin->sin_family == AF_LOCAL) { - struct sockaddr_un *sun = (struct sockaddr_un *)sin; - printed += scnprintf(bf + printed, size - printed, ", path: %s", sun->sun_path); - } + if (sa->sa_family < ARRAY_SIZE(af_scnprintfs) && af_scnprintfs[sa->sa_family]) + printed += af_scnprintfs[sa->sa_family](sa, bf + printed, size - printed); return printed + scnprintf(bf + printed, size - printed, " }"); } From 766e0618e49490bf67a35542880bcecbcee2e5fa Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 9 Aug 2018 06:56:50 +0200 Subject: [PATCH 32/90] perf report: Create auxiliary trace data files for s390 Create auxiliary trace data log files when invoked with option --itrace=d as in: [root@s35lp76 perf] perf report -i perf.data.aux1 --stdio --itrace=d perf report creates several data files in the current directory named aux.smp.## where ## is a 2 digit hex number with leading zeros representing the CPU number this trace data was recorded from. The file contents is binary and contains the CPU-Measurement Sampling Data Blocks (SDBs). The directory to save the auxiliary trace buffer can be changed using the perf config file and command. Specify section 'auxtrace' keyword 'dumpdir' and assign it a valid directory name. If the directory does not exist or has the wrong file type, the current directory is used. [root@p23lp27 perf]# perf config auxtrace.dumpdir=/tmp [root@p23lp27 perf]# perf config --user -l auxtrace.dumpdir=/tmp [root@p23lp27 perf]# perf report ... [root@p23lp27 perf]# ll /tmp/aux.smp.00 -rw-r--r-- 1 root root 204800 Aug 2 13:48 /tmp/aux.smp.00 [root@p23lp27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180809045650.89197-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-cpumsf.c | 94 +++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index d2c78ffd9fee..a2eeebbfb25f 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -147,6 +147,9 @@ #include #include +#include +#include + #include "cpumap.h" #include "color.h" #include "evsel.h" @@ -159,6 +162,7 @@ #include "auxtrace.h" #include "s390-cpumsf.h" #include "s390-cpumsf-kernel.h" +#include "config.h" struct s390_cpumsf { struct auxtrace auxtrace; @@ -170,6 +174,8 @@ struct s390_cpumsf { u32 pmu_type; u16 machine_type; bool data_queued; + bool use_logfile; + char *logdir; }; struct s390_cpumsf_queue { @@ -177,6 +183,7 @@ struct s390_cpumsf_queue { unsigned int queue_nr; struct auxtrace_buffer *buffer; int cpu; + FILE *logfile; }; /* Display s390 CPU measurement facility basic-sampling data entry */ @@ -595,6 +602,12 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, buffer->use_size = buffer->size; buffer->use_data = buffer->data; } + if (sfq->logfile) { /* Write into log file */ + size_t rc = fwrite(buffer->data, buffer->size, 1, + sfq->logfile); + if (rc != 1) + pr_err("Failed to write auxiliary data\n"); + } } else buffer = sfq->buffer; @@ -606,6 +619,13 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, return -ENOMEM; buffer->use_size = buffer->size; buffer->use_data = buffer->data; + + if (sfq->logfile) { /* Write into log file */ + size_t rc = fwrite(buffer->data, buffer->size, 1, + sfq->logfile); + if (rc != 1) + pr_err("Failed to write auxiliary data\n"); + } } pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, @@ -640,6 +660,23 @@ s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) sfq->sf = sf; sfq->queue_nr = queue_nr; sfq->cpu = -1; + if (sf->use_logfile) { + char *name; + int rc; + + rc = (sf->logdir) + ? asprintf(&name, "%s/aux.smp.%02x", + sf->logdir, queue_nr) + : asprintf(&name, "aux.smp.%02x", queue_nr); + if (rc > 0) + sfq->logfile = fopen(name, "w"); + if (sfq->logfile == NULL) { + pr_err("Failed to open auxiliary log file %s," + "continue...\n", name); + sf->use_logfile = false; + } + free(name); + } return sfq; } @@ -850,8 +887,16 @@ static void s390_cpumsf_free_queues(struct perf_session *session) struct auxtrace_queues *queues = &sf->queues; unsigned int i; - for (i = 0; i < queues->nr_queues; i++) + for (i = 0; i < queues->nr_queues; i++) { + struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) + queues->queue_array[i].priv; + + if (sfq != NULL && sfq->logfile) { + fclose(sfq->logfile); + sfq->logfile = NULL; + } zfree(&queues->queue_array[i].priv); + } auxtrace_queues__free(queues); } @@ -864,6 +909,7 @@ static void s390_cpumsf_free(struct perf_session *session) auxtrace_heap__free(&sf->heap); s390_cpumsf_free_queues(session); session->auxtrace = NULL; + free(sf->logdir); free(sf); } @@ -877,17 +923,55 @@ static int s390_cpumsf_get_type(const char *cpuid) /* Check itrace options set on perf report command. * Return true, if none are set or all options specified can be - * handled on s390. + * handled on s390 (currently only option 'd' for logging. * Return false otherwise. */ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) { + bool ison = false; + if (!itops || !itops->set) return true; - pr_err("No --itrace options supported\n"); + ison = itops->inject || itops->instructions || itops->branches || + itops->transactions || itops->ptwrites || + itops->pwr_events || itops->errors || + itops->dont_decode || itops->calls || itops->returns || + itops->callchain || itops->thread_stack || + itops->last_branch; + if (!ison) + return true; + pr_err("Unsupported --itrace options specified\n"); return false; } +/* Check for AUXTRACE dump directory if it is needed. + * On failure print an error message but continue. + * Return 0 on wrong keyword in config file and 1 otherwise. + */ +static int s390_cpumsf__config(const char *var, const char *value, void *cb) +{ + struct s390_cpumsf *sf = cb; + struct stat stbuf; + int rc; + + if (strcmp(var, "auxtrace.dumpdir")) + return 0; + sf->logdir = strdup(value); + if (sf->logdir == NULL) { + pr_err("Failed to find auxtrace log directory %s," + " continue with current directory...\n", value); + return 1; + } + rc = stat(sf->logdir, &stbuf); + if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { + pr_err("Missing auxtrace log directory %s," + " continue with current directory...\n", value); + free(sf->logdir); + sf->logdir = NULL; + } + return 1; +} + int s390_cpumsf_process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -906,6 +990,9 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, err = -EINVAL; goto err_free; } + sf->use_logfile = session->itrace_synth_opts->log; + if (sf->use_logfile) + perf_config(s390_cpumsf__config, sf); err = auxtrace_queues__init(&sf->queues); if (err) @@ -940,6 +1027,7 @@ err_free_queues: auxtrace_queues__free(&sf->queues); session->auxtrace = NULL; err_free: + free(sf->logdir); free(sf); return err; } From 266b851cc2874774a59f04a7b3b059ca0f26569b Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Tue, 28 Aug 2018 18:50:38 -0400 Subject: [PATCH 33/90] tools lib traceevent, perf tools: Split trace-seq related APIs in a separate header file In order to make libtraceevent into a proper library, all its APIs should be defined in corresponding header files. This patch splits trace-seq related APIs in a separate header file: trace-seq.h Signed-off-by: Tzvetomir Stoyanov (VMware) Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180828185038.2dcb2743@gandalf.local.home Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 1 + tools/lib/traceevent/event-parse.h | 46 +----------------- tools/lib/traceevent/event-plugin.c | 1 + tools/lib/traceevent/plugin_function.c | 1 + tools/lib/traceevent/plugin_hrtimer.c | 1 + tools/lib/traceevent/plugin_jbd2.c | 1 + tools/lib/traceevent/plugin_kmem.c | 1 + tools/lib/traceevent/plugin_kvm.c | 1 + tools/lib/traceevent/plugin_mac80211.c | 1 + tools/lib/traceevent/plugin_sched_switch.c | 1 + tools/lib/traceevent/plugin_scsi.c | 1 + tools/lib/traceevent/plugin_xen.c | 1 + tools/lib/traceevent/trace-seq.c | 2 + tools/lib/traceevent/trace-seq.h | 55 ++++++++++++++++++++++ tools/perf/util/trace-event.h | 1 + 15 files changed, 71 insertions(+), 44 deletions(-) create mode 100644 tools/lib/traceevent/trace-seq.h diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ce1e20227c64..70a42bec6931 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -24,6 +24,7 @@ #include #include "event-parse.h" #include "event-utils.h" +#include "trace-seq.h" static const char *input_buf; static unsigned long long input_buf_ptr; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 44b7c2d41f9f..fa665c66bfa4 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -26,17 +26,12 @@ #include #include +#include "trace-seq.h" + #ifndef __maybe_unused #define __maybe_unused __attribute__((unused)) #endif -/* ----------------------- trace_seq ----------------------- */ - - -#ifndef TRACE_SEQ_BUF_SIZE -#define TRACE_SEQ_BUF_SIZE 4096 -#endif - #ifndef DEBUG_RECORD #define DEBUG_RECORD 0 #endif @@ -59,43 +54,6 @@ struct tep_record { #endif }; -enum trace_seq_fail { - TRACE_SEQ__GOOD, - TRACE_SEQ__BUFFER_POISONED, - TRACE_SEQ__MEM_ALLOC_FAILED, -}; - -/* - * Trace sequences are used to allow a function to call several other functions - * to create a string of data to use (up to a max of PAGE_SIZE). - */ - -struct trace_seq { - char *buffer; - unsigned int buffer_size; - unsigned int len; - unsigned int readpos; - enum trace_seq_fail state; -}; - -void trace_seq_init(struct trace_seq *s); -void trace_seq_reset(struct trace_seq *s); -void trace_seq_destroy(struct trace_seq *s); - -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) - __attribute__ ((format (printf, 2, 0))); - -extern int trace_seq_puts(struct trace_seq *s, const char *str); -extern int trace_seq_putc(struct trace_seq *s, unsigned char c); - -extern void trace_seq_terminate(struct trace_seq *s); - -extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); -extern int trace_seq_do_printf(struct trace_seq *s); - - /* ----------------------- pevent ----------------------- */ struct tep_handle; diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index f17e25097e1e..ec16a103c0cc 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -15,6 +15,7 @@ #include #include "event-parse.h" #include "event-utils.h" +#include "trace-seq.h" #define LOCAL_PLUGIN_DIR ".traceevent/plugins" diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c index 424747475d37..2919042e7dc2 100644 --- a/tools/lib/traceevent/plugin_function.c +++ b/tools/lib/traceevent/plugin_function.c @@ -23,6 +23,7 @@ #include "event-parse.h" #include "event-utils.h" +#include "trace-seq.h" static struct func_stack { int size; diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c index b43bfec565d8..29b608076ea0 100644 --- a/tools/lib/traceevent/plugin_hrtimer.c +++ b/tools/lib/traceevent/plugin_hrtimer.c @@ -23,6 +23,7 @@ #include #include "event-parse.h" +#include "trace-seq.h" static int timer_expire_handler(struct trace_seq *s, struct tep_record *record, diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c index 45a9acd19640..a5e34135dd6a 100644 --- a/tools/lib/traceevent/plugin_jbd2.c +++ b/tools/lib/traceevent/plugin_jbd2.c @@ -22,6 +22,7 @@ #include #include "event-parse.h" +#include "trace-seq.h" #define MINORBITS 20 #define MINORMASK ((1U << MINORBITS) - 1) diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c index 73966b05abce..a7a162575e2c 100644 --- a/tools/lib/traceevent/plugin_kmem.c +++ b/tools/lib/traceevent/plugin_kmem.c @@ -22,6 +22,7 @@ #include #include "event-parse.h" +#include "trace-seq.h" static int call_site_handler(struct trace_seq *s, struct tep_record *record, struct event_format *event, void *context) diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c index 1d0d15906225..a0dfd3d0f197 100644 --- a/tools/lib/traceevent/plugin_kvm.c +++ b/tools/lib/traceevent/plugin_kvm.c @@ -23,6 +23,7 @@ #include #include "event-parse.h" +#include "trace-seq.h" #ifdef HAVE_UDIS86 diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c index de50a5316203..0b7779444b63 100644 --- a/tools/lib/traceevent/plugin_mac80211.c +++ b/tools/lib/traceevent/plugin_mac80211.c @@ -22,6 +22,7 @@ #include #include "event-parse.h" +#include "trace-seq.h" #define INDENT 65 diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index eecb4bd95c11..582d3be2849b 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -22,6 +22,7 @@ #include #include "event-parse.h" +#include "trace-seq.h" static void write_state(struct trace_seq *s, int val) { diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c index 5ec346f6b842..4eba25cc1431 100644 --- a/tools/lib/traceevent/plugin_scsi.c +++ b/tools/lib/traceevent/plugin_scsi.c @@ -3,6 +3,7 @@ #include #include #include "event-parse.h" +#include "trace-seq.h" typedef unsigned long sector_t; typedef uint64_t u64; diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c index b2acbd6e9c86..bc0496e4c296 100644 --- a/tools/lib/traceevent/plugin_xen.c +++ b/tools/lib/traceevent/plugin_xen.c @@ -3,6 +3,7 @@ #include #include #include "event-parse.h" +#include "trace-seq.h" #define __HYPERVISOR_set_trap_table 0 #define __HYPERVISOR_mmu_update 1 diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index e3bac4543d3b..8ff1d55954d1 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -3,6 +3,8 @@ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt * */ +#include "trace-seq.h" + #include #include #include diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h new file mode 100644 index 000000000000..d68ec69f8d1a --- /dev/null +++ b/tools/lib/traceevent/trace-seq.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt + * + */ + +#ifndef _TRACE_SEQ_H +#define _TRACE_SEQ_H + +#include +#include + +/* ----------------------- trace_seq ----------------------- */ + +#ifndef TRACE_SEQ_BUF_SIZE +#define TRACE_SEQ_BUF_SIZE 4096 +#endif + +enum trace_seq_fail { + TRACE_SEQ__GOOD, + TRACE_SEQ__BUFFER_POISONED, + TRACE_SEQ__MEM_ALLOC_FAILED, +}; + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE). + */ + +struct trace_seq { + char *buffer; + unsigned int buffer_size; + unsigned int len; + unsigned int readpos; + enum trace_seq_fail state; +}; + +void trace_seq_init(struct trace_seq *s); +void trace_seq_reset(struct trace_seq *s); +void trace_seq_destroy(struct trace_seq *s); + +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); + +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); + +extern void trace_seq_terminate(struct trace_seq *s); + +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); +extern int trace_seq_do_printf(struct trace_seq *s); + +#endif /* _TRACE_SEQ_H */ diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 40204ec3a7a2..c69d77d7cf55 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -3,6 +3,7 @@ #define _PERF_UTIL_TRACE_EVENT_H #include +#include #include "parse-events.h" struct machine; From 664b6a95d771a6dcd7069996c825a03be411ef99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Aug 2018 08:48:44 -0300 Subject: [PATCH 34/90] perf bpf: Add syscall_exit() helper So that we can hook to the syscalls:sys_exit_SYSCALL tracepoints in addition to the syscalls:sys_enter_SYSCALL we hook using the syscall_enter() helper. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-6qh8aph1jklyvdu7w89c0izc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/include/bpf/bpf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h index 47897d65e799..52b6d87fe822 100644 --- a/tools/perf/include/bpf/bpf.h +++ b/tools/perf/include/bpf/bpf.h @@ -26,6 +26,9 @@ struct bpf_map { #define syscall_enter(name) \ SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name +#define syscall_exit(name) \ + SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name + #define license(name) \ char _license[] SEC("license") = #name; \ int _version SEC("version") = LINUX_VERSION_CODE; From 5e2d8a5acc99e7b9df9be216f0a73855e865f9a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Aug 2018 10:02:23 -0300 Subject: [PATCH 35/90] perf augmented_syscalls: Update the header comments Reflecting the fact that it now augments more than syscalls:sys_enter_SYSCALL tracepoints that have filename strings as args. Also mention how the extra data is handled by the by now modified 'perf trace' beautifiers, that will use special "augmented" beautifiers when extra data is found after the expected syscall enter/exit tracepoints. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ybskanehmdilj5fs7080nz1g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index be06d2c9e8c9..bfa28eaf27b3 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Augment the openat syscall with the contents of the filename pointer argument. + * Augment syscalls with the contents of the pointer arguments. * * Test it with: * @@ -10,12 +10,10 @@ * the last one should be the one for '/etc/passwd'. * * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them unmodified, - * which will be done as that feature is implemented in the next csets, for now - * it will appear in a dump done by the default tracepoint handler in 'perf trace', - * that uses bpf_output__fprintf() to just dump those contents, as done with - * the bpf-output event associated with the __bpf_output__ map declared in - * tools/perf/include/bpf/stdio.h. + * expected by the 'perf trace' beautifiers, and can be used by them, that will + * check if perf_sample->raw_data is more than what is expected for each + * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the + * contents of pointer arguments. */ #include From 4c8f0a726ef808244788a237e52ce3c612bff8c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Aug 2018 11:50:21 -0300 Subject: [PATCH 36/90] perf trace augmented_syscalls: Rename augmented_*_syscall__enter to just *_syscall As we'll also hook into the syscalls:sys_exit_SYSCALL for which there are enter hooks. This way we'll be able to iterate the ELF file for the eBPF program, find the syscalls that have hooks and filter them out from the general raw_syscalls:sys_{enter,exit} tracepoint for not-yet-augmented (the ones with pointer arguments not yet being attached to the usual syscalls tracepoint payload) and non augmentable syscalls (syscalls without pointer arguments). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-cl1xyghwb1usp500354mv37h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index bfa28eaf27b3..253b3ccbd17d 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -32,7 +32,7 @@ struct augmented_filename { char value[256]; }; -#define augmented_filename_syscall_enter(syscall) \ +#define augmented_filename_syscall(syscall) \ struct augmented_enter_##syscall##_args { \ struct syscall_enter_##syscall##_args args; \ struct augmented_filename filename; \ @@ -60,7 +60,7 @@ struct syscall_enter_openat_args { long mode; }; -augmented_filename_syscall_enter(openat); +augmented_filename_syscall(openat); struct syscall_enter_open_args { unsigned long long common_tp_fields; @@ -70,7 +70,7 @@ struct syscall_enter_open_args { long mode; }; -augmented_filename_syscall_enter(open); +augmented_filename_syscall(open); struct syscall_enter_inotify_add_watch_args { unsigned long long common_tp_fields; @@ -80,7 +80,7 @@ struct syscall_enter_inotify_add_watch_args { long mask; }; -augmented_filename_syscall_enter(inotify_add_watch); +augmented_filename_syscall(inotify_add_watch); struct statbuf; @@ -91,13 +91,13 @@ struct syscall_enter_newstat_args { struct stat *statbuf; }; -augmented_filename_syscall_enter(newstat); +augmented_filename_syscall(newstat); #ifndef _K_SS_MAXSIZE #define _K_SS_MAXSIZE 128 #endif -#define augmented_sockaddr_syscall_enter(syscall) \ +#define augmented_sockaddr_syscall(syscall) \ struct augmented_enter_##syscall##_args { \ struct syscall_enter_##syscall##_args args; \ struct sockaddr_storage addr; \ @@ -128,7 +128,7 @@ struct syscall_enter_bind_args { unsigned long addrlen; }; -augmented_sockaddr_syscall_enter(bind); +augmented_sockaddr_syscall(bind); struct syscall_enter_connect_args { unsigned long long common_tp_fields; @@ -138,7 +138,7 @@ struct syscall_enter_connect_args { unsigned long addrlen; }; -augmented_sockaddr_syscall_enter(connect); +augmented_sockaddr_syscall(connect); struct syscall_enter_sendto_args { unsigned long long common_tp_fields; @@ -151,6 +151,6 @@ struct syscall_enter_sendto_args { long addr_len; }; -augmented_sockaddr_syscall_enter(sendto); +augmented_sockaddr_syscall(sendto); license(GPL); From f5b076dc01e77fa016de8439f8ac21d1c310c5be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Aug 2018 12:32:35 -0300 Subject: [PATCH 37/90] perf trace augmented_syscalls: Hook into syscalls:sys_exit_SYSCALL too Hook the pair enter/exit when using augmented_{filename,sockaddr,etc}_syscall(), this way we'll be able to see what entries are in the ELF sections generated from augmented_syscalls.c and filter them out from the main raw_syscalls:* tracepoints used by 'perf trace'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-cyav42qj5yylolw4attcw99z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 253b3ccbd17d..1419a9186937 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -26,6 +26,12 @@ struct bpf_map SEC("maps") __augmented_syscalls__ = { .max_entries = __NR_CPUS__, }; +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + struct augmented_filename { int size; int reserved; @@ -49,6 +55,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ augmented_args.filename.size)); \ return 0; \ +} \ +int syscall_exit(syscall)(struct syscall_exit_args *args) \ +{ \ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ } struct syscall_enter_openat_args { @@ -116,6 +126,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ &augmented_args, \ sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ return 0; \ +} \ +int syscall_exit(syscall)(struct syscall_exit_args *args) \ +{ \ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ } struct sockaddr; From c4191e55b8741f72d44c7c1435c340681ae1ea4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Aug 2018 13:37:28 -0300 Subject: [PATCH 38/90] perf trace: Show comm and tid for tracepoint events So that all events have that info, improving reading by having information better aligned, etc. Before: # echo 1 > /proc/sys/vm/drop_caches # perf trace -e block:*,ext4:*,tools/perf/examples/bpf/augmented_syscalls.c,close cat tools/perf/examples/bpf/hello.c 0.000 ( ): #include int syscall_enter(openat)(void *args) { puts("Hello, world\n"); return 0; } license(GPL); cat/2731 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.025 ( ): syscalls:sys_exit_openat:0x3 0.063 ( 0.022 ms): cat/2731 close(fd: 3) = 0 0.110 ( ): cat/2731 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.123 ( ): syscalls:sys_exit_openat:0x3 0.243 ( 0.008 ms): cat/2731 close(fd: 3) = 0 0.485 ( ): cat/2731 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.500 ( ): syscalls:sys_exit_open:0x3 0.531 ( 0.017 ms): cat/2731 close(fd: 3) = 0 0.587 ( ): cat/2731 openat(dfd: CWD, filename: tools/perf/examples/bpf/hello.c) 0.601 ( ): syscalls:sys_exit_openat:0x3 0.631 ( ): ext4:ext4_es_lookup_extent_enter:dev 253,2 ino 1311399 lblk 0 0.639 ( ): ext4:ext4_es_lookup_extent_exit:dev 253,2 ino 1311399 found 1 [0/1) 5276651 W0x10 0.654 ( ): block:block_bio_queue:253,2 R 42213208 + 8 [cat] 0.663 ( ): block:block_bio_remap:8,0 R 58206040 + 8 <- (253,2) 42213208 0.671 ( ): block:block_bio_remap:8,0 R 175570776 + 8 <- (8,6) 58206040 0.678 ( ): block:block_bio_queue:8,0 R 175570776 + 8 [cat] 0.692 ( ): block:block_getrq:8,0 R 175570776 + 8 [cat] 0.700 ( ): block:block_plug:[cat] 0.708 ( ): block:block_rq_insert:8,0 R 4096 () 175570776 + 8 [cat] 0.713 ( ): block:block_unplug:[cat] 1 0.716 ( ): block:block_rq_issue:8,0 R 4096 () 175570776 + 8 [cat] 0.949 ( 0.007 ms): cat/2731 close(fd: 3) = 0 0.969 ( 0.006 ms): cat/2731 close(fd: 1) = 0 0.982 ( 0.006 ms): cat/2731 close(fd: 2) = 0 # After: # echo 1 > /proc/sys/vm/drop_caches # perf trace -e block:*,ext4:*,tools/perf/examples/bpf/augmented_syscalls.c,close cat tools/perf/examples/bpf/hello.c 0.000 ( ): cat/1380 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC)#include int syscall_enter(openat)(void *args) { puts("Hello, world\n"); return 0; } license(GPL); 0.024 ( ): cat/1380 syscalls:sys_exit_openat:0x3 0.063 ( 0.024 ms): cat/1380 close(fd: 3) = 0 0.114 ( ): cat/1380 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.127 ( ): cat/1380 syscalls:sys_exit_openat:0x3 0.247 ( 0.009 ms): cat/1380 close(fd: 3) = 0 0.484 ( ): cat/1380 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.499 ( ): cat/1380 syscalls:sys_exit_open:0x3 0.613 ( 0.010 ms): cat/1380 close(fd: 3) = 0 0.662 ( ): cat/1380 openat(dfd: CWD, filename: tools/perf/examples/bpf/hello.c) 0.678 ( ): cat/1380 syscalls:sys_exit_openat:0x3 0.712 ( ): cat/1380 ext4:ext4_es_lookup_extent_enter:dev 253,2 ino 1311399 lblk 0 0.721 ( ): cat/1380 ext4:ext4_es_lookup_extent_exit:dev 253,2 ino 1311399 found 1 [0/1) 5276651 W0x10 0.734 ( ): cat/1380 block:block_bio_queue:253,2 R 42213208 + 8 [cat] 0.745 ( ): cat/1380 block:block_bio_remap:8,0 R 58206040 + 8 <- (253,2) 42213208 0.754 ( ): cat/1380 block:block_bio_remap:8,0 R 175570776 + 8 <- (8,6) 58206040 0.761 ( ): cat/1380 block:block_bio_queue:8,0 R 175570776 + 8 [cat] 0.780 ( ): cat/1380 block:block_getrq:8,0 R 175570776 + 8 [cat] 0.791 ( ): cat/1380 block:block_plug:[cat] 0.802 ( ): cat/1380 block:block_rq_insert:8,0 R 4096 () 175570776 + 8 [cat] 0.806 ( ): cat/1380 block:block_unplug:[cat] 1 0.810 ( ): cat/1380 block:block_rq_issue:8,0 R 4096 () 175570776 + 8 [cat] 1.005 ( 0.011 ms): cat/1380 close(fd: 3) = 0 1.031 ( 0.008 ms): cat/1380 close(fd: 1) = 0 1.048 ( 0.008 ms): cat/1380 close(fd: 2) = 0 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-us1mwsupxffs4jlm3uqm5dvj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 759d14e3fe6b..97ace635bed8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2049,6 +2049,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { + struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); int callchain_ret = 0; if (sample->callchain) { @@ -2066,21 +2067,18 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, if (trace->trace_syscalls) fprintf(trace->output, "( ): "); + if (thread) + trace__fprintf_comm_tid(trace, thread, trace->output); + if (evsel == trace->syscalls.events.augmented) { int id = perf_evsel__sc_tp_uint(evsel, id, sample); struct syscall *sc = trace__syscall_info(trace, evsel, id); if (sc) { - struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - - if (thread) { - trace__fprintf_comm_tid(trace, thread, trace->output); - fprintf(trace->output, "%s(", sc->name); - trace__fprintf_sys_enter(trace, evsel, sample); - fputc(')', trace->output); - thread__put(thread); - goto newline; - } + fprintf(trace->output, "%s(", sc->name); + trace__fprintf_sys_enter(trace, evsel, sample); + fputc(')', trace->output); + goto newline; } /* @@ -2110,6 +2108,7 @@ newline: trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + thread__put(thread); out: return 0; } From d50ed0ce820414dbe249a6ad5c9830e29cc46fcc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:10 +0200 Subject: [PATCH 39/90] perf stat: Use evsel->threads in create_perf_stat_counter() Get rid of the evsel_list dependency, here we can use the evsel->threads copy of the struct thread_map. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d097b5b47eb8..d389ed623715 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -293,7 +293,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) if (target__has_cpu(&target) && !target__has_per_thread(&target)) return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); - return perf_evsel__open_per_thread(evsel, evsel_list->threads); + return perf_evsel__open_per_thread(evsel, evsel->threads); } static int process_synthesized_event(struct perf_tool *tool __maybe_unused, From 728c0ee0a896fcb0957b496afdb78bf195397645 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:11 +0200 Subject: [PATCH 40/90] perf stat: Move 'initial_delay' to 'struct perf_stat_config' Move the static 'initial_delay' variable to 'struct perf_stat_config', so it can be passed around and used outside the 'perf stat' command. Add 'struct perf_stat_config' argument to create_perf_stat_counter() and use its 'initial_delay' member instead of the static one. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 16 ++++++++-------- tools/perf/util/stat.h | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d389ed623715..719abdd3b7de 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -164,7 +164,6 @@ static bool group = false; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; -static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; @@ -236,7 +235,8 @@ static void perf_stat__reset_stats(void) perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } -static int create_perf_stat_counter(struct perf_evsel *evsel) +static int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config) { struct perf_event_attr *attr = &evsel->attr; struct perf_evsel *leader = evsel->leader; @@ -286,7 +286,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) * In case of initial_delay we enable tracee * events manually. */ - if (target__none(&target) && !initial_delay) + if (target__none(&target) && !config->initial_delay) attr->enable_on_exec = 1; } @@ -428,15 +428,15 @@ static void process_interval(void) static void enable_counters(void) { - if (initial_delay) - usleep(initial_delay * USEC_PER_MSEC); + if (stat_config.initial_delay) + usleep(stat_config.initial_delay * USEC_PER_MSEC); /* * We need to enable counters only if: * - we don't have tracee (attaching to task or cpu) * - we have initial delay configured */ - if (!target__none(&target) || initial_delay) + if (!target__none(&target) || stat_config.initial_delay) perf_evlist__enable(evsel_list); } @@ -609,7 +609,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) evlist__for_each_entry(evsel_list, counter) { try_again: - if (create_perf_stat_counter(counter) < 0) { + if (create_perf_stat_counter(counter, &stat_config) < 0) { /* Weak group failed. Reset the group. */ if ((errno == EINVAL || errno == EBADF) && @@ -2027,7 +2027,7 @@ static const struct option stat_options[] = { "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, "aggregate counts per thread", AGGR_THREAD), - OPT_UINTEGER('D', "delay", &initial_delay, + OPT_UINTEGER('D', "delay", &stat_config.initial_delay, "ms to wait before starting measurement after program start"), OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 36efb986f7fc..91e6609fce6e 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -6,6 +6,7 @@ #include #include "xyarray.h" #include "rblist.h" +#include "perf.h" struct stats { double n, mean, M2; @@ -85,14 +86,15 @@ struct runtime_stat { }; struct perf_stat_config { - enum aggr_mode aggr_mode; - bool scale; - FILE *output; - unsigned int interval; - unsigned int timeout; - int times; - struct runtime_stat *stats; - int stats_num; + enum aggr_mode aggr_mode; + bool scale; + FILE *output; + unsigned int interval; + unsigned int timeout; + unsigned int initial_delay; + int times; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); From 5698f26b46e4f47f2371418eb92732048fa4fa66 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:12 +0200 Subject: [PATCH 41/90] perf stat: Move 'no_inherit' to 'struct perf_stat_config' Move the static 'no_inherit' variable to 'struct perf_stat_config', so it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 ++--- tools/perf/util/stat.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 719abdd3b7de..84dbac526925 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -148,7 +148,6 @@ typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); #define METRIC_ONLY_LEN 20 static int run_count = 1; -static bool no_inherit = false; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -254,7 +253,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, if (leader->nr_members > 1) attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - attr->inherit = !no_inherit; + attr->inherit = !config->no_inherit; /* * Some events get initialized with sample_(period/type) set, @@ -1969,7 +1968,7 @@ static const struct option stat_options[] = { parse_events_option), OPT_CALLBACK(0, "filter", &evsel_list, "filter", "event filter", parse_filter), - OPT_BOOLEAN('i', "no-inherit", &no_inherit, + OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit, "child tasks do not inherit counters"), OPT_STRING('p', "pid", &target.pid, "pid", "stat events on existing process id"), diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 91e6609fce6e..53b2415ba3f3 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -88,6 +88,7 @@ struct runtime_stat { struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; + bool no_inherit; FILE *output; unsigned int interval; unsigned int timeout; From 35386233fcf78f20cb8a51199518da9f81eca280 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:13 +0200 Subject: [PATCH 42/90] perf stat: Use local config arg for scale in create_perf_stat_counter() Use the local 'scale' member in the 'struct perf_stat_config' argument instead of the global 'stat_config' variable, to make the function independent. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 84dbac526925..47789558899a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -240,7 +240,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct perf_evsel *leader = evsel->leader; - if (stat_config.scale) { + if (config->scale) { attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; } From 7d9ad16afe2bfc73b8967cc2aa2dc30f0170a8e2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:14 +0200 Subject: [PATCH 43/90] perf stat: Add 'identifier' flag to 'struct perf_stat_config' Add 'identifier' flag to 'struct perf_stat_config' to carry the info whether to use PERF_SAMPLE_IDENTIFIER for events. This makes create_perf_stat_counter() independent. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 ++++++++++++---------- tools/perf/util/stat.h | 1 + 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 47789558899a..48c88f568fe1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -261,16 +261,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, */ attr->sample_period = 0; - /* - * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless - * while avoiding that older tools show confusing messages. - * - * However for pipe sessions we need to keep it zero, - * because script's perf_evsel__check_attr is triggered - * by attr->sample_type != 0, and we can't run it on - * stat sessions. - */ - if (!(STAT_RECORD && perf_stat.data.is_pipe)) + if (config->identifier) attr->sample_type = PERF_SAMPLE_IDENTIFIER; /* @@ -3064,6 +3055,17 @@ int cmd_stat(int argc, const char **argv) if (perf_stat_init_aggr_mode()) goto out; + /* + * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless + * while avoiding that older tools show confusing messages. + * + * However for pipe sessions we need to keep it zero, + * because script's perf_evsel__check_attr is triggered + * by attr->sample_type != 0, and we can't run it on + * stat sessions. + */ + stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe); + /* * We dont want to block the signals - that would cause * child tasks to inherit that and Ctrl-C would not work. diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 53b2415ba3f3..918cde064cdc 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -89,6 +89,7 @@ struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; bool no_inherit; + bool identifier; FILE *output; unsigned int interval; unsigned int timeout; From 318ec1841a3f26799fe663d8f979a57623c0c470 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:15 +0200 Subject: [PATCH 44/90] perf tools: Switch 'session' argument to 'evlist' in perf_event__synthesize_attrs() To be able to pass in other than session's evlist. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/util/header.c | 6 +++--- tools/perf/util/header.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 22ebeb92ac51..9853552bcf16 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -758,7 +758,7 @@ static int record__synthesize(struct record *rec, bool tail) * We need to synthesize events first, because some * features works on top of them (on report side). */ - err = perf_event__synthesize_attrs(tool, session, + err = perf_event__synthesize_attrs(tool, rec->evlist, process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 48c88f568fe1..8291f503d0cc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -459,7 +459,7 @@ static int perf_stat_synthesize_config(bool is_pipe) int err; if (is_pipe) { - err = perf_event__synthesize_attrs(NULL, perf_stat.session, + err = perf_event__synthesize_attrs(NULL, evsel_list, process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3cadc252dd89..91e6d9cfd906 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3637,13 +3637,13 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) } int perf_event__synthesize_attrs(struct perf_tool *tool, - struct perf_session *session, - perf_event__handler_t process) + struct perf_evlist *evlist, + perf_event__handler_t process) { struct perf_evsel *evsel; int err = 0; - evlist__for_each_entry(session->evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, evsel->id, process); if (err) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 6d7fe44aadc0..ff2a1263fb9b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -124,7 +124,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_attrs(struct perf_tool *tool, - struct perf_session *session, + struct perf_evlist *evlist, perf_event__handler_t process); int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct perf_evsel *evsel, From 650d622046024623e71fea1f28acf1edb7e61a81 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:16 +0200 Subject: [PATCH 45/90] perf evsel: Introduce perf_evsel__store_ids() Add perf_evsel__store_ids() from stat's store_counter_ids() code to the evsel class, so that it can be used globally. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 33 +-------------------------------- tools/perf/util/evsel.c | 29 +++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8291f503d0cc..45bbd156d496 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -497,37 +497,6 @@ static int perf_stat_synthesize_config(bool is_pipe) return 0; } -#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) - -static int __store_counter_ids(struct perf_evsel *counter) -{ - int cpu, thread; - - for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { - for (thread = 0; thread < xyarray__max_y(counter->fd); - thread++) { - int fd = FD(counter, cpu, thread); - - if (perf_evlist__id_add_fd(evsel_list, counter, - cpu, thread, fd) < 0) - return -1; - } - } - - return 0; -} - -static int store_counter_ids(struct perf_evsel *counter) -{ - struct cpu_map *cpus = counter->cpus; - struct thread_map *threads = counter->threads; - - if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) - return -ENOMEM; - - return __store_counter_ids(counter); -} - static bool perf_evsel__should_store_id(struct perf_evsel *counter) { return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; @@ -658,7 +627,7 @@ try_again: unit_width = l; if (perf_evsel__should_store_id(counter) && - store_counter_ids(counter)) + perf_evsel__store_ids(counter, evsel_list)) return -1; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1a61628a1c12..4ec909d57e9c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2940,3 +2940,32 @@ struct perf_env *perf_evsel__env(struct perf_evsel *evsel) return evsel->evlist->env; return NULL; } + +static int store_evsel_ids(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + int cpu, thread; + + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(evsel->fd); + thread++) { + int fd = FD(evsel, cpu, thread); + + if (perf_evlist__id_add_fd(evlist, evsel, + cpu, thread, fd) < 0) + return -1; + } + } + + return 0; +} + +int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + struct cpu_map *cpus = evsel->cpus; + struct thread_map *threads = evsel->threads; + + if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr)) + return -ENOMEM; + + return store_evsel_ids(evsel, evlist); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 163c960614d3..4f8430a85531 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -481,4 +481,5 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, struct perf_env *perf_evsel__env(struct perf_evsel *evsel); +int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist); #endif /* __PERF_EVSEL_H */ From d09cefd2ef9945b4b767bb67f473a0eb2066374f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:17 +0200 Subject: [PATCH 46/90] perf stat: Move create_perf_stat_counter() to stat.c Move create_perf_stat_counter() to the 'stat' class, so that we can use it globally. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 54 +-------------------------------------- tools/perf/util/stat.c | 53 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 4 +++ 3 files changed, 58 insertions(+), 53 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 45bbd156d496..142cff8eb12b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -234,58 +234,6 @@ static void perf_stat__reset_stats(void) perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } -static int create_perf_stat_counter(struct perf_evsel *evsel, - struct perf_stat_config *config) -{ - struct perf_event_attr *attr = &evsel->attr; - struct perf_evsel *leader = evsel->leader; - - if (config->scale) { - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - } - - /* - * The event is part of non trivial group, let's enable - * the group read (for leader) and ID retrieval for all - * members. - */ - if (leader->nr_members > 1) - attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - - attr->inherit = !config->no_inherit; - - /* - * Some events get initialized with sample_(period/type) set, - * like tracepoints. Clear it up for counting. - */ - attr->sample_period = 0; - - if (config->identifier) - attr->sample_type = PERF_SAMPLE_IDENTIFIER; - - /* - * Disabling all counters initially, they will be enabled - * either manually by us or by kernel via enable_on_exec - * set later. - */ - if (perf_evsel__is_group_leader(evsel)) { - attr->disabled = 1; - - /* - * In case of initial_delay we enable tracee - * events manually. - */ - if (target__none(&target) && !config->initial_delay) - attr->enable_on_exec = 1; - } - - if (target__has_cpu(&target) && !target__has_per_thread(&target)) - return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); - - return perf_evsel__open_per_thread(evsel, evsel->threads); -} - static int process_synthesized_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -568,7 +516,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) evlist__for_each_entry(evsel_list, counter) { try_again: - if (create_perf_stat_counter(counter, &stat_config) < 0) { + if (create_perf_stat_counter(counter, &stat_config, &target) < 0) { /* Weak group failed. Reset the group. */ if ((errno == EINVAL || errno == EBADF) && diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index a0061e0b0fad..3bd24255376a 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -435,3 +435,56 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) return ret; } + +int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config, + struct target *target) +{ + struct perf_event_attr *attr = &evsel->attr; + struct perf_evsel *leader = evsel->leader; + + if (config->scale) { + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + /* + * The event is part of non trivial group, let's enable + * the group read (for leader) and ID retrieval for all + * members. + */ + if (leader->nr_members > 1) + attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; + + attr->inherit = !config->no_inherit; + + /* + * Some events get initialized with sample_(period/type) set, + * like tracepoints. Clear it up for counting. + */ + attr->sample_period = 0; + + if (config->identifier) + attr->sample_type = PERF_SAMPLE_IDENTIFIER; + + /* + * Disabling all counters initially, they will be enabled + * either manually by us or by kernel via enable_on_exec + * set later. + */ + if (perf_evsel__is_group_leader(evsel)) { + attr->disabled = 1; + + /* + * In case of initial_delay we enable tracee + * events manually. + */ + if (target__none(target) && !config->initial_delay) + attr->enable_on_exec = 1; + } + + if (target__has_cpu(target) && !target__has_per_thread(target)) + return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); + + return perf_evsel__open_per_thread(evsel, evsel->threads); +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 918cde064cdc..8fb596641545 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -175,4 +175,8 @@ int perf_event__process_stat_event(struct perf_tool *tool, size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); + +int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config, + struct target *target); #endif From 491073a6126644d3c60b677b777006deb3c0e16b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:18 +0200 Subject: [PATCH 47/90] perf stat: Rename 'is_pipe' argument to 'attrs' in perf_stat_synthesize_config() The attrs name makes more sense. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 142cff8eb12b..0a358c2e1a93 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -402,11 +402,11 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static int perf_stat_synthesize_config(bool is_pipe) +static int perf_stat_synthesize_config(bool attrs) { int err; - if (is_pipe) { + if (attrs) { err = perf_event__synthesize_attrs(NULL, evsel_list, process_synthesized_event); if (err < 0) { @@ -418,7 +418,7 @@ static int perf_stat_synthesize_config(bool is_pipe) err = perf_event__synthesize_extra_attr(NULL, evsel_list, process_synthesized_event, - is_pipe); + attrs); err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, process_synthesized_event, From 73d586c3917d5109bb547c16d90d0eb97203986a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:19 +0200 Subject: [PATCH 48/90] perf stat: Add 'struct perf_stat_config' argument to perf_stat_synthesize_config() Add a 'struct perf_stat_config' argument to perf_stat_synthesize_config(), so we could synthesize arbitrary config. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0a358c2e1a93..d0d19a5ffa85 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -402,7 +402,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static int perf_stat_synthesize_config(bool attrs) +static int perf_stat_synthesize_config(struct perf_stat_config *config, + bool attrs) { int err; @@ -435,7 +436,7 @@ static int perf_stat_synthesize_config(bool attrs) return err; } - err = perf_event__synthesize_stat_config(NULL, &stat_config, + err = perf_event__synthesize_stat_config(NULL, config, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize config.\n"); @@ -606,7 +607,7 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(is_pipe); + err = perf_stat_synthesize_config(&stat_config, is_pipe); if (err < 0) return err; } From 1821f4eb480bdd3c7c2a1863431ba539c7b0c1f8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:20 +0200 Subject: [PATCH 49/90] perf stat: Add 'struct perf_tool' argument to perf_stat_synthesize_config() So that we can use the function outside the 'perf stat' command with standard synthesize functions, that take 'struct perf_tool *' argument. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d0d19a5ffa85..ae5029875e87 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -403,12 +403,13 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf } static int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, bool attrs) { int err; if (attrs) { - err = perf_event__synthesize_attrs(NULL, evsel_list, + err = perf_event__synthesize_attrs(tool, evsel_list, process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); @@ -416,12 +417,12 @@ static int perf_stat_synthesize_config(struct perf_stat_config *config, } } - err = perf_event__synthesize_extra_attr(NULL, + err = perf_event__synthesize_extra_attr(tool, evsel_list, process_synthesized_event, attrs); - err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, + err = perf_event__synthesize_thread_map2(tool, evsel_list->threads, process_synthesized_event, NULL); if (err < 0) { @@ -429,14 +430,14 @@ static int perf_stat_synthesize_config(struct perf_stat_config *config, return err; } - err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, + err = perf_event__synthesize_cpu_map(tool, evsel_list->cpus, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize thread map.\n"); return err; } - err = perf_event__synthesize_stat_config(NULL, config, + err = perf_event__synthesize_stat_config(tool, config, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize config.\n"); @@ -607,7 +608,7 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(&stat_config, is_pipe); + err = perf_stat_synthesize_config(&stat_config, NULL, is_pipe); if (err < 0) return err; } From 1c21e9899d6a9ea72d4d678faa7b0ec22bcf59a9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:21 +0200 Subject: [PATCH 50/90] perf stat: Add 'struct perf_evlist' argument to perf_stat_synthesize_config() Get rid of the 'evsel_list' global variable dependency, here in perf_stat_synthesize_config() we are adding the 'evlist' arg. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ae5029875e87..cb36344c25b7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -404,12 +404,13 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf static int perf_stat_synthesize_config(struct perf_stat_config *config, struct perf_tool *tool, + struct perf_evlist *evlist, bool attrs) { int err; if (attrs) { - err = perf_event__synthesize_attrs(tool, evsel_list, + err = perf_event__synthesize_attrs(tool, evlist, process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); @@ -417,12 +418,11 @@ static int perf_stat_synthesize_config(struct perf_stat_config *config, } } - err = perf_event__synthesize_extra_attr(tool, - evsel_list, + err = perf_event__synthesize_extra_attr(tool, evlist, process_synthesized_event, attrs); - err = perf_event__synthesize_thread_map2(tool, evsel_list->threads, + err = perf_event__synthesize_thread_map2(tool, evlist->threads, process_synthesized_event, NULL); if (err < 0) { @@ -430,7 +430,7 @@ static int perf_stat_synthesize_config(struct perf_stat_config *config, return err; } - err = perf_event__synthesize_cpu_map(tool, evsel_list->cpus, + err = perf_event__synthesize_cpu_map(tool, evlist->cpus, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize thread map.\n"); @@ -608,7 +608,8 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(&stat_config, NULL, is_pipe); + err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, + is_pipe); if (err < 0) return err; } From c2c247f2dd87706961fa16d033f9dbf173145e70 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:22 +0200 Subject: [PATCH 51/90] perf stat: Add 'perf_event__handler_t' argument to perf_stat_synthesize_config() So that it's completely independent and can be used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cb36344c25b7..1171d4e00276 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -405,40 +405,37 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf static int perf_stat_synthesize_config(struct perf_stat_config *config, struct perf_tool *tool, struct perf_evlist *evlist, + perf_event__handler_t process, bool attrs) { int err; if (attrs) { - err = perf_event__synthesize_attrs(tool, evlist, - process_synthesized_event); + err = perf_event__synthesize_attrs(tool, evlist, process); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); return err; } } - err = perf_event__synthesize_extra_attr(tool, evlist, - process_synthesized_event, + err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs); err = perf_event__synthesize_thread_map2(tool, evlist->threads, - process_synthesized_event, - NULL); + process, NULL); if (err < 0) { pr_err("Couldn't synthesize thread map.\n"); return err; } err = perf_event__synthesize_cpu_map(tool, evlist->cpus, - process_synthesized_event, NULL); + process, NULL); if (err < 0) { pr_err("Couldn't synthesize thread map.\n"); return err; } - err = perf_event__synthesize_stat_config(tool, config, - process_synthesized_event, NULL); + err = perf_event__synthesize_stat_config(tool, config, process, NULL); if (err < 0) { pr_err("Couldn't synthesize config.\n"); return err; @@ -609,7 +606,7 @@ try_again: return err; err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, - is_pipe); + process_synthesized_event, is_pipe); if (err < 0) return err; } From 0a4e64d391a2c771ae33e648cf84d4492369560c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:23 +0200 Subject: [PATCH 52/90] perf stat: Move perf_stat_synthesize_config() to stat.c So that it can be used globally. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 42 --------------------------------------- tools/perf/util/stat.c | 42 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 6 ++++++ 3 files changed, 48 insertions(+), 42 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1171d4e00276..54768ec15dbc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -402,48 +402,6 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static int perf_stat_synthesize_config(struct perf_stat_config *config, - struct perf_tool *tool, - struct perf_evlist *evlist, - perf_event__handler_t process, - bool attrs) -{ - int err; - - if (attrs) { - err = perf_event__synthesize_attrs(tool, evlist, process); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - return err; - } - } - - err = perf_event__synthesize_extra_attr(tool, evlist, process, - attrs); - - err = perf_event__synthesize_thread_map2(tool, evlist->threads, - process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_cpu_map(tool, evlist->cpus, - process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_stat_config(tool, config, process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize config.\n"); - return err; - } - - return 0; -} - static bool perf_evsel__should_store_id(struct perf_evsel *counter) { return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 3bd24255376a..5d3172bcc4ae 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -488,3 +488,45 @@ int create_perf_stat_counter(struct perf_evsel *evsel, return perf_evsel__open_per_thread(evsel, evsel->threads); } + +int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, + struct perf_evlist *evlist, + perf_event__handler_t process, + bool attrs) +{ + int err; + + if (attrs) { + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + } + + err = perf_event__synthesize_extra_attr(tool, evlist, process, + attrs); + + err = perf_event__synthesize_thread_map2(tool, evlist->threads, + process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(tool, evlist->cpus, + process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_stat_config(tool, config, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize config.\n"); + return err; + } + + return 0; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 8fb596641545..da6a706daecc 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,6 +7,7 @@ #include "xyarray.h" #include "rblist.h" #include "perf.h" +#include "event.h" struct stats { double n, mean, M2; @@ -179,4 +180,9 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_stat_config *config, struct target *target); +int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, + struct perf_evlist *evlist, + perf_event__handler_t process, + bool attrs); #endif From a5a9eac1a018ad3bfcf9a3ec11eae99fd35f466b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:24 +0200 Subject: [PATCH 53/90] perf stat: Introduce perf_evlist__print_counters() To be in charge of printing out the stat output. It will be moved out of the 'perf stat' command in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 54768ec15dbc..db11832bbdbc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1692,7 +1692,10 @@ static void print_footer(void) "the same PMU. Try reorganizing the group.\n"); } -static void print_counters(struct timespec *ts, int argc, const char **argv) +static void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct timespec *ts, + int argc, const char **argv) { int interval = stat_config.interval; struct perf_evsel *counter; @@ -1724,14 +1727,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) print_aggr(prefix); break; case AGGR_THREAD: - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; print_aggr_thread(counter, prefix); } break; case AGGR_GLOBAL: - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; print_counter_aggr(counter, prefix); @@ -1743,7 +1746,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) print_no_aggr_metric(prefix); else { - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; print_counter(counter, prefix); @@ -1761,6 +1764,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) fflush(stat_config.output); } +static void print_counters(struct timespec *ts, int argc, const char **argv) +{ + perf_evlist__print_counters(evsel_list, ts, argc, argv); +} + static volatile int signr = -1; static void skip_signal(int signo) From 0174820a8ba108f2e72dac5caaea3500c8ca6323 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:25 +0200 Subject: [PATCH 54/90] perf stat: Move STAT_RECORD out of perf_evlist__print_counters() It's stat related and should stay in the 'perf stat' command. The perf_evlist__print_counters function will be moved out in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-17-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index db11832bbdbc..4ffbb6594eb6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1701,10 +1701,6 @@ perf_evlist__print_counters(struct perf_evlist *evlist, struct perf_evsel *counter; char buf[64], *prefix = NULL; - /* Do not print anything if we record to the pipe. */ - if (STAT_RECORD && perf_stat.data.is_pipe) - return; - if (interval) print_interval(prefix = buf, ts); else @@ -1766,6 +1762,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist, static void print_counters(struct timespec *ts, int argc, const char **argv) { + /* Do not print anything if we record to the pipe. */ + if (STAT_RECORD && perf_stat.data.is_pipe) + return; + perf_evlist__print_counters(evsel_list, ts, argc, argv); } From b64df7f33743cd6095b4a007f5f15ff4432fbcf5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:26 +0200 Subject: [PATCH 55/90] perf stat: Add 'struct perf_stat_config' argument to perf_evlist__print_counters() Add a 'struct perf_stat_config' argument to perf_evlist__print_counters(), so that it can be moved out of the 'perf stat' command to generic object in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-18-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4ffbb6594eb6..f340641fe63a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1694,10 +1694,11 @@ static void print_footer(void) static void perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, struct timespec *ts, int argc, const char **argv) { - int interval = stat_config.interval; + int interval = config->interval; struct perf_evsel *counter; char buf[64], *prefix = NULL; @@ -1713,11 +1714,11 @@ perf_evlist__print_counters(struct perf_evlist *evlist, print_metric_headers(prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; - if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) - fprintf(stat_config.output, "%s", prefix); + if (config->aggr_mode == AGGR_GLOBAL && prefix) + fprintf(config->output, "%s", prefix); } - switch (stat_config.aggr_mode) { + switch (config->aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: print_aggr(prefix); @@ -1736,7 +1737,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, print_counter_aggr(counter, prefix); } if (metric_only) - fputc('\n', stat_config.output); + fputc('\n', config->output); break; case AGGR_NONE: if (metric_only) @@ -1757,7 +1758,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, if (!interval && !csv_output) print_footer(); - fflush(stat_config.output); + fflush(config->output); } static void print_counters(struct timespec *ts, int argc, const char **argv) @@ -1766,7 +1767,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (STAT_RECORD && perf_stat.data.is_pipe) return; - perf_evlist__print_counters(evsel_list, ts, argc, argv); + perf_evlist__print_counters(evsel_list, &stat_config, + ts, argc, argv); } static volatile int signr = -1; From f3ca50e61ff4aebfbefc666be2e064d277ad524c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:27 +0200 Subject: [PATCH 56/90] perf stat: Pass 'struct perf_stat_config' argument to local print functions Add 'struct perf_stat_config' argument to print functions, so that those functions can be moved out of the 'perf stat' command to a generic class in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 149 ++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f340641fe63a..f56da22abccc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -660,30 +660,33 @@ static int run_perf_stat(int argc, const char **argv, int run_idx) return ret; } -static void print_running(u64 run, u64 ena) +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena) { if (csv_output) { - fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", + fprintf(config->output, "%s%" PRIu64 "%s%.2f", csv_sep, run, csv_sep, ena ? 100.0 * run / ena : 100.0); } else if (run != ena) { - fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); } } -static void print_noise_pct(double total, double avg) +static void print_noise_pct(struct perf_stat_config *config, + double total, double avg) { double pct = rel_stddev_stats(total, avg); if (csv_output) - fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); + fprintf(config->output, "%s%.2f%%", csv_sep, pct); else if (pct) - fprintf(stat_config.output, " ( +-%6.2f%% )", pct); + fprintf(config->output, " ( +-%6.2f%% )", pct); } -static void print_noise(struct perf_evsel *evsel, double avg) +static void print_noise(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg) { struct perf_stat_evsel *ps; @@ -691,7 +694,7 @@ static void print_noise(struct perf_evsel *evsel, double avg) return; ps = evsel->stats; - print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); + print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); } static void aggr_printout(struct perf_evsel *evsel, int id, int nr) @@ -987,13 +990,14 @@ static bool is_mixed_hw_group(struct perf_evsel *counter) return false; } -static void printout(int id, int nr, struct perf_evsel *counter, double uval, +static void printout(struct perf_stat_config *config, int id, int nr, + struct perf_evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st) { struct perf_stat_output_ctx out; struct outstate os = { - .fh = stat_config.output, + .fh = config->output, .prefix = prefix ? prefix : "", .id = id, .nr = nr, @@ -1023,7 +1027,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, pm = print_metric_csv; nl = new_line_csv; os.nfields = 3; - os.nfields += aggr_fields[stat_config.aggr_mode]; + os.nfields += aggr_fields[config->aggr_mode]; if (counter->cgrp) os.nfields++; } @@ -1034,7 +1038,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, } aggr_printout(counter, id, nr); - fprintf(stat_config.output, "%*s%s", + fprintf(config->output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep); @@ -1045,22 +1049,22 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, print_mixed_hw_group_error = 1; } - fprintf(stat_config.output, "%-*s%s", + fprintf(config->output, "%-*s%s", csv_output ? 0 : unit_width, counter->unit, csv_sep); - fprintf(stat_config.output, "%*s", + fprintf(config->output, "%*s", csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) - fprintf(stat_config.output, "%s%s", + fprintf(config->output, "%s%s", csv_sep, counter->cgrp->name); if (!csv_output) pm(&os, NULL, NULL, "", 0); - print_noise(counter, noise); - print_running(run, ena); + print_noise(config, counter, noise); + print_running(config, run, ena); if (csv_output) pm(&os, NULL, NULL, "", 0); return; @@ -1075,16 +1079,16 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.force_header = false; if (csv_output && !metric_only) { - print_noise(counter, noise); - print_running(run, ena); + print_noise(config, counter, noise); + print_running(config, run, ena); } perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), &out, &metric_events, st); if (!csv_output && !metric_only) { - print_noise(counter, noise); - print_running(run, ena); + print_noise(config, counter, noise); + print_running(config, run, ena); } } @@ -1211,9 +1215,10 @@ static void aggr_cb(struct perf_evsel *counter, void *data, bool first) } } -static void print_aggr(char *prefix) +static void print_aggr(struct perf_stat_config *config, + char *prefix) { - FILE *output = stat_config.output; + FILE *output = config->output; struct perf_evsel *counter; int s, id, nr; double uval; @@ -1256,8 +1261,8 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); } @@ -1320,9 +1325,10 @@ static struct perf_aggr_thread_value *sort_aggr_thread( return buf; } -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +static void print_aggr_thread(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) { - FILE *output = stat_config.output; + FILE *output = config->output; int nthreads = thread_map__nr(counter->threads); int ncpus = cpu_map__nr(counter->cpus); int thread, sorted_threads, id; @@ -1339,12 +1345,12 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); id = buf[thread].id; - if (stat_config.stats) - printout(id, 0, buf[thread].counter, buf[thread].uval, + if (config->stats) + printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, - &stat_config.stats[id]); + &config->stats[id]); else - printout(id, 0, buf[thread].counter, buf[thread].uval, + printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, &rt_stat); fputc('\n', output); @@ -1372,9 +1378,10 @@ static void counter_aggr_cb(struct perf_evsel *counter, void *data, * Print out the results of a single counter: * aggregated counts in system-wide mode */ -static void print_counter_aggr(struct perf_evsel *counter, char *prefix) +static void print_counter_aggr(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) { - FILE *output = stat_config.output; + FILE *output = config->output; double uval; struct caggr_data cd = { .avg = 0.0 }; @@ -1385,7 +1392,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); @@ -1405,9 +1412,10 @@ static void counter_cb(struct perf_evsel *counter, void *data, * Print out the results of a single counter: * does not use aggregated count in system-wide */ -static void print_counter(struct perf_evsel *counter, char *prefix) +static void print_counter(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) { - FILE *output = stat_config.output; + FILE *output = config->output; u64 ena, run, val; double uval; int cpu; @@ -1425,14 +1433,15 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); fputc('\n', output); } } -static void print_no_aggr_metric(char *prefix) +static void print_no_aggr_metric(struct perf_stat_config *config, + char *prefix) { int cpu; int nrcpus = 0; @@ -1445,7 +1454,7 @@ static void print_no_aggr_metric(char *prefix) bool first = true; if (prefix) - fputs(prefix, stat_config.output); + fputs(prefix, config->output); evlist__for_each_entry(evsel_list, counter) { if (is_duration_time(counter)) continue; @@ -1458,10 +1467,10 @@ static void print_no_aggr_metric(char *prefix) run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); } - fputc('\n', stat_config.output); + fputc('\n', config->output); } } @@ -1481,25 +1490,25 @@ static const char *aggr_header_csv[] = { [AGGR_GLOBAL] = "" }; -static void print_metric_headers(const char *prefix, bool no_indent) +static void print_metric_headers(struct perf_stat_config *config, + const char *prefix, bool no_indent) { struct perf_stat_output_ctx out; struct perf_evsel *counter; struct outstate os = { - .fh = stat_config.output + .fh = config->output }; if (prefix) - fprintf(stat_config.output, "%s", prefix); + fprintf(config->output, "%s", prefix); if (!csv_output && !no_indent) - fprintf(stat_config.output, "%*s", - aggr_header_lens[stat_config.aggr_mode], ""); + fprintf(config->output, "%*s", + aggr_header_lens[config->aggr_mode], ""); if (csv_output) { - if (stat_config.interval) - fputs("time,", stat_config.output); - fputs(aggr_header_csv[stat_config.aggr_mode], - stat_config.output); + if (config->interval) + fputs("time,", config->output); + fputs(aggr_header_csv[config->aggr_mode], config->output); } /* Print metrics headers only */ @@ -1518,12 +1527,13 @@ static void print_metric_headers(const char *prefix, bool no_indent) &metric_events, &rt_stat); } - fputc('\n', stat_config.output); + fputc('\n', config->output); } -static void print_interval(char *prefix, struct timespec *ts) +static void print_interval(struct perf_stat_config *config, + char *prefix, struct timespec *ts) { - FILE *output = stat_config.output; + FILE *output = config->output; static int num_print_interval; if (interval_clear) @@ -1532,7 +1542,7 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); if ((num_print_interval == 0 && !csv_output) || interval_clear) { - switch (stat_config.aggr_mode) { + switch (config->aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus"); if (!metric_only) @@ -1564,14 +1574,15 @@ static void print_interval(char *prefix, struct timespec *ts) } if ((num_print_interval == 0 || interval_clear) && metric_only) - print_metric_headers(" ", true); + print_metric_headers(config, " ", true); if (++num_print_interval == 25) num_print_interval = 0; } -static void print_header(int argc, const char **argv) +static void print_header(struct perf_stat_config *config, + int argc, const char **argv) { - FILE *output = stat_config.output; + FILE *output = config->output; int i; fflush(stdout); @@ -1639,10 +1650,10 @@ static double timeval2double(struct timeval *t) return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; } -static void print_footer(void) +static void print_footer(struct perf_stat_config *config) { double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; - FILE *output = stat_config.output; + FILE *output = config->output; int n; if (!null_run) @@ -1673,7 +1684,7 @@ static void print_footer(void) fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); - print_noise_pct(sd, avg); + print_noise_pct(config, sd, avg); } fprintf(output, "\n\n"); @@ -1703,15 +1714,15 @@ perf_evlist__print_counters(struct perf_evlist *evlist, char buf[64], *prefix = NULL; if (interval) - print_interval(prefix = buf, ts); + print_interval(config, prefix = buf, ts); else - print_header(argc, argv); + print_header(config, argc, argv); if (metric_only) { static int num_print_iv; if (num_print_iv == 0 && !interval) - print_metric_headers(prefix, false); + print_metric_headers(config, prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; if (config->aggr_mode == AGGR_GLOBAL && prefix) @@ -1721,32 +1732,32 @@ perf_evlist__print_counters(struct perf_evlist *evlist, switch (config->aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: - print_aggr(prefix); + print_aggr(config, prefix); break; case AGGR_THREAD: evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; - print_aggr_thread(counter, prefix); + print_aggr_thread(config, counter, prefix); } break; case AGGR_GLOBAL: evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; - print_counter_aggr(counter, prefix); + print_counter_aggr(config, counter, prefix); } if (metric_only) fputc('\n', config->output); break; case AGGR_NONE: if (metric_only) - print_no_aggr_metric(prefix); + print_no_aggr_metric(config, prefix); else { evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; - print_counter(counter, prefix); + print_counter(config, counter, prefix); } } break; @@ -1756,7 +1767,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, } if (!interval && !csv_output) - print_footer(); + print_footer(config); fflush(config->output); } From 6ca9a082b1908ff7f8adedf08166043b83b266f6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:28 +0200 Subject: [PATCH 57/90] perf stat: Pass a 'struct perf_stat_config' argument to global print functions Add 'struct perf_stat_config' argument to the global print functions, so that these functions can be used out of the 'perf stat' command code. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 +- tools/perf/builtin-stat.c | 74 +++++++++-------- tools/perf/util/stat-shadow.c | 147 ++++++++++++++++++---------------- tools/perf/util/stat.h | 8 +- 4 files changed, 131 insertions(+), 106 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ba481d73f910..6176bae177c2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1544,7 +1544,8 @@ struct metric_ctx { FILE *fp; }; -static void script_print_metric(void *ctx, const char *color, +static void script_print_metric(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color, const char *fmt, const char *unit, double val) { @@ -1562,7 +1563,8 @@ static void script_print_metric(void *ctx, const char *color, fprintf(mctx->fp, " %s\n", unit); } -static void script_new_line(void *ctx) +static void script_new_line(struct perf_stat_config *config __maybe_unused, + void *ctx) { struct metric_ctx *mctx = ctx; @@ -1608,7 +1610,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { - perf_stat__print_shadow_stats(ev2, + perf_stat__print_shadow_stats(&stat_config, ev2, evsel_script(ev2)->val, sample->cpu, &ctx, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f56da22abccc..7a3361308e61 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -697,11 +697,12 @@ static void print_noise(struct perf_stat_config *config, print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); } -static void aggr_printout(struct perf_evsel *evsel, int id, int nr) +static void aggr_printout(struct perf_stat_config *config, + struct perf_evsel *evsel, int id, int nr) { - switch (stat_config.aggr_mode) { + switch (config->aggr_mode) { case AGGR_CORE: - fprintf(stat_config.output, "S%d-C%*d%s%*d%s", + fprintf(config->output, "S%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), csv_output ? 0 : -8, cpu_map__id_to_cpu(id), @@ -711,7 +712,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_SOCKET: - fprintf(stat_config.output, "S%*d%s%*d%s", + fprintf(config->output, "S%*d%s%*d%s", csv_output ? 0 : -5, id, csv_sep, @@ -720,12 +721,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_NONE: - fprintf(stat_config.output, "CPU%*d%s", + fprintf(config->output, "CPU%*d%s", csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; case AGGR_THREAD: - fprintf(stat_config.output, "%*s-%*d%s", + fprintf(config->output, "%*s-%*d%s", csv_output ? 0 : 16, thread_map__comm(evsel->threads, id), csv_output ? 0 : -8, @@ -750,24 +751,27 @@ struct outstate { #define METRIC_LEN 35 -static void new_line_std(void *ctx) +static void new_line_std(struct perf_stat_config *config __maybe_unused, + void *ctx) { struct outstate *os = ctx; os->newline = true; } -static void do_new_line_std(struct outstate *os) +static void do_new_line_std(struct perf_stat_config *config, + struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); - aggr_printout(os->evsel, os->id, os->nr); - if (stat_config.aggr_mode == AGGR_NONE) + aggr_printout(config, os->evsel, os->id, os->nr); + if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); fprintf(os->fh, " "); } -static void print_metric_std(void *ctx, const char *color, const char *fmt, +static void print_metric_std(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -783,7 +787,7 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt, } if (newline) - do_new_line_std(os); + do_new_line_std(config, os); n = fprintf(out, " # "); if (color) @@ -793,7 +797,7 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt, fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); } -static void new_line_csv(void *ctx) +static void new_line_csv(struct perf_stat_config *config, void *ctx) { struct outstate *os = ctx; int i; @@ -801,12 +805,13 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); - aggr_printout(os->evsel, os->id, os->nr); + aggr_printout(config, os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } -static void print_metric_csv(void *ctx, +static void print_metric_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color __maybe_unused, const char *fmt, const char *unit, double val) { @@ -853,7 +858,8 @@ static const char *fixunit(char *buf, struct perf_evsel *evsel, return unit; } -static void print_metric_only(void *ctx, const char *color, const char *fmt, +static void print_metric_only(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -874,7 +880,8 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt, fprintf(out, "%*s ", mlen, str); } -static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, +static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color __maybe_unused, const char *fmt, const char *unit, double val) { @@ -894,11 +901,13 @@ static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, fprintf(out, "%s%s", vals, csv_sep); } -static void new_line_metric(void *ctx __maybe_unused) +static void new_line_metric(struct perf_stat_config *config __maybe_unused, + void *ctx __maybe_unused) { } -static void print_metric_header(void *ctx, const char *color __maybe_unused, +static void print_metric_header(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) { @@ -936,9 +945,10 @@ static int first_shadow_cpu(struct perf_evsel *evsel, int id) return 0; } -static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) +static void abs_printout(struct perf_stat_config *config, + int id, int nr, struct perf_evsel *evsel, double avg) { - FILE *output = stat_config.output; + FILE *output = config->output; double sc = evsel->scale; const char *fmt; @@ -951,7 +961,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; } - aggr_printout(evsel, id, nr); + aggr_printout(config, evsel, id, nr); fprintf(output, fmt, avg, csv_sep); @@ -1004,7 +1014,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, .evsel = counter, }; print_metric_t pm = print_metric_std; - void (*nl)(void *); + new_line_t nl; if (metric_only) { nl = new_line_metric; @@ -1033,10 +1043,10 @@ static void printout(struct perf_stat_config *config, int id, int nr, } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (metric_only) { - pm(&os, NULL, "", "", 0); + pm(config, &os, NULL, "", "", 0); return; } - aggr_printout(counter, id, nr); + aggr_printout(config, counter, id, nr); fprintf(config->output, "%*s%s", csv_output ? 0 : 18, @@ -1062,16 +1072,16 @@ static void printout(struct perf_stat_config *config, int id, int nr, csv_sep, counter->cgrp->name); if (!csv_output) - pm(&os, NULL, NULL, "", 0); + pm(config, &os, NULL, NULL, "", 0); print_noise(config, counter, noise); print_running(config, run, ena); if (csv_output) - pm(&os, NULL, NULL, "", 0); + pm(config, &os, NULL, NULL, "", 0); return; } if (!metric_only) - abs_printout(id, nr, counter, uval); + abs_printout(config, id, nr, counter, uval); out.print_metric = pm; out.new_line = nl; @@ -1083,7 +1093,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, print_running(config, run, ena); } - perf_stat__print_shadow_stats(counter, uval, + perf_stat__print_shadow_stats(config, counter, uval, first_shadow_cpu(counter, id), &out, &metric_events, st); if (!csv_output && !metric_only) { @@ -1255,7 +1265,7 @@ static void print_aggr(struct perf_stat_config *config, val = ad.val; if (first && metric_only) { first = false; - aggr_printout(counter, id, nr); + aggr_printout(config, counter, id, nr); } if (prefix && !metric_only) fprintf(output, "%s", prefix); @@ -1459,7 +1469,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (is_duration_time(counter)) continue; if (first) { - aggr_printout(counter, cpu, 0); + aggr_printout(config, counter, cpu, 0); first = false; } val = perf_counts(counter->counts, cpu, 0)->val; @@ -1521,7 +1531,7 @@ static void print_metric_headers(struct perf_stat_config *config, out.new_line = new_line_metric; out.force_header = true; os.evsel = counter; - perf_stat__print_shadow_stats(counter, 0, + perf_stat__print_shadow_stats(config, counter, 0, 0, &out, &metric_events, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 99990f5f2512..8ad32763cfff 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -410,7 +410,8 @@ static double runtime_stat_n(struct runtime_stat *st, return v->stats.n; } -static void print_stalled_cycles_frontend(int cpu, +static void print_stalled_cycles_frontend(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st) @@ -427,13 +428,14 @@ static void print_stalled_cycles_frontend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); if (ratio) - out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", ratio); else - out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); + out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(int cpu, +static void print_stalled_cycles_backend(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st) @@ -449,10 +451,11 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(int cpu, +static void print_branch_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -469,10 +472,11 @@ static void print_branch_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(int cpu, +static void print_l1_dcache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -490,10 +494,11 @@ static void print_l1_dcache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(int cpu, +static void print_l1_icache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -510,10 +515,11 @@ static void print_l1_icache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(int cpu, +static void print_dtlb_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -529,10 +535,11 @@ static void print_dtlb_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(int cpu, +static void print_itlb_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -548,10 +555,11 @@ static void print_itlb_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(int cpu, +static void print_ll_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -567,7 +575,7 @@ static void print_ll_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } /* @@ -674,7 +682,8 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) return sanitize_val(1.0 - sum); } -static void print_smi_cost(int cpu, struct perf_evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -694,11 +703,12 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel, if (cost > 10) color = PERF_COLOR_RED; - out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); - out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); + out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); + out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); } -static void generic_metric(const char *metric_expr, +static void generic_metric(struct perf_stat_config *config, + const char *metric_expr, struct perf_evsel **metric_events, char *name, const char *metric_name, @@ -737,20 +747,21 @@ static void generic_metric(const char *metric_expr, const char *p = metric_expr; if (expr__parse(&ratio, &pctx, &p) == 0) - print_metric(ctxp, NULL, "%8.1f", + print_metric(config, ctxp, NULL, "%8.1f", metric_name ? metric_name : out->force_header ? name : "", ratio); else - print_metric(ctxp, NULL, NULL, + print_metric(config, ctxp, NULL, NULL, out->force_header ? (metric_name ? metric_name : name) : "", 0); } else - print_metric(ctxp, NULL, NULL, "", 0); + print_metric(config, ctxp, NULL, NULL, "", 0); } -void perf_stat__print_shadow_stats(struct perf_evsel *evsel, +void perf_stat__print_shadow_stats(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, @@ -769,10 +780,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (total) { ratio = avg / total; - print_metric(ctxp, NULL, "%7.2f ", + print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", ratio); } else { - print_metric(ctxp, NULL, NULL, "insn per cycle", 0); + print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, @@ -783,20 +794,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ctx, cpu)); if (total && avg) { - out->new_line(ctxp); + out->new_line(config, ctxp); ratio = total / avg; - print_metric(ctxp, NULL, "%7.2f ", + print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); } else if (have_frontend_stalled) { - print_metric(ctxp, NULL, NULL, + print_metric(config, ctxp, NULL, NULL, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(cpu, evsel, avg, out, st); + print_branch_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all branches", 0); + print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | @@ -804,9 +815,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(cpu, evsel, avg, out, st); + print_l1_dcache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | @@ -814,9 +825,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(cpu, evsel, avg, out, st); + print_l1_icache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | @@ -824,9 +835,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out, st); + print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | @@ -834,9 +845,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(cpu, evsel, avg, out, st); + print_itlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | @@ -844,9 +855,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(cpu, evsel, avg, out, st); + print_ll_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); @@ -854,32 +865,32 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = avg * 100 / total; if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) - print_metric(ctxp, NULL, "%8.3f %%", + print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else - print_metric(ctxp, NULL, NULL, "of all cache refs", 0); + print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; - print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); + print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); } else { - print_metric(ctxp, NULL, NULL, "Ghz", 0); + print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) - print_metric(ctxp, NULL, + print_metric(config, ctxp, NULL, "%7.2f%%", "transactional cycles", 100.0 * (avg / total)); else - print_metric(ctxp, NULL, NULL, "transactional cycles", + print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); @@ -888,10 +899,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (total2 < avg) total2 = avg; if (total) - print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", + print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); else - print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); @@ -900,10 +911,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = total / avg; if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) - print_metric(ctxp, NULL, "%8.0f", + print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else - print_metric(ctxp, NULL, NULL, "cycles / transaction", + print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, @@ -912,33 +923,33 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (avg) ratio = total / avg; - print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); + print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); } else if (perf_evsel__is_clock(evsel)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", avg / (ratio * evsel->scale)); else - print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); + print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; - print_metric(ctxp, color, "%8.1f%%", "frontend bound", + print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; - print_metric(ctxp, color, "%8.1f%%", "retiring", + print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; - print_metric(ctxp, color, "%8.1f%%", "bad speculation", + print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { double be_bound = td_be_bound(ctx, cpu, st); @@ -955,12 +966,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; if (td_total_slots(ctx, cpu, st) > 0) - print_metric(ctxp, color, "%8.1f%%", name, + print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else - print_metric(ctxp, NULL, NULL, name, 0); + print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, + generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, evsel->metric_name, avg, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; @@ -975,9 +986,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, unit = 'K'; } snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); + print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out, st); + print_smi_cost(config, cpu, evsel, out, st); } else { num = 0; } @@ -987,12 +998,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, list_for_each_entry (mexp, &me->head, nd) { if (num++ > 0) - out->new_line(ctxp); - generic_metric(mexp->metric_expr, mexp->metric_events, + out->new_line(config, ctxp); + generic_metric(config, mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, avg, cpu, out, st); } } if (num == 0) - print_metric(ctxp, NULL, NULL, NULL, 0); + print_metric(config, ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index da6a706daecc..dffcf2110706 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -135,9 +135,10 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; -typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, +typedef void (*print_metric_t)(struct perf_stat_config *config, + void *ctx, const char *color, const char *unit, const char *fmt, double val); -typedef void (*new_line_t )(void *ctx); +typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); void runtime_stat__init(struct runtime_stat *st); void runtime_stat__exit(struct runtime_stat *st); @@ -153,7 +154,8 @@ struct perf_stat_output_ctx { bool force_header; }; -void perf_stat__print_shadow_stats(struct perf_evsel *evsel, +void perf_stat__print_shadow_stats(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, From fa7070a38676d660c0a71ab6981bfdca3b340ccd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:29 +0200 Subject: [PATCH 58/90] perf stat: Move csv_* to 'struct perf_stat_config' Move the static csv_* variables to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 114 +++++++++++++++++++------------------- tools/perf/util/stat.h | 2 + 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7a3361308e61..4c29e5065e02 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -157,8 +157,6 @@ static bool smi_cost = false; static bool smi_reset = false; static bool big_num = true; static int big_num_opt = -1; -static const char *csv_sep = NULL; -static bool csv_output = false; static bool group = false; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; @@ -663,11 +661,11 @@ static int run_perf_stat(int argc, const char **argv, int run_idx) static void print_running(struct perf_stat_config *config, u64 run, u64 ena) { - if (csv_output) { + if (config->csv_output) { fprintf(config->output, "%s%" PRIu64 "%s%.2f", - csv_sep, + config->csv_sep, run, - csv_sep, + config->csv_sep, ena ? 100.0 * run / ena : 100.0); } else if (run != ena) { fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); @@ -679,8 +677,8 @@ static void print_noise_pct(struct perf_stat_config *config, { double pct = rel_stddev_stats(total, avg); - if (csv_output) - fprintf(config->output, "%s%.2f%%", csv_sep, pct); + if (config->csv_output) + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); else if (pct) fprintf(config->output, " ( +-%6.2f%% )", pct); } @@ -704,34 +702,34 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_CORE: fprintf(config->output, "S%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), - csv_output ? 0 : -8, + config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id), - csv_sep, - csv_output ? 0 : 4, + config->csv_sep, + config->csv_output ? 0 : 4, nr, - csv_sep); + config->csv_sep); break; case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", - csv_output ? 0 : -5, + config->csv_output ? 0 : -5, id, - csv_sep, - csv_output ? 0 : 4, + config->csv_sep, + config->csv_output ? 0 : 4, nr, - csv_sep); + config->csv_sep); break; case AGGR_NONE: fprintf(config->output, "CPU%*d%s", - csv_output ? 0 : -4, - perf_evsel__cpus(evsel)->map[id], csv_sep); + config->csv_output ? 0 : -4, + perf_evsel__cpus(evsel)->map[id], config->csv_sep); break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", - csv_output ? 0 : 16, + config->csv_output ? 0 : 16, thread_map__comm(evsel->threads, id), - csv_output ? 0 : -8, + config->csv_output ? 0 : -8, thread_map__pid(evsel->threads, id), - csv_sep); + config->csv_sep); break; case AGGR_GLOBAL: case AGGR_UNSET: @@ -804,10 +802,10 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) fputc('\n', os->fh); if (os->prefix) - fprintf(os->fh, "%s%s", os->prefix, csv_sep); + fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); aggr_printout(config, os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) - fputs(csv_sep, os->fh); + fputs(config->csv_sep, os->fh); } static void print_metric_csv(struct perf_stat_config *config __maybe_unused, @@ -820,7 +818,7 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, char buf[64], *vals, *ends; if (unit == NULL || fmt == NULL) { - fprintf(out, "%s%s", csv_sep, csv_sep); + fprintf(out, "%s%s", config->csv_sep, config->csv_sep); return; } snprintf(buf, sizeof(buf), fmt, val); @@ -830,7 +828,7 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, *ends = 0; while (isspace(*unit)) unit++; - fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); + fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); } /* Filter out some columns that don't work well in metrics only mode */ @@ -898,7 +896,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - fprintf(out, "%s%s", vals, csv_sep); + fprintf(out, "%s%s", vals, config->csv_sep); } static void new_line_metric(struct perf_stat_config *config __maybe_unused, @@ -917,8 +915,8 @@ static void print_metric_header(struct perf_stat_config *config __maybe_unused, if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); - if (csv_output) - fprintf(os->fh, "%s%s", unit, csv_sep); + if (config->csv_output) + fprintf(os->fh, "%s%s", unit, config->csv_sep); else fprintf(os->fh, "%*s ", metric_only_len, unit); } @@ -952,7 +950,7 @@ static void abs_printout(struct perf_stat_config *config, double sc = evsel->scale; const char *fmt; - if (csv_output) { + if (config->csv_output) { fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; } else { if (big_num) @@ -963,17 +961,17 @@ static void abs_printout(struct perf_stat_config *config, aggr_printout(config, evsel, id, nr); - fprintf(output, fmt, avg, csv_sep); + fprintf(output, fmt, avg, config->csv_sep); if (evsel->unit) fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - evsel->unit, csv_sep); + config->csv_output ? 0 : unit_width, + evsel->unit, config->csv_sep); - fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); + fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); if (evsel->cgrp) - fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); + fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name); } static bool is_mixed_hw_group(struct perf_evsel *counter) @@ -1018,14 +1016,14 @@ static void printout(struct perf_stat_config *config, int id, int nr, if (metric_only) { nl = new_line_metric; - if (csv_output) + if (config->csv_output) pm = print_metric_only_csv; else pm = print_metric_only; } else nl = new_line_std; - if (csv_output && !metric_only) { + if (config->csv_output && !metric_only) { static int aggr_fields[] = { [AGGR_GLOBAL] = 0, [AGGR_THREAD] = 1, @@ -1049,9 +1047,9 @@ static void printout(struct perf_stat_config *config, int id, int nr, aggr_printout(config, counter, id, nr); fprintf(config->output, "%*s%s", - csv_output ? 0 : 18, + config->csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); + config->csv_sep); if (counter->supported) { print_free_counters_hint = 1; @@ -1060,22 +1058,22 @@ static void printout(struct perf_stat_config *config, int id, int nr, } fprintf(config->output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); + config->csv_output ? 0 : unit_width, + counter->unit, config->csv_sep); fprintf(config->output, "%*s", - csv_output ? 0 : -25, + config->csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) fprintf(config->output, "%s%s", - csv_sep, counter->cgrp->name); + config->csv_sep, counter->cgrp->name); - if (!csv_output) + if (!config->csv_output) pm(config, &os, NULL, NULL, "", 0); print_noise(config, counter, noise); print_running(config, run, ena); - if (csv_output) + if (config->csv_output) pm(config, &os, NULL, NULL, "", 0); return; } @@ -1088,7 +1086,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, out.ctx = &os; out.force_header = false; - if (csv_output && !metric_only) { + if (config->csv_output && !metric_only) { print_noise(config, counter, noise); print_running(config, run, ena); } @@ -1096,7 +1094,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, perf_stat__print_shadow_stats(config, counter, uval, first_shadow_cpu(counter, id), &out, &metric_events, st); - if (!csv_output && !metric_only) { + if (!config->csv_output && !metric_only) { print_noise(config, counter, noise); print_running(config, run, ena); } @@ -1512,10 +1510,10 @@ static void print_metric_headers(struct perf_stat_config *config, if (prefix) fprintf(config->output, "%s", prefix); - if (!csv_output && !no_indent) + if (!config->csv_output && !no_indent) fprintf(config->output, "%*s", aggr_header_lens[config->aggr_mode], ""); - if (csv_output) { + if (config->csv_output) { if (config->interval) fputs("time,", config->output); fputs(aggr_header_csv[config->aggr_mode], config->output); @@ -1549,9 +1547,9 @@ static void print_interval(struct perf_stat_config *config, if (interval_clear) puts(CONSOLE_CLEAR); - sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); - if ((num_print_interval == 0 && !csv_output) || interval_clear) { + if ((num_print_interval == 0 && !config->csv_output) || interval_clear) { switch (config->aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus"); @@ -1597,7 +1595,7 @@ static void print_header(struct perf_stat_config *config, fflush(stdout); - if (!csv_output) { + if (!config->csv_output) { fprintf(output, "\n"); fprintf(output, " Performance counter stats for "); if (target.system_wide) @@ -1776,7 +1774,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, break; } - if (!interval && !csv_output) + if (!interval && !config->csv_output) print_footer(config); fflush(config->output); @@ -1896,7 +1894,7 @@ static const struct option stat_options[] = { OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), - OPT_STRING('x', "field-separator", &csv_sep, "separator", + OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", "monitor event in cgroup name only", parse_cgroups), @@ -2749,12 +2747,12 @@ int cmd_stat(int argc, const char **argv) perf_stat__collect_metric_expr(evsel_list); perf_stat__init_shadow_stats(); - if (csv_sep) { - csv_output = true; - if (!strcmp(csv_sep, "\\t")) - csv_sep = "\t"; + if (stat_config.csv_sep) { + stat_config.csv_output = true; + if (!strcmp(stat_config.csv_sep, "\\t")) + stat_config.csv_sep = "\t"; } else - csv_sep = DEFAULT_SEPARATOR; + stat_config.csv_sep = DEFAULT_SEPARATOR; if (argc && !strncmp(argv[0], "rec", 3)) { argc = __cmd_record(argc, argv); @@ -2827,7 +2825,7 @@ int cmd_stat(int argc, const char **argv) /* * let the spreadsheet do the pretty-printing */ - if (csv_output) { + if (stat_config.csv_output) { /* User explicitly passed -B? */ if (big_num_opt == 1) { fprintf(stderr, "-B option not supported with -x\n"); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index dffcf2110706..18546d8b0279 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -91,6 +91,7 @@ struct perf_stat_config { bool scale; bool no_inherit; bool identifier; + bool csv_output; FILE *output; unsigned int interval; unsigned int timeout; @@ -98,6 +99,7 @@ struct perf_stat_config { int times; struct runtime_stat *stats; int stats_num; + const char *csv_sep; }; void update_stats(struct stats *stats, u64 val); From 132c6ba3c440fd21a45ff7f9d7a1f53813f4d0e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:30 +0200 Subject: [PATCH 59/90] perf stat: Move 'interval_clear' to 'struct perf_stat_config' Move the static 'interval_clear' variable to 'struct perf_stat_config', so it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 ++++----- tools/perf/util/stat.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4c29e5065e02..89297ab77d21 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -172,7 +172,6 @@ static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; static bool interval_count; -static bool interval_clear; static const char *output_name; static int output_fd; static int print_free_counters_hint; @@ -1544,12 +1543,12 @@ static void print_interval(struct perf_stat_config *config, FILE *output = config->output; static int num_print_interval; - if (interval_clear) + if (config->interval_clear) puts(CONSOLE_CLEAR); sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); - if ((num_print_interval == 0 && !config->csv_output) || interval_clear) { + if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { switch (config->aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus"); @@ -1581,7 +1580,7 @@ static void print_interval(struct perf_stat_config *config, } } - if ((num_print_interval == 0 || interval_clear) && metric_only) + if ((num_print_interval == 0 || config->interval_clear) && metric_only) print_metric_headers(config, " ", true); if (++num_print_interval == 25) num_print_interval = 0; @@ -1911,7 +1910,7 @@ static const struct option stat_options[] = { "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), - OPT_BOOLEAN(0, "interval-clear", &interval_clear, + OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear, "clear screen in between new interval"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, "stop workload and print counts after a timeout period in ms (>= 10ms)"), diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 18546d8b0279..470ab37601be 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -92,6 +92,7 @@ struct perf_stat_config { bool no_inherit; bool identifier; bool csv_output; + bool interval_clear; FILE *output; unsigned int interval; unsigned int timeout; From 0ce5aa0266604c77ee64882b70c980e843629177 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:31 +0200 Subject: [PATCH 60/90] perf stat: Move 'metric_only' to 'struct perf_stat_config' Move the static 'metric_only' variable to 'struct perf_stat_config', so it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-23-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 29 ++++++++++++++++------------- tools/perf/util/stat.h | 1 + 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 89297ab77d21..397cb4f28d7f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -163,7 +163,6 @@ static const char *post_cmd = NULL; static bool sync_run = false; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; -static bool metric_only = false; static bool force_metric_only = false; static bool no_merge = false; static bool walltime_run_table = false; @@ -1013,7 +1012,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, print_metric_t pm = print_metric_std; new_line_t nl; - if (metric_only) { + if (config->metric_only) { nl = new_line_metric; if (config->csv_output) pm = print_metric_only_csv; @@ -1022,7 +1021,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, } else nl = new_line_std; - if (config->csv_output && !metric_only) { + if (config->csv_output && !config->metric_only) { static int aggr_fields[] = { [AGGR_GLOBAL] = 0, [AGGR_THREAD] = 1, @@ -1039,7 +1038,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, os.nfields++; } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { - if (metric_only) { + if (config->metric_only) { pm(config, &os, NULL, "", "", 0); return; } @@ -1077,7 +1076,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, return; } - if (!metric_only) + if (!config->metric_only) abs_printout(config, id, nr, counter, uval); out.print_metric = pm; @@ -1085,7 +1084,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, out.ctx = &os; out.force_header = false; - if (config->csv_output && !metric_only) { + if (config->csv_output && !config->metric_only) { print_noise(config, counter, noise); print_running(config, run, ena); } @@ -1093,7 +1092,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, perf_stat__print_shadow_stats(config, counter, uval, first_shadow_cpu(counter, id), &out, &metric_events, st); - if (!config->csv_output && !metric_only) { + if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); print_running(config, run, ena); } @@ -1225,6 +1224,7 @@ static void aggr_cb(struct perf_evsel *counter, void *data, bool first) static void print_aggr(struct perf_stat_config *config, char *prefix) { + bool metric_only = config->metric_only; FILE *output = config->output; struct perf_evsel *counter; int s, id, nr; @@ -1388,6 +1388,7 @@ static void counter_aggr_cb(struct perf_evsel *counter, void *data, static void print_counter_aggr(struct perf_stat_config *config, struct perf_evsel *counter, char *prefix) { + bool metric_only = config->metric_only; FILE *output = config->output; double uval; struct caggr_data cd = { .avg = 0.0 }; @@ -1540,6 +1541,7 @@ static void print_metric_headers(struct perf_stat_config *config, static void print_interval(struct perf_stat_config *config, char *prefix, struct timespec *ts) { + bool metric_only = config->metric_only; FILE *output = config->output; static int num_print_interval; @@ -1716,6 +1718,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, struct timespec *ts, int argc, const char **argv) { + bool metric_only = config->metric_only; int interval = config->interval; struct perf_evsel *counter; char buf[64], *prefix = NULL; @@ -1843,7 +1846,7 @@ static int enable_metric_only(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset) { force_metric_only = true; - metric_only = !unset; + stat_config.metric_only = !unset; return 0; } @@ -1922,7 +1925,7 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &stat_config.initial_delay, "ms to wait before starting measurement after program start"), - OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, + OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure topdown level 1 statistics"), @@ -2345,7 +2348,7 @@ static int add_default_attributes(void) if (pmu_have_event("msr", "aperf") && pmu_have_event("msr", "smi")) { if (!force_metric_only) - metric_only = true; + stat_config.metric_only = true; err = parse_events(evsel_list, smi_cost_attrs, &errinfo); } else { fprintf(stderr, "To measure SMI cost, it needs " @@ -2376,7 +2379,7 @@ static int add_default_attributes(void) } if (!force_metric_only) - metric_only = true; + stat_config.metric_only = true; if (topdown_filter_events(topdown_attrs, &str, arch_topdown_check_group(&warn)) < 0) { pr_err("Out of memory\n"); @@ -2776,12 +2779,12 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { + if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) { fprintf(stderr, "--metric-only is not supported with --per-thread\n"); goto out; } - if (metric_only && run_count > 1) { + if (stat_config.metric_only && run_count > 1) { fprintf(stderr, "--metric-only is not supported with -r\n"); goto out; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 470ab37601be..da838182b99c 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -93,6 +93,7 @@ struct perf_stat_config { bool identifier; bool csv_output; bool interval_clear; + bool metric_only; FILE *output; unsigned int interval; unsigned int timeout; From df4f7b4d4b1e61e6b16ac2e3760be46bac86e4f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:32 +0200 Subject: [PATCH 61/90] perf stat: Move 'unit_width' to 'struct perf_stat_config' Move the static 'unit_width' variable to 'struct perf_stat_config', so it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-24-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 11 ++++++----- tools/perf/util/stat.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 397cb4f28d7f..24171aa6c41f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -161,7 +161,6 @@ static bool group = false; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; -static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool force_metric_only = false; static bool no_merge = false; @@ -200,6 +199,7 @@ static volatile int done = 0; static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, .scale = true, + .unit_width = 4, /* strlen("unit") */ }; static bool is_duration_time(struct perf_evsel *evsel) @@ -524,8 +524,8 @@ try_again: counter->supported = true; l = strlen(counter->unit); - if (l > unit_width) - unit_width = l; + if (l > stat_config.unit_width) + stat_config.unit_width = l; if (perf_evsel__should_store_id(counter) && perf_evsel__store_ids(counter, evsel_list)) @@ -963,7 +963,7 @@ static void abs_printout(struct perf_stat_config *config, if (evsel->unit) fprintf(output, "%-*s%s", - config->csv_output ? 0 : unit_width, + config->csv_output ? 0 : config->unit_width, evsel->unit, config->csv_sep); fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); @@ -1056,7 +1056,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, } fprintf(config->output, "%-*s%s", - config->csv_output ? 0 : unit_width, + config->csv_output ? 0 : config->unit_width, counter->unit, config->csv_sep); fprintf(config->output, "%*s", @@ -1542,6 +1542,7 @@ static void print_interval(struct perf_stat_config *config, char *prefix, struct timespec *ts) { bool metric_only = config->metric_only; + unsigned int unit_width = config->unit_width; FILE *output = config->output; static int num_print_interval; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index da838182b99c..2dc66e0ba4b8 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -98,6 +98,7 @@ struct perf_stat_config { unsigned int interval; unsigned int timeout; unsigned int initial_delay; + unsigned int unit_width; int times; struct runtime_stat *stats; int stats_num; From c512e0eae4c63be8c5964c3942b2e9c04dcd459e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:33 +0200 Subject: [PATCH 62/90] perf stat: Add 'target' argument to perf_evlist__print_counters() Add 'struct target' argument to perf_evlist__print_counters(), so the function does not depend on the 'perf stat' command object local target and can be moved out. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-25-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24171aa6c41f..528f85146b59 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1590,6 +1590,7 @@ static void print_interval(struct perf_stat_config *config, } static void print_header(struct perf_stat_config *config, + struct target *_target, int argc, const char **argv) { FILE *output = config->output; @@ -1600,18 +1601,18 @@ static void print_header(struct perf_stat_config *config, if (!config->csv_output) { fprintf(output, "\n"); fprintf(output, " Performance counter stats for "); - if (target.system_wide) + if (_target->system_wide) fprintf(output, "\'system wide"); - else if (target.cpu_list) - fprintf(output, "\'CPU(s) %s", target.cpu_list); - else if (!target__has_task(&target)) { + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { fprintf(output, "\'%s", argv ? argv[0] : "pipe"); for (i = 1; argv && (i < argc); i++) fprintf(output, " %s", argv[i]); - } else if (target.pid) - fprintf(output, "process id \'%s", target.pid); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); else - fprintf(output, "thread id \'%s", target.tid); + fprintf(output, "thread id \'%s", _target->tid); fprintf(output, "\'"); if (run_count > 1) @@ -1716,6 +1717,7 @@ static void print_footer(struct perf_stat_config *config) static void perf_evlist__print_counters(struct perf_evlist *evlist, struct perf_stat_config *config, + struct target *_target, struct timespec *ts, int argc, const char **argv) { @@ -1727,7 +1729,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, if (interval) print_interval(config, prefix = buf, ts); else - print_header(config, argc, argv); + print_header(config, _target, argc, argv); if (metric_only) { static int num_print_iv; @@ -1789,7 +1791,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (STAT_RECORD && perf_stat.data.is_pipe) return; - perf_evlist__print_counters(evsel_list, &stat_config, + perf_evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } From bc0bcda201e87d62f0922fa664376355b0fc77ff Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:34 +0200 Subject: [PATCH 63/90] perf stat: Pass 'evlist' argument to print functions Add 'evlist' argument to print functions to get rid of the global 'evsel_list' variable dependency. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-26-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 528f85146b59..46181ac492c5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1222,6 +1222,7 @@ static void aggr_cb(struct perf_evsel *counter, void *data, bool first) } static void print_aggr(struct perf_stat_config *config, + struct perf_evlist *evlist, char *prefix) { bool metric_only = config->metric_only; @@ -1248,7 +1249,7 @@ static void print_aggr(struct perf_stat_config *config, ad.id = id = aggr_map->map[s]; first = true; - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; @@ -1449,6 +1450,7 @@ static void print_counter(struct perf_stat_config *config, } static void print_no_aggr_metric(struct perf_stat_config *config, + struct perf_evlist *evlist, char *prefix) { int cpu; @@ -1457,13 +1459,13 @@ static void print_no_aggr_metric(struct perf_stat_config *config, u64 ena, run, val; double uval; - nrcpus = evsel_list->cpus->nr; + nrcpus = evlist->cpus->nr; for (cpu = 0; cpu < nrcpus; cpu++) { bool first = true; if (prefix) fputs(prefix, config->output); - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; if (first) { @@ -1499,6 +1501,7 @@ static const char *aggr_header_csv[] = { }; static void print_metric_headers(struct perf_stat_config *config, + struct perf_evlist *evlist, const char *prefix, bool no_indent) { struct perf_stat_output_ctx out; @@ -1520,7 +1523,7 @@ static void print_metric_headers(struct perf_stat_config *config, } /* Print metrics headers only */ - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; os.evsel = counter; @@ -1539,6 +1542,7 @@ static void print_metric_headers(struct perf_stat_config *config, } static void print_interval(struct perf_stat_config *config, + struct perf_evlist *evlist, char *prefix, struct timespec *ts) { bool metric_only = config->metric_only; @@ -1584,7 +1588,7 @@ static void print_interval(struct perf_stat_config *config, } if ((num_print_interval == 0 || config->interval_clear) && metric_only) - print_metric_headers(config, " ", true); + print_metric_headers(config, evlist, " ", true); if (++num_print_interval == 25) num_print_interval = 0; } @@ -1727,7 +1731,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, char buf[64], *prefix = NULL; if (interval) - print_interval(config, prefix = buf, ts); + print_interval(config, evlist, prefix = buf, ts); else print_header(config, _target, argc, argv); @@ -1735,7 +1739,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, static int num_print_iv; if (num_print_iv == 0 && !interval) - print_metric_headers(config, prefix, false); + print_metric_headers(config, evlist, prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; if (config->aggr_mode == AGGR_GLOBAL && prefix) @@ -1745,7 +1749,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, switch (config->aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: - print_aggr(config, prefix); + print_aggr(config, evlist, prefix); break; case AGGR_THREAD: evlist__for_each_entry(evlist, counter) { @@ -1765,7 +1769,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, break; case AGGR_NONE: if (metric_only) - print_no_aggr_metric(config, prefix); + print_no_aggr_metric(config, evlist, prefix); else { evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) From 0c538a9462953dfcde2fe961aca2cf75c2747040 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:35 +0200 Subject: [PATCH 64/90] perf stat: Use 'evsel->evlist' instead of 'evsel_list' in collect_all_aliases() Use 'evsel->evlist' instead of 'evsel_list' in collect_all_aliases(), to get rid of the global 'evsel_list' variable dependency. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-27-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 46181ac492c5..11741f3a8342 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1154,10 +1154,11 @@ static void collect_all_aliases(struct perf_evsel *counter, bool first), void *data) { + struct perf_evlist *evlist = counter->evlist; struct perf_evsel *alias; - alias = list_prepare_entry(counter, &(evsel_list->entries), node); - list_for_each_entry_continue (alias, &evsel_list->entries, node) { + alias = list_prepare_entry(counter, &(evlist->entries), node); + list_for_each_entry_continue (alias, &evlist->entries, node) { if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || alias->scale != counter->scale || alias->cgrp != counter->cgrp || From d97ae04b3d5263be242a6a9f9eef6422fc86326a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:36 +0200 Subject: [PATCH 65/90] perf stat: Move 'run_count' to 'struct perf_stat_config' Move the static 'run_count' variable to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-28-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 39 ++++++++++++++++++++------------------- tools/perf/util/stat.h | 1 + 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 11741f3a8342..1276596840c9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -147,7 +147,6 @@ typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); #define METRIC_ONLY_LEN 20 -static int run_count = 1; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -200,6 +199,7 @@ static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, .scale = true, .unit_width = 4, /* strlen("unit") */ + .run_count = 1, }; static bool is_duration_time(struct perf_evsel *evsel) @@ -686,7 +686,7 @@ static void print_noise(struct perf_stat_config *config, { struct perf_stat_evsel *ps; - if (run_count == 1) + if (config->run_count == 1) return; ps = evsel->stats; @@ -1620,8 +1620,8 @@ static void print_header(struct perf_stat_config *config, fprintf(output, "thread id \'%s", _target->tid); fprintf(output, "\'"); - if (run_count > 1) - fprintf(output, " (%d runs)", run_count); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); fprintf(output, ":\n\n"); } } @@ -1634,7 +1634,8 @@ static int get_precision(double num) return lround(ceil(-log10(num))); } -static void print_table(FILE *output, int precision, double avg) +static void print_table(struct perf_stat_config *config, + FILE *output, int precision, double avg) { char tmp[64]; int idx, indent = 0; @@ -1645,7 +1646,7 @@ static void print_table(FILE *output, int precision, double avg) fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); - for (idx = 0; idx < run_count; idx++) { + for (idx = 0; idx < config->run_count; idx++) { double run = (double) walltime_run[idx] / NSEC_PER_SEC; int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); @@ -1675,7 +1676,7 @@ static void print_footer(struct perf_stat_config *config) if (!null_run) fprintf(output, "\n"); - if (run_count == 1) { + if (config->run_count == 1) { fprintf(output, " %17.9f seconds time elapsed", avg); if (ru_display) { @@ -1695,7 +1696,7 @@ static void print_footer(struct perf_stat_config *config) int precision = get_precision(sd) + 2; if (walltime_run_table) - print_table(output, precision, avg); + print_table(config, output, precision, avg); fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); @@ -1886,7 +1887,7 @@ static const struct option stat_options[] = { OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_INTEGER('r', "repeat", &run_count, + OPT_INTEGER('r', "repeat", &stat_config.run_count, "repeat command and print average + stddev (max: 100, forever: 0)"), OPT_BOOLEAN(0, "table", &walltime_run_table, "display details about each run (only with -r option)"), @@ -2484,7 +2485,7 @@ static int __cmd_record(int argc, const char **argv) if (output_name) data->file.path = output_name; - if (run_count != 1 || forever) { + if (stat_config.run_count != 1 || forever) { pr_err("Cannot use -r option with perf stat record.\n"); return -1; } @@ -2792,12 +2793,12 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (stat_config.metric_only && run_count > 1) { + if (stat_config.metric_only && stat_config.run_count > 1) { fprintf(stderr, "--metric-only is not supported with -r\n"); goto out; } - if (walltime_run_table && run_count <= 1) { + if (walltime_run_table && stat_config.run_count <= 1) { fprintf(stderr, "--table is only supported with -r\n"); parse_options_usage(stat_usage, stat_options, "r", 1); parse_options_usage(NULL, stat_options, "table", 0); @@ -2853,20 +2854,20 @@ int cmd_stat(int argc, const char **argv) * Display user/system times only for single * run and when there's specified tracee. */ - if ((run_count == 1) && target__none(&target)) + if ((stat_config.run_count == 1) && target__none(&target)) ru_display = true; - if (run_count < 0) { + if (stat_config.run_count < 0) { pr_err("Run count must be a positive number\n"); parse_options_usage(stat_usage, stat_options, "r", 1); goto out; - } else if (run_count == 0) { + } else if (stat_config.run_count == 0) { forever = true; - run_count = 1; + stat_config.run_count = 1; } if (walltime_run_table) { - walltime_run = zalloc(run_count * sizeof(walltime_run[0])); + walltime_run = zalloc(stat_config.run_count * sizeof(walltime_run[0])); if (!walltime_run) { pr_err("failed to setup -r option"); goto out; @@ -2994,8 +2995,8 @@ int cmd_stat(int argc, const char **argv) signal(SIGABRT, skip_signal); status = 0; - for (run_idx = 0; forever || run_idx < run_count; run_idx++) { - if (run_count != 1 && verbose > 0) + for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { + if (stat_config.run_count != 1 && verbose > 0) fprintf(output, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2dc66e0ba4b8..351a36f5f5b0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -100,6 +100,7 @@ struct perf_stat_config { unsigned int initial_delay; unsigned int unit_width; int times; + int run_count; struct runtime_stat *stats; int stats_num; const char *csv_sep; From ee1760e2cf623bc6834fc3e4e89c6ad030a25dfd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:37 +0200 Subject: [PATCH 66/90] perf stat: Move 'metric_only_len' to 'struct perf_stat_config' Move the static 'metric_only_len' variable to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-29-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 +++++----- tools/perf/util/stat.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1276596840c9..e96cef526ef0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -176,7 +176,6 @@ static int print_mixed_hw_group_error; static u64 *walltime_run; static bool ru_display = false; static struct rusage ru_data; -static unsigned int metric_only_len = METRIC_ONLY_LEN; struct perf_stat { bool record; @@ -200,6 +199,7 @@ static struct perf_stat_config stat_config = { .scale = true, .unit_width = 4, /* strlen("unit") */ .run_count = 1, + .metric_only_len = METRIC_ONLY_LEN, }; static bool is_duration_time(struct perf_evsel *evsel) @@ -854,14 +854,14 @@ static const char *fixunit(char *buf, struct perf_evsel *evsel, return unit; } -static void print_metric_only(struct perf_stat_config *config __maybe_unused, +static void print_metric_only(struct perf_stat_config *config, void *ctx, const char *color, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[1024], str[1024]; - unsigned mlen = metric_only_len; + unsigned mlen = config->metric_only_len; if (!valid_only_metric(unit)) return; @@ -902,7 +902,7 @@ static void new_line_metric(struct perf_stat_config *config __maybe_unused, { } -static void print_metric_header(struct perf_stat_config *config __maybe_unused, +static void print_metric_header(struct perf_stat_config *config, void *ctx, const char *color __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) @@ -916,7 +916,7 @@ static void print_metric_header(struct perf_stat_config *config __maybe_unused, if (config->csv_output) fprintf(os->fh, "%s%s", unit, config->csv_sep); else - fprintf(os->fh, "%*s ", metric_only_len, unit); + fprintf(os->fh, "%*s ", config->metric_only_len, unit); } static int first_shadow_cpu(struct perf_evsel *evsel, int id) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 351a36f5f5b0..e70e6d93ee1b 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -99,6 +99,7 @@ struct perf_stat_config { unsigned int timeout; unsigned int initial_delay; unsigned int unit_width; + unsigned int metric_only_len; int times; int run_count; struct runtime_stat *stats; From ae2d7da554f0dda837b6639d247665f6df90e41f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:38 +0200 Subject: [PATCH 67/90] perf stat: Pass 'struct perf_stat_config' to first_shadow_cpu() Pass a 'struct perf_stat_config' arg to first_shadow_cpu(), so that the function does not depend on the 'perf stat' command object local 'stat_config' variable and can then be moved out. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-30-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e96cef526ef0..f5ac6545af34 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -919,17 +919,18 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_cpu(struct perf_evsel *evsel, int id) +static int first_shadow_cpu(struct perf_stat_config *config, + struct perf_evsel *evsel, int id) { int i; if (!aggr_get_id) return 0; - if (stat_config.aggr_mode == AGGR_NONE) + if (config->aggr_mode == AGGR_NONE) return id; - if (stat_config.aggr_mode == AGGR_GLOBAL) + if (config->aggr_mode == AGGR_GLOBAL) return 0; for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { @@ -1090,7 +1091,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu(counter, id), + first_shadow_cpu(config, counter, id), &out, &metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); @@ -1098,7 +1099,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, } } -static void aggr_update_shadow(void) +static void aggr_update_shadow(struct perf_stat_config *config) { int cpu, s2, id, s; u64 val; @@ -1115,7 +1116,7 @@ static void aggr_update_shadow(void) val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id), + first_shadow_cpu(config, counter, id), &rt_stat); } } @@ -1237,7 +1238,7 @@ static void print_aggr(struct perf_stat_config *config, if (!(aggr_map || aggr_get_id)) return; - aggr_update_shadow(); + aggr_update_shadow(config); /* * With metric_only everything is on a single line. From 77e0faf8552c9329d58a4de460928e03252647c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:39 +0200 Subject: [PATCH 68/90] perf stat: Pass 'evlist' to aggr_update_shadow() Pass a 'evlist' argument to aggr_update_shadow(), to get rid of the global 'evsel_list' variable dependency. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-31-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5ac6545af34..2711d8f6e4b8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1099,7 +1099,8 @@ static void printout(struct perf_stat_config *config, int id, int nr, } } -static void aggr_update_shadow(struct perf_stat_config *config) +static void aggr_update_shadow(struct perf_stat_config *config, + struct perf_evlist *evlist) { int cpu, s2, id, s; u64 val; @@ -1107,7 +1108,7 @@ static void aggr_update_shadow(struct perf_stat_config *config) for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; - evlist__for_each_entry(evsel_list, counter) { + evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { s2 = aggr_get_id(evsel_list->cpus, cpu); @@ -1238,7 +1239,7 @@ static void print_aggr(struct perf_stat_config *config, if (!(aggr_map || aggr_get_id)) return; - aggr_update_shadow(config); + aggr_update_shadow(config, evlist); /* * With metric_only everything is on a single line. From 26893a6018f88779c0aded934e99e0ebb6859a58 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:40 +0200 Subject: [PATCH 69/90] perf stat: Add 'walltime_nsecs_stats' pointer to 'struct perf_stat_config' Add 'walltime_nsecs_stats' pointer to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. It's initialized to point to stat's walltime_nsecs_stats value. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-32-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 15 ++++++++------- tools/perf/util/stat.h | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2711d8f6e4b8..381549a989b4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -195,11 +195,12 @@ static struct perf_stat perf_stat; static volatile int done = 0; static struct perf_stat_config stat_config = { - .aggr_mode = AGGR_GLOBAL, - .scale = true, - .unit_width = 4, /* strlen("unit") */ - .run_count = 1, - .metric_only_len = METRIC_ONLY_LEN, + .aggr_mode = AGGR_GLOBAL, + .scale = true, + .unit_width = 4, /* strlen("unit") */ + .run_count = 1, + .metric_only_len = METRIC_ONLY_LEN, + .walltime_nsecs_stats = &walltime_nsecs_stats, }; static bool is_duration_time(struct perf_evsel *evsel) @@ -1671,7 +1672,7 @@ static double timeval2double(struct timeval *t) static void print_footer(struct perf_stat_config *config) { - double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; int n; @@ -1690,7 +1691,7 @@ static void print_footer(struct perf_stat_config *config) fprintf(output, " %17.9f seconds sys\n", ru_stime); } } else { - double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; + double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; /* * Display at most 2 more significant * digits than the stddev inaccuracy. diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index e70e6d93ee1b..35550e3efd81 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -105,6 +105,7 @@ struct perf_stat_config { struct runtime_stat *stats; int stats_num; const char *csv_sep; + struct stats *walltime_nsecs_stats; }; void update_stats(struct stats *stats, u64 val); From aea0dca1629c72128cf7174d1d3f3807f7297005 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:41 +0200 Subject: [PATCH 70/90] perf stat: Move 'null_run' to 'struct perf_stat_config' Move the static 'null_run' variable to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-33-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 +++---- tools/perf/util/stat.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 381549a989b4..844c755564e3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -148,7 +148,6 @@ typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); #define METRIC_ONLY_LEN 20 static volatile pid_t child_pid = -1; -static bool null_run = false; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; @@ -1676,7 +1675,7 @@ static void print_footer(struct perf_stat_config *config) FILE *output = config->output; int n; - if (!null_run) + if (!config->null_run) fprintf(output, "\n"); if (config->run_count == 1) { @@ -1894,7 +1893,7 @@ static const struct option stat_options[] = { "repeat command and print average + stddev (max: 100, forever: 0)"), OPT_BOOLEAN(0, "table", &walltime_run_table, "display details about each run (only with -r option)"), - OPT_BOOLEAN('n', "null", &null_run, + OPT_BOOLEAN('n', "null", &stat_config.null_run, "null run - dont start any counters"), OPT_INCR('d', "detailed", &detailed_run, "detailed run - start a lot of events"), @@ -2309,7 +2308,7 @@ static int add_default_attributes(void) struct parse_events_error errinfo; /* Set attrs if no event is selected and !null_run: */ - if (null_run) + if (stat_config.null_run) return 0; if (transaction_run) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 35550e3efd81..c198926c0e27 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -94,6 +94,7 @@ struct perf_stat_config { bool csv_output; bool interval_clear; bool metric_only; + bool null_run; FILE *output; unsigned int interval; unsigned int timeout; From 31084123c1962dd4235655c1839a50c9cf6c709b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:42 +0200 Subject: [PATCH 71/90] perf stat: Move 'print_free_counters_hint' to 'struct perf_stat_config' Move the 'print_free_counters_hint' variable to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-34-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 ++--- tools/perf/util/stat.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 844c755564e3..8748bb5e5403 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -170,7 +170,6 @@ static bool append_file; static bool interval_count; static const char *output_name; static int output_fd; -static int print_free_counters_hint; static int print_mixed_hw_group_error; static u64 *walltime_run; static bool ru_display = false; @@ -1051,7 +1050,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, config->csv_sep); if (counter->supported) { - print_free_counters_hint = 1; + config->print_free_counters_hint = 1; if (is_mixed_hw_group(counter)) print_mixed_hw_group_error = 1; } @@ -1707,7 +1706,7 @@ static void print_footer(struct perf_stat_config *config) } fprintf(output, "\n\n"); - if (print_free_counters_hint && + if (config->print_free_counters_hint && sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && n > 0) fprintf(output, diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c198926c0e27..8cad17363e90 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -103,6 +103,7 @@ struct perf_stat_config { unsigned int metric_only_len; int times; int run_count; + int print_free_counters_hint; struct runtime_stat *stats; int stats_num; const char *csv_sep; From 3b3cd9a41c2b5d97309099ba6eae69b2d3177226 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:43 +0200 Subject: [PATCH 72/90] perf stat: Move 'print_mixed_hw_group_error' to 'struct perf_stat_config' Move the 'print_mixed_hw_group_error' global variable to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-35-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 ++--- tools/perf/util/stat.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8748bb5e5403..f61c2dc8c7e8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -170,7 +170,6 @@ static bool append_file; static bool interval_count; static const char *output_name; static int output_fd; -static int print_mixed_hw_group_error; static u64 *walltime_run; static bool ru_display = false; static struct rusage ru_data; @@ -1052,7 +1051,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, if (counter->supported) { config->print_free_counters_hint = 1; if (is_mixed_hw_group(counter)) - print_mixed_hw_group_error = 1; + config->print_mixed_hw_group_error = 1; } fprintf(config->output, "%-*s%s", @@ -1715,7 +1714,7 @@ static void print_footer(struct perf_stat_config *config) " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - if (print_mixed_hw_group_error) + if (config->print_mixed_hw_group_error) fprintf(output, "The events in group usually have to be from " "the same PMU. Try reorganizing the group.\n"); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 8cad17363e90..6fb4dac26ccc 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -104,6 +104,7 @@ struct perf_stat_config { int times; int run_count; int print_free_counters_hint; + int print_mixed_hw_group_error; struct runtime_stat *stats; int stats_num; const char *csv_sep; From 8897a8916efb29fa8bbe9e5f6e5d56362aedf64e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:44 +0200 Subject: [PATCH 73/90] perf stat: Move ru_* data to 'struct perf_stat_config' Move the 'ru_*' global variables to 'struct perf_stat_config', so that it can be passed around and used outside the 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-36-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 12 +++++------- tools/perf/util/stat.h | 6 ++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f61c2dc8c7e8..942ebfd8ef2e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -171,8 +171,6 @@ static bool interval_count; static const char *output_name; static int output_fd; static u64 *walltime_run; -static bool ru_display = false; -static struct rusage ru_data; struct perf_stat { bool record; @@ -583,7 +581,7 @@ try_again: break; } } - wait4(child_pid, &status, 0, &ru_data); + wait4(child_pid, &status, 0, &stat_config.ru_data); if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); @@ -1679,9 +1677,9 @@ static void print_footer(struct perf_stat_config *config) if (config->run_count == 1) { fprintf(output, " %17.9f seconds time elapsed", avg); - if (ru_display) { - double ru_utime = timeval2double(&ru_data.ru_utime); - double ru_stime = timeval2double(&ru_data.ru_stime); + if (config->ru_display) { + double ru_utime = timeval2double(&config->ru_data.ru_utime); + double ru_stime = timeval2double(&config->ru_data.ru_stime); fprintf(output, "\n\n"); fprintf(output, " %17.9f seconds user\n", ru_utime); @@ -2855,7 +2853,7 @@ int cmd_stat(int argc, const char **argv) * run and when there's specified tracee. */ if ((stat_config.run_count == 1) && target__none(&target)) - ru_display = true; + stat_config.ru_display = true; if (stat_config.run_count < 0) { pr_err("Run count must be a positive number\n"); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 6fb4dac26ccc..1fd0b7e196c6 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -4,6 +4,10 @@ #include #include +#include +#include +#include +#include #include "xyarray.h" #include "rblist.h" #include "perf.h" @@ -95,6 +99,7 @@ struct perf_stat_config { bool interval_clear; bool metric_only; bool null_run; + bool ru_display; FILE *output; unsigned int interval; unsigned int timeout; @@ -109,6 +114,7 @@ struct perf_stat_config { int stats_num; const char *csv_sep; struct stats *walltime_nsecs_stats; + struct rusage ru_data; }; void update_stats(struct stats *stats, u64 val); From 6f6b6594b5f380b0a972b66b275caa6c54bb1fea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:45 +0200 Subject: [PATCH 74/90] perf stat: Move *_aggr_* data to 'struct perf_stat_config' Move the *_aggr_* global variables to 'struct perf_stat_config', so that it can be passed around and used outside 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-37-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 112 ++++++++++++++++++++------------------ tools/perf/util/stat.h | 6 ++ 2 files changed, 64 insertions(+), 54 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 942ebfd8ef2e..2f606f76b66a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -143,8 +143,6 @@ static struct target target = { .uid = UINT_MAX, }; -typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); - #define METRIC_ONLY_LEN 20 static volatile pid_t child_pid = -1; @@ -164,8 +162,6 @@ static bool force_metric_only = false; static bool no_merge = false; static bool walltime_run_table = false; static struct timespec ref_time; -static struct cpu_map *aggr_map; -static aggr_get_id_t aggr_get_id; static bool append_file; static bool interval_count; static const char *output_name; @@ -920,7 +916,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, { int i; - if (!aggr_get_id) + if (!config->aggr_get_id) return 0; if (config->aggr_mode == AGGR_NONE) @@ -932,7 +928,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { int cpu2 = perf_evsel__cpus(evsel)->map[i]; - if (aggr_get_id(evsel_list->cpus, cpu2) == id) + if (config->aggr_get_id(config, evsel_list->cpus, cpu2) == id) return cpu2; } return 0; @@ -1102,12 +1098,12 @@ static void aggr_update_shadow(struct perf_stat_config *config, u64 val; struct perf_evsel *counter; - for (s = 0; s < aggr_map->nr; s++) { - id = aggr_map->map[s]; + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(evsel_list->cpus, cpu); + s2 = config->aggr_get_id(config, evsel_list->cpus, cpu); if (s2 != id) continue; val += perf_counts(counter->counts, cpu, 0)->val; @@ -1147,8 +1143,8 @@ static void uniquify_event_name(struct perf_evsel *counter) counter->uniquified_name = true; } -static void collect_all_aliases(struct perf_evsel *counter, - void (*cb)(struct perf_evsel *counter, void *data, +static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, bool first), void *data) { @@ -1164,22 +1160,22 @@ static void collect_all_aliases(struct perf_evsel *counter, perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) break; alias->merged_stat = true; - cb(alias, data, false); + cb(config, alias, data, false); } } -static bool collect_data(struct perf_evsel *counter, - void (*cb)(struct perf_evsel *counter, void *data, +static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, bool first), void *data) { if (counter->merged_stat) return false; - cb(counter, data, true); + cb(config, counter, data, true); if (no_merge) uniquify_event_name(counter); else if (counter->auto_merge_stats) - collect_all_aliases(counter, cb, data); + collect_all_aliases(config, counter, cb, data); return true; } @@ -1190,7 +1186,8 @@ struct aggr_data { int cpu; }; -static void aggr_cb(struct perf_evsel *counter, void *data, bool first) +static void aggr_cb(struct perf_stat_config *config, + struct perf_evsel *counter, void *data, bool first) { struct aggr_data *ad = data; int cpu, s2; @@ -1198,7 +1195,7 @@ static void aggr_cb(struct perf_evsel *counter, void *data, bool first) for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; - s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); + s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu); if (s2 != ad->id) continue; if (first) @@ -1232,7 +1229,7 @@ static void print_aggr(struct perf_stat_config *config, u64 ena, run, val; bool first; - if (!(aggr_map || aggr_get_id)) + if (!(config->aggr_map || config->aggr_get_id)) return; aggr_update_shadow(config, evlist); @@ -1241,12 +1238,12 @@ static void print_aggr(struct perf_stat_config *config, * With metric_only everything is on a single line. * Without each counter has its own line. */ - for (s = 0; s < aggr_map->nr; s++) { + for (s = 0; s < config->aggr_map->nr; s++) { struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - ad.id = id = aggr_map->map[s]; + ad.id = id = config->aggr_map->map[s]; first = true; evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) @@ -1254,7 +1251,7 @@ static void print_aggr(struct perf_stat_config *config, ad.val = ad.ena = ad.run = 0; ad.nr = 0; - if (!collect_data(counter, aggr_cb, &ad)) + if (!collect_data(config, counter, aggr_cb, &ad)) continue; nr = ad.nr; ena = ad.ena; @@ -1370,7 +1367,8 @@ struct caggr_data { double avg, avg_enabled, avg_running; }; -static void counter_aggr_cb(struct perf_evsel *counter, void *data, +static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, bool first __maybe_unused) { struct caggr_data *cd = data; @@ -1393,7 +1391,7 @@ static void print_counter_aggr(struct perf_stat_config *config, double uval; struct caggr_data cd = { .avg = 0.0 }; - if (!collect_data(counter, counter_aggr_cb, &cd)) + if (!collect_data(config, counter, counter_aggr_cb, &cd)) return; if (prefix && !metric_only) @@ -1406,7 +1404,8 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "\n"); } -static void counter_cb(struct perf_evsel *counter, void *data, +static void counter_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, bool first __maybe_unused) { struct aggr_data *ad = data; @@ -1431,7 +1430,7 @@ static void print_counter(struct perf_stat_config *config, for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; - if (!collect_data(counter, counter_cb, &ad)) + if (!collect_data(config, counter, counter_cb, &ad)) return; val = ad.val; ena = ad.ena; @@ -1944,12 +1943,14 @@ static const struct option stat_options[] = { OPT_END() }; -static int perf_stat__get_socket(struct cpu_map *map, int cpu) +static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); } -static int perf_stat__get_core(struct cpu_map *map, int cpu) +static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) { return cpu_map__get_core(map, cpu, NULL); } @@ -1966,9 +1967,8 @@ static int cpu_map__get_max(struct cpu_map *map) return max; } -static struct cpu_map *cpus_aggr_map; - -static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) +static int perf_stat__get_aggr(struct perf_stat_config *config, + aggr_get_id_t get_id, struct cpu_map *map, int idx) { int cpu; @@ -1977,20 +1977,22 @@ static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int id cpu = map->map[idx]; - if (cpus_aggr_map->map[cpu] == -1) - cpus_aggr_map->map[cpu] = get_id(map, idx); + if (config->cpus_aggr_map->map[cpu] == -1) + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - return cpus_aggr_map->map[cpu]; + return config->cpus_aggr_map->map[cpu]; } -static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) +static int perf_stat__get_socket_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) { - return perf_stat__get_aggr(perf_stat__get_socket, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } -static int perf_stat__get_core_cached(struct cpu_map *map, int idx) +static int perf_stat__get_core_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) { - return perf_stat__get_aggr(perf_stat__get_core, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } static int perf_stat_init_aggr_mode(void) @@ -1999,18 +2001,18 @@ static int perf_stat_init_aggr_mode(void) switch (stat_config.aggr_mode) { case AGGR_SOCKET: - if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { + if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build socket map"); return -1; } - aggr_get_id = perf_stat__get_socket_cached; + stat_config.aggr_get_id = perf_stat__get_socket_cached; break; case AGGR_CORE: - if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { + if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = perf_stat__get_core_cached; + stat_config.aggr_get_id = perf_stat__get_core_cached; break; case AGGR_NONE: case AGGR_GLOBAL: @@ -2026,16 +2028,16 @@ static int perf_stat_init_aggr_mode(void) * the aggregation translate cpumap. */ nr = cpu_map__get_max(evsel_list->cpus); - cpus_aggr_map = cpu_map__empty_new(nr + 1); - return cpus_aggr_map ? 0 : -ENOMEM; + stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1); + return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } static void perf_stat__exit_aggr_mode(void) { - cpu_map__put(aggr_map); - cpu_map__put(cpus_aggr_map); - aggr_map = NULL; - cpus_aggr_map = NULL; + cpu_map__put(stat_config.aggr_map); + cpu_map__put(stat_config.cpus_aggr_map); + stat_config.aggr_map = NULL; + stat_config.cpus_aggr_map = NULL; } static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) @@ -2093,12 +2095,14 @@ static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, return cpu_map__build_map(cpus, corep, perf_env__get_core, env); } -static int perf_stat__get_socket_file(struct cpu_map *map, int idx) +static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_core_file(struct cpu_map *map, int idx) +static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) { return perf_env__get_core(map, idx, &perf_stat.session->header.env); } @@ -2109,18 +2113,18 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) switch (stat_config.aggr_mode) { case AGGR_SOCKET: - if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { + if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build socket map"); return -1; } - aggr_get_id = perf_stat__get_socket_file; + stat_config.aggr_get_id = perf_stat__get_socket_file; break; case AGGR_CORE: - if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { + if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = perf_stat__get_core_file; + stat_config.aggr_get_id = perf_stat__get_core_file; break; case AGGR_NONE: case AGGR_GLOBAL: diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1fd0b7e196c6..be202b066e62 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,9 @@ struct runtime_stat { struct rblist value_list; }; +typedef int (*aggr_get_id_t)(struct perf_stat_config *config, + struct cpu_map *m, int cpu); + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; @@ -115,6 +118,9 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; + struct cpu_map *aggr_map; + aggr_get_id_t aggr_get_id; + struct cpu_map *cpus_aggr_map; }; void update_stats(struct stats *stats, u64 val); From a138af663500a07742bb27793302625135a0f6c4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:46 +0200 Subject: [PATCH 75/90] perf stat: Do not use the global 'evsel_list' in print functions Get rid of the the 'evsel_list' global variable dependency, here we can use the 'evlist' pointer from the evsel. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-38-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2f606f76b66a..445673f688de 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -914,6 +914,7 @@ static void print_metric_header(struct perf_stat_config *config, static int first_shadow_cpu(struct perf_stat_config *config, struct perf_evsel *evsel, int id) { + struct perf_evlist *evlist = evsel->evlist; int i; if (!config->aggr_get_id) @@ -928,7 +929,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { int cpu2 = perf_evsel__cpus(evsel)->map[i]; - if (config->aggr_get_id(config, evsel_list->cpus, cpu2) == id) + if (config->aggr_get_id(config, evlist->cpus, cpu2) == id) return cpu2; } return 0; @@ -1103,7 +1104,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = config->aggr_get_id(config, evsel_list->cpus, cpu); + s2 = config->aggr_get_id(config, evlist->cpus, cpu); if (s2 != id) continue; val += perf_counts(counter->counts, cpu, 0)->val; From 34ff0866d46ae206de884f54e7235f57096e5588 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:47 +0200 Subject: [PATCH 76/90] perf stat: Move 'big_num' data to 'struct perf_stat_config' Move the static variable 'big_num' to 'struct perf_stat_config', so that it can be passed around and used outside 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-39-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++---- tools/perf/util/stat.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 445673f688de..8ca85017a973 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -151,7 +151,6 @@ static bool transaction_run; static bool topdown_run = false; static bool smi_cost = false; static bool smi_reset = false; -static bool big_num = true; static int big_num_opt = -1; static bool group = false; static const char *pre_cmd = NULL; @@ -192,6 +191,7 @@ static struct perf_stat_config stat_config = { .run_count = 1, .metric_only_len = METRIC_ONLY_LEN, .walltime_nsecs_stats = &walltime_nsecs_stats, + .big_num = true, }; static bool is_duration_time(struct perf_evsel *evsel) @@ -945,7 +945,7 @@ static void abs_printout(struct perf_stat_config *config, if (config->csv_output) { fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; } else { - if (big_num) + if (config->big_num) fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; else fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; @@ -2847,9 +2847,9 @@ int cmd_stat(int argc, const char **argv) parse_options_usage(NULL, stat_options, "x", 1); goto out; } else /* Nope, so disable big number formatting */ - big_num = false; + stat_config.big_num = false; } else if (big_num_opt == 0) /* User passed --no-big-num */ - big_num = false; + stat_config.big_num = false; setup_system_wide(argc); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index be202b066e62..843672214cca 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -103,6 +103,7 @@ struct perf_stat_config { bool metric_only; bool null_run; bool ru_display; + bool big_num; FILE *output; unsigned int interval; unsigned int timeout; From fdee335b00b0807e8d65b8e3d81214556f07bed3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:48 +0200 Subject: [PATCH 77/90] perf stat: Move 'no_merge' data to 'struct perf_stat_config' Move the static variable 'no_merge' to 'struct perf_stat_config', so that it can be passed around and used outside 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-40-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 ++--- tools/perf/util/stat.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8ca85017a973..c4df076f1e54 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -158,7 +158,6 @@ static const char *post_cmd = NULL; static bool sync_run = false; static bool forever = false; static bool force_metric_only = false; -static bool no_merge = false; static bool walltime_run_table = false; static struct timespec ref_time; static bool append_file; @@ -1173,7 +1172,7 @@ static bool collect_data(struct perf_stat_config *config, struct perf_evsel *cou if (counter->merged_stat) return false; cb(config, counter, data, true); - if (no_merge) + if (config->no_merge) uniquify_event_name(counter); else if (counter->auto_merge_stats) collect_all_aliases(config, counter, cb, data); @@ -1902,7 +1901,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), - OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), + OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 843672214cca..0758107fe56f 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -104,6 +104,7 @@ struct perf_stat_config { bool null_run; bool ru_display; bool big_num; + bool no_merge; FILE *output; unsigned int interval; unsigned int timeout; From be54d59325314be9d4d53852cbfbeeaebc3b9239 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:49 +0200 Subject: [PATCH 78/90] perf stat: Propagate 'struct target' arg to sort_aggr_thread() Propagate the 'struct target' arg to sort_aggr_thread() so that the function does not depend on the 'perf stat' command object local variable 'target' and can be moved out. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-41-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c4df076f1e54..486b0cf7818d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1284,7 +1284,8 @@ static int cmp_val(const void *a, const void *b) static struct perf_aggr_thread_value *sort_aggr_thread( struct perf_evsel *counter, int nthreads, int ncpus, - int *ret) + int *ret, + struct target *_target) { int cpu, thread, i = 0; double uval; @@ -1309,7 +1310,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( * Skip value 0 when enabling --per-thread globally, * otherwise too many 0 output. */ - if (uval == 0.0 && target__has_per_thread(&target)) + if (uval == 0.0 && target__has_per_thread(_target)) continue; buf[i].counter = counter; @@ -1330,6 +1331,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( } static void print_aggr_thread(struct perf_stat_config *config, + struct target *_target, struct perf_evsel *counter, char *prefix) { FILE *output = config->output; @@ -1338,7 +1340,7 @@ static void print_aggr_thread(struct perf_stat_config *config, int thread, sorted_threads, id; struct perf_aggr_thread_value *buf; - buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); if (!buf) { perror("cannot sort aggr thread"); return; @@ -1754,7 +1756,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, evlist__for_each_entry(evlist, counter) { if (is_duration_time(counter)) continue; - print_aggr_thread(config, counter, prefix); + print_aggr_thread(config, _target, counter, prefix); } break; case AGGR_GLOBAL: From 54ac0b1bd25cbdeda226b32a0459e09de46157b3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:50 +0200 Subject: [PATCH 79/90] perf stat: Move 'walltime_*' data to 'struct perf_stat_config' Move the static variables 'walltime_*' to 'struct perf_stat_config', so that it can be passed around and used outside 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-42-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 ++++++++++------------ tools/perf/util/stat.h | 2 ++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 486b0cf7818d..8a4979748cbb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -158,13 +158,11 @@ static const char *post_cmd = NULL; static bool sync_run = false; static bool forever = false; static bool force_metric_only = false; -static bool walltime_run_table = false; static struct timespec ref_time; static bool append_file; static bool interval_count; static const char *output_name; static int output_fd; -static u64 *walltime_run; struct perf_stat { bool record; @@ -604,8 +602,8 @@ try_again: t1 = rdclock(); - if (walltime_run_table) - walltime_run[run_idx] = t1 - t0; + if (stat_config.walltime_run_table) + stat_config.walltime_run[run_idx] = t1 - t0; update_stats(&walltime_nsecs_stats, t1 - t0); @@ -1646,7 +1644,7 @@ static void print_table(struct perf_stat_config *config, fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); for (idx = 0; idx < config->run_count; idx++) { - double run = (double) walltime_run[idx] / NSEC_PER_SEC; + double run = (double) config->walltime_run[idx] / NSEC_PER_SEC; int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); fprintf(output, " %17.*f (%+.*f) ", @@ -1694,7 +1692,7 @@ static void print_footer(struct perf_stat_config *config) */ int precision = get_precision(sd) + 2; - if (walltime_run_table) + if (config->walltime_run_table) print_table(config, output, precision, avg); fprintf(output, " %17.*f +- %.*f seconds time elapsed", @@ -1888,7 +1886,7 @@ static const struct option stat_options[] = { "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &stat_config.run_count, "repeat command and print average + stddev (max: 100, forever: 0)"), - OPT_BOOLEAN(0, "table", &walltime_run_table, + OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table, "display details about each run (only with -r option)"), OPT_BOOLEAN('n', "null", &stat_config.null_run, "null run - dont start any counters"), @@ -2802,7 +2800,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (walltime_run_table && stat_config.run_count <= 1) { + if (stat_config.walltime_run_table && stat_config.run_count <= 1) { fprintf(stderr, "--table is only supported with -r\n"); parse_options_usage(stat_usage, stat_options, "r", 1); parse_options_usage(NULL, stat_options, "table", 0); @@ -2870,9 +2868,9 @@ int cmd_stat(int argc, const char **argv) stat_config.run_count = 1; } - if (walltime_run_table) { - walltime_run = zalloc(stat_config.run_count * sizeof(walltime_run[0])); - if (!walltime_run) { + if (stat_config.walltime_run_table) { + stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0])); + if (!stat_config.walltime_run) { pr_err("failed to setup -r option"); goto out; } @@ -3052,7 +3050,7 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: - free(walltime_run); + free(stat_config.walltime_run); if (smi_cost && smi_reset) sysfs__write_int(FREEZE_ON_SMI_PATH, 0); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0758107fe56f..5193cbf6e4c6 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -105,6 +105,7 @@ struct perf_stat_config { bool ru_display; bool big_num; bool no_merge; + bool walltime_run_table; FILE *output; unsigned int interval; unsigned int timeout; @@ -123,6 +124,7 @@ struct perf_stat_config { struct cpu_map *aggr_map; aggr_get_id_t aggr_get_id; struct cpu_map *cpus_aggr_map; + u64 *walltime_run; }; void update_stats(struct stats *stats, u64 val); From d0192fdba09a8901db133fe5a1fcd22d40fcf545 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:51 +0200 Subject: [PATCH 80/90] perf stat: Move 'metric_events' to 'struct perf_stat_config' Move the static variable 'metric_events' to 'struct perf_stat_config', so that it can be passed around and used outside 'perf stat' command. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-43-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 ++++------ tools/perf/util/stat.h | 1 + 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8a4979748cbb..f4ddedc8619b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -137,8 +137,6 @@ static const char *smi_cost_attrs = { static struct perf_evlist *evsel_list; -static struct rblist metric_events; - static struct target target = { .uid = UINT_MAX, }; @@ -1082,7 +1080,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, perf_stat__print_shadow_stats(config, counter, uval, first_shadow_cpu(config, counter, id), - &out, &metric_events, st); + &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); print_running(config, run, ena); @@ -1533,7 +1531,7 @@ static void print_metric_headers(struct perf_stat_config *config, perf_stat__print_shadow_stats(config, counter, 0, 0, &out, - &metric_events, + &config->metric_events, &rt_stat); } fputc('\n', config->output); @@ -1860,7 +1858,7 @@ static int parse_metric_groups(const struct option *opt, const char *str, int unset __maybe_unused) { - return metricgroup__parse_groups(opt, str, &metric_events); + return metricgroup__parse_groups(opt, str, &stat_config.metric_events); } static const struct option stat_options[] = { @@ -2321,7 +2319,7 @@ static int add_default_attributes(void) struct option opt = { .value = &evsel_list }; return metricgroup__parse_groups(&opt, "transaction", - &metric_events); + &stat_config.metric_events); } if (pmu_have_event("cpu", "cycles-ct") && diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5193cbf6e4c6..8d3354e21e19 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -125,6 +125,7 @@ struct perf_stat_config { aggr_get_id_t aggr_get_id; struct cpu_map *cpus_aggr_map; u64 *walltime_run; + struct rblist metric_events; }; void update_stats(struct stats *stats, u64 val); From 088519f318be3a41d1afe8d628c4a1eb5a50b4c0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Aug 2018 08:32:52 +0200 Subject: [PATCH 81/90] perf stat: Move the display functions to stat-display.c Move perf_evlist__print_counters() with all its dependency functions to the stat-display.c object. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180830063252.23729-44-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1150 ------------------------------- tools/perf/util/Build | 1 + tools/perf/util/stat-display.c | 1166 ++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 6 + 4 files changed, 1173 insertions(+), 1150 deletions(-) create mode 100644 tools/perf/util/stat-display.c diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f4ddedc8619b..0b0e3961d511 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -88,8 +88,6 @@ #include "sane_ctype.h" #define DEFAULT_SEPARATOR " " -#define CNTR_NOT_SUPPORTED "" -#define CNTR_NOT_COUNTED "" #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" static void print_counters(struct timespec *ts, int argc, const char **argv); @@ -189,11 +187,6 @@ static struct perf_stat_config stat_config = { .big_num = true, }; -static bool is_duration_time(struct perf_evsel *evsel) -{ - return !strcmp(evsel->name, "duration_time"); -} - static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -643,1149 +636,6 @@ static int run_perf_stat(int argc, const char **argv, int run_idx) return ret; } -static void print_running(struct perf_stat_config *config, - u64 run, u64 ena) -{ - if (config->csv_output) { - fprintf(config->output, "%s%" PRIu64 "%s%.2f", - config->csv_sep, - run, - config->csv_sep, - ena ? 100.0 * run / ena : 100.0); - } else if (run != ena) { - fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); - } -} - -static void print_noise_pct(struct perf_stat_config *config, - double total, double avg) -{ - double pct = rel_stddev_stats(total, avg); - - if (config->csv_output) - fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); - else if (pct) - fprintf(config->output, " ( +-%6.2f%% )", pct); -} - -static void print_noise(struct perf_stat_config *config, - struct perf_evsel *evsel, double avg) -{ - struct perf_stat_evsel *ps; - - if (config->run_count == 1) - return; - - ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); -} - -static void aggr_printout(struct perf_stat_config *config, - struct perf_evsel *evsel, int id, int nr) -{ - switch (config->aggr_mode) { - case AGGR_CORE: - fprintf(config->output, "S%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id), - config->csv_output ? 0 : -8, - cpu_map__id_to_cpu(id), - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - break; - case AGGR_SOCKET: - fprintf(config->output, "S%*d%s%*d%s", - config->csv_output ? 0 : -5, - id, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - break; - case AGGR_NONE: - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -4, - perf_evsel__cpus(evsel)->map[id], config->csv_sep); - break; - case AGGR_THREAD: - fprintf(config->output, "%*s-%*d%s", - config->csv_output ? 0 : 16, - thread_map__comm(evsel->threads, id), - config->csv_output ? 0 : -8, - thread_map__pid(evsel->threads, id), - config->csv_sep); - break; - case AGGR_GLOBAL: - case AGGR_UNSET: - default: - break; - } -} - -struct outstate { - FILE *fh; - bool newline; - const char *prefix; - int nfields; - int id, nr; - struct perf_evsel *evsel; -}; - -#define METRIC_LEN 35 - -static void new_line_std(struct perf_stat_config *config __maybe_unused, - void *ctx) -{ - struct outstate *os = ctx; - - os->newline = true; -} - -static void do_new_line_std(struct perf_stat_config *config, - struct outstate *os) -{ - fputc('\n', os->fh); - fputs(os->prefix, os->fh); - aggr_printout(config, os->evsel, os->id, os->nr); - if (config->aggr_mode == AGGR_NONE) - fprintf(os->fh, " "); - fprintf(os->fh, " "); -} - -static void print_metric_std(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - int n; - bool newline = os->newline; - - os->newline = false; - - if (unit == NULL || fmt == NULL) { - fprintf(out, "%-*s", METRIC_LEN, ""); - return; - } - - if (newline) - do_new_line_std(config, os); - - n = fprintf(out, " # "); - if (color) - n += color_fprintf(out, color, fmt, val); - else - n += fprintf(out, fmt, val); - fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); -} - -static void new_line_csv(struct perf_stat_config *config, void *ctx) -{ - struct outstate *os = ctx; - int i; - - fputc('\n', os->fh); - if (os->prefix) - fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); - aggr_printout(config, os->evsel, os->id, os->nr); - for (i = 0; i < os->nfields; i++) - fputs(config->csv_sep, os->fh); -} - -static void print_metric_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, - const char *color __maybe_unused, - const char *fmt, const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - char buf[64], *vals, *ends; - - if (unit == NULL || fmt == NULL) { - fprintf(out, "%s%s", config->csv_sep, config->csv_sep); - return; - } - snprintf(buf, sizeof(buf), fmt, val); - ends = vals = ltrim(buf); - while (isdigit(*ends) || *ends == '.') - ends++; - *ends = 0; - while (isspace(*unit)) - unit++; - fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); -} - -/* Filter out some columns that don't work well in metrics only mode */ - -static bool valid_only_metric(const char *unit) -{ - if (!unit) - return false; - if (strstr(unit, "/sec") || - strstr(unit, "hz") || - strstr(unit, "Hz") || - strstr(unit, "CPUs utilized")) - return false; - return true; -} - -static const char *fixunit(char *buf, struct perf_evsel *evsel, - const char *unit) -{ - if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), - unit); - return buf; - } - return unit; -} - -static void print_metric_only(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - char buf[1024], str[1024]; - unsigned mlen = config->metric_only_len; - - if (!valid_only_metric(unit)) - return; - unit = fixunit(buf, os->evsel, unit); - if (mlen < strlen(unit)) - mlen = strlen(unit) + 1; - - if (color) - mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; - - color_snprintf(str, sizeof(str), color ?: "", fmt, val); - fprintf(out, "%*s ", mlen, str); -} - -static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, - const char *fmt, - const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - char buf[64], *vals, *ends; - char tbuf[1024]; - - if (!valid_only_metric(unit)) - return; - unit = fixunit(tbuf, os->evsel, unit); - snprintf(buf, sizeof buf, fmt, val); - ends = vals = ltrim(buf); - while (isdigit(*ends) || *ends == '.') - ends++; - *ends = 0; - fprintf(out, "%s%s", vals, config->csv_sep); -} - -static void new_line_metric(struct perf_stat_config *config __maybe_unused, - void *ctx __maybe_unused) -{ -} - -static void print_metric_header(struct perf_stat_config *config, - void *ctx, const char *color __maybe_unused, - const char *fmt __maybe_unused, - const char *unit, double val __maybe_unused) -{ - struct outstate *os = ctx; - char tbuf[1024]; - - if (!valid_only_metric(unit)) - return; - unit = fixunit(tbuf, os->evsel, unit); - if (config->csv_output) - fprintf(os->fh, "%s%s", unit, config->csv_sep); - else - fprintf(os->fh, "%*s ", config->metric_only_len, unit); -} - -static int first_shadow_cpu(struct perf_stat_config *config, - struct perf_evsel *evsel, int id) -{ - struct perf_evlist *evlist = evsel->evlist; - int i; - - if (!config->aggr_get_id) - return 0; - - if (config->aggr_mode == AGGR_NONE) - return id; - - if (config->aggr_mode == AGGR_GLOBAL) - return 0; - - for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { - int cpu2 = perf_evsel__cpus(evsel)->map[i]; - - if (config->aggr_get_id(config, evlist->cpus, cpu2) == id) - return cpu2; - } - return 0; -} - -static void abs_printout(struct perf_stat_config *config, - int id, int nr, struct perf_evsel *evsel, double avg) -{ - FILE *output = config->output; - double sc = evsel->scale; - const char *fmt; - - if (config->csv_output) { - fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; - } else { - if (config->big_num) - fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; - else - fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; - } - - aggr_printout(config, evsel, id, nr); - - fprintf(output, fmt, avg, config->csv_sep); - - if (evsel->unit) - fprintf(output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - evsel->unit, config->csv_sep); - - fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); - - if (evsel->cgrp) - fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name); -} - -static bool is_mixed_hw_group(struct perf_evsel *counter) -{ - struct perf_evlist *evlist = counter->evlist; - u32 pmu_type = counter->attr.type; - struct perf_evsel *pos; - - if (counter->nr_members < 2) - return false; - - evlist__for_each_entry(evlist, pos) { - /* software events can be part of any hardware group */ - if (pos->attr.type == PERF_TYPE_SOFTWARE) - continue; - if (pmu_type == PERF_TYPE_SOFTWARE) { - pmu_type = pos->attr.type; - continue; - } - if (pmu_type != pos->attr.type) - return true; - } - - return false; -} - -static void printout(struct perf_stat_config *config, int id, int nr, - struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise, - struct runtime_stat *st) -{ - struct perf_stat_output_ctx out; - struct outstate os = { - .fh = config->output, - .prefix = prefix ? prefix : "", - .id = id, - .nr = nr, - .evsel = counter, - }; - print_metric_t pm = print_metric_std; - new_line_t nl; - - if (config->metric_only) { - nl = new_line_metric; - if (config->csv_output) - pm = print_metric_only_csv; - else - pm = print_metric_only; - } else - nl = new_line_std; - - if (config->csv_output && !config->metric_only) { - static int aggr_fields[] = { - [AGGR_GLOBAL] = 0, - [AGGR_THREAD] = 1, - [AGGR_NONE] = 1, - [AGGR_SOCKET] = 2, - [AGGR_CORE] = 2, - }; - - pm = print_metric_csv; - nl = new_line_csv; - os.nfields = 3; - os.nfields += aggr_fields[config->aggr_mode]; - if (counter->cgrp) - os.nfields++; - } - if (run == 0 || ena == 0 || counter->counts->scaled == -1) { - if (config->metric_only) { - pm(config, &os, NULL, "", "", 0); - return; - } - aggr_printout(config, counter, id, nr); - - fprintf(config->output, "%*s%s", - config->csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - config->csv_sep); - - if (counter->supported) { - config->print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - config->print_mixed_hw_group_error = 1; - } - - fprintf(config->output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - counter->unit, config->csv_sep); - - fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(config->output, "%s%s", - config->csv_sep, counter->cgrp->name); - - if (!config->csv_output) - pm(config, &os, NULL, NULL, "", 0); - print_noise(config, counter, noise); - print_running(config, run, ena); - if (config->csv_output) - pm(config, &os, NULL, NULL, "", 0); - return; - } - - if (!config->metric_only) - abs_printout(config, id, nr, counter, uval); - - out.print_metric = pm; - out.new_line = nl; - out.ctx = &os; - out.force_header = false; - - if (config->csv_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); - } - - perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu(config, counter, id), - &out, &config->metric_events, st); - if (!config->csv_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); - } -} - -static void aggr_update_shadow(struct perf_stat_config *config, - struct perf_evlist *evlist) -{ - int cpu, s2, id, s; - u64 val; - struct perf_evsel *counter; - - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - evlist__for_each_entry(evlist, counter) { - val = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = config->aggr_get_id(config, evlist->cpus, cpu); - if (s2 != id) - continue; - val += perf_counts(counter->counts, cpu, 0)->val; - } - perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(config, counter, id), - &rt_stat); - } - } -} - -static void uniquify_event_name(struct perf_evsel *counter) -{ - char *new_name; - char *config; - - if (counter->uniquified_name || - !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, - strlen(counter->pmu_name))) - return; - - config = strchr(counter->name, '/'); - if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (asprintf(&new_name, - "%s [%s]", counter->name, counter->pmu_name) > 0) { - free(counter->name); - counter->name = new_name; - } - } - - counter->uniquified_name = true; -} - -static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter, - void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, - bool first), - void *data) -{ - struct perf_evlist *evlist = counter->evlist; - struct perf_evsel *alias; - - alias = list_prepare_entry(counter, &(evlist->entries), node); - list_for_each_entry_continue (alias, &evlist->entries, node) { - if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || - alias->scale != counter->scale || - alias->cgrp != counter->cgrp || - strcmp(alias->unit, counter->unit) || - perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) - break; - alias->merged_stat = true; - cb(config, alias, data, false); - } -} - -static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter, - void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, - bool first), - void *data) -{ - if (counter->merged_stat) - return false; - cb(config, counter, data, true); - if (config->no_merge) - uniquify_event_name(counter); - else if (counter->auto_merge_stats) - collect_all_aliases(config, counter, cb, data); - return true; -} - -struct aggr_data { - u64 ena, run, val; - int id; - int nr; - int cpu; -}; - -static void aggr_cb(struct perf_stat_config *config, - struct perf_evsel *counter, void *data, bool first) -{ - struct aggr_data *ad = data; - int cpu, s2; - - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - struct perf_counts_values *counts; - - s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu); - if (s2 != ad->id) - continue; - if (first) - ad->nr++; - counts = perf_counts(counter->counts, cpu, 0); - /* - * When any result is bad, make them all to give - * consistent output in interval mode. - */ - if (counts->ena == 0 || counts->run == 0 || - counter->counts->scaled == -1) { - ad->ena = 0; - ad->run = 0; - break; - } - ad->val += counts->val; - ad->ena += counts->ena; - ad->run += counts->run; - } -} - -static void print_aggr(struct perf_stat_config *config, - struct perf_evlist *evlist, - char *prefix) -{ - bool metric_only = config->metric_only; - FILE *output = config->output; - struct perf_evsel *counter; - int s, id, nr; - double uval; - u64 ena, run, val; - bool first; - - if (!(config->aggr_map || config->aggr_get_id)) - return; - - aggr_update_shadow(config, evlist); - - /* - * With metric_only everything is on a single line. - * Without each counter has its own line. - */ - for (s = 0; s < config->aggr_map->nr; s++) { - struct aggr_data ad; - if (prefix && metric_only) - fprintf(output, "%s", prefix); - - ad.id = id = config->aggr_map->map[s]; - first = true; - evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) - continue; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; - if (first && metric_only) { - first = false; - aggr_printout(config, counter, id, nr); - } - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); - if (!metric_only) - fputc('\n', output); - } - if (metric_only) - fputc('\n', output); - } -} - -static int cmp_val(const void *a, const void *b) -{ - return ((struct perf_aggr_thread_value *)b)->val - - ((struct perf_aggr_thread_value *)a)->val; -} - -static struct perf_aggr_thread_value *sort_aggr_thread( - struct perf_evsel *counter, - int nthreads, int ncpus, - int *ret, - struct target *_target) -{ - int cpu, thread, i = 0; - double uval; - struct perf_aggr_thread_value *buf; - - buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); - if (!buf) - return NULL; - - for (thread = 0; thread < nthreads; thread++) { - u64 ena = 0, run = 0, val = 0; - - for (cpu = 0; cpu < ncpus; cpu++) { - val += perf_counts(counter->counts, cpu, thread)->val; - ena += perf_counts(counter->counts, cpu, thread)->ena; - run += perf_counts(counter->counts, cpu, thread)->run; - } - - uval = val * counter->scale; - - /* - * Skip value 0 when enabling --per-thread globally, - * otherwise too many 0 output. - */ - if (uval == 0.0 && target__has_per_thread(_target)) - continue; - - buf[i].counter = counter; - buf[i].id = thread; - buf[i].uval = uval; - buf[i].val = val; - buf[i].run = run; - buf[i].ena = ena; - i++; - } - - qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); - - if (ret) - *ret = i; - - return buf; -} - -static void print_aggr_thread(struct perf_stat_config *config, - struct target *_target, - struct perf_evsel *counter, char *prefix) -{ - FILE *output = config->output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int thread, sorted_threads, id; - struct perf_aggr_thread_value *buf; - - buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); - if (!buf) { - perror("cannot sort aggr thread"); - return; - } - - for (thread = 0; thread < sorted_threads; thread++) { - if (prefix) - fprintf(output, "%s", prefix); - - id = buf[thread].id; - if (config->stats) - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id]); - else - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &rt_stat); - fputc('\n', output); - } - - free(buf); -} - -struct caggr_data { - double avg, avg_enabled, avg_running; -}; - -static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, - struct perf_evsel *counter, void *data, - bool first __maybe_unused) -{ - struct caggr_data *cd = data; - struct perf_stat_evsel *ps = counter->stats; - - cd->avg += avg_stats(&ps->res_stats[0]); - cd->avg_enabled += avg_stats(&ps->res_stats[1]); - cd->avg_running += avg_stats(&ps->res_stats[2]); -} - -/* - * Print out the results of a single counter: - * aggregated counts in system-wide mode - */ -static void print_counter_aggr(struct perf_stat_config *config, - struct perf_evsel *counter, char *prefix) -{ - bool metric_only = config->metric_only; - FILE *output = config->output; - double uval; - struct caggr_data cd = { .avg = 0.0 }; - - if (!collect_data(config, counter, counter_aggr_cb, &cd)) - return; - - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = cd.avg * counter->scale; - printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, - cd.avg, &rt_stat); - if (!metric_only) - fprintf(output, "\n"); -} - -static void counter_cb(struct perf_stat_config *config __maybe_unused, - struct perf_evsel *counter, void *data, - bool first __maybe_unused) -{ - struct aggr_data *ad = data; - - ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; -} - -/* - * Print out the results of a single counter: - * does not use aggregated count in system-wide - */ -static void print_counter(struct perf_stat_config *config, - struct perf_evsel *counter, char *prefix) -{ - FILE *output = config->output; - u64 ena, run, val; - double uval; - int cpu; - - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - struct aggr_data ad = { .cpu = cpu }; - - if (!collect_data(config, counter, counter_cb, &ad)) - return; - val = ad.val; - ena = ad.ena; - run = ad.run; - - if (prefix) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); - - fputc('\n', output); - } -} - -static void print_no_aggr_metric(struct perf_stat_config *config, - struct perf_evlist *evlist, - char *prefix) -{ - int cpu; - int nrcpus = 0; - struct perf_evsel *counter; - u64 ena, run, val; - double uval; - - nrcpus = evlist->cpus->nr; - for (cpu = 0; cpu < nrcpus; cpu++) { - bool first = true; - - if (prefix) - fputs(prefix, config->output); - evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - if (first) { - aggr_printout(config, counter, cpu, 0); - first = false; - } - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; - - uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); - } - fputc('\n', config->output); - } -} - -static int aggr_header_lens[] = { - [AGGR_CORE] = 18, - [AGGR_SOCKET] = 12, - [AGGR_NONE] = 6, - [AGGR_THREAD] = 24, - [AGGR_GLOBAL] = 0, -}; - -static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_SOCKET] = "socket,cpus", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_GLOBAL] = "" -}; - -static void print_metric_headers(struct perf_stat_config *config, - struct perf_evlist *evlist, - const char *prefix, bool no_indent) -{ - struct perf_stat_output_ctx out; - struct perf_evsel *counter; - struct outstate os = { - .fh = config->output - }; - - if (prefix) - fprintf(config->output, "%s", prefix); - - if (!config->csv_output && !no_indent) - fprintf(config->output, "%*s", - aggr_header_lens[config->aggr_mode], ""); - if (config->csv_output) { - if (config->interval) - fputs("time,", config->output); - fputs(aggr_header_csv[config->aggr_mode], config->output); - } - - /* Print metrics headers only */ - evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - os.evsel = counter; - out.ctx = &os; - out.print_metric = print_metric_header; - out.new_line = new_line_metric; - out.force_header = true; - os.evsel = counter; - perf_stat__print_shadow_stats(config, counter, 0, - 0, - &out, - &config->metric_events, - &rt_stat); - } - fputc('\n', config->output); -} - -static void print_interval(struct perf_stat_config *config, - struct perf_evlist *evlist, - char *prefix, struct timespec *ts) -{ - bool metric_only = config->metric_only; - unsigned int unit_width = config->unit_width; - FILE *output = config->output; - static int num_print_interval; - - if (config->interval_clear) - puts(CONSOLE_CLEAR); - - sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); - - if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { - switch (config->aggr_mode) { - case AGGR_SOCKET: - fprintf(output, "# time socket cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU "); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_THREAD: - fprintf(output, "# time comm-pid"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - fprintf(output, "# time"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - case AGGR_UNSET: - break; - } - } - - if ((num_print_interval == 0 || config->interval_clear) && metric_only) - print_metric_headers(config, evlist, " ", true); - if (++num_print_interval == 25) - num_print_interval = 0; -} - -static void print_header(struct perf_stat_config *config, - struct target *_target, - int argc, const char **argv) -{ - FILE *output = config->output; - int i; - - fflush(stdout); - - if (!config->csv_output) { - fprintf(output, "\n"); - fprintf(output, " Performance counter stats for "); - if (_target->system_wide) - fprintf(output, "\'system wide"); - else if (_target->cpu_list) - fprintf(output, "\'CPU(s) %s", _target->cpu_list); - else if (!target__has_task(_target)) { - fprintf(output, "\'%s", argv ? argv[0] : "pipe"); - for (i = 1; argv && (i < argc); i++) - fprintf(output, " %s", argv[i]); - } else if (_target->pid) - fprintf(output, "process id \'%s", _target->pid); - else - fprintf(output, "thread id \'%s", _target->tid); - - fprintf(output, "\'"); - if (config->run_count > 1) - fprintf(output, " (%d runs)", config->run_count); - fprintf(output, ":\n\n"); - } -} - -static int get_precision(double num) -{ - if (num > 1) - return 0; - - return lround(ceil(-log10(num))); -} - -static void print_table(struct perf_stat_config *config, - FILE *output, int precision, double avg) -{ - char tmp[64]; - int idx, indent = 0; - - scnprintf(tmp, 64, " %17.*f", precision, avg); - while (tmp[indent] == ' ') - indent++; - - fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); - - for (idx = 0; idx < config->run_count; idx++) { - double run = (double) config->walltime_run[idx] / NSEC_PER_SEC; - int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); - - fprintf(output, " %17.*f (%+.*f) ", - precision, run, precision, run - avg); - - for (h = 0; h < n; h++) - fprintf(output, "#"); - - fprintf(output, "\n"); - } - - fprintf(output, "\n%*s# Final result:\n", indent, ""); -} - -static double timeval2double(struct timeval *t) -{ - return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; -} - -static void print_footer(struct perf_stat_config *config) -{ - double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; - FILE *output = config->output; - int n; - - if (!config->null_run) - fprintf(output, "\n"); - - if (config->run_count == 1) { - fprintf(output, " %17.9f seconds time elapsed", avg); - - if (config->ru_display) { - double ru_utime = timeval2double(&config->ru_data.ru_utime); - double ru_stime = timeval2double(&config->ru_data.ru_stime); - - fprintf(output, "\n\n"); - fprintf(output, " %17.9f seconds user\n", ru_utime); - fprintf(output, " %17.9f seconds sys\n", ru_stime); - } - } else { - double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; - /* - * Display at most 2 more significant - * digits than the stddev inaccuracy. - */ - int precision = get_precision(sd) + 2; - - if (config->walltime_run_table) - print_table(config, output, precision, avg); - - fprintf(output, " %17.*f +- %.*f seconds time elapsed", - precision, avg, precision, sd); - - print_noise_pct(config, sd, avg); - } - fprintf(output, "\n\n"); - - if (config->print_free_counters_hint && - sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && - n > 0) - fprintf(output, -"Some events weren't counted. Try disabling the NMI watchdog:\n" -" echo 0 > /proc/sys/kernel/nmi_watchdog\n" -" perf stat ...\n" -" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - - if (config->print_mixed_hw_group_error) - fprintf(output, - "The events in group usually have to be from " - "the same PMU. Try reorganizing the group.\n"); -} - -static void -perf_evlist__print_counters(struct perf_evlist *evlist, - struct perf_stat_config *config, - struct target *_target, - struct timespec *ts, - int argc, const char **argv) -{ - bool metric_only = config->metric_only; - int interval = config->interval; - struct perf_evsel *counter; - char buf[64], *prefix = NULL; - - if (interval) - print_interval(config, evlist, prefix = buf, ts); - else - print_header(config, _target, argc, argv); - - if (metric_only) { - static int num_print_iv; - - if (num_print_iv == 0 && !interval) - print_metric_headers(config, evlist, prefix, false); - if (num_print_iv++ == 25) - num_print_iv = 0; - if (config->aggr_mode == AGGR_GLOBAL && prefix) - fprintf(config->output, "%s", prefix); - } - - switch (config->aggr_mode) { - case AGGR_CORE: - case AGGR_SOCKET: - print_aggr(config, evlist, prefix); - break; - case AGGR_THREAD: - evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - print_aggr_thread(config, _target, counter, prefix); - } - break; - case AGGR_GLOBAL: - evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - print_counter_aggr(config, counter, prefix); - } - if (metric_only) - fputc('\n', config->output); - break; - case AGGR_NONE: - if (metric_only) - print_no_aggr_metric(config, evlist, prefix); - else { - evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - print_counter(config, counter, prefix); - } - } - break; - case AGGR_UNSET: - default: - break; - } - - if (!interval && !config->csv_output) - print_footer(config); - - fflush(config->output); -} - static void print_counters(struct timespec *ts, int argc, const char **argv) { /* Do not print anything if we record to the pipe. */ diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7efe15b9618d..ecd9f9ceda77 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -73,6 +73,7 @@ libperf-y += vdso.o libperf-y += counts.o libperf-y += stat.o libperf-y += stat-shadow.o +libperf-y += stat-display.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c new file mode 100644 index 000000000000..e7b4c44ebb62 --- /dev/null +++ b/tools/perf/util/stat-display.c @@ -0,0 +1,1166 @@ +#include +#include +#include +#include +#include "evlist.h" +#include "evsel.h" +#include "stat.h" +#include "top.h" +#include "thread_map.h" +#include "cpumap.h" +#include "string2.h" +#include "sane_ctype.h" +#include "cgroup.h" +#include +#include + +#define CNTR_NOT_SUPPORTED "" +#define CNTR_NOT_COUNTED "" + +static bool is_duration_time(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "duration_time"); +} + +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena) +{ + if (config->csv_output) { + fprintf(config->output, "%s%" PRIu64 "%s%.2f", + config->csv_sep, + run, + config->csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); + } +} + +static void print_noise_pct(struct perf_stat_config *config, + double total, double avg) +{ + double pct = rel_stddev_stats(total, avg); + + if (config->csv_output) + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); + else if (pct) + fprintf(config->output, " ( +-%6.2f%% )", pct); +} + +static void print_noise(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg) +{ + struct perf_stat_evsel *ps; + + if (config->run_count == 1) + return; + + ps = evsel->stats; + print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); +} + +static void aggr_printout(struct perf_stat_config *config, + struct perf_evsel *evsel, int id, int nr) +{ + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(config->output, "S%d-C%*d%s%*d%s", + cpu_map__id_to_socket(id), + config->csv_output ? 0 : -8, + cpu_map__id_to_cpu(id), + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; + case AGGR_SOCKET: + fprintf(config->output, "S%*d%s%*d%s", + config->csv_output ? 0 : -5, + id, + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; + case AGGR_NONE: + fprintf(config->output, "CPU%*d%s", + config->csv_output ? 0 : -4, + perf_evsel__cpus(evsel)->map[id], config->csv_sep); + break; + case AGGR_THREAD: + fprintf(config->output, "%*s-%*d%s", + config->csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + config->csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + config->csv_sep); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + default: + break; + } +} + +struct outstate { + FILE *fh; + bool newline; + const char *prefix; + int nfields; + int id, nr; + struct perf_evsel *evsel; +}; + +#define METRIC_LEN 35 + +static void new_line_std(struct perf_stat_config *config __maybe_unused, + void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct perf_stat_config *config, + struct outstate *os) +{ + fputc('\n', os->fh); + fputs(os->prefix, os->fh); + aggr_printout(config, os->evsel, os->id, os->nr); + if (config->aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(config, os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + +static void new_line_csv(struct perf_stat_config *config, void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); + aggr_printout(config, os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(config->csv_sep, os->fh); +} + +static void print_metric_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s", config->csv_sep, config->csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + ends = vals = ltrim(buf); + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); +} + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[1024], str[1024]; + unsigned mlen = config->metric_only_len; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + + if (color) + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; + + color_snprintf(str, sizeof(str), color ?: "", fmt, val); + fprintf(out, "%*s ", mlen, str); +} + +static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + ends = vals = ltrim(buf); + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, config->csv_sep); +} + +static void new_line_metric(struct perf_stat_config *config __maybe_unused, + void *ctx __maybe_unused) +{ +} + +static void print_metric_header(struct perf_stat_config *config, + void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (config->csv_output) + fprintf(os->fh, "%s%s", unit, config->csv_sep); + else + fprintf(os->fh, "%*s ", config->metric_only_len, unit); +} + +static int first_shadow_cpu(struct perf_stat_config *config, + struct perf_evsel *evsel, int id) +{ + struct perf_evlist *evlist = evsel->evlist; + int i; + + if (!config->aggr_get_id) + return 0; + + if (config->aggr_mode == AGGR_NONE) + return id; + + if (config->aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (config->aggr_get_id(config, evlist->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + +static void abs_printout(struct perf_stat_config *config, + int id, int nr, struct perf_evsel *evsel, double avg) +{ + FILE *output = config->output; + double sc = evsel->scale; + const char *fmt; + + if (config->csv_output) { + fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + } else { + if (config->big_num) + fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; + } + + aggr_printout(config, evsel, id, nr); + + fprintf(output, fmt, avg, config->csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + config->csv_output ? 0 : config->unit_width, + evsel->unit, config->csv_sep); + + fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); + + if (evsel->cgrp) + fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name); +} + +static bool is_mixed_hw_group(struct perf_evsel *counter) +{ + struct perf_evlist *evlist = counter->evlist; + u32 pmu_type = counter->attr.type; + struct perf_evsel *pos; + + if (counter->nr_members < 2) + return false; + + evlist__for_each_entry(evlist, pos) { + /* software events can be part of any hardware group */ + if (pos->attr.type == PERF_TYPE_SOFTWARE) + continue; + if (pmu_type == PERF_TYPE_SOFTWARE) { + pmu_type = pos->attr.type; + continue; + } + if (pmu_type != pos->attr.type) + return true; + } + + return false; +} + +static void printout(struct perf_stat_config *config, int id, int nr, + struct perf_evsel *counter, double uval, + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) +{ + struct perf_stat_output_ctx out; + struct outstate os = { + .fh = config->output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, + }; + print_metric_t pm = print_metric_std; + new_line_t nl; + + if (config->metric_only) { + nl = new_line_metric; + if (config->csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; + + if (config->csv_output && !config->metric_only) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[config->aggr_mode]; + if (counter->cgrp) + os.nfields++; + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (config->metric_only) { + pm(config, &os, NULL, "", "", 0); + return; + } + aggr_printout(config, counter, id, nr); + + fprintf(config->output, "%*s%s", + config->csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + config->csv_sep); + + if (counter->supported) { + config->print_free_counters_hint = 1; + if (is_mixed_hw_group(counter)) + config->print_mixed_hw_group_error = 1; + } + + fprintf(config->output, "%-*s%s", + config->csv_output ? 0 : config->unit_width, + counter->unit, config->csv_sep); + + fprintf(config->output, "%*s", + config->csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(config->output, "%s%s", + config->csv_sep, counter->cgrp->name); + + if (!config->csv_output) + pm(config, &os, NULL, NULL, "", 0); + print_noise(config, counter, noise); + print_running(config, run, ena); + if (config->csv_output) + pm(config, &os, NULL, NULL, "", 0); + return; + } + + if (!config->metric_only) + abs_printout(config, id, nr, counter, uval); + + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + out.force_header = false; + + if (config->csv_output && !config->metric_only) { + print_noise(config, counter, noise); + print_running(config, run, ena); + } + + perf_stat__print_shadow_stats(config, counter, uval, + first_shadow_cpu(config, counter, id), + &out, &config->metric_events, st); + if (!config->csv_output && !config->metric_only) { + print_noise(config, counter, noise); + print_running(config, run, ena); + } +} + +static void aggr_update_shadow(struct perf_stat_config *config, + struct perf_evlist *evlist) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; + evlist__for_each_entry(evlist, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = config->aggr_get_id(config, evlist->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + perf_stat__update_shadow_stats(counter, val, + first_shadow_cpu(config, counter, id), + &rt_stat); + } + } +} + +static void uniquify_event_name(struct perf_evsel *counter) +{ + char *new_name; + char *config; + + if (counter->uniquified_name || + !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, + strlen(counter->pmu_name))) + return; + + config = strchr(counter->name, '/'); + if (config) { + if (asprintf(&new_name, + "%s%s", counter->pmu_name, config) > 0) { + free(counter->name); + counter->name = new_name; + } + } else { + if (asprintf(&new_name, + "%s [%s]", counter->name, counter->pmu_name) > 0) { + free(counter->name); + counter->name = new_name; + } + } + + counter->uniquified_name = true; +} + +static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evlist *evlist = counter->evlist; + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evlist->entries), node); + list_for_each_entry_continue (alias, &evlist->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) + break; + alias->merged_stat = true; + cb(config, alias, data, false); + } +} + +static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; + cb(config, counter, data, true); + if (config->no_merge) + uniquify_event_name(counter); + else if (counter->auto_merge_stats) + collect_all_aliases(config, counter, cb, data); + return true; +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_stat_config *config, + struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct perf_counts_values *counts; + + s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu); + if (s2 != ad->id) + continue; + if (first) + ad->nr++; + counts = perf_counts(counter->counts, cpu, 0); + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (counts->ena == 0 || counts->run == 0 || + counter->counts->scaled == -1) { + ad->ena = 0; + ad->run = 0; + break; + } + ad->val += counts->val; + ad->ena += counts->ena; + ad->run += counts->run; + } +} + +static void print_aggr(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + struct perf_evsel *counter; + int s, id, nr; + double uval; + u64 ena, run, val; + bool first; + + if (!(config->aggr_map || config->aggr_get_id)) + return; + + aggr_update_shadow(config, evlist); + + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ + for (s = 0; s < config->aggr_map->nr; s++) { + struct aggr_data ad; + if (prefix && metric_only) + fprintf(output, "%s", prefix); + + ad.id = id = config->aggr_map->map[s]; + first = true; + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(config, counter, aggr_cb, &ad)) + continue; + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; + if (first && metric_only) { + first = false; + aggr_printout(config, counter, id, nr); + } + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); + if (!metric_only) + fputc('\n', output); + } + if (metric_only) + fputc('\n', output); + } +} + +static int cmp_val(const void *a, const void *b) +{ + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret, + struct target *_target) +{ + int cpu, thread, i = 0; + double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(_target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_stat_config *config, + struct target *_target, + struct perf_evsel *counter, char *prefix) +{ + FILE *output = config->output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { + if (prefix) + fprintf(output, "%s", prefix); + + id = buf[thread].id; + if (config->stats) + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &config->stats[id]); + else + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); + fputc('\n', output); + } + + free(buf); +} + +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->stats; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + +/* + * Print out the results of a single counter: + * aggregated counts in system-wide mode + */ +static void print_counter_aggr(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + double uval; + struct caggr_data cd = { .avg = 0.0 }; + + if (!collect_data(config, counter, counter_aggr_cb, &cd)) + return; + + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = cd.avg * counter->scale; + printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); + if (!metric_only) + fprintf(output, "\n"); +} + +static void counter_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + +/* + * Print out the results of a single counter: + * does not use aggregated count in system-wide + */ +static void print_counter(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + FILE *output = config->output; + u64 ena, run, val; + double uval; + int cpu; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct aggr_data ad = { .cpu = cpu }; + + if (!collect_data(config, counter, counter_cb, &ad)) + return; + val = ad.val; + ena = ad.ena; + run = ad.run; + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); + + fputc('\n', output); + } +} + +static void print_no_aggr_metric(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; + + nrcpus = evlist->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; + + if (prefix) + fputs(prefix, config->output); + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + if (first) { + aggr_printout(config, counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); + } + fputc('\n', config->output); + } +} + +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; + +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_SOCKET] = "socket,cpus", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_GLOBAL] = "" +}; + +static void print_metric_headers(struct perf_stat_config *config, + struct perf_evlist *evlist, + const char *prefix, bool no_indent) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = config->output + }; + + if (prefix) + fprintf(config->output, "%s", prefix); + + if (!config->csv_output && !no_indent) + fprintf(config->output, "%*s", + aggr_header_lens[config->aggr_mode], ""); + if (config->csv_output) { + if (config->interval) + fputs("time,", config->output); + fputs(aggr_header_csv[config->aggr_mode], config->output); + } + + /* Print metrics headers only */ + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + out.force_header = true; + os.evsel = counter; + perf_stat__print_shadow_stats(config, counter, 0, + 0, + &out, + &config->metric_events, + &rt_stat); + } + fputc('\n', config->output); +} + +static void print_interval(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix, struct timespec *ts) +{ + bool metric_only = config->metric_only; + unsigned int unit_width = config->unit_width; + FILE *output = config->output; + static int num_print_interval; + + if (config->interval_clear) + puts(CONSOLE_CLEAR); + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + + if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { + switch (config->aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU "); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + case AGGR_UNSET: + break; + } + } + + if ((num_print_interval == 0 || config->interval_clear) && metric_only) + print_metric_headers(config, evlist, " ", true); + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(struct perf_stat_config *config, + struct target *_target, + int argc, const char **argv) +{ + FILE *output = config->output; + int i; + + fflush(stdout); + + if (!config->csv_output) { + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (_target->system_wide) + fprintf(output, "\'system wide"); + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) + fprintf(output, " %s", argv[i]); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); + else + fprintf(output, "thread id \'%s", _target->tid); + + fprintf(output, "\'"); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); + fprintf(output, ":\n\n"); + } +} + +static int get_precision(double num) +{ + if (num > 1) + return 0; + + return lround(ceil(-log10(num))); +} + +static void print_table(struct perf_stat_config *config, + FILE *output, int precision, double avg) +{ + char tmp[64]; + int idx, indent = 0; + + scnprintf(tmp, 64, " %17.*f", precision, avg); + while (tmp[indent] == ' ') + indent++; + + fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); + + for (idx = 0; idx < config->run_count; idx++) { + double run = (double) config->walltime_run[idx] / NSEC_PER_SEC; + int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); + + fprintf(output, " %17.*f (%+.*f) ", + precision, run, precision, run - avg); + + for (h = 0; h < n; h++) + fprintf(output, "#"); + + fprintf(output, "\n"); + } + + fprintf(output, "\n%*s# Final result:\n", indent, ""); +} + +static double timeval2double(struct timeval *t) +{ + return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; +} + +static void print_footer(struct perf_stat_config *config) +{ + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; + FILE *output = config->output; + int n; + + if (!config->null_run) + fprintf(output, "\n"); + + if (config->run_count == 1) { + fprintf(output, " %17.9f seconds time elapsed", avg); + + if (config->ru_display) { + double ru_utime = timeval2double(&config->ru_data.ru_utime); + double ru_stime = timeval2double(&config->ru_data.ru_stime); + + fprintf(output, "\n\n"); + fprintf(output, " %17.9f seconds user\n", ru_utime); + fprintf(output, " %17.9f seconds sys\n", ru_stime); + } + } else { + double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; + /* + * Display at most 2 more significant + * digits than the stddev inaccuracy. + */ + int precision = get_precision(sd) + 2; + + if (config->walltime_run_table) + print_table(config, output, precision, avg); + + fprintf(output, " %17.*f +- %.*f seconds time elapsed", + precision, avg, precision, sd); + + print_noise_pct(config, sd, avg); + } + fprintf(output, "\n\n"); + + if (config->print_free_counters_hint && + sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && + n > 0) + fprintf(output, +"Some events weren't counted. Try disabling the NMI watchdog:\n" +" echo 0 > /proc/sys/kernel/nmi_watchdog\n" +" perf stat ...\n" +" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + + if (config->print_mixed_hw_group_error) + fprintf(output, + "The events in group usually have to be from " + "the same PMU. Try reorganizing the group.\n"); +} + +void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, + struct target *_target, + struct timespec *ts, + int argc, const char **argv) +{ + bool metric_only = config->metric_only; + int interval = config->interval; + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(config, evlist, prefix = buf, ts); + else + print_header(config, _target, argc, argv); + + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0 && !interval) + print_metric_headers(config, evlist, prefix, false); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (config->aggr_mode == AGGR_GLOBAL && prefix) + fprintf(config->output, "%s", prefix); + } + + switch (config->aggr_mode) { + case AGGR_CORE: + case AGGR_SOCKET: + print_aggr(config, evlist, prefix); + break; + case AGGR_THREAD: + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_aggr_thread(config, _target, counter, prefix); + } + break; + case AGGR_GLOBAL: + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_counter_aggr(config, counter, prefix); + } + if (metric_only) + fputc('\n', config->output); + break; + case AGGR_NONE: + if (metric_only) + print_no_aggr_metric(config, evlist, prefix); + else { + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_counter(config, counter, prefix); + } + } + break; + case AGGR_UNSET: + default: + break; + } + + if (!interval && !config->csv_output) + print_footer(config); + + fflush(config->output); +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 8d3354e21e19..3a13a6dc5a62 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -215,4 +215,10 @@ int perf_stat_synthesize_config(struct perf_stat_config *config, struct perf_evlist *evlist, perf_event__handler_t process, bool attrs); +void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, + struct target *_target, + struct timespec *ts, + int argc, const char **argv); #endif From 3de3e8bbf302545ef9acebb9f900939ac5c3820f Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 27 Aug 2018 20:53:44 -0700 Subject: [PATCH 82/90] perf trace beauty: Alias 'umount' to 'umount2' Before: # perf trace -e *mount* umount /dev/mapper/fedora-home /s 11.576 ( 0.004 ms) umount/3138 umount2(arg0: 94501956754656, arg1: 0, arg2: 1, arg3: 140051050083104, arg4: 4, arg5: 94501956755136) = -1 EINVAL Invalid argument # After: # perf trace -e *mount* umount /s 0.000 ( 9.241 ms): umount/5251 umount2(name: 0x55f74a986480) = 0 Signed-off-by: Benjamin Peterson Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180828035344.31500-1-benjamin@python.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 97ace635bed8..c106189f4066 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -825,6 +825,7 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "tkill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, + { .name = "umount2", .alias = "umount", }, { .name = "uname", .alias = "newuname", }, { .name = "unlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, From 58094c48f4079cfc784f53a73caaa446db436389 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 27 Aug 2018 15:08:07 -0500 Subject: [PATCH 83/90] perf annotate: Handle arm64 move instructions Add default handler for non-jump instructions. This really only has an effect on instructions that compute a PC-relative address, such as 'adrp,' as seen in these couple of examples: BEFORE: adrp x0, ffff20000aa11000 AFTER: adrp x0, kallsyms_token_index+0xce000 BEFORE: adrp x23, ffff20000ae94000 <__per_cpu_load> AFTER: adrp x23, __per_cpu_load The implementation is identical to that of s390, but with a slight adjustment for objdump whitespace propagation (arm64 objdump puts spaces after commas, whereas s390's presumably doesn't). The mov__scnprintf() declaration is moved from s390's to arm64's instructions.c because arm64's gets included before s390's. Committer testing: Ran 'perf annotate --stdio2 > /tmp/{before,after}' no diff. Signed-off-by: Kim Phillips Tested-by: Arnaldo Carvalho de Melo Tested-by: Thomas Richter Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20180827150807.304110d2e9919a17c832ca48@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/annotate/instructions.c | 59 ++++++++++++++++++- tools/perf/arch/s390/annotate/instructions.c | 2 - 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 6688977e4ac7..76c6345a57d5 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -8,6 +8,63 @@ struct arm64_annotate { jump_insn; }; +static int arm64_mov__parse(struct arch *arch __maybe_unused, + struct ins_operands *ops, + struct map_symbol *ms __maybe_unused) +{ + char *s = strchr(ops->raw, ','), *target, *endptr; + + if (s == NULL) + return -1; + + *s = '\0'; + ops->source.raw = strdup(ops->raw); + *s = ','; + + if (ops->source.raw == NULL) + return -1; + + target = ++s; + ops->target.raw = strdup(target); + if (ops->target.raw == NULL) + goto out_free_source; + + ops->target.addr = strtoull(target, &endptr, 16); + if (endptr == target) + goto out_free_target; + + s = strchr(endptr, '<'); + if (s == NULL) + goto out_free_target; + endptr = strchr(s + 1, '>'); + if (endptr == NULL) + goto out_free_target; + + *endptr = '\0'; + *s = ' '; + ops->target.name = strdup(s); + *s = '<'; + *endptr = '>'; + if (ops->target.name == NULL) + goto out_free_target; + + return 0; + +out_free_target: + zfree(&ops->target.raw); +out_free_source: + zfree(&ops->source.raw); + return -1; +} + +static int mov__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops); + +static struct ins_ops arm64_mov_ops = { + .parse = arm64_mov__parse, + .scnprintf = mov__scnprintf, +}; + static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name) { struct arm64_annotate *arm = arch->priv; @@ -21,7 +78,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const else if (!strcmp(name, "ret")) ops = &ret_ops; else - return NULL; + ops = &arm64_mov_ops; arch__associate_ins_ops(arch, name, ops); return ops; diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index cee4e2f7c057..de0dd66dbb48 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -100,8 +100,6 @@ out_free_source: return -1; } -static int mov__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops); static struct ins_ops s390_mov_ops = { .parse = s390_mov__parse, From 21d7eb9a24739cdc5ea19c90a79e5a585866ba35 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Sep 2018 15:02:22 -0300 Subject: [PATCH 84/90] perf augmented_syscalls: Check probe_read_str() return separately Using a value returned from probe_read_str() to tell how many bytes to copy using perf_event_output() has issues in some older kernels, like 4.17.17-100.fc27.x86_64, so separate the bounds checking done on how many bytes to copy to a separate variable, so that the next patch has only what is being done to make the test pass on older BPF validators. For reference, see the discussion in this thread: https://www.spinics.net/lists/netdev/msg480099.html Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-jtsapwibyxrnv1xjfsgzp0fj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 1419a9186937..0decbcfa8b90 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -33,9 +33,9 @@ struct syscall_exit_args { }; struct augmented_filename { - int size; - int reserved; - char value[256]; + unsigned int size; + int reserved; + char value[256]; }; #define augmented_filename_syscall(syscall) \ @@ -46,14 +46,15 @@ struct augmented_enter_##syscall##_args { \ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ { \ struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + unsigned int len = sizeof(augmented_args); \ probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ sizeof(augmented_args.filename.value), \ args->filename_ptr); \ + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) \ + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ + &augmented_args, len); \ return 0; \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ From 7538d16397dfc72d8b61a99c32c592a75ae7f157 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Sep 2018 15:18:37 -0300 Subject: [PATCH 85/90] perf augmented_syscalls: Avoid optimization to pass older BPF validators See https://www.spinics.net/lists/netdev/msg480099.html for the whole discussio, but to make the augmented_syscalls.c BPF program to get built and loaded successfully in a greater range of kernels, add an extra check. Related patch: a60dd35d2e39 ("bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO") That is in the kernel since v4.15, I couldn't figure why this is hitting me with 4.17.17, but adding the workaround discussed there makes this work with this fedora kernel and with 4.18.recent. Before: # uname -a Linux seventh 4.17.17-100.fc27.x86_64 #1 SMP Mon Aug 20 15:53:11 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (bf) r6 = r1 1: (b7) r1 = 0 2: (7b) *(u64 *)(r10 -8) = r1 3: (7b) *(u64 *)(r10 -16) = r1 4: (7b) *(u64 *)(r10 -24) = r1 5: (7b) *(u64 *)(r10 -32) = r1 6: (7b) *(u64 *)(r10 -40) = r1 7: (7b) *(u64 *)(r10 -48) = r1 8: (7b) *(u64 *)(r10 -56) = r1 9: (7b) *(u64 *)(r10 -64) = r1 10: (7b) *(u64 *)(r10 -72) = r1 11: (7b) *(u64 *)(r10 -80) = r1 12: (7b) *(u64 *)(r10 -88) = r1 13: (7b) *(u64 *)(r10 -96) = r1 14: (7b) *(u64 *)(r10 -104) = r1 15: (7b) *(u64 *)(r10 -112) = r1 16: (7b) *(u64 *)(r10 -120) = r1 17: (7b) *(u64 *)(r10 -128) = r1 18: (7b) *(u64 *)(r10 -136) = r1 19: (7b) *(u64 *)(r10 -144) = r1 20: (7b) *(u64 *)(r10 -152) = r1 21: (7b) *(u64 *)(r10 -160) = r1 22: (7b) *(u64 *)(r10 -168) = r1 23: (7b) *(u64 *)(r10 -176) = r1 24: (7b) *(u64 *)(r10 -184) = r1 25: (7b) *(u64 *)(r10 -192) = r1 26: (7b) *(u64 *)(r10 -200) = r1 27: (7b) *(u64 *)(r10 -208) = r1 28: (7b) *(u64 *)(r10 -216) = r1 29: (7b) *(u64 *)(r10 -224) = r1 30: (7b) *(u64 *)(r10 -232) = r1 31: (7b) *(u64 *)(r10 -240) = r1 32: (7b) *(u64 *)(r10 -248) = r1 33: (7b) *(u64 *)(r10 -256) = r1 34: (7b) *(u64 *)(r10 -264) = r1 35: (7b) *(u64 *)(r10 -272) = r1 36: (7b) *(u64 *)(r10 -280) = r1 37: (7b) *(u64 *)(r10 -288) = r1 38: (7b) *(u64 *)(r10 -296) = r1 39: (7b) *(u64 *)(r10 -304) = r1 40: (7b) *(u64 *)(r10 -312) = r1 41: (bf) r7 = r10 42: (07) r7 += -312 43: (bf) r1 = r7 44: (b7) r2 = 48 45: (bf) r3 = r6 46: (85) call bpf_probe_read#4 47: (79) r3 = *(u64 *)(r6 +24) 48: (bf) r1 = r10 49: (07) r1 += -256 50: (b7) r8 = 256 51: (b7) r2 = 256 52: (85) call bpf_probe_read_str#45 53: (bf) r1 = r0 54: (67) r1 <<= 32 55: (77) r1 >>= 32 56: (bf) r5 = r0 57: (07) r5 += 56 58: (2d) if r8 > r1 goto pc+1 R0=inv(id=0) R1=inv(id=0,umin_value=256,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 59: (b7) r5 = 312 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 70: (b7) r0 = 0 71: (95) exit from 58 to 60: R0=inv(id=0) R1=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 R5 unbounded memory access, use 'var &= const' or 'if (var < const)' libbpf: -- END LOG -- libbpf: failed to load program 'syscalls:sys_enter_openat' libbpf: failed to load object 'tools/perf/examples/bpf/augmented_syscalls.c' bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading) event syntax error: 'tools/perf/examples/bpf/augmented_syscalls.c' \___ Kernel verifier blocks program loading After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/29249 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.008 cat/29249 syscalls:sys_exit_openat:0x3 0.021 cat/29249 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.025 cat/29249 syscalls:sys_exit_openat:0x3 0.180 cat/29249 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.185 cat/29249 syscalls:sys_exit_open:0x3 0.242 cat/29249 openat(dfd: CWD, filename: /etc/passwd) 0.245 cat/29249 syscalls:sys_exit_openat:0x3 # It also works with a more recent kernel: # uname -a Linux jouet 4.18.0-00014-g4e67b2a5df5d #6 SMP Thu Aug 30 17:34:17 -03 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/26451 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.020 cat/26451 syscalls:sys_exit_openat:0x3 0.039 cat/26451 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.044 cat/26451 syscalls:sys_exit_openat:0x3 0.231 cat/26451 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.238 cat/26451 syscalls:sys_exit_open:0x3 0.278 cat/26451 openat(dfd: CWD, filename: /etc/passwd) 0.282 cat/26451 syscalls:sys_exit_openat:0x3 # Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: Gianluca Borello Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-wkpsivs1a9afwldbul46btbv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 0decbcfa8b90..2ae44813ef2d 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -51,8 +51,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ sizeof(augmented_args.filename.value), \ args->filename_ptr); \ - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) \ + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ + len &= sizeof(augmented_args.filename.value) - 1; \ + } \ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ &augmented_args, len); \ return 0; \ From 8a041f86a83f9783ba23a423a2d5a51b48136850 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Sep 2018 16:07:53 -0300 Subject: [PATCH 86/90] perf trace: Introduce syscall__augmented_args() method That will be used by trace__sys_enter when we start combining the augmented syscalls:sys_enter_FOO + syscalls:sys_exit_FOO. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-iiseo3s0qbf9i3rzn8k597bv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c106189f4066..12356deb6046 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1677,6 +1677,17 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, return printed; } +static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size) +{ + void *augmented_args = NULL; + + *augmented_args_size = sample->raw_size - sc->args_size; + if (*augmented_args_size > 0) + augmented_args = sample->raw_data + sc->args_size; + + return augmented_args; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1762,10 +1773,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse goto out_put; args = perf_evsel__sc_tp_ptr(evsel, args, sample); - augmented_args_size = sample->raw_size - sc->args_size; - if (augmented_args_size > 0) - augmented_args = sample->raw_data + sc->args_size; - + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); fprintf(trace->output, "%s", msg); err = 0; From db2da3f85cd6314321b6a9441a5af8841c93394d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Sep 2018 16:24:09 -0300 Subject: [PATCH 87/90] perf trace: Setup augmented_args in the raw_syscalls:sys_enter handler Without using something to augment the raw_syscalls:sys_enter tracepoint payload with the pointer contents, this will work just like before, i.e. the augmented_args arg will be NULL and the augmented_args_size will be 0. This just paves the way for the next cset where we will associate the trace__sys_enter tracepoint handler with the augmented "bpf-output" event named "__augmented_args__". Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-p8uvt2a6ug3uwlhja3cno4la@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 12356deb6046..2b99a02355cf 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1697,6 +1697,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, size_t printed = 0; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; + int augmented_args_size = 0; + void *augmented_args = NULL; struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -1720,13 +1722,24 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) trace__printf_interrupted_entry(trace); - + /* + * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible + * arguments, even if the syscall being handled, say "openat", uses only 4 arguments + * this breaks syscall__augmented_args() check for augmented args, as we calculate + * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file, + * so when handling, say the openat syscall, we end up getting 6 args for the + * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly + * thinking that the extra 2 u64 args are the augmented filename, so just check + * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. + */ + if (evsel != trace->syscalls.events.sys_enter) + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, - args, NULL, 0, trace, thread); + args, augmented_args, augmented_args_size, trace, thread); if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) { From b1a9e2535e20cdd6cd14eec8128278bc5d97843c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Sep 2018 16:29:39 -0300 Subject: [PATCH 88/90] perf trace: Use the raw_syscalls:sys_enter for the augmented syscalls Now we combine what comes from the "bpf-output" event, i.e. what is added in the augmented_syscalls.c BPF program via the __augmented_syscalls__ BPF map, i.e. the payload we get with raw_syscalls:sys_enter tracepoints plus the pointer contents, right after that payload, with the raw_syscall:sys_exit also added, without augmentation, in the augmented_syscalls.c program. The end result is that for the hooked syscalls, we get strace like output with pointer expansion, something that wasn't possible before with just raw_syscalls:sys_enter + raw_syscalls:sys_exit. E.g.: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c ping -c 2 ::1 0.000 ( 0.008 ms): ping/19573 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 0.036 ( 0.006 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libcap.so.2, flags: CLOEXEC) = 3 0.070 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libidn.so.11, flags: CLOEXEC) = 3 0.095 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libcrypto.so.1.1, flags: CLOEXEC) = 3 0.127 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libresolv.so.2, flags: CLOEXEC) = 3 0.156 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libm.so.6, flags: CLOEXEC) = 3 0.181 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) = 3 0.212 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libz.so.1, flags: CLOEXEC) = 3 0.242 ( 0.004 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libdl.so.2, flags: CLOEXEC) = 3 0.266 ( 0.003 ms): ping/19573 openat(dfd: CWD, filename: /lib64/libpthread.so.0, flags: CLOEXEC) = 3 0.709 ( 0.006 ms): ping/19573 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 PING ::1(::1) 56 data bytes 1.133 ( 0.011 ms): ping/19573 connect(fd: 5, uservaddr: { .family: INET6, port: 1025, addr: ::1 }, addrlen: 28) = 0 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.033 ms 1.234 ( 0.036 ms): ping/19573 sendto(fd: 4, buff: 0x555e5b975720, len: 64, addr: { .family: INET6, port: 58, addr: ::1 }, addr_len: 28) = 64 64 bytes from ::1: icmp_seq=2 ttl=64 time=0.120 ms --- ::1 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1000ms rtt min/avg/max/mdev = 0.033/0.076/0.120/0.044 ms 1002.060 ( 0.129 ms): ping/19573 sendto(fd: 4, buff: 0x555e5b975720, len: 64, flags: CONFIRM, addr: { .family: INET6, port: 58, addr: ::1 }, addr_len: 28) = 64 # # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat tools/perf/examples/bpf/hello.c #include int syscall_enter(openat)(void *args) { puts("Hello, world\n"); return 0; } license(GPL); 0.000 ( 0.008 ms): cat/20054 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 0.020 ( 0.005 ms): cat/20054 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) = 3 0.176 ( 0.011 ms): cat/20054 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 0.243 ( 0.006 ms): cat/20054 openat(dfd: CWD, filename: tools/perf/examples/bpf/hello.c) = 3 # Now to think how to hook on all syscalls, fallbacking to the non-augmented raw_syscalls:sys_enter payload. Probably the best way is to use a BPF_MAP_TYPE_PROG_ARRAY just like samples/bpf/tracex5_kern.c does. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-nlt60y69o26xi59z5vtpdrj5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 41 +++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2b99a02355cf..7ce277d22a91 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -288,6 +288,13 @@ static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel) return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); } +static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel) +{ + struct syscall_tp *sc = evsel->priv; + + return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); +} + static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler) { evsel->priv = malloc(sizeof(struct syscall_tp)); @@ -3346,12 +3353,8 @@ int cmd_trace(int argc, const char **argv) goto out; } - if (evsel) { - if (perf_evsel__init_augmented_syscall_tp(evsel) || - perf_evsel__init_augmented_syscall_tp_args(evsel)) - goto out; + if (evsel) trace.syscalls.events.augmented = evsel; - } err = bpf__setup_stdout(trace.evlist); if (err) { @@ -3396,6 +3399,34 @@ int cmd_trace(int argc, const char **argv) } } + /* + * If we are augmenting syscalls, then combine what we put in the + * __augmented_syscalls__ BPF map with what is in the + * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF, + * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit. + * + * We'll switch to look at two BPF maps, one for sys_enter and the + * other for sys_exit when we start augmenting the sys_exit paths with + * buffers that are being copied from kernel to userspace, think 'read' + * syscall. + */ + if (trace.syscalls.events.augmented) { + evsel = trace.syscalls.events.augmented; + + if (perf_evsel__init_augmented_syscall_tp(evsel) || + perf_evsel__init_augmented_syscall_tp_args(evsel)) + goto out; + evsel->handler = trace__sys_enter; + + evlist__for_each_entry(trace.evlist, evsel) { + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { + perf_evsel__init_augmented_syscall_tp(evsel); + perf_evsel__init_augmented_syscall_tp_ret(evsel); + evsel->handler = trace__sys_exit; + } + } + } + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); From d8e75a110df7e3318990c9fb207ae0aa7812895a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Sep 2018 10:43:07 -0300 Subject: [PATCH 89/90] perf map: Turn some pr_warning() to pr_debug() Annoying when using it with --stdio/--stdio2, so just turn them debug, we can get those using -v. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-t3684lkugnf1w4lwcmpj9ivm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 36d0763311ef..3f07a587c8e6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -320,12 +320,11 @@ int map__load(struct map *map) build_id__sprintf(map->dso->build_id, sizeof(map->dso->build_id), sbuild_id); - pr_warning("%s with build id %s not found", - name, sbuild_id); + pr_debug("%s with build id %s not found", name, sbuild_id); } else - pr_warning("Failed to open %s", name); + pr_debug("Failed to open %s", name); - pr_warning(", continuing without symbols\n"); + pr_debug(", continuing without symbols\n"); return -1; } else if (nr == 0) { #ifdef HAVE_LIBELF_SUPPORT @@ -334,12 +333,11 @@ int map__load(struct map *map) if (len > sizeof(DSO__DELETED) && strcmp(name + real_len + 1, DSO__DELETED) == 0) { - pr_warning("%.*s was updated (is prelink enabled?). " + pr_debug("%.*s was updated (is prelink enabled?). " "Restart the long running apps that use it!\n", (int)real_len, name); } else { - pr_warning("no symbols found in %s, maybe install " - "a debug package?\n", name); + pr_debug("no symbols found in %s, maybe install a debug package?\n", name); } #endif return -1; @@ -701,8 +699,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp if (verbose >= 2) { if (use_browser) { - pr_warning("overlapping maps in %s " - "(disable tui for more info)\n", + pr_debug("overlapping maps in %s (disable tui for more info)\n", map->dso->name); } else { fputs("overlapping maps:\n", fp); From 1632936480a53d85ef3012cd9f290e247251cbb9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Sep 2018 10:47:56 -0300 Subject: [PATCH 90/90] perf tests: Fix record+probe_libc_inet_pton.sh without ping's debuginfo When we don't have the iputils-debuginfo package installed, i.e. when we don't have the DWARF information needed to resolve ping's samples, we end up failing this 'perf test' entry: # perf test ping 62: probe libc's inet_pton & backtrace it with ping : Ok # rpm -e iputils-debuginfo # perf test ping 62: probe libc's inet_pton & backtrace it with ping : FAILED! # Fix it to accept "[unknown]" where the symbol + offset, when resolved, is expected. I think this will fail in the other arches as well, but since I can't test now, I'm leaving s390x and ppc cases as-is. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Kim Phillips Cc: Michael Petlan Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sukadev Bhattiprolu Cc: Thomas Richter Cc: Wang Nan Fixes: 7903a7086723 ("perf script: Show symbol offsets by default") Link: https://lkml.kernel.org/n/tip-hnizqwqrs03vcq1b74yao0f6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 3013ac8f83d0..cab7b0aea6ea 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -48,7 +48,7 @@ trace_libc_inet_pton_backtrace() { *) eventattr='max-stack=3' echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected - echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected + echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; esac