After the commit ffd3d18c20b8 ("perf tools: Add ARM Statistical Profiling Extensions (SPE) support") has been merged, it supports to output raw data with option "--dump-raw-trace". However, it misses for support synthetic events so cannot output any statistical info. This patch is to improve the "perf report" support for ARM SPE for four types synthetic events: First level cache synthetic events, including L1 data cache accessing and missing events; Last level cache synthetic events, including last level cache accessing and missing events; TLB synthetic events, including TLB accessing and missing events; Remote access events, which is used to account load/store operations caused to another socket. Example usage: $ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000 $ perf report --stdio # Samples: 59 of event 'l1d-miss' # Event count (approx.): 59 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. .................................. # 23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135 20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages 5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap 5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg 5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range 3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge 3.39% 3.39% dd [kernel.kallsyms] [k] release_pages 3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c 1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd [...] # Samples: 3K of event 'l1d-access' # Event count (approx.): 3980 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ...................................... # 26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user 10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify 7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read 4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read 4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write 3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light 3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area 3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission 2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent 2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write 2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero 2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero 1.81% 1.81% dd dd [.] 0x0000000000002960 1.78% 1.78% dd dd [.] 0x0000000000002980 [...] # Samples: 35 of event 'llc-miss' # Event count (approx.): 35 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ........................... # 34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages 8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg 8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range 5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge 5.71% 5.71% dd [kernel.kallsyms] [k] release_pages 5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c 2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work 2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup 2.86% 2.86% dd [kernel.kallsyms] [k] copy_page [...] # Samples: 2 of event 'llc-access' # Event count (approx.): 2 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ............. # 50.00% 50.00% dd [kernel.kallsyms] [k] copy_page 50.00% 50.00% dd libc-2.28.so [.] _dl_addr # Samples: 48 of event 'tlb-miss' # Event count (approx.): 48 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. .................................. # 20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135 12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user 10.42% 10.42% dd [kernel.kallsyms] [k] clear_page 4.17% 4.17% dd [kernel.kallsyms] [k] copy_page 4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages 2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd 2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70 2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work 2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock 2.08% 2.08% dd [kernel.kallsyms] [k] d_path 2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode 2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open [...] # Samples: 9K of event 'tlb-access' # Event count (approx.): 9573 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ...................................... # 25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user 11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user 8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify 4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read 3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2 3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent 2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write 2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read 2.52% 2.52% dd libc-2.28.so [.] write 2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission 2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write 1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area 1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero [...] # Samples: 9 of event 'branch-miss' # Event count (approx.): 9 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ......................... # 22.22% 22.22% dd libc-2.28.so [.] _dl_addr 11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user 11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user 11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill 11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy 11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c 11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980 11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340 # Samples: 29 of event 'remote-access' # Event count (approx.): 29 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ........................... # 41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages 10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg 10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range 6.90% 6.90% dd [kernel.kallsyms] [k] release_pages 3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge 3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work 3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap 3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge 3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap 3.45% 3.45% dd [kernel.kallsyms] [k] xas_start 3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c 3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c 3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com> Tested-by: James Clark <james.clark@arm.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Al Grant <al.grant@arm.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Mike Leach <mike.leach@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org Signed-off-by: James Clark <james.clark@arm.com> Signed-off-by: Leo Yan <leo.yan@linaro.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
220 lines
4.6 KiB
C
220 lines
4.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* arm_spe_decoder.c: ARM SPE support
|
|
*/
|
|
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
#include <errno.h>
|
|
#include <inttypes.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/zalloc.h>
|
|
|
|
#include "../auxtrace.h"
|
|
#include "../debug.h"
|
|
#include "../util.h"
|
|
|
|
#include "arm-spe-decoder.h"
|
|
|
|
#ifndef BIT
|
|
#define BIT(n) (1UL << (n))
|
|
#endif
|
|
|
|
static u64 arm_spe_calc_ip(int index, u64 payload)
|
|
{
|
|
u8 *addr = (u8 *)&payload;
|
|
int ns, el;
|
|
|
|
/* Instruction virtual address or Branch target address */
|
|
if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
|
|
index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
|
|
ns = addr[7] & SPE_ADDR_PKT_NS;
|
|
el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
|
|
|
|
/* Fill highest byte for EL1 or EL2 (VHE) mode */
|
|
if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
|
|
addr[7] = 0xff;
|
|
/* Clean highest byte for other cases */
|
|
else
|
|
addr[7] = 0x0;
|
|
|
|
/* Data access virtual address */
|
|
} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
|
|
|
|
/* Fill highest byte if bits [48..55] is 0xff */
|
|
if (addr[6] == 0xff)
|
|
addr[7] = 0xff;
|
|
/* Otherwise, cleanup tags */
|
|
else
|
|
addr[7] = 0x0;
|
|
|
|
/* Data access physical address */
|
|
} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
|
|
/* Cleanup byte 7 */
|
|
addr[7] = 0x0;
|
|
} else {
|
|
pr_err("unsupported address packet index: 0x%x\n", index);
|
|
}
|
|
|
|
return payload;
|
|
}
|
|
|
|
struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
|
|
{
|
|
struct arm_spe_decoder *decoder;
|
|
|
|
if (!params->get_trace)
|
|
return NULL;
|
|
|
|
decoder = zalloc(sizeof(struct arm_spe_decoder));
|
|
if (!decoder)
|
|
return NULL;
|
|
|
|
decoder->get_trace = params->get_trace;
|
|
decoder->data = params->data;
|
|
|
|
return decoder;
|
|
}
|
|
|
|
void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
|
|
{
|
|
free(decoder);
|
|
}
|
|
|
|
static int arm_spe_get_data(struct arm_spe_decoder *decoder)
|
|
{
|
|
struct arm_spe_buffer buffer = { .buf = 0, };
|
|
int ret;
|
|
|
|
pr_debug("Getting more data\n");
|
|
ret = decoder->get_trace(&buffer, decoder->data);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
decoder->buf = buffer.buf;
|
|
decoder->len = buffer.len;
|
|
|
|
if (!decoder->len)
|
|
pr_debug("No more data\n");
|
|
|
|
return decoder->len;
|
|
}
|
|
|
|
static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
|
|
{
|
|
int ret;
|
|
|
|
do {
|
|
if (!decoder->len) {
|
|
ret = arm_spe_get_data(decoder);
|
|
|
|
/* Failed to read out trace data */
|
|
if (ret <= 0)
|
|
return ret;
|
|
}
|
|
|
|
ret = arm_spe_get_packet(decoder->buf, decoder->len,
|
|
&decoder->packet);
|
|
if (ret <= 0) {
|
|
/* Move forward for 1 byte */
|
|
decoder->buf += 1;
|
|
decoder->len -= 1;
|
|
return -EBADMSG;
|
|
}
|
|
|
|
decoder->buf += ret;
|
|
decoder->len -= ret;
|
|
} while (decoder->packet.type == ARM_SPE_PAD);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int arm_spe_read_record(struct arm_spe_decoder *decoder)
|
|
{
|
|
int err;
|
|
int idx;
|
|
u64 payload, ip;
|
|
|
|
memset(&decoder->record, 0x0, sizeof(decoder->record));
|
|
|
|
while (1) {
|
|
err = arm_spe_get_next_packet(decoder);
|
|
if (err <= 0)
|
|
return err;
|
|
|
|
idx = decoder->packet.index;
|
|
payload = decoder->packet.payload;
|
|
|
|
switch (decoder->packet.type) {
|
|
case ARM_SPE_TIMESTAMP:
|
|
decoder->record.timestamp = payload;
|
|
return 1;
|
|
case ARM_SPE_END:
|
|
return 1;
|
|
case ARM_SPE_ADDRESS:
|
|
ip = arm_spe_calc_ip(idx, payload);
|
|
if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
|
|
decoder->record.from_ip = ip;
|
|
else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
|
|
decoder->record.to_ip = ip;
|
|
break;
|
|
case ARM_SPE_COUNTER:
|
|
break;
|
|
case ARM_SPE_CONTEXT:
|
|
break;
|
|
case ARM_SPE_OP_TYPE:
|
|
break;
|
|
case ARM_SPE_EVENTS:
|
|
if (payload & BIT(EV_L1D_REFILL))
|
|
decoder->record.type |= ARM_SPE_L1D_MISS;
|
|
|
|
if (payload & BIT(EV_L1D_ACCESS))
|
|
decoder->record.type |= ARM_SPE_L1D_ACCESS;
|
|
|
|
if (payload & BIT(EV_TLB_WALK))
|
|
decoder->record.type |= ARM_SPE_TLB_MISS;
|
|
|
|
if (payload & BIT(EV_TLB_ACCESS))
|
|
decoder->record.type |= ARM_SPE_TLB_ACCESS;
|
|
|
|
if ((idx == 1 || idx == 2 || idx == 3) &&
|
|
(payload & BIT(EV_LLC_MISS)))
|
|
decoder->record.type |= ARM_SPE_LLC_MISS;
|
|
|
|
if ((idx == 1 || idx == 2 || idx == 3) &&
|
|
(payload & BIT(EV_LLC_ACCESS)))
|
|
decoder->record.type |= ARM_SPE_LLC_ACCESS;
|
|
|
|
if ((idx == 1 || idx == 2 || idx == 3) &&
|
|
(payload & BIT(EV_REMOTE_ACCESS)))
|
|
decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
|
|
|
|
if (payload & BIT(EV_MISPRED))
|
|
decoder->record.type |= ARM_SPE_BRANCH_MISS;
|
|
|
|
break;
|
|
case ARM_SPE_DATA_SOURCE:
|
|
break;
|
|
case ARM_SPE_BAD:
|
|
break;
|
|
case ARM_SPE_PAD:
|
|
break;
|
|
default:
|
|
pr_err("Get packet error!\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int arm_spe_decode(struct arm_spe_decoder *decoder)
|
|
{
|
|
return arm_spe_read_record(decoder);
|
|
}
|