perf/core improvements and fixes:

- Fix various per event 'max-stack' and 'call-graph=dwarf' issues,
   mostly in 'perf trace', allowing to use 'perf trace --call-graph' with
   'dwarf' and 'fp' to setup the callgraph details for the syscall events
   and make that apply to other events, whilhe allowing to override that on
   a per-event basis, using '-e sched:*switch/call-graph=dwarf/' for
   instance (Arnaldo Carvalho de Melo)
 
 - Improve the --time percent support in record/report/script (Jin Yao)
 
 - Fix copyfile_offset update of output offset (Jiri Olsa)
 
 - Add python script to profile and resolve physical mem type (Kan Liang)
 
 - Add ARM Statistical Profiling Extensions (SPE) support (Kim Phillips)
 
 - Remove trailing semicolon in the evlist code (Luis de Bethencourt)
 
 - Fix incorrect handling of type _TERM_DRV_CFG (Mathieu Poirier)
 
 - Use asprintf when possible in libtraceevent (Federico Vaga)
 
 - Fix bad force_token escape sequence in libtraceevent (Michael Sartain)
 
 - Add UL suffix to MISSING_EVENTS in libtraceevent (Michael Sartain)
 
 - value of unknown symbolic fields in libtraceevent (Jan Kiszka)
 
 - libtraceevent updates: (Steven Rostedt)
   o Show value of flags that have not been parsed
   o Simplify pointer print logic and fix %pF
   o Handle new pointer processing of bprint strings
   o Show contents (in hex) of data of unrecognized type records
   o Fix get_field_str() for dynamic strings
 
 - Add missing break in FALSE case of pevent_filter_clear_trivial() (Taeung Song)
 
 - Fix failed memory allocation for get_cpuid_str (Thomas Richter)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEELb9bqkb7Te0zijNb1lAW81NSqkAFAlpfcqoACgkQ1lAW81NS
 qkBDxQ/8CdJGxeHyookAaEYAUvezRBQKi7fwkZN+wYwWE5YpsgLI4MONT9boQGBE
 siPCrRcPjGU7455Xka2/wC067lZkfn7aTEHrVODUld3uLrnNtsa0zuKogaF9+D8E
 4MxW5OsMyfn+hx9JGxQcH59euHeeJcMY5Rj+067x6KgMX9znl6wFJfBTkLC8tXhx
 lRhpC3sbENHorLg+u00MCtlAIIHc9iqsUR32CCsGfFJTLQc2Asy1Xb0he/GRYLrX
 vdrflQ7TB7I/c+yQayBPpKSLQvhj/F0ltHtWBzH3haCdp97NWQ+gaYxeO6MZ3j7Z
 6zTrUh9ICG65Nw4SnOHHiz9Y0u0prMSkHdcfCl03d4AJf+iHfXoZwWiEMZhzBmdQ
 rGDNEdXigORkjOLGG7oFlQYny3GQzH4cY7+r138dpicZmys6z4wh9yU1I1ex8wcL
 EfGxpzGi7uKoACu3Rlxv+ByeLTsiCnkUUcmeQg9qq1M99Bw3qP19VXiFlaCE2NPT
 gL8iKo392oVyyJzMZgFAxzDQ+2YlvcsjBZ6kW7YG6Jl8H3AqRPDoSLXrs890PQzR
 BJTkRUEajddVv/1lzSXy67RypQdcGADqNC2F5/4akyfAF2jCBJj4VVnxlvky7gxj
 r6u9h8zpi0pV5Rk4qSicu4IvidfNLpK6gWqfoJIEoJDA2/CYZP8=
 =dEKk
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-4.16-20180117' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Fix various per event 'max-stack' and 'call-graph=dwarf' issues,
  mostly in 'perf trace', allowing to use 'perf trace --call-graph' with
  'dwarf' and 'fp' to setup the callgraph details for the syscall events
  and make that apply to other events, whilhe allowing to override that on
  a per-event basis, using '-e sched:*switch/call-graph=dwarf/' for
  instance (Arnaldo Carvalho de Melo)

- Improve the --time percent support in record/report/script (Jin Yao)

- Fix copyfile_offset update of output offset (Jiri Olsa)

- Add python script to profile and resolve physical mem type (Kan Liang)

- Add ARM Statistical Profiling Extensions (SPE) support (Kim Phillips)

- Remove trailing semicolon in the evlist code (Luis de Bethencourt)

- Fix incorrect handling of type _TERM_DRV_CFG (Mathieu Poirier)

- Use asprintf when possible in libtraceevent (Federico Vaga)

- Fix bad force_token escape sequence in libtraceevent (Michael Sartain)

- Add UL suffix to MISSING_EVENTS in libtraceevent (Michael Sartain)

- value of unknown symbolic fields in libtraceevent (Jan Kiszka)

- libtraceevent updates: (Steven Rostedt)
  o Show value of flags that have not been parsed
  o Simplify pointer print logic and fix %pF
  o Handle new pointer processing of bprint strings
  o Show contents (in hex) of data of unrecognized type records
  o Fix get_field_str() for dynamic strings

- Add missing break in FALSE case of pevent_filter_clear_trivial() (Taeung Song)

- Fix failed memory allocation for get_cpuid_str (Thomas Richter)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2018-01-17 17:20:24 +01:00
commit a72594ca5c
35 changed files with 1465 additions and 130 deletions

View File

@ -1094,7 +1094,7 @@ static enum event_type __read_token(char **tok)
if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
free(*tok); free(*tok);
*tok = NULL; *tok = NULL;
return force_token("\"\%s\" ", tok); return force_token("\"%s\" ", tok);
} else if (strcmp(*tok, "STA_PR_FMT") == 0) { } else if (strcmp(*tok, "STA_PR_FMT") == 0) {
free(*tok); free(*tok);
*tok = NULL; *tok = NULL;
@ -3970,6 +3970,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
val &= ~fval; val &= ~fval;
} }
} }
if (val) {
if (print && arg->flags.delim)
trace_seq_puts(s, arg->flags.delim);
trace_seq_printf(s, "0x%llx", val);
}
break; break;
case PRINT_SYMBOL: case PRINT_SYMBOL:
val = eval_num_arg(data, size, event, arg->symbol.field); val = eval_num_arg(data, size, event, arg->symbol.field);
@ -3980,6 +3985,8 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
break; break;
} }
} }
if (!flag)
trace_seq_printf(s, "0x%llx", val);
break; break;
case PRINT_HEX: case PRINT_HEX:
case PRINT_HEX_STR: case PRINT_HEX_STR:
@ -4293,6 +4300,26 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
goto process_again; goto process_again;
case 'p': case 'p':
ls = 1; ls = 1;
if (isalnum(ptr[1])) {
ptr++;
/* Check for special pointers */
switch (*ptr) {
case 's':
case 'S':
case 'f':
case 'F':
break;
default:
/*
* Older kernels do not process
* dereferenced pointers.
* Only process if the pointer
* value is a printable.
*/
if (isprint(*(char *)bptr))
goto process_string;
}
}
/* fall through */ /* fall through */
case 'd': case 'd':
case 'u': case 'u':
@ -4345,6 +4372,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
break; break;
case 's': case 's':
process_string:
arg = alloc_arg(); arg = alloc_arg();
if (!arg) { if (!arg) {
do_warning_event(event, "%s(%d): not enough memory!", do_warning_event(event, "%s(%d): not enough memory!",
@ -4949,21 +4977,27 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
else else
ls = 2; ls = 2;
if (*(ptr+1) == 'F' || *(ptr+1) == 'f' || if (isalnum(ptr[1]))
*(ptr+1) == 'S' || *(ptr+1) == 's') {
ptr++; ptr++;
if (arg->type == PRINT_BSTRING) {
trace_seq_puts(s, arg->string.string);
break;
}
if (*ptr == 'F' || *ptr == 'f' ||
*ptr == 'S' || *ptr == 's') {
show_func = *ptr; show_func = *ptr;
} else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') { } else if (*ptr == 'M' || *ptr == 'm') {
print_mac_arg(s, *(ptr+1), data, size, event, arg); print_mac_arg(s, *ptr, data, size, event, arg);
ptr++;
arg = arg->next; arg = arg->next;
break; break;
} else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') { } else if (*ptr == 'I' || *ptr == 'i') {
int n; int n;
n = print_ip_arg(s, ptr+1, data, size, event, arg); n = print_ip_arg(s, ptr, data, size, event, arg);
if (n > 0) { if (n > 0) {
ptr += n; ptr += n - 1;
arg = arg->next; arg = arg->next;
break; break;
} }
@ -5532,8 +5566,14 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
event = pevent_find_event_by_record(pevent, record); event = pevent_find_event_by_record(pevent, record);
if (!event) { if (!event) {
do_warning("ug! no event found for type %d", int i;
trace_parse_common_type(pevent, record->data)); int type = trace_parse_common_type(pevent, record->data);
do_warning("ug! no event found for type %d", type);
trace_seq_printf(s, "[UNKNOWN TYPE %d]", type);
for (i = 0; i < record->size; i++)
trace_seq_printf(s, " %02x",
((unsigned char *)record->data)[i]);
return; return;
} }

View File

@ -120,12 +120,12 @@ char **traceevent_plugin_list_options(void)
for (op = reg->options; op->name; op++) { for (op = reg->options; op->name; op++) {
char *alias = op->plugin_alias ? op->plugin_alias : op->file; char *alias = op->plugin_alias ? op->plugin_alias : op->file;
char **temp = list; char **temp = list;
int ret;
name = malloc(strlen(op->name) + strlen(alias) + 2); ret = asprintf(&name, "%s:%s", alias, op->name);
if (!name) if (ret < 0)
goto err; goto err;
sprintf(name, "%s:%s", alias, op->name);
list = realloc(list, count + 2); list = realloc(list, count + 2);
if (!list) { if (!list) {
list = temp; list = temp;
@ -290,17 +290,14 @@ load_plugin(struct pevent *pevent, const char *path,
const char *alias; const char *alias;
char *plugin; char *plugin;
void *handle; void *handle;
int ret;
plugin = malloc(strlen(path) + strlen(file) + 2); ret = asprintf(&plugin, "%s/%s", path, file);
if (!plugin) { if (ret < 0) {
warning("could not allocate plugin memory\n"); warning("could not allocate plugin memory\n");
return; return;
} }
strcpy(plugin, path);
strcat(plugin, "/");
strcat(plugin, file);
handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
if (!handle) { if (!handle) {
warning("could not load plugin '%s'\n%s\n", warning("could not load plugin '%s'\n%s\n",
@ -391,6 +388,7 @@ load_plugins(struct pevent *pevent, const char *suffix,
char *home; char *home;
char *path; char *path;
char *envdir; char *envdir;
int ret;
if (pevent->flags & PEVENT_DISABLE_PLUGINS) if (pevent->flags & PEVENT_DISABLE_PLUGINS)
return; return;
@ -421,16 +419,12 @@ load_plugins(struct pevent *pevent, const char *suffix,
if (!home) if (!home)
return; return;
path = malloc(strlen(home) + strlen(LOCAL_PLUGIN_DIR) + 2); ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
if (!path) { if (ret < 0) {
warning("could not allocate plugin memory\n"); warning("could not allocate plugin memory\n");
return; return;
} }
strcpy(path, home);
strcat(path, "/");
strcat(path, LOCAL_PLUGIN_DIR);
load_plugins_dir(pevent, suffix, path, load_plugin, data); load_plugins_dir(pevent, suffix, path, load_plugin, data);
free(path); free(path);

View File

@ -24,8 +24,8 @@
#include "kbuffer.h" #include "kbuffer.h"
#define MISSING_EVENTS (1 << 31) #define MISSING_EVENTS (1UL << 31)
#define MISSING_STORED (1 << 30) #define MISSING_STORED (1UL << 30)
#define COMMIT_MASK ((1 << 27) - 1) #define COMMIT_MASK ((1 << 27) - 1)

View File

@ -287,12 +287,10 @@ find_event(struct pevent *pevent, struct event_list **events,
sys_name = NULL; sys_name = NULL;
} }
reg = malloc(strlen(event_name) + 3); ret = asprintf(&reg, "^%s$", event_name);
if (reg == NULL) if (ret < 0)
return PEVENT_ERRNO__MEM_ALLOC_FAILED; return PEVENT_ERRNO__MEM_ALLOC_FAILED;
sprintf(reg, "^%s$", event_name);
ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB); ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
free(reg); free(reg);
@ -300,13 +298,12 @@ find_event(struct pevent *pevent, struct event_list **events,
return PEVENT_ERRNO__INVALID_EVENT_NAME; return PEVENT_ERRNO__INVALID_EVENT_NAME;
if (sys_name) { if (sys_name) {
reg = malloc(strlen(sys_name) + 3); ret = asprintf(&reg, "^%s$", sys_name);
if (reg == NULL) { if (ret < 0) {
regfree(&ereg); regfree(&ereg);
return PEVENT_ERRNO__MEM_ALLOC_FAILED; return PEVENT_ERRNO__MEM_ALLOC_FAILED;
} }
sprintf(reg, "^%s$", sys_name);
ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB); ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
free(reg); free(reg);
if (ret) { if (ret) {
@ -1634,6 +1631,7 @@ int pevent_filter_clear_trivial(struct event_filter *filter,
case FILTER_TRIVIAL_FALSE: case FILTER_TRIVIAL_FALSE:
if (filter_type->filter->boolean.value) if (filter_type->filter->boolean.value)
continue; continue;
break;
case FILTER_TRIVIAL_TRUE: case FILTER_TRIVIAL_TRUE:
if (!filter_type->filter->boolean.value) if (!filter_type->filter->boolean.value)
continue; continue;
@ -1879,17 +1877,25 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r
struct pevent *pevent; struct pevent *pevent;
unsigned long long addr; unsigned long long addr;
const char *val = NULL; const char *val = NULL;
unsigned int size;
char hex[64]; char hex[64];
/* If the field is not a string convert it */ /* If the field is not a string convert it */
if (arg->str.field->flags & FIELD_IS_STRING) { if (arg->str.field->flags & FIELD_IS_STRING) {
val = record->data + arg->str.field->offset; val = record->data + arg->str.field->offset;
size = arg->str.field->size;
if (arg->str.field->flags & FIELD_IS_DYNAMIC) {
addr = *(unsigned int *)val;
val = record->data + (addr & 0xffff);
size = addr >> 16;
}
/* /*
* We need to copy the data since we can't be sure the field * We need to copy the data since we can't be sure the field
* is null terminated. * is null terminated.
*/ */
if (*(val + arg->str.field->size - 1)) { if (*(val + size - 1)) {
/* copy it */ /* copy it */
memcpy(arg->str.buffer, val, arg->str.field->size); memcpy(arg->str.buffer, val, arg->str.field->size);
/* the buffer is already NULL terminated */ /* the buffer is already NULL terminated */

View File

@ -403,7 +403,7 @@ OPTIONS
to end of file. to end of file.
Also support time percent with multiple time range. Time string is Also support time percent with multiple time range. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example: For example:
Select the second 10% time slice: Select the second 10% time slice:

View File

@ -351,19 +351,19 @@ include::itrace.txt[]
to end of file. to end of file.
Also support time percent with multipe time range. Time string is Also support time percent with multipe time range. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example: For example:
Select the second 10% time slice Select the second 10% time slice:
perf script --time 10%/2 perf script --time 10%/2
Select from 0% to 10% time slice Select from 0% to 10% time slice:
perf script --time 0%-10% perf script --time 0%-10%
Select the first and second 10% time slices Select the first and second 10% time slices:
perf script --time 10%/1,10%/2 perf script --time 10%/1,10%/2
Select from 0% to 10% and 30% to 40% slices Select from 0% to 10% and 30% to 40% slices:
perf script --time 0%-10%,30%-40% perf script --time 0%-10%,30%-40%
--max-blocks:: --max-blocks::

View File

@ -22,6 +22,42 @@
#include "../../util/evlist.h" #include "../../util/evlist.h"
#include "../../util/pmu.h" #include "../../util/pmu.h"
#include "cs-etm.h" #include "cs-etm.h"
#include "arm-spe.h"
static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
{
struct perf_pmu **arm_spe_pmus = NULL;
int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
/* arm_spe_xxxxxxxxx\0 */
char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
if (!arm_spe_pmus) {
pr_err("spes alloc failed\n");
*err = -ENOMEM;
return NULL;
}
for (i = 0; i < nr_cpus; i++) {
ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
if (ret < 0) {
pr_err("sprintf failed\n");
*err = -ENOMEM;
return NULL;
}
arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
if (arm_spe_pmus[*nr_spes]) {
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
__func__, __LINE__, *nr_spes,
arm_spe_pmus[*nr_spes]->type,
arm_spe_pmus[*nr_spes]->name);
(*nr_spes)++;
}
}
return arm_spe_pmus;
}
struct auxtrace_record struct auxtrace_record
*auxtrace_record__init(struct perf_evlist *evlist, int *err) *auxtrace_record__init(struct perf_evlist *evlist, int *err)
@ -29,22 +65,51 @@ struct auxtrace_record
struct perf_pmu *cs_etm_pmu; struct perf_pmu *cs_etm_pmu;
struct perf_evsel *evsel; struct perf_evsel *evsel;
bool found_etm = false; bool found_etm = false;
bool found_spe = false;
static struct perf_pmu **arm_spe_pmus = NULL;
static int nr_spes = 0;
int i;
if (!evlist)
return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
if (evlist) { if (!arm_spe_pmus)
evlist__for_each_entry(evlist, evsel) { arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
if (cs_etm_pmu &&
evsel->attr.type == cs_etm_pmu->type) evlist__for_each_entry(evlist, evsel) {
found_etm = true; if (cs_etm_pmu &&
evsel->attr.type == cs_etm_pmu->type)
found_etm = true;
if (!nr_spes)
continue;
for (i = 0; i < nr_spes; i++) {
if (evsel->attr.type == arm_spe_pmus[i]->type) {
found_spe = true;
break;
}
} }
} }
if (found_etm && found_spe) {
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
*err = -EOPNOTSUPP;
return NULL;
}
if (found_etm) if (found_etm)
return cs_etm_record_init(err); return cs_etm_record_init(err);
#if defined(__aarch64__)
if (found_spe)
return arm_spe_recording_init(err, arm_spe_pmus[i]);
#endif
/* /*
* Clear 'err' even if we haven't found a cs_etm event - that way perf * Clear 'err' even if we haven't found an event - that way perf
* record can still be used even if tracers aren't present. The NULL * record can still be used even if tracers aren't present. The NULL
* return value will take care of telling the infrastructure HW tracing * return value will take care of telling the infrastructure HW tracing
* isn't available. * isn't available.

View File

@ -20,6 +20,7 @@
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include "cs-etm.h" #include "cs-etm.h"
#include "arm-spe.h"
#include "../../util/pmu.h" #include "../../util/pmu.h"
struct perf_event_attr struct perf_event_attr
@ -30,7 +31,12 @@ struct perf_event_attr
/* add ETM default config here */ /* add ETM default config here */
pmu->selectable = true; pmu->selectable = true;
pmu->set_drv_config = cs_etm_set_drv_config; pmu->set_drv_config = cs_etm_set_drv_config;
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
return arm_spe_pmu_default_config(pmu);
#endif
} }
#endif #endif
return NULL; return NULL;
} }

View File

@ -5,4 +5,5 @@ libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \ ../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o ../../arm/util/cs-etm.o \
arm-spe.o

View File

@ -0,0 +1,225 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include <time.h>
#include "../../util/cpumap.h"
#include "../../util/evsel.h"
#include "../../util/evlist.h"
#include "../../util/session.h"
#include "../../util/util.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
#include "../../util/auxtrace.h"
#include "../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
struct arm_spe_recording {
struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu;
struct perf_evlist *evlist;
};
static size_t
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
return ARM_SPE_AUXTRACE_PRIV_SIZE;
}
static int arm_spe_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *auxtrace_info,
size_t priv_size)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
return -EINVAL;
if (!session->evlist->nr_mmaps)
return -EINVAL;
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
return 0;
}
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct perf_evsel *evsel, *arm_spe_evsel = NULL;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
struct perf_evsel *tracking_evsel;
int err;
sper->evlist = evlist;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == arm_spe_pmu->type) {
if (arm_spe_evsel) {
pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
return -EINVAL;
}
evsel->attr.freq = 0;
evsel->attr.sample_period = 1;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
}
if (!opts->full_auxtrace)
return 0;
/* We are in full trace mode but '-m,xyz' wasn't specified */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
}
/* Validate auxtrace_mmap_pages */
if (opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz = KiB(8);
if (sz < min_sz || !is_power_of_2(sz)) {
pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
min_sz / 1024);
return -EINVAL;
}
}
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first.
*/
perf_evlist__to_front(evlist, arm_spe_evsel);
perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
perf_evsel__set_sample_bit(arm_spe_evsel, TID);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1;
perf_evsel__set_sample_bit(tracking_evsel, TIME);
perf_evsel__set_sample_bit(tracking_evsel, CPU);
perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
return 0;
}
static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec ^ ts.tv_nsec;
}
static void arm_spe_recording_free(struct auxtrace_record *itr)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
free(sper);
}
static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(sper->evlist, evsel) {
if (evsel->attr.type == sper->arm_spe_pmu->type)
return perf_evlist__enable_event_idx(sper->evlist,
evsel, idx);
}
return -EINVAL;
}
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
struct arm_spe_recording *sper;
if (!arm_spe_pmu) {
*err = -ENODEV;
return NULL;
}
sper = zalloc(sizeof(struct arm_spe_recording));
if (!sper) {
*err = -ENOMEM;
return NULL;
}
sper->arm_spe_pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
sper->itr.read_finish = arm_spe_read_finish;
sper->itr.alignment = 0;
return &sper->itr;
}
struct perf_event_attr
*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
{
struct perf_event_attr *attr;
attr = zalloc(sizeof(struct perf_event_attr));
if (!attr) {
pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
return NULL;
}
/*
* If kernel driver doesn't advertise a minimum,
* use max allowable by PMSIDR_EL1.INTERVAL
*/
if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
&attr->sample_period) != 1) {
pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
attr->sample_period = 4096;
}
arm_spe_pmu->selectable = true;
arm_spe_pmu->is_uncore = false;
return attr;
}

View File

@ -70,7 +70,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{ {
char *buf = malloc(128); char *buf = malloc(128);
if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) { if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
free(buf); free(buf);
return NULL; return NULL;
} }

View File

@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
enum perf_call_graph_mode mode = CALLCHAIN_NONE; enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) && if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER)) (sample_type & PERF_SAMPLE_STACK_USER)) {
mode = CALLCHAIN_DWARF; mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK) dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
mode = CALLCHAIN_LBR; mode = CALLCHAIN_LBR;
else if (sample_type & PERF_SAMPLE_CALLCHAIN) else if (sample_type & PERF_SAMPLE_CALLCHAIN)
mode = CALLCHAIN_FP; mode = CALLCHAIN_FP;

View File

@ -54,8 +54,6 @@
#include <unistd.h> #include <unistd.h>
#include <linux/mman.h> #include <linux/mman.h>
#define PTIME_RANGE_MAX 10
struct report { struct report {
struct perf_tool tool; struct perf_tool tool;
struct perf_session *session; struct perf_session *session;
@ -76,7 +74,8 @@ struct report {
const char *cpu_list; const char *cpu_list;
const char *symbol_filter_str; const char *symbol_filter_str;
const char *time_str; const char *time_str;
struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; struct perf_time_interval *ptime_range;
int range_size;
int range_num; int range_num;
float min_percent; float min_percent;
u64 nr_entries; u64 nr_entries;
@ -338,9 +337,10 @@ static int report__setup_sample_type(struct report *rep)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) && if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER)) (sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF; callchain_param.record_mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK) dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR; callchain_param.record_mode = CALLCHAIN_LBR;
else else
callchain_param.record_mode = CALLCHAIN_FP; callchain_param.record_mode = CALLCHAIN_FP;
@ -403,6 +403,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
if (evname != NULL) if (evname != NULL)
ret += fprintf(fp, " of event '%s'", evname); ret += fprintf(fp, " of event '%s'", evname);
if (rep->time_str)
ret += fprintf(fp, " (time slices: %s)", rep->time_str);
if (symbol_conf.show_ref_callgraph && if (symbol_conf.show_ref_callgraph &&
strstr(evname, "call-graph=no")) { strstr(evname, "call-graph=no")) {
ret += fprintf(fp, ", show reference callgraph"); ret += fprintf(fp, ", show reference callgraph");
@ -1296,22 +1299,33 @@ repeat:
if (symbol__init(&session->header.env) < 0) if (symbol__init(&session->header.env) < 0)
goto error; goto error;
report.ptime_range = perf_time__range_alloc(report.time_str,
&report.range_size);
if (!report.ptime_range) {
ret = -ENOMEM;
goto error;
}
if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) {
if (session->evlist->first_sample_time == 0 && if (session->evlist->first_sample_time == 0 &&
session->evlist->last_sample_time == 0) { session->evlist->last_sample_time == 0) {
pr_err("No first/last sample time in perf data\n"); pr_err("HINT: no first/last sample time found in perf data.\n"
return -EINVAL; "Please use latest perf binary to execute 'perf record'\n"
"(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
ret = -EINVAL;
goto error;
} }
report.range_num = perf_time__percent_parse_str( report.range_num = perf_time__percent_parse_str(
report.ptime_range, PTIME_RANGE_MAX, report.ptime_range, report.range_size,
report.time_str, report.time_str,
session->evlist->first_sample_time, session->evlist->first_sample_time,
session->evlist->last_sample_time); session->evlist->last_sample_time);
if (report.range_num < 0) { if (report.range_num < 0) {
pr_err("Invalid time string\n"); pr_err("Invalid time string\n");
return -EINVAL; ret = -EINVAL;
goto error;
} }
} else { } else {
report.range_num = 1; report.range_num = 1;
@ -1327,6 +1341,8 @@ repeat:
ret = 0; ret = 0;
error: error:
zfree(&report.ptime_range);
perf_session__delete(session); perf_session__delete(session);
return ret; return ret;
} }

View File

@ -1480,8 +1480,6 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return 0; return 0;
} }
#define PTIME_RANGE_MAX 10
struct perf_script { struct perf_script {
struct perf_tool tool; struct perf_tool tool;
struct perf_session *session; struct perf_session *session;
@ -1496,7 +1494,8 @@ struct perf_script {
struct thread_map *threads; struct thread_map *threads;
int name_width; int name_width;
const char *time_str; const char *time_str;
struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; struct perf_time_interval *ptime_range;
int range_size;
int range_num; int range_num;
}; };
@ -2919,9 +2918,10 @@ static void script__setup_sample_type(struct perf_script *script)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) && if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER)) (sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF; callchain_param.record_mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK) dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR; callchain_param.record_mode = CALLCHAIN_LBR;
else else
callchain_param.record_mode = CALLCHAIN_FP; callchain_param.record_mode = CALLCHAIN_FP;
@ -3444,17 +3444,26 @@ int cmd_script(int argc, const char **argv)
if (err < 0) if (err < 0)
goto out_delete; goto out_delete;
script.ptime_range = perf_time__range_alloc(script.time_str,
&script.range_size);
if (!script.ptime_range) {
err = -ENOMEM;
goto out_delete;
}
/* needs to be parsed after looking up reference time */ /* needs to be parsed after looking up reference time */
if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
if (session->evlist->first_sample_time == 0 && if (session->evlist->first_sample_time == 0 &&
session->evlist->last_sample_time == 0) { session->evlist->last_sample_time == 0) {
pr_err("No first/last sample time in perf data\n"); pr_err("HINT: no first/last sample time found in perf data.\n"
"Please use latest perf binary to execute 'perf record'\n"
"(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
err = -EINVAL; err = -EINVAL;
goto out_delete; goto out_delete;
} }
script.range_num = perf_time__percent_parse_str( script.range_num = perf_time__percent_parse_str(
script.ptime_range, PTIME_RANGE_MAX, script.ptime_range, script.range_size,
script.time_str, script.time_str,
session->evlist->first_sample_time, session->evlist->first_sample_time,
session->evlist->last_sample_time); session->evlist->last_sample_time);
@ -3473,6 +3482,8 @@ int cmd_script(int argc, const char **argv)
flush_scripting(); flush_scripting();
out_delete: out_delete:
zfree(&script.ptime_range);
perf_evlist__free_stats(session->evlist); perf_evlist__free_stats(session->evlist);
perf_session__delete(session); perf_session__delete(session);

View File

@ -1644,7 +1644,7 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
struct addr_location al; struct addr_location al;
if (machine__resolve(trace->host, &al, sample) < 0 || if (machine__resolve(trace->host, &al, sample) < 0 ||
thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack)) thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
return -1; return -1;
return 0; return 0;
@ -2222,6 +2222,9 @@ static int trace__add_syscall_newtp(struct trace *trace)
if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
goto out_delete_sys_exit; goto out_delete_sys_exit;
perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
perf_evlist__add(evlist, sys_enter); perf_evlist__add(evlist, sys_enter);
perf_evlist__add(evlist, sys_exit); perf_evlist__add(evlist, sys_exit);
@ -2318,6 +2321,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
if (pgfault_maj == NULL) if (pgfault_maj == NULL)
goto out_error_mem; goto out_error_mem;
perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
perf_evlist__add(evlist, pgfault_maj); perf_evlist__add(evlist, pgfault_maj);
} }
@ -2325,6 +2329,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
if (pgfault_min == NULL) if (pgfault_min == NULL)
goto out_error_mem; goto out_error_mem;
perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
perf_evlist__add(evlist, pgfault_min); perf_evlist__add(evlist, pgfault_min);
} }
@ -2345,45 +2350,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_delete_evlist; goto out_delete_evlist;
} }
perf_evlist__config(evlist, &trace->opts, NULL); perf_evlist__config(evlist, &trace->opts, &callchain_param);
if (callchain_param.enabled) {
bool use_identifier = false;
if (trace->syscalls.events.sys_exit) {
perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
&trace->opts, &callchain_param);
use_identifier = true;
}
if (pgfault_maj) {
perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
use_identifier = true;
}
if (pgfault_min) {
perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
use_identifier = true;
}
if (use_identifier) {
/*
* Now we have evsels with different sample_ids, use
* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
* from a fixed position in each ring buffer record.
*
* As of this the changeset introducing this comment, this
* isn't strictly needed, as the fields that can come before
* PERF_SAMPLE_ID are all used, but we'll probably disable
* some of those for things like copying the payload of
* pointer syscall arguments, and for vfs_getname we don't
* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
*/
perf_evlist__set_sample_bit(evlist, IDENTIFIER);
perf_evlist__reset_sample_bit(evlist, ID);
}
}
signal(SIGCHLD, sig_handler); signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler); signal(SIGINT, sig_handler);
@ -2456,6 +2423,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
evlist->threads->nr > 1 || evlist->threads->nr > 1 ||
perf_evlist__first(evlist)->attr.inherit; perf_evlist__first(evlist)->attr.inherit;
/*
* Now that we already used evsel->attr to ask the kernel to setup the
* events, lets reuse evsel->attr.sample_max_stack as the limit in
* trace__resolve_callchain(), allowing per-event max-stack settings
* to override an explicitely set --max-stack global setting.
*/
evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
evsel->attr.sample_max_stack == 0)
evsel->attr.sample_max_stack = trace->max_stack;
}
again: again:
before = trace->nr_events; before = trace->nr_events;
@ -3098,8 +3077,9 @@ int cmd_trace(int argc, const char **argv)
} }
#ifdef HAVE_DWARF_UNWIND_SUPPORT #ifdef HAVE_DWARF_UNWIND_SUPPORT
if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls) if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
}
#endif #endif
if (callchain_param.enabled) { if (callchain_param.enabled) {

View File

@ -0,0 +1,19 @@
#!/bin/bash
#
# Profiling physical memory by all retired load instructions/uops event
# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS
#
load=`perf list | grep mem_inst_retired.all_loads`
if [ -z "$load" ]; then
load=`perf list | grep mem_uops_retired.all_loads`
fi
if [ -z "$load" ]; then
echo "There is no event to count all retired load instructions/uops."
exit 1
fi
arg=$(echo $load | tr -d ' ')
arg="$arg:P"
perf record --phys-data -e $arg $@

View File

@ -0,0 +1,3 @@
#!/bin/bash
# description: resolve physical address samples
perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py

View File

@ -0,0 +1,95 @@
# mem-phys-addr.py: Resolve physical address samples
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2018, Intel Corporation.
from __future__ import division
import os
import sys
import struct
import re
import bisect
import collections
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
#physical address ranges for System RAM
system_ram = []
#physical address ranges for Persistent Memory
pmem = []
#file object for proc iomem
f = None
#Count for each type of memory
load_mem_type_cnt = collections.Counter()
#perf event name
event_name = None
def parse_iomem():
global f
f = open('/proc/iomem', 'r')
for i, j in enumerate(f):
m = re.split('-|:',j,2)
if m[2].strip() == 'System RAM':
system_ram.append(long(m[0], 16))
system_ram.append(long(m[1], 16))
if m[2].strip() == 'Persistent Memory':
pmem.append(long(m[0], 16))
pmem.append(long(m[1], 16))
def print_memory_type():
print "Event: %s" % (event_name)
print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"),
print "%-40s %10s %10s\n" % ("----------------------------------------", \
"-----------", "-----------"),
total = sum(load_mem_type_cnt.values())
for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
key = lambda(k, v): (v, k), reverse = True):
print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total),
def trace_begin():
parse_iomem()
def trace_end():
print_memory_type()
f.close()
def is_system_ram(phys_addr):
#/proc/iomem is sorted
position = bisect.bisect(system_ram, phys_addr)
if position % 2 == 0:
return False
return True
def is_persistent_mem(phys_addr):
position = bisect.bisect(pmem, phys_addr)
if position % 2 == 0:
return False
return True
def find_memory_type(phys_addr):
if phys_addr == 0:
return "N/A"
if is_system_ram(phys_addr):
return "System RAM"
if is_persistent_mem(phys_addr):
return "Persistent Memory"
#slow path, search all
f.seek(0, 0)
for j in f:
m = re.split('-|:',j,2)
if long(m[0], 16) <= phys_addr <= long(m[1], 16):
return m[2]
return "N/A"
def process_event(param_dict):
name = param_dict["ev_name"]
sample = param_dict["sample"]
phys_addr = sample["phys_addr"]
global event_name
if event_name == None:
event_name = name
load_mem_type_cnt[find_memory_type(phys_addr)] += 1

View File

@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
} }
callchain_param.record_mode = CALLCHAIN_DWARF; callchain_param.record_mode = CALLCHAIN_DWARF;
dwarf_callchain_users = true;
if (init_live_machine(machine)) { if (init_live_machine(machine)) {
pr_err("Could not init machine\n"); pr_err("Could not init machine\n");

View File

@ -86,6 +86,8 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-$(CONFIG_AUXTRACE) += arm-spe.o
libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
libperf-y += parse-branch-options.o libperf-y += parse-branch-options.o
libperf-y += dump-insn.o libperf-y += dump-insn.o
libperf-y += parse-regs-options.o libperf-y += parse-regs-options.o

View File

@ -0,0 +1,462 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <stdio.h>
#include <string.h>
#include <endian.h>
#include <byteswap.h>
#include "arm-spe-pkt-decoder.h"
#define BIT(n) (1ULL << (n))
#define NS_FLAG BIT(63)
#define EL_FLAG (BIT(62) | BIT(61))
#define SPE_HEADER0_PAD 0x0
#define SPE_HEADER0_END 0x1
#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */
#define SPE_HEADER0_ADDRESS_MASK 0x38
#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */
#define SPE_HEADER0_COUNTER_MASK 0x38
#define SPE_HEADER0_TIMESTAMP 0x71
#define SPE_HEADER0_TIMESTAMP 0x71
#define SPE_HEADER0_EVENTS 0x2
#define SPE_HEADER0_EVENTS_MASK 0xf
#define SPE_HEADER0_SOURCE 0x3
#define SPE_HEADER0_SOURCE_MASK 0xf
#define SPE_HEADER0_CONTEXT 0x24
#define SPE_HEADER0_CONTEXT_MASK 0x3c
#define SPE_HEADER0_OP_TYPE 0x8
#define SPE_HEADER0_OP_TYPE_MASK 0x3c
#define SPE_HEADER1_ALIGNMENT 0x0
#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */
#define SPE_HEADER1_ADDRESS_MASK 0xf8
#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */
#define SPE_HEADER1_COUNTER_MASK 0xf8
#if __BYTE_ORDER == __BIG_ENDIAN
#define le16_to_cpu bswap_16
#define le32_to_cpu bswap_32
#define le64_to_cpu bswap_64
#define memcpy_le64(d, s, n) do { \
memcpy((d), (s), (n)); \
*(d) = le64_to_cpu(*(d)); \
} while (0)
#else
#define le16_to_cpu
#define le32_to_cpu
#define le64_to_cpu
#define memcpy_le64 memcpy
#endif
static const char * const arm_spe_packet_name[] = {
[ARM_SPE_PAD] = "PAD",
[ARM_SPE_END] = "END",
[ARM_SPE_TIMESTAMP] = "TS",
[ARM_SPE_ADDRESS] = "ADDR",
[ARM_SPE_COUNTER] = "LAT",
[ARM_SPE_CONTEXT] = "CONTEXT",
[ARM_SPE_OP_TYPE] = "OP-TYPE",
[ARM_SPE_EVENTS] = "EVENTS",
[ARM_SPE_DATA_SOURCE] = "DATA-SOURCE",
};
const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
{
return arm_spe_packet_name[type];
}
/* return ARM SPE payload size from its encoding,
* which is in bits 5:4 of the byte.
* 00 : byte
* 01 : halfword (2)
* 10 : word (4)
* 11 : doubleword (8)
*/
static int payloadlen(unsigned char byte)
{
return 1 << ((byte & 0x30) >> 4);
}
static int arm_spe_get_payload(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
size_t payload_len = payloadlen(buf[0]);
if (len < 1 + payload_len)
return ARM_SPE_NEED_MORE_BYTES;
buf++;
switch (payload_len) {
case 1: packet->payload = *(uint8_t *)buf; break;
case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break;
case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break;
case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break;
default: return ARM_SPE_BAD_PACKET;
}
return 1 + payload_len;
}
static int arm_spe_get_pad(struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_PAD;
return 1;
}
static int arm_spe_get_alignment(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
unsigned int alignment = 1 << ((buf[0] & 0xf) + 1);
if (len < alignment)
return ARM_SPE_NEED_MORE_BYTES;
packet->type = ARM_SPE_PAD;
return alignment - (((uintptr_t)buf) & (alignment - 1));
}
static int arm_spe_get_end(struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_END;
return 1;
}
static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_TIMESTAMP;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_events(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
int ret = arm_spe_get_payload(buf, len, packet);
packet->type = ARM_SPE_EVENTS;
/* we use index to identify Events with a less number of
* comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
* LLC-REFILL, and REMOTE-ACCESS events are identified iff
* index > 1.
*/
packet->index = ret - 1;
return ret;
}
static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_DATA_SOURCE;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_context(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_CONTEXT;
packet->index = buf[0] & 0x3;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_OP_TYPE;
packet->index = buf[0] & 0x3;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_counter(const unsigned char *buf, size_t len,
const unsigned char ext_hdr, struct arm_spe_pkt *packet)
{
if (len < 2)
return ARM_SPE_NEED_MORE_BYTES;
packet->type = ARM_SPE_COUNTER;
if (ext_hdr)
packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
else
packet->index = buf[0] & 0x7;
packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
return 1 + ext_hdr + 2;
}
static int arm_spe_get_addr(const unsigned char *buf, size_t len,
const unsigned char ext_hdr, struct arm_spe_pkt *packet)
{
if (len < 8)
return ARM_SPE_NEED_MORE_BYTES;
packet->type = ARM_SPE_ADDRESS;
if (ext_hdr)
packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
else
packet->index = buf[0] & 0x7;
memcpy_le64(&packet->payload, buf + 1, 8);
return 1 + ext_hdr + 8;
}
static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
unsigned int byte;
memset(packet, 0, sizeof(struct arm_spe_pkt));
if (!len)
return ARM_SPE_NEED_MORE_BYTES;
byte = buf[0];
if (byte == SPE_HEADER0_PAD)
return arm_spe_get_pad(packet);
else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */
return arm_spe_get_end(packet);
else if (byte & 0xc0 /* 0y11xxxxxx */) {
if (byte & 0x80) {
if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS)
return arm_spe_get_addr(buf, len, 0, packet);
if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER)
return arm_spe_get_counter(buf, len, 0, packet);
} else
if (byte == SPE_HEADER0_TIMESTAMP)
return arm_spe_get_timestamp(buf, len, packet);
else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS)
return arm_spe_get_events(buf, len, packet);
else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE)
return arm_spe_get_data_source(buf, len, packet);
else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT)
return arm_spe_get_context(buf, len, packet);
else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE)
return arm_spe_get_op_type(buf, len, packet);
} else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) {
/* 16-bit header */
byte = buf[1];
if (byte == SPE_HEADER1_ALIGNMENT)
return arm_spe_get_alignment(buf, len, packet);
else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS)
return arm_spe_get_addr(buf, len, 1, packet);
else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER)
return arm_spe_get_counter(buf, len, 1, packet);
}
return ARM_SPE_BAD_PACKET;
}
int arm_spe_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
int ret;
ret = arm_spe_do_get_packet(buf, len, packet);
/* put multiple consecutive PADs on the same line, up to
* the fixed-width output format of 16 bytes per line.
*/
if (ret > 0 && packet->type == ARM_SPE_PAD) {
while (ret < 16 && len > (size_t)ret && !buf[ret])
ret += 1;
}
return ret;
}
int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
size_t buf_len)
{
int ret, ns, el, idx = packet->index;
unsigned long long payload = packet->payload;
const char *name = arm_spe_pkt_name(packet->type);
switch (packet->type) {
case ARM_SPE_BAD:
case ARM_SPE_PAD:
case ARM_SPE_END:
return snprintf(buf, buf_len, "%s", name);
case ARM_SPE_EVENTS: {
size_t blen = buf_len;
ret = 0;
ret = snprintf(buf, buf_len, "EV");
buf += ret;
blen -= ret;
if (payload & 0x1) {
ret = snprintf(buf, buf_len, " EXCEPTION-GEN");
buf += ret;
blen -= ret;
}
if (payload & 0x2) {
ret = snprintf(buf, buf_len, " RETIRED");
buf += ret;
blen -= ret;
}
if (payload & 0x4) {
ret = snprintf(buf, buf_len, " L1D-ACCESS");
buf += ret;
blen -= ret;
}
if (payload & 0x8) {
ret = snprintf(buf, buf_len, " L1D-REFILL");
buf += ret;
blen -= ret;
}
if (payload & 0x10) {
ret = snprintf(buf, buf_len, " TLB-ACCESS");
buf += ret;
blen -= ret;
}
if (payload & 0x20) {
ret = snprintf(buf, buf_len, " TLB-REFILL");
buf += ret;
blen -= ret;
}
if (payload & 0x40) {
ret = snprintf(buf, buf_len, " NOT-TAKEN");
buf += ret;
blen -= ret;
}
if (payload & 0x80) {
ret = snprintf(buf, buf_len, " MISPRED");
buf += ret;
blen -= ret;
}
if (idx > 1) {
if (payload & 0x100) {
ret = snprintf(buf, buf_len, " LLC-ACCESS");
buf += ret;
blen -= ret;
}
if (payload & 0x200) {
ret = snprintf(buf, buf_len, " LLC-REFILL");
buf += ret;
blen -= ret;
}
if (payload & 0x400) {
ret = snprintf(buf, buf_len, " REMOTE-ACCESS");
buf += ret;
blen -= ret;
}
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
case ARM_SPE_OP_TYPE:
switch (idx) {
case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ?
"COND-SELECT" : "INSN-OTHER");
case 1: {
size_t blen = buf_len;
if (payload & 0x1)
ret = snprintf(buf, buf_len, "ST");
else
ret = snprintf(buf, buf_len, "LD");
buf += ret;
blen -= ret;
if (payload & 0x2) {
if (payload & 0x4) {
ret = snprintf(buf, buf_len, " AT");
buf += ret;
blen -= ret;
}
if (payload & 0x8) {
ret = snprintf(buf, buf_len, " EXCL");
buf += ret;
blen -= ret;
}
if (payload & 0x10) {
ret = snprintf(buf, buf_len, " AR");
buf += ret;
blen -= ret;
}
} else if (payload & 0x4) {
ret = snprintf(buf, buf_len, " SIMD-FP");
buf += ret;
blen -= ret;
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
case 2: {
size_t blen = buf_len;
ret = snprintf(buf, buf_len, "B");
buf += ret;
blen -= ret;
if (payload & 0x1) {
ret = snprintf(buf, buf_len, " COND");
buf += ret;
blen -= ret;
}
if (payload & 0x2) {
ret = snprintf(buf, buf_len, " IND");
buf += ret;
blen -= ret;
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
default: return 0;
}
case ARM_SPE_DATA_SOURCE:
case ARM_SPE_TIMESTAMP:
return snprintf(buf, buf_len, "%s %lld", name, payload);
case ARM_SPE_ADDRESS:
switch (idx) {
case 0:
case 1: ns = !!(packet->payload & NS_FLAG);
el = (packet->payload & EL_FLAG) >> 61;
payload &= ~(0xffULL << 56);
return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d",
(idx == 1) ? "TGT" : "PC", payload, el, ns);
case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload);
case 3: ns = !!(packet->payload & NS_FLAG);
payload &= ~(0xffULL << 56);
return snprintf(buf, buf_len, "PA 0x%llx ns=%d",
payload, ns);
default: return 0;
}
case ARM_SPE_CONTEXT:
return snprintf(buf, buf_len, "%s 0x%lx el%d", name,
(unsigned long)payload, idx + 1);
case ARM_SPE_COUNTER: {
size_t blen = buf_len;
ret = snprintf(buf, buf_len, "%s %d ", name,
(unsigned short)payload);
buf += ret;
blen -= ret;
switch (idx) {
case 0: ret = snprintf(buf, buf_len, "TOT"); break;
case 1: ret = snprintf(buf, buf_len, "ISSUE"); break;
case 2: ret = snprintf(buf, buf_len, "XLAT"); break;
default: ret = 0;
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
default:
break;
}
return snprintf(buf, buf_len, "%s 0x%llx (%d)",
name, payload, packet->index);
}

View File

@ -0,0 +1,43 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
#define INCLUDE__ARM_SPE_PKT_DECODER_H__
#include <stddef.h>
#include <stdint.h>
#define ARM_SPE_PKT_DESC_MAX 256
#define ARM_SPE_NEED_MORE_BYTES -1
#define ARM_SPE_BAD_PACKET -2
enum arm_spe_pkt_type {
ARM_SPE_BAD,
ARM_SPE_PAD,
ARM_SPE_END,
ARM_SPE_TIMESTAMP,
ARM_SPE_ADDRESS,
ARM_SPE_COUNTER,
ARM_SPE_CONTEXT,
ARM_SPE_OP_TYPE,
ARM_SPE_EVENTS,
ARM_SPE_DATA_SOURCE,
};
struct arm_spe_pkt {
enum arm_spe_pkt_type type;
unsigned char index;
uint64_t payload;
};
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
int arm_spe_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet);
int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
#endif

231
tools/perf/util/arm-spe.c Normal file
View File

@ -0,0 +1,231 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <endian.h>
#include <errno.h>
#include <byteswap.h>
#include <inttypes.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include "cpumap.h"
#include "color.h"
#include "evsel.h"
#include "evlist.h"
#include "machine.h"
#include "session.h"
#include "util.h"
#include "thread.h"
#include "debug.h"
#include "auxtrace.h"
#include "arm-spe.h"
#include "arm-spe-pkt-decoder.h"
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
struct auxtrace_heap heap;
u32 auxtrace_type;
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
};
struct arm_spe_queue {
struct arm_spe *spe;
unsigned int queue_nr;
struct auxtrace_buffer *buffer;
bool on_heap;
bool done;
pid_t pid;
pid_t tid;
int cpu;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
unsigned char *buf, size_t len)
{
struct arm_spe_pkt packet;
size_t pos = 0;
int ret, pkt_len, i;
char desc[ARM_SPE_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
color_fprintf(stdout, color,
". ... ARM SPE data: size %zu bytes\n",
len);
while (len) {
ret = arm_spe_get_packet(buf, len, &packet);
if (ret > 0)
pkt_len = ret;
else
pkt_len = 1;
printf(".");
color_fprintf(stdout, color, " %08x: ", pos);
for (i = 0; i < pkt_len; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < 16; i++)
color_fprintf(stdout, color, " ");
if (ret > 0) {
ret = arm_spe_pkt_desc(&packet, desc,
ARM_SPE_PKT_DESC_MAX);
if (ret > 0)
color_fprintf(stdout, color, " %s\n", desc);
} else {
color_fprintf(stdout, color, " Bad packet!\n");
}
pos += pkt_len;
buf += pkt_len;
len -= pkt_len;
}
}
static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
size_t len)
{
printf(".\n");
arm_spe_dump(spe, buf, len);
}
static int arm_spe_process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct perf_tool *tool __maybe_unused)
{
return 0;
}
static int arm_spe_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
struct auxtrace_buffer *buffer;
off_t data_offset;
int fd = perf_data__fd(session->data);
int err;
if (perf_data__is_pipe(session->data)) {
data_offset = 0;
} else {
data_offset = lseek(fd, 0, SEEK_CUR);
if (data_offset == -1)
return -errno;
}
err = auxtrace_queues__add_event(&spe->queues, session, event,
data_offset, &buffer);
if (err)
return err;
/* Dump here now we have copied a piped trace out of the pipe */
if (dump_trace) {
if (auxtrace_buffer__get_data(buffer, fd)) {
arm_spe_dump_event(spe, buffer->data,
buffer->size);
auxtrace_buffer__put_data(buffer);
}
}
return 0;
}
static int arm_spe_flush(struct perf_session *session __maybe_unused,
struct perf_tool *tool __maybe_unused)
{
return 0;
}
static void arm_spe_free_queue(void *priv)
{
struct arm_spe_queue *speq = priv;
if (!speq)
return;
free(speq);
}
static void arm_spe_free_events(struct perf_session *session)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
struct auxtrace_queues *queues = &spe->queues;
unsigned int i;
for (i = 0; i < queues->nr_queues; i++) {
arm_spe_free_queue(queues->queue_array[i].priv);
queues->queue_array[i].priv = NULL;
}
auxtrace_queues__free(queues);
}
static void arm_spe_free(struct perf_session *session)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
auxtrace_heap__free(&spe->heap);
arm_spe_free_events(session);
session->auxtrace = NULL;
free(spe);
}
static const char * const arm_spe_info_fmts[] = {
[ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
};
static void arm_spe_print_info(u64 *arr)
{
if (!dump_trace)
return;
fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
}
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
struct arm_spe *spe;
int err;
if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
min_sz)
return -EINVAL;
spe = zalloc(sizeof(struct arm_spe));
if (!spe)
return -ENOMEM;
err = auxtrace_queues__init(&spe->queues);
if (err)
goto err_free;
spe->session = session;
spe->machine = &session->machines.host; /* No kvm support */
spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;
spe->auxtrace.free_events = arm_spe_free_events;
spe->auxtrace.free = arm_spe_free;
session->auxtrace = &spe->auxtrace;
arm_spe_print_info(&auxtrace_info->priv[0]);
return 0;
err_free:
free(spe);
return err;
}

31
tools/perf/util/arm-spe.h Normal file
View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#ifndef INCLUDE__PERF_ARM_SPE_H__
#define INCLUDE__PERF_ARM_SPE_H__
#define ARM_SPE_PMU_NAME "arm_spe_"
enum {
ARM_SPE_PMU_TYPE,
ARM_SPE_PER_CPU_MMAPS,
ARM_SPE_AUXTRACE_PRIV_MAX,
};
#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
union perf_event;
struct perf_session;
struct perf_pmu;
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu);
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session);
struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
#endif

View File

@ -54,6 +54,7 @@
#include "intel-pt.h" #include "intel-pt.h"
#include "intel-bts.h" #include "intel-bts.h"
#include "arm-spe.h"
#include "sane_ctype.h" #include "sane_ctype.h"
#include "symbol/kallsyms.h" #include "symbol/kallsyms.h"
@ -910,6 +911,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
return intel_pt_process_auxtrace_info(event, session); return intel_pt_process_auxtrace_info(event, session);
case PERF_AUXTRACE_INTEL_BTS: case PERF_AUXTRACE_INTEL_BTS:
return intel_bts_process_auxtrace_info(event, session); return intel_bts_process_auxtrace_info(event, session);
case PERF_AUXTRACE_ARM_SPE:
return arm_spe_process_auxtrace_info(event, session);
case PERF_AUXTRACE_CS_ETM: case PERF_AUXTRACE_CS_ETM:
case PERF_AUXTRACE_UNKNOWN: case PERF_AUXTRACE_UNKNOWN:
default: default:

View File

@ -43,6 +43,7 @@ enum auxtrace_type {
PERF_AUXTRACE_INTEL_PT, PERF_AUXTRACE_INTEL_PT,
PERF_AUXTRACE_INTEL_BTS, PERF_AUXTRACE_INTEL_BTS,
PERF_AUXTRACE_CS_ETM, PERF_AUXTRACE_CS_ETM,
PERF_AUXTRACE_ARM_SPE,
}; };
enum itrace_period_type { enum itrace_period_type {

View File

@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
CALLCHAIN_PARAM_DEFAULT CALLCHAIN_PARAM_DEFAULT
}; };
/*
* Are there any events usind DWARF callchains?
*
* I.e.
*
* -e cycles/call-graph=dwarf/
*/
bool dwarf_callchain_users;
struct callchain_param callchain_param_default = { struct callchain_param callchain_param_default = {
CALLCHAIN_PARAM_DEFAULT CALLCHAIN_PARAM_DEFAULT
}; };
@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
ret = 0; ret = 0;
param->record_mode = CALLCHAIN_DWARF; param->record_mode = CALLCHAIN_DWARF;
param->dump_size = default_stack_dump_size; param->dump_size = default_stack_dump_size;
dwarf_callchain_users = true;
tok = strtok_r(NULL, ",", &saveptr); tok = strtok_r(NULL, ",", &saveptr);
if (tok) { if (tok) {

View File

@ -89,6 +89,8 @@ enum chain_value {
CCVAL_COUNT, CCVAL_COUNT,
}; };
extern bool dwarf_callchain_users;
struct callchain_param { struct callchain_param {
bool enabled; bool enabled;
enum perf_call_graph_mode record_mode; enum perf_call_graph_mode record_mode;

View File

@ -1760,7 +1760,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
switch (old_state) { switch (old_state) {
case BKW_MMAP_NOTREADY: { case BKW_MMAP_NOTREADY: {
if (state != BKW_MMAP_RUNNING) if (state != BKW_MMAP_RUNNING)
goto state_err;; goto state_err;
break; break;
} }
case BKW_MMAP_RUNNING: { case BKW_MMAP_RUNNING: {

View File

@ -651,9 +651,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
return ret; return ret;
} }
void perf_evsel__config_callchain(struct perf_evsel *evsel, static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
struct record_opts *opts, struct record_opts *opts,
struct callchain_param *param) struct callchain_param *param)
{ {
bool function = perf_evsel__is_function_event(evsel); bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr; struct perf_event_attr *attr = &evsel->attr;
@ -699,6 +699,14 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
} }
} }
void perf_evsel__config_callchain(struct perf_evsel *evsel,
struct record_opts *opts,
struct callchain_param *param)
{
if (param->enabled)
return __perf_evsel__config_callchain(evsel, opts, param);
}
static void static void
perf_evsel__reset_callgraph(struct perf_evsel *evsel, perf_evsel__reset_callgraph(struct perf_evsel *evsel,
struct callchain_param *param) struct callchain_param *param)
@ -718,19 +726,19 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel,
} }
static void apply_config_terms(struct perf_evsel *evsel, static void apply_config_terms(struct perf_evsel *evsel,
struct record_opts *opts) struct record_opts *opts, bool track)
{ {
struct perf_evsel_config_term *term; struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms; struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr; struct perf_event_attr *attr = &evsel->attr;
struct callchain_param param; /* callgraph default */
struct callchain_param param = {
.record_mode = callchain_param.record_mode,
};
u32 dump_size = 0; u32 dump_size = 0;
int max_stack = 0; int max_stack = 0;
const char *callgraph_buf = NULL; const char *callgraph_buf = NULL;
/* callgraph default */
param.record_mode = callchain_param.record_mode;
list_for_each_entry(term, config_terms, list) { list_for_each_entry(term, config_terms, list) {
switch (term->type) { switch (term->type) {
case PERF_EVSEL__CONFIG_TERM_PERIOD: case PERF_EVSEL__CONFIG_TERM_PERIOD:
@ -781,7 +789,7 @@ static void apply_config_terms(struct perf_evsel *evsel,
attr->write_backward = term->val.overwrite ? 1 : 0; attr->write_backward = term->val.overwrite ? 1 : 0;
break; break;
case PERF_EVSEL__CONFIG_TERM_DRV_CFG: case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
BUG_ON(1); break;
default: default:
break; break;
} }
@ -789,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
/* User explicitly set per-event callgraph, clear the old setting and reset. */ /* User explicitly set per-event callgraph, clear the old setting and reset. */
if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
bool sample_address = false;
if (max_stack) { if (max_stack) {
param.max_stack = max_stack; param.max_stack = max_stack;
if (callgraph_buf == NULL) if (callgraph_buf == NULL)
@ -808,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
evsel->name); evsel->name);
return; return;
} }
if (param.record_mode == CALLCHAIN_DWARF)
sample_address = true;
} }
} }
if (dump_size > 0) { if (dump_size > 0) {
@ -820,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel,
perf_evsel__reset_callgraph(evsel, &callchain_param); perf_evsel__reset_callgraph(evsel, &callchain_param);
/* set perf-event callgraph */ /* set perf-event callgraph */
if (param.enabled) if (param.enabled) {
if (sample_address) {
perf_evsel__set_sample_bit(evsel, ADDR);
perf_evsel__set_sample_bit(evsel, DATA_SRC);
evsel->attr.mmap_data = track;
}
perf_evsel__config_callchain(evsel, opts, &param); perf_evsel__config_callchain(evsel, opts, &param);
}
} }
} }
@ -1052,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
* Apply event specific term settings, * Apply event specific term settings,
* it overloads any global configuration. * it overloads any global configuration.
*/ */
apply_config_terms(evsel, opts); apply_config_terms(evsel, opts, track);
evsel->ignore_missing_thread = opts->ignore_missing_thread; evsel->ignore_missing_thread = opts->ignore_missing_thread;
} }

View File

@ -499,6 +499,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
PyLong_FromUnsignedLongLong(sample->time)); PyLong_FromUnsignedLongLong(sample->time));
pydict_set_item_string_decref(dict_sample, "period", pydict_set_item_string_decref(dict_sample, "period",
PyLong_FromUnsignedLongLong(sample->period)); PyLong_FromUnsignedLongLong(sample->period));
pydict_set_item_string_decref(dict_sample, "phys_addr",
PyLong_FromUnsignedLongLong(sample->phys_addr));
set_sample_read_in_dict(dict_sample, sample, evsel); set_sample_read_in_dict(dict_sample, sample, evsel);
pydict_set_item_string_decref(dict, "sample", dict_sample); pydict_set_item_string_decref(dict, "sample", dict_sample);

View File

@ -116,7 +116,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
static int parse_percent(double *pcnt, char *str) static int parse_percent(double *pcnt, char *str)
{ {
char *c; char *c, *endptr;
double d;
c = strchr(str, '%'); c = strchr(str, '%');
if (c) if (c)
@ -124,8 +125,11 @@ static int parse_percent(double *pcnt, char *str)
else else
return -1; return -1;
*pcnt = atof(str) / 100.0; d = strtod(str, &endptr);
if (endptr != str + strlen(str))
return -1;
*pcnt = d / 100.0;
return 0; return 0;
} }
@ -257,6 +261,37 @@ static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
return i; return i;
} }
static int one_percent_convert(struct perf_time_interval *ptime_buf,
const char *ostr, u64 start, u64 end, char *c)
{
char *str;
int len = strlen(ostr), ret;
/*
* c points to '%'.
* '%' should be the last character
*/
if (ostr + len - 1 != c)
return -1;
/*
* Construct a string like "xx%/1"
*/
str = malloc(len + 3);
if (str == NULL)
return -ENOMEM;
memcpy(str, ostr, len);
strcpy(str + len, "/1");
ret = percent_slash_split(str, ptime_buf, start, end);
if (ret == 0)
ret = 1;
free(str);
return ret;
}
int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
const char *ostr, u64 start, u64 end) const char *ostr, u64 start, u64 end)
{ {
@ -266,6 +301,7 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
* ostr example: * ostr example:
* 10%/2,10%/3: select the second 10% slice and the third 10% slice * 10%/2,10%/3: select the second 10% slice and the third 10% slice
* 0%-10%,30%-40%: multiple time range * 0%-10%,30%-40%: multiple time range
* 50%: just one percent
*/ */
memset(ptime_buf, 0, sizeof(*ptime_buf) * num); memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
@ -282,9 +318,41 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
end, percent_dash_split); end, percent_dash_split);
} }
c = strchr(ostr, '%');
if (c)
return one_percent_convert(ptime_buf, ostr, start, end, c);
return -1; return -1;
} }
struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size)
{
const char *p1, *p2;
int i = 1;
struct perf_time_interval *ptime;
/*
* At least allocate one time range.
*/
if (!ostr)
goto alloc;
p1 = ostr;
while (p1 < ostr + strlen(ostr)) {
p2 = strchr(p1, ',');
if (!p2)
break;
p1 = p2 + 1;
i++;
}
alloc:
*size = i;
ptime = calloc(i, sizeof(*ptime));
return ptime;
}
bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
{ {
/* if time is not set don't drop sample */ /* if time is not set don't drop sample */

View File

@ -16,6 +16,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
const char *ostr, u64 start, u64 end); const char *ostr, u64 start, u64 end);
struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size);
bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,

View File

@ -631,9 +631,8 @@ static unw_accessors_t accessors = {
static int _unwind__prepare_access(struct thread *thread) static int _unwind__prepare_access(struct thread *thread)
{ {
if (callchain_param.record_mode != CALLCHAIN_DWARF) if (!dwarf_callchain_users)
return 0; return 0;
thread->addr_space = unw_create_addr_space(&accessors, 0); thread->addr_space = unw_create_addr_space(&accessors, 0);
if (!thread->addr_space) { if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n"); pr_err("unwind: Can't create unwind address space.\n");
@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread)
static void _unwind__flush_access(struct thread *thread) static void _unwind__flush_access(struct thread *thread)
{ {
if (callchain_param.record_mode != CALLCHAIN_DWARF) if (!dwarf_callchain_users)
return; return;
unw_flush_cache(thread->addr_space, 0, 0); unw_flush_cache(thread->addr_space, 0, 0);
} }
static void _unwind__finish_access(struct thread *thread) static void _unwind__finish_access(struct thread *thread)
{ {
if (callchain_param.record_mode != CALLCHAIN_DWARF) if (!dwarf_callchain_users)
return; return;
unw_destroy_addr_space(thread->addr_space); unw_destroy_addr_space(thread->addr_space);
} }

View File

@ -210,7 +210,7 @@ static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64
size -= ret; size -= ret;
off_in += ret; off_in += ret;
off_out -= ret; off_out += ret;
} }
munmap(ptr, off_in + size); munmap(ptr, off_in + size);