perf stat: Add per-core aggregation
This patch adds the --per-core option to perf stat.
This option is used to aggregate system-wide counts
on a per physical core basis. On processors with
hyperthreading, this means counts of all HT threads
running on a physical core are aggregated.
This mode is useful to find imblance between physical
cores running an uniform workload. Cores are identified
by socket: S0-C1, means physical core 1 on socket 0. Note
that cores are identified using their physical core id,
thus their numbering may not be continuous.
Per core aggregation can be combined with interval printing:
# perf stat -a --per-core -I 1000 -e cycles sleep 1000
# time core cpus counts events
1.000090030 S0-C0 1 4,765,747 cycles
1.000090030 S0-C1 1 5,580,647 cycles
1.000090030 S0-C2 1 221,181 cycles
1.000090030 S0-C3 1 266,092 cycles
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1360846649-6411-4-git-send-email-eranian@google.com
[ committer note: Remove parts already applied on 86ee6e1
to keep bisectability ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
d4304958a2
commit
12c08a9f59
@ -126,6 +126,12 @@ use --per-socket in addition to -a. (system-wide). The output includes the
|
||||
socket number and the number of online processors on that socket. This is
|
||||
useful to gauge the amount of aggregation.
|
||||
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements. This
|
||||
is a useful mode to detect imbalance between physical cores. To enable this mode,
|
||||
use --per-core in addition to -a. (system-wide). The output includes the
|
||||
core number and the number of online logical processors on that physical processor.
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
|
@ -80,6 +80,7 @@ enum aggr_mode {
|
||||
AGGR_NONE,
|
||||
AGGR_GLOBAL,
|
||||
AGGR_SOCKET,
|
||||
AGGR_CORE,
|
||||
};
|
||||
|
||||
static int run_count = 1;
|
||||
@ -384,6 +385,9 @@ static void print_interval(void)
|
||||
case AGGR_SOCKET:
|
||||
fprintf(output, "# time socket cpus counts events\n");
|
||||
break;
|
||||
case AGGR_CORE:
|
||||
fprintf(output, "# time core cpus counts events\n");
|
||||
break;
|
||||
case AGGR_NONE:
|
||||
fprintf(output, "# time CPU counts events\n");
|
||||
break;
|
||||
@ -397,6 +401,7 @@ static void print_interval(void)
|
||||
num_print_interval = 0;
|
||||
|
||||
switch (aggr_mode) {
|
||||
case AGGR_CORE:
|
||||
case AGGR_SOCKET:
|
||||
print_aggr(prefix);
|
||||
break;
|
||||
@ -566,13 +571,23 @@ static void print_noise(struct perf_evsel *evsel, double avg)
|
||||
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
|
||||
}
|
||||
|
||||
static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr)
|
||||
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
|
||||
{
|
||||
switch (aggr_mode) {
|
||||
case AGGR_CORE:
|
||||
fprintf(output, "S%d-C%*d%s%*d%s",
|
||||
cpu_map__id_to_socket(id),
|
||||
csv_output ? 0 : -8,
|
||||
cpu_map__id_to_cpu(id),
|
||||
csv_sep,
|
||||
csv_output ? 0 : 4,
|
||||
nr,
|
||||
csv_sep);
|
||||
break;
|
||||
case AGGR_SOCKET:
|
||||
fprintf(output, "S%*d%s%*d%s",
|
||||
csv_output ? 0 : -5,
|
||||
cpu,
|
||||
id,
|
||||
csv_sep,
|
||||
csv_output ? 0 : 4,
|
||||
nr,
|
||||
@ -581,7 +596,7 @@ static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr)
|
||||
case AGGR_NONE:
|
||||
fprintf(output, "CPU%*d%s",
|
||||
csv_output ? 0 : -4,
|
||||
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
|
||||
perf_evsel__cpus(evsel)->map[id], csv_sep);
|
||||
break;
|
||||
case AGGR_GLOBAL:
|
||||
default:
|
||||
@ -1095,6 +1110,7 @@ static void print_stat(int argc, const char **argv)
|
||||
}
|
||||
|
||||
switch (aggr_mode) {
|
||||
case AGGR_CORE:
|
||||
case AGGR_SOCKET:
|
||||
print_aggr(NULL);
|
||||
break;
|
||||
@ -1163,6 +1179,13 @@ static int perf_stat_init_aggr_mode(void)
|
||||
}
|
||||
aggr_get_id = cpu_map__get_socket;
|
||||
break;
|
||||
case AGGR_CORE:
|
||||
if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
|
||||
perror("cannot build core map");
|
||||
return -1;
|
||||
}
|
||||
aggr_get_id = cpu_map__get_core;
|
||||
break;
|
||||
case AGGR_NONE:
|
||||
case AGGR_GLOBAL:
|
||||
default:
|
||||
@ -1372,6 +1395,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"print counts at regular interval in ms (>= 100)"),
|
||||
OPT_SET_UINT(0, "per-socket", &aggr_mode,
|
||||
"aggregate counts per processor socket", AGGR_SOCKET),
|
||||
OPT_SET_UINT(0, "per-core", &aggr_mode,
|
||||
"aggregate counts per physical processor core", AGGR_CORE),
|
||||
OPT_END()
|
||||
};
|
||||
const char * const stat_usage[] = {
|
||||
|
@ -267,7 +267,53 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpu_map__get_core(struct cpu_map *map, int idx)
|
||||
{
|
||||
FILE *fp;
|
||||
const char *mnt;
|
||||
char path[PATH_MAX];
|
||||
int cpu, ret, s;
|
||||
|
||||
if (idx > map->nr)
|
||||
return -1;
|
||||
|
||||
cpu = map->map[idx];
|
||||
|
||||
mnt = sysfs_find_mountpoint();
|
||||
if (!mnt)
|
||||
return -1;
|
||||
|
||||
snprintf(path, PATH_MAX,
|
||||
"%s/devices/system/cpu/cpu%d/topology/core_id",
|
||||
mnt, cpu);
|
||||
|
||||
fp = fopen(path, "r");
|
||||
if (!fp)
|
||||
return -1;
|
||||
ret = fscanf(fp, "%d", &cpu);
|
||||
fclose(fp);
|
||||
if (ret != 1)
|
||||
return -1;
|
||||
|
||||
s = cpu_map__get_socket(map, idx);
|
||||
if (s == -1)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* encode socket in upper 16 bits
|
||||
* core_id is relative to socket, and
|
||||
* we need a global id. So we combine
|
||||
* socket+ core id
|
||||
*/
|
||||
return (s << 16) | (cpu & 0xffff);
|
||||
}
|
||||
|
||||
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
|
||||
{
|
||||
return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
|
||||
}
|
||||
|
||||
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
|
||||
{
|
||||
return cpu_map__build_map(cpus, corep, cpu_map__get_core);
|
||||
}
|
||||
|
@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map);
|
||||
struct cpu_map *cpu_map__read(FILE *file);
|
||||
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
|
||||
int cpu_map__get_socket(struct cpu_map *map, int idx);
|
||||
int cpu_map__get_core(struct cpu_map *map, int idx);
|
||||
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
|
||||
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
|
||||
|
||||
static inline int cpu_map__socket(struct cpu_map *sock, int s)
|
||||
{
|
||||
@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
|
||||
return sock->map[s];
|
||||
}
|
||||
|
||||
static inline int cpu_map__id_to_socket(int id)
|
||||
{
|
||||
return id >> 16;
|
||||
}
|
||||
|
||||
static inline int cpu_map__id_to_cpu(int id)
|
||||
{
|
||||
return id & 0xffff;
|
||||
}
|
||||
|
||||
static inline int cpu_map__nr(const struct cpu_map *map)
|
||||
{
|
||||
return map ? map->nr : 1;
|
||||
|
Loading…
Reference in New Issue
Block a user