perf sort: Fix the 'weight' sort key behavior
[ Upstream commit 784e8adda4cdb3e2510742023729851b6c08803c ] Currently, the 'weight' field in the perf sample has latency information for some instructions like in memory accesses. And perf tool has 'weight' and 'local_weight' sort keys to display the info. But it's somewhat confusing what it shows exactly. In my understanding, 'local_weight' shows a weight in a single sample, and (global) 'weight' shows a sum of the weights in the hist_entry. For example: $ perf mem record -t load dd if=/dev/zero of=/dev/null bs=4k count=1M $ perf report --stdio -n -s +local_weight ... # # Overhead Samples Command Shared Object Symbol Local Weight # ........ ....... ....... ................ ......................... ............ # 21.23% 313 dd [kernel.vmlinux] [k] lockref_get_not_zero 32 12.43% 183 dd [kernel.vmlinux] [k] lockref_get_not_zero 35 11.97% 159 dd [kernel.vmlinux] [k] lockref_get_not_zero 36 10.40% 141 dd [kernel.vmlinux] [k] lockref_put_return 32 7.63% 113 dd [kernel.vmlinux] [k] lockref_get_not_zero 33 6.37% 92 dd [kernel.vmlinux] [k] lockref_get_not_zero 34 6.15% 90 dd [kernel.vmlinux] [k] lockref_put_return 33 ... So let's look at the 'lockref_get_not_zero' symbols. The top entry shows that 313 samples were captured with 'local_weight' 32, so the total weight should be 313 x 32 = 10016. But it's not the case: $ perf report --stdio -n -s +local_weight,weight -S lockref_get_not_zero ... # # Overhead Samples Command Shared Object Local Weight Weight # ........ ....... ....... ................ ............ ...... # 1.36% 4 dd [kernel.vmlinux] 36 144 0.47% 4 dd [kernel.vmlinux] 37 148 0.42% 4 dd [kernel.vmlinux] 32 128 0.40% 4 dd [kernel.vmlinux] 34 136 0.35% 4 dd [kernel.vmlinux] 36 144 0.34% 4 dd [kernel.vmlinux] 35 140 0.30% 4 dd [kernel.vmlinux] 36 144 0.30% 4 dd [kernel.vmlinux] 34 136 0.30% 4 dd [kernel.vmlinux] 32 128 0.30% 4 dd [kernel.vmlinux] 32 128 ... With the 'weight' sort key, it's divided to 4 samples even with the same info ('comm', 'dso', 'sym' and 'local_weight'). I don't think this is what we want. I found this because of the way it aggregates the 'weight' value. Since it's not a period, we should not add them in the he->stat. Otherwise, two 32 'weight' entries will create a 64 'weight' entry. After that, new 32 'weight' samples don't have a matching entry so it'd create a new entry and make it a 64 'weight' entry again and again. Later, they will be merged into 128 'weight' entries during the hists__collapse_resort() with 4 samples, multiple times like above. Let's keep the weight and display it differently. For 'local_weight', it can show the weight as is, and for (global) 'weight' it can display the number multiplied by the number of samples. With this change, I can see the expected numbers. $ perf report --stdio -n -s +local_weight,weight -S lockref_get_not_zero ... # # Overhead Samples Command Shared Object Local Weight Weight # ........ ....... ....... ................ ............ ..... # 21.23% 313 dd [kernel.vmlinux] 32 10016 12.43% 183 dd [kernel.vmlinux] 35 6405 11.97% 159 dd [kernel.vmlinux] 36 5724 7.63% 113 dd [kernel.vmlinux] 33 3729 6.37% 92 dd [kernel.vmlinux] 34 3128 4.17% 59 dd [kernel.vmlinux] 37 2183 0.08% 1 dd [kernel.vmlinux] 269 269 0.08% 1 dd [kernel.vmlinux] 38 38 Reviewed-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20211105225617.151364-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
40e35c7744
commit
57482dc5ac
@ -290,11 +290,9 @@ static long hist_time(unsigned long htime)
|
||||
}
|
||||
|
||||
static void he_stat__add_period(struct he_stat *he_stat, u64 period,
|
||||
u64 weight, u64 ins_lat, u64 p_stage_cyc)
|
||||
u64 ins_lat, u64 p_stage_cyc)
|
||||
{
|
||||
|
||||
he_stat->period += period;
|
||||
he_stat->weight += weight;
|
||||
he_stat->nr_events += 1;
|
||||
he_stat->ins_lat += ins_lat;
|
||||
he_stat->p_stage_cyc += p_stage_cyc;
|
||||
@ -308,9 +306,8 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
|
||||
dest->period_guest_sys += src->period_guest_sys;
|
||||
dest->period_guest_us += src->period_guest_us;
|
||||
dest->nr_events += src->nr_events;
|
||||
dest->weight += src->weight;
|
||||
dest->ins_lat += src->ins_lat;
|
||||
dest->p_stage_cyc += src->p_stage_cyc;
|
||||
dest->p_stage_cyc += src->p_stage_cyc;
|
||||
}
|
||||
|
||||
static void he_stat__decay(struct he_stat *he_stat)
|
||||
@ -598,7 +595,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
|
||||
struct hist_entry *he;
|
||||
int64_t cmp;
|
||||
u64 period = entry->stat.period;
|
||||
u64 weight = entry->stat.weight;
|
||||
u64 ins_lat = entry->stat.ins_lat;
|
||||
u64 p_stage_cyc = entry->stat.p_stage_cyc;
|
||||
bool leftmost = true;
|
||||
@ -619,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
|
||||
|
||||
if (!cmp) {
|
||||
if (sample_self) {
|
||||
he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
|
||||
he_stat__add_period(&he->stat, period, ins_lat, p_stage_cyc);
|
||||
hist_entry__add_callchain_period(he, period);
|
||||
}
|
||||
if (symbol_conf.cumulate_callchain)
|
||||
he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
|
||||
he_stat__add_period(he->stat_acc, period, ins_lat, p_stage_cyc);
|
||||
|
||||
/*
|
||||
* This mem info was allocated from sample__resolve_mem
|
||||
@ -733,7 +729,6 @@ __hists__add_entry(struct hists *hists,
|
||||
.stat = {
|
||||
.nr_events = 1,
|
||||
.period = sample->period,
|
||||
.weight = sample->weight,
|
||||
.ins_lat = sample->ins_lat,
|
||||
.p_stage_cyc = sample->p_stage_cyc,
|
||||
},
|
||||
@ -748,6 +743,7 @@ __hists__add_entry(struct hists *hists,
|
||||
.raw_size = sample->raw_size,
|
||||
.ops = ops,
|
||||
.time = hist_time(sample->time),
|
||||
.weight = sample->weight,
|
||||
}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
|
||||
|
||||
if (!hists->has_callchains && he && he->callchain_size != 0)
|
||||
|
@ -1325,45 +1325,35 @@ struct sort_entry sort_mispredict = {
|
||||
.se_width_idx = HISTC_MISPREDICT,
|
||||
};
|
||||
|
||||
static u64 he_weight(struct hist_entry *he)
|
||||
{
|
||||
return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
|
||||
}
|
||||
|
||||
static int64_t
|
||||
sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
sort__weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return he_weight(left) - he_weight(right);
|
||||
return left->weight - right->weight;
|
||||
}
|
||||
|
||||
static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
|
||||
return repsep_snprintf(bf, size, "%-*llu", width, he->weight);
|
||||
}
|
||||
|
||||
struct sort_entry sort_local_weight = {
|
||||
.se_header = "Local Weight",
|
||||
.se_cmp = sort__local_weight_cmp,
|
||||
.se_cmp = sort__weight_cmp,
|
||||
.se_snprintf = hist_entry__local_weight_snprintf,
|
||||
.se_width_idx = HISTC_LOCAL_WEIGHT,
|
||||
};
|
||||
|
||||
static int64_t
|
||||
sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return left->stat.weight - right->stat.weight;
|
||||
}
|
||||
|
||||
static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
|
||||
return repsep_snprintf(bf, size, "%-*llu", width,
|
||||
he->weight * he->stat.nr_events);
|
||||
}
|
||||
|
||||
struct sort_entry sort_global_weight = {
|
||||
.se_header = "Weight",
|
||||
.se_cmp = sort__global_weight_cmp,
|
||||
.se_cmp = sort__weight_cmp,
|
||||
.se_snprintf = hist_entry__global_weight_snprintf,
|
||||
.se_width_idx = HISTC_GLOBAL_WEIGHT,
|
||||
};
|
||||
|
@ -49,7 +49,6 @@ struct he_stat {
|
||||
u64 period_us;
|
||||
u64 period_guest_sys;
|
||||
u64 period_guest_us;
|
||||
u64 weight;
|
||||
u64 ins_lat;
|
||||
u64 p_stage_cyc;
|
||||
u32 nr_events;
|
||||
@ -109,6 +108,7 @@ struct hist_entry {
|
||||
s32 socket;
|
||||
s32 cpu;
|
||||
u64 code_page_size;
|
||||
u64 weight;
|
||||
u8 cpumode;
|
||||
u8 depth;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user