6d18804b96
A common problem is confusing CPU map indices with the CPU, by wrapping the CPU with a struct then this is avoided. This approach is similar to atomic_t. Committer notes: To make it build with BUILD_BPF_SKEL=1 these files needed the conversions to 'struct perf_cpu' usage: tools/perf/util/bpf_counter.c tools/perf/util/bpf_counter_cgroup.c tools/perf/util/bpf_ftrace.c Also perf_env__get_cpu() was removed back in "perf cpumap: Switch cpu_map__build_map to cpu function". Additionally these needed to be fixed for the ARM builds to complete: tools/perf/arch/arm/util/cs-etm.c tools/perf/arch/arm64/util/pmu.c Suggested-by: John Garry <john.garry@huawei.com> Signed-off-by: Ian Rogers <irogers@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Mike Leach <mike.leach@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Clarke <pc@us.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Riccardo Mancini <rickyman7@gmail.com> Cc: Stephane Eranian <eranian@google.com> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Vineet Singh <vineet.singh@intel.com> Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220105061351.120843-49-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
153 lines
3.4 KiB
C
153 lines
3.4 KiB
C
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include "util/ftrace.h"
|
|
#include "util/cpumap.h"
|
|
#include "util/thread_map.h"
|
|
#include "util/debug.h"
|
|
#include "util/evlist.h"
|
|
#include "util/bpf_counter.h"
|
|
|
|
#include "util/bpf_skel/func_latency.skel.h"
|
|
|
|
static struct func_latency_bpf *skel;
|
|
|
|
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
|
|
{
|
|
int fd, err;
|
|
int i, ncpus = 1, ntasks = 1;
|
|
struct filter_entry *func;
|
|
|
|
if (!list_is_singular(&ftrace->filters)) {
|
|
pr_err("ERROR: %s target function(s).\n",
|
|
list_empty(&ftrace->filters) ? "No" : "Too many");
|
|
return -1;
|
|
}
|
|
|
|
func = list_first_entry(&ftrace->filters, struct filter_entry, list);
|
|
|
|
skel = func_latency_bpf__open();
|
|
if (!skel) {
|
|
pr_err("Failed to open func latency skeleton\n");
|
|
return -1;
|
|
}
|
|
|
|
/* don't need to set cpu filter for system-wide mode */
|
|
if (ftrace->target.cpu_list) {
|
|
ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus);
|
|
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
|
|
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
|
|
}
|
|
|
|
set_max_rlimit();
|
|
|
|
err = func_latency_bpf__load(skel);
|
|
if (err) {
|
|
pr_err("Failed to load func latency skeleton\n");
|
|
goto out;
|
|
}
|
|
|
|
if (ftrace->target.cpu_list) {
|
|
u32 cpu;
|
|
u8 val = 1;
|
|
|
|
skel->bss->has_cpu = 1;
|
|
fd = bpf_map__fd(skel->maps.cpu_filter);
|
|
|
|
for (i = 0; i < ncpus; i++) {
|
|
cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i).cpu;
|
|
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
u32 pid;
|
|
u8 val = 1;
|
|
|
|
skel->bss->has_task = 1;
|
|
fd = bpf_map__fd(skel->maps.task_filter);
|
|
|
|
for (i = 0; i < ntasks; i++) {
|
|
pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
|
|
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
|
|
false, func->name);
|
|
if (IS_ERR(skel->links.func_begin)) {
|
|
pr_err("Failed to attach fentry program\n");
|
|
err = PTR_ERR(skel->links.func_begin);
|
|
goto out;
|
|
}
|
|
|
|
skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
|
|
true, func->name);
|
|
if (IS_ERR(skel->links.func_end)) {
|
|
pr_err("Failed to attach fexit program\n");
|
|
err = PTR_ERR(skel->links.func_end);
|
|
goto out;
|
|
}
|
|
|
|
/* XXX: we don't actually use this fd - just for poll() */
|
|
return open("/dev/null", O_RDONLY);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 1;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 0;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
|
|
int buckets[])
|
|
{
|
|
int i, fd, err;
|
|
u32 idx;
|
|
u64 *hist;
|
|
int ncpus = cpu__max_cpu().cpu;
|
|
|
|
fd = bpf_map__fd(skel->maps.latency);
|
|
|
|
hist = calloc(ncpus, sizeof(*hist));
|
|
if (hist == NULL)
|
|
return -ENOMEM;
|
|
|
|
for (idx = 0; idx < NUM_BUCKET; idx++) {
|
|
err = bpf_map_lookup_elem(fd, &idx, hist);
|
|
if (err) {
|
|
buckets[idx] = 0;
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < ncpus; i++)
|
|
buckets[idx] += hist[i];
|
|
}
|
|
|
|
free(hist);
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
func_latency_bpf__destroy(skel);
|
|
return 0;
|
|
}
|