47d01e7b99
x86 and arm64 can both support direct access of event counters in userspace. The access sequence is less than trivial and currently exists in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in projects such as PAPI and libpfm4. In order to support userspace access, an event must be mmapped first with perf_evsel__mmap(). Then subsequent calls to perf_evsel__read() will use the fast path (assuming the arch supports it). Committer notes: Added a '__maybe_unused' attribute to the read_perf_counter() argument to fix the build on arches other than x86_64 and arm. Committer testing: Building and running the libperf tests in verbose mode (V=1) now shows those "loop = N, count = N" extra lines, testing user space counter access. # make V=1 -C tools/lib/perf tests make: Entering directory '/home/acme/git/perf/tools/lib/perf' make -f /home/acme/git/perf/tools/build/Makefile.build dir=. obj=libperf make -C /home/acme/git/perf/tools/lib/api/ O= libapi.a make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fd obj=libapi make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fs obj=libapi make -C tests gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-cpumap-a test-cpumap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-threadmap-a test-threadmap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evlist-a test-evlist.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evsel-a test-evsel.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-cpumap-so test-cpumap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-threadmap-so test-threadmap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evlist-so test-evlist.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evsel-so test-evsel.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf make -C tests run running static: - running test-cpumap.c...OK - running test-threadmap.c...OK - running test-evlist.c...OK - running test-evsel.c... loop = 65536, count = 333926 loop = 131072, count = 655781 loop = 262144, count = 1311141 loop = 524288, count = 2630126 loop = 1048576, count = 5256955 loop = 65536, count = 524594 loop = 131072, count = 1058916 loop = 262144, count = 2097458 loop = 524288, count = 4205429 loop = 1048576, count = 8406606 OK running dynamic: - running test-cpumap.c...OK - running test-threadmap.c...OK - running test-evlist.c...OK - running test-evsel.c... loop = 65536, count = 328102 loop = 131072, count = 655782 loop = 262144, count = 1317494 loop = 524288, count = 2627851 loop = 1048576, count = 5255187 loop = 65536, count = 524601 loop = 131072, count = 1048923 loop = 262144, count = 2107917 loop = 524288, count = 4194606 loop = 1048576, count = 8409322 OK make: Leaving directory '/home/acme/git/perf/tools/lib/perf' # Signed-off-by: Rob Herring <robh@kernel.org> Acked-by: Jiri Olsa <jolsa@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Itaru Kitayama <itaru.kitayama@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Link: http://lore.kernel.org/lkml/20210414155412.3697605-4-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
382 lines
8.0 KiB
C
382 lines
8.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <sys/syscall.h>
|
|
#include <perf/evsel.h>
|
|
#include <perf/cpumap.h>
|
|
#include <perf/threadmap.h>
|
|
#include <linux/list.h>
|
|
#include <internal/evsel.h>
|
|
#include <linux/zalloc.h>
|
|
#include <stdlib.h>
|
|
#include <internal/xyarray.h>
|
|
#include <internal/cpumap.h>
|
|
#include <internal/mmap.h>
|
|
#include <internal/threadmap.h>
|
|
#include <internal/lib.h>
|
|
#include <linux/string.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/mman.h>
|
|
|
|
void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
|
|
{
|
|
INIT_LIST_HEAD(&evsel->node);
|
|
evsel->attr = *attr;
|
|
}
|
|
|
|
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
|
|
{
|
|
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
|
|
|
|
if (evsel != NULL)
|
|
perf_evsel__init(evsel, attr);
|
|
|
|
return evsel;
|
|
}
|
|
|
|
void perf_evsel__delete(struct perf_evsel *evsel)
|
|
{
|
|
free(evsel);
|
|
}
|
|
|
|
#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
|
|
#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
|
|
|
|
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
|
|
{
|
|
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
|
|
|
|
if (evsel->fd) {
|
|
int cpu, thread;
|
|
for (cpu = 0; cpu < ncpus; cpu++) {
|
|
for (thread = 0; thread < nthreads; thread++) {
|
|
FD(evsel, cpu, thread) = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return evsel->fd != NULL ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads)
|
|
{
|
|
evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap));
|
|
|
|
return evsel->mmap != NULL ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static int
|
|
sys_perf_event_open(struct perf_event_attr *attr,
|
|
pid_t pid, int cpu, int group_fd,
|
|
unsigned long flags)
|
|
{
|
|
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
|
|
}
|
|
|
|
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
|
|
struct perf_thread_map *threads)
|
|
{
|
|
int cpu, thread, err = 0;
|
|
|
|
if (cpus == NULL) {
|
|
static struct perf_cpu_map *empty_cpu_map;
|
|
|
|
if (empty_cpu_map == NULL) {
|
|
empty_cpu_map = perf_cpu_map__dummy_new();
|
|
if (empty_cpu_map == NULL)
|
|
return -ENOMEM;
|
|
}
|
|
|
|
cpus = empty_cpu_map;
|
|
}
|
|
|
|
if (threads == NULL) {
|
|
static struct perf_thread_map *empty_thread_map;
|
|
|
|
if (empty_thread_map == NULL) {
|
|
empty_thread_map = perf_thread_map__new_dummy();
|
|
if (empty_thread_map == NULL)
|
|
return -ENOMEM;
|
|
}
|
|
|
|
threads = empty_thread_map;
|
|
}
|
|
|
|
if (evsel->fd == NULL &&
|
|
perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
|
|
return -ENOMEM;
|
|
|
|
for (cpu = 0; cpu < cpus->nr; cpu++) {
|
|
for (thread = 0; thread < threads->nr; thread++) {
|
|
int fd;
|
|
|
|
fd = sys_perf_event_open(&evsel->attr,
|
|
threads->map[thread].pid,
|
|
cpus->map[cpu], -1, 0);
|
|
|
|
if (fd < 0)
|
|
return -errno;
|
|
|
|
FD(evsel, cpu, thread) = fd;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
|
|
{
|
|
int thread;
|
|
|
|
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
|
|
if (FD(evsel, cpu, thread) >= 0)
|
|
close(FD(evsel, cpu, thread));
|
|
FD(evsel, cpu, thread) = -1;
|
|
}
|
|
}
|
|
|
|
void perf_evsel__close_fd(struct perf_evsel *evsel)
|
|
{
|
|
int cpu;
|
|
|
|
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
|
|
perf_evsel__close_fd_cpu(evsel, cpu);
|
|
}
|
|
|
|
void perf_evsel__free_fd(struct perf_evsel *evsel)
|
|
{
|
|
xyarray__delete(evsel->fd);
|
|
evsel->fd = NULL;
|
|
}
|
|
|
|
void perf_evsel__close(struct perf_evsel *evsel)
|
|
{
|
|
if (evsel->fd == NULL)
|
|
return;
|
|
|
|
perf_evsel__close_fd(evsel);
|
|
perf_evsel__free_fd(evsel);
|
|
}
|
|
|
|
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
|
|
{
|
|
if (evsel->fd == NULL)
|
|
return;
|
|
|
|
perf_evsel__close_fd_cpu(evsel, cpu);
|
|
}
|
|
|
|
void perf_evsel__munmap(struct perf_evsel *evsel)
|
|
{
|
|
int cpu, thread;
|
|
|
|
if (evsel->fd == NULL || evsel->mmap == NULL)
|
|
return;
|
|
|
|
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
|
|
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
|
|
int fd = FD(evsel, cpu, thread);
|
|
struct perf_mmap *map = MMAP(evsel, cpu, thread);
|
|
|
|
if (fd < 0)
|
|
continue;
|
|
|
|
perf_mmap__munmap(map);
|
|
}
|
|
}
|
|
|
|
xyarray__delete(evsel->mmap);
|
|
evsel->mmap = NULL;
|
|
}
|
|
|
|
int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
|
|
{
|
|
int ret, cpu, thread;
|
|
struct perf_mmap_param mp = {
|
|
.prot = PROT_READ | PROT_WRITE,
|
|
.mask = (pages * page_size) - 1,
|
|
};
|
|
|
|
if (evsel->fd == NULL || evsel->mmap)
|
|
return -EINVAL;
|
|
|
|
if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
|
|
return -ENOMEM;
|
|
|
|
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
|
|
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
|
|
int fd = FD(evsel, cpu, thread);
|
|
struct perf_mmap *map = MMAP(evsel, cpu, thread);
|
|
|
|
if (fd < 0)
|
|
continue;
|
|
|
|
perf_mmap__init(map, NULL, false, NULL);
|
|
|
|
ret = perf_mmap__mmap(map, &mp, fd, cpu);
|
|
if (ret) {
|
|
perf_evsel__munmap(evsel);
|
|
return ret;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
|
|
{
|
|
if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL)
|
|
return NULL;
|
|
|
|
return MMAP(evsel, cpu, thread)->base;
|
|
}
|
|
|
|
int perf_evsel__read_size(struct perf_evsel *evsel)
|
|
{
|
|
u64 read_format = evsel->attr.read_format;
|
|
int entry = sizeof(u64); /* value */
|
|
int size = 0;
|
|
int nr = 1;
|
|
|
|
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
|
|
size += sizeof(u64);
|
|
|
|
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
|
|
size += sizeof(u64);
|
|
|
|
if (read_format & PERF_FORMAT_ID)
|
|
entry += sizeof(u64);
|
|
|
|
if (read_format & PERF_FORMAT_GROUP) {
|
|
nr = evsel->nr_members;
|
|
size += sizeof(u64);
|
|
}
|
|
|
|
size += entry * nr;
|
|
return size;
|
|
}
|
|
|
|
int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
|
|
struct perf_counts_values *count)
|
|
{
|
|
size_t size = perf_evsel__read_size(evsel);
|
|
|
|
memset(count, 0, sizeof(*count));
|
|
|
|
if (FD(evsel, cpu, thread) < 0)
|
|
return -EINVAL;
|
|
|
|
if (MMAP(evsel, cpu, thread) &&
|
|
!perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
|
|
return 0;
|
|
|
|
if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
|
|
return -errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
|
|
int ioc, void *arg,
|
|
int cpu)
|
|
{
|
|
int thread;
|
|
|
|
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
|
|
int fd = FD(evsel, cpu, thread),
|
|
err = ioctl(fd, ioc, arg);
|
|
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
|
|
{
|
|
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
|
|
}
|
|
|
|
int perf_evsel__enable(struct perf_evsel *evsel)
|
|
{
|
|
int i;
|
|
int err = 0;
|
|
|
|
for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
|
|
err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, i);
|
|
return err;
|
|
}
|
|
|
|
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
|
|
{
|
|
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
|
|
}
|
|
|
|
int perf_evsel__disable(struct perf_evsel *evsel)
|
|
{
|
|
int i;
|
|
int err = 0;
|
|
|
|
for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
|
|
err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, i);
|
|
return err;
|
|
}
|
|
|
|
int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
|
|
{
|
|
int err = 0, i;
|
|
|
|
for (i = 0; i < evsel->cpus->nr && !err; i++)
|
|
err = perf_evsel__run_ioctl(evsel,
|
|
PERF_EVENT_IOC_SET_FILTER,
|
|
(void *)filter, i);
|
|
return err;
|
|
}
|
|
|
|
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
|
|
{
|
|
return evsel->cpus;
|
|
}
|
|
|
|
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel)
|
|
{
|
|
return evsel->threads;
|
|
}
|
|
|
|
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel)
|
|
{
|
|
return &evsel->attr;
|
|
}
|
|
|
|
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
|
|
{
|
|
if (ncpus == 0 || nthreads == 0)
|
|
return 0;
|
|
|
|
if (evsel->system_wide)
|
|
nthreads = 1;
|
|
|
|
evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
|
|
if (evsel->sample_id == NULL)
|
|
return -ENOMEM;
|
|
|
|
evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
|
|
if (evsel->id == NULL) {
|
|
xyarray__delete(evsel->sample_id);
|
|
evsel->sample_id = NULL;
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void perf_evsel__free_id(struct perf_evsel *evsel)
|
|
{
|
|
xyarray__delete(evsel->sample_id);
|
|
evsel->sample_id = NULL;
|
|
zfree(&evsel->id);
|
|
evsel->ids = 0;
|
|
}
|