9823ae6f68
Based on commit 7d54a4acd8
("perf test: Skip watchpoint tests if
no watchpoints available"), hardware breakpoints are not available for
power9 platform and because of that 'perf bench breakpoint' run fails on
power9 platform.
Add code to check for the return value of perf_event_open() in the
breakpoint run and skip the 'perf bench breakpoint' run, if hardware
breakpoints are not available.
Result on power9 system before patch changes:
[command]# perf bench breakpoint thread
perf_event_open: No such device
Result on power9 system after patch changes:
[command]# ./perf bench breakpoint thread
Skipping perf bench breakpoint thread: No hardware support
Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Naveen N Rao <naveen@kernel.org>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Disha Goel <disgoel@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lore.kernel.org/r/20230823075103.190565-1-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
263 lines
7.5 KiB
C
263 lines
7.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <subcmd/parse-options.h>
|
|
#include <linux/hw_breakpoint.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/time64.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/time.h>
|
|
#include <pthread.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include "bench.h"
|
|
#include "futex.h"
|
|
|
|
struct {
|
|
unsigned int nbreakpoints;
|
|
unsigned int nparallel;
|
|
unsigned int nthreads;
|
|
} thread_params = {
|
|
.nbreakpoints = 1,
|
|
.nparallel = 1,
|
|
.nthreads = 1,
|
|
};
|
|
|
|
static const struct option thread_options[] = {
|
|
OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints,
|
|
"Specify amount of breakpoints"),
|
|
OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"),
|
|
OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"),
|
|
OPT_END()
|
|
};
|
|
|
|
static const char * const thread_usage[] = {
|
|
"perf bench breakpoint thread <options>",
|
|
NULL
|
|
};
|
|
|
|
struct breakpoint {
|
|
int fd;
|
|
char watched;
|
|
};
|
|
|
|
static int breakpoint_setup(void *addr)
|
|
{
|
|
struct perf_event_attr attr = { .size = 0, };
|
|
int fd;
|
|
|
|
attr.type = PERF_TYPE_BREAKPOINT;
|
|
attr.size = sizeof(attr);
|
|
attr.inherit = 1;
|
|
attr.exclude_kernel = 1;
|
|
attr.exclude_hv = 1;
|
|
attr.bp_addr = (unsigned long)addr;
|
|
attr.bp_type = HW_BREAKPOINT_RW;
|
|
attr.bp_len = HW_BREAKPOINT_LEN_1;
|
|
fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
|
|
|
|
if (fd < 0)
|
|
fd = -errno;
|
|
|
|
return fd;
|
|
}
|
|
|
|
static void *passive_thread(void *arg)
|
|
{
|
|
unsigned int *done = (unsigned int *)arg;
|
|
|
|
while (!__atomic_load_n(done, __ATOMIC_RELAXED))
|
|
futex_wait(done, 0, NULL, 0);
|
|
return NULL;
|
|
}
|
|
|
|
static void *active_thread(void *arg)
|
|
{
|
|
unsigned int *done = (unsigned int *)arg;
|
|
|
|
while (!__atomic_load_n(done, __ATOMIC_RELAXED));
|
|
return NULL;
|
|
}
|
|
|
|
static void *breakpoint_thread(void *arg)
|
|
{
|
|
unsigned int i, done;
|
|
int *repeat = (int *)arg;
|
|
pthread_t *threads;
|
|
|
|
threads = calloc(thread_params.nthreads, sizeof(threads[0]));
|
|
if (!threads)
|
|
exit((perror("calloc"), EXIT_FAILURE));
|
|
|
|
while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) {
|
|
done = 0;
|
|
for (i = 0; i < thread_params.nthreads; i++) {
|
|
if (pthread_create(&threads[i], NULL, passive_thread, &done))
|
|
exit((perror("pthread_create"), EXIT_FAILURE));
|
|
}
|
|
__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
|
|
futex_wake(&done, thread_params.nthreads, 0);
|
|
for (i = 0; i < thread_params.nthreads; i++)
|
|
pthread_join(threads[i], NULL);
|
|
}
|
|
free(threads);
|
|
return NULL;
|
|
}
|
|
|
|
// The benchmark creates nbreakpoints inheritable breakpoints,
|
|
// then starts nparallel threads which create and join bench_repeat batches of nthreads threads.
|
|
int bench_breakpoint_thread(int argc, const char **argv)
|
|
{
|
|
unsigned int i, result_usec;
|
|
int repeat = bench_repeat;
|
|
struct breakpoint *breakpoints;
|
|
pthread_t *parallel;
|
|
struct timeval start, stop, diff;
|
|
|
|
if (parse_options(argc, argv, thread_options, thread_usage, 0)) {
|
|
usage_with_options(thread_usage, thread_options);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0]));
|
|
parallel = calloc(thread_params.nparallel, sizeof(parallel[0]));
|
|
if (!breakpoints || !parallel)
|
|
exit((perror("calloc"), EXIT_FAILURE));
|
|
|
|
for (i = 0; i < thread_params.nbreakpoints; i++) {
|
|
breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
|
|
|
|
if (breakpoints[i].fd < 0) {
|
|
if (breakpoints[i].fd == -ENODEV) {
|
|
printf("Skipping perf bench breakpoint thread: No hardware support\n");
|
|
return 0;
|
|
}
|
|
exit((perror("perf_event_open"), EXIT_FAILURE));
|
|
}
|
|
}
|
|
gettimeofday(&start, NULL);
|
|
for (i = 0; i < thread_params.nparallel; i++) {
|
|
if (pthread_create(¶llel[i], NULL, breakpoint_thread, &repeat))
|
|
exit((perror("pthread_create"), EXIT_FAILURE));
|
|
}
|
|
for (i = 0; i < thread_params.nparallel; i++)
|
|
pthread_join(parallel[i], NULL);
|
|
gettimeofday(&stop, NULL);
|
|
timersub(&stop, &start, &diff);
|
|
for (i = 0; i < thread_params.nbreakpoints; i++)
|
|
close(breakpoints[i].fd);
|
|
free(parallel);
|
|
free(breakpoints);
|
|
switch (bench_format) {
|
|
case BENCH_FORMAT_DEFAULT:
|
|
printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n",
|
|
bench_repeat, thread_params.nbreakpoints, thread_params.nparallel);
|
|
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
|
|
(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
|
|
result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
|
|
printf(" %14lf usecs/op\n",
|
|
(double)result_usec / bench_repeat / thread_params.nthreads);
|
|
printf(" %14lf usecs/op/cpu\n",
|
|
(double)result_usec / bench_repeat /
|
|
thread_params.nthreads * thread_params.nparallel);
|
|
break;
|
|
case BENCH_FORMAT_SIMPLE:
|
|
printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
|
|
break;
|
|
default:
|
|
fprintf(stderr, "Unknown format: %d\n", bench_format);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
struct {
|
|
unsigned int npassive;
|
|
unsigned int nactive;
|
|
} enable_params = {
|
|
.nactive = 0,
|
|
.npassive = 0,
|
|
};
|
|
|
|
static const struct option enable_options[] = {
|
|
OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"),
|
|
OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"),
|
|
OPT_END()
|
|
};
|
|
|
|
static const char * const enable_usage[] = {
|
|
"perf bench breakpoint enable <options>",
|
|
NULL
|
|
};
|
|
|
|
// The benchmark creates an inheritable breakpoint,
|
|
// then starts npassive threads that block and nactive threads that actively spin
|
|
// and then disables and enables the breakpoint bench_repeat times.
|
|
int bench_breakpoint_enable(int argc, const char **argv)
|
|
{
|
|
unsigned int i, nthreads, result_usec, done = 0;
|
|
char watched;
|
|
int fd;
|
|
pthread_t *threads;
|
|
struct timeval start, stop, diff;
|
|
|
|
if (parse_options(argc, argv, enable_options, enable_usage, 0)) {
|
|
usage_with_options(enable_usage, enable_options);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
fd = breakpoint_setup(&watched);
|
|
|
|
if (fd < 0) {
|
|
if (fd == -ENODEV) {
|
|
printf("Skipping perf bench breakpoint enable: No hardware support\n");
|
|
return 0;
|
|
}
|
|
exit((perror("perf_event_open"), EXIT_FAILURE));
|
|
}
|
|
nthreads = enable_params.npassive + enable_params.nactive;
|
|
threads = calloc(nthreads, sizeof(threads[0]));
|
|
if (!threads)
|
|
exit((perror("calloc"), EXIT_FAILURE));
|
|
|
|
for (i = 0; i < nthreads; i++) {
|
|
if (pthread_create(&threads[i], NULL,
|
|
i < enable_params.npassive ? passive_thread : active_thread, &done))
|
|
exit((perror("pthread_create"), EXIT_FAILURE));
|
|
}
|
|
usleep(10000); // let the threads block
|
|
gettimeofday(&start, NULL);
|
|
for (i = 0; i < bench_repeat; i++) {
|
|
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
|
|
exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE));
|
|
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0))
|
|
exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE));
|
|
}
|
|
gettimeofday(&stop, NULL);
|
|
timersub(&stop, &start, &diff);
|
|
__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
|
|
futex_wake(&done, enable_params.npassive, 0);
|
|
for (i = 0; i < nthreads; i++)
|
|
pthread_join(threads[i], NULL);
|
|
free(threads);
|
|
close(fd);
|
|
switch (bench_format) {
|
|
case BENCH_FORMAT_DEFAULT:
|
|
printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n",
|
|
bench_repeat, enable_params.npassive, enable_params.nactive);
|
|
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
|
|
(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
|
|
result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
|
|
printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat);
|
|
break;
|
|
case BENCH_FORMAT_SIMPLE:
|
|
printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
|
|
break;
|
|
default:
|
|
fprintf(stderr, "Unknown format: %d\n", bench_format);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
return 0;
|
|
}
|