Joanne Koong ec151037af selftest/bpf/benchs: Add bpf_loop benchmark
Add benchmark to measure the throughput and latency of the bpf_loop
call.

Testing this on my dev machine on 1 thread, the data is as follows:

        nr_loops: 10
bpf_loop - throughput: 198.519 ± 0.155 M ops/s, latency: 5.037 ns/op

        nr_loops: 100
bpf_loop - throughput: 247.448 ± 0.305 M ops/s, latency: 4.041 ns/op

        nr_loops: 500
bpf_loop - throughput: 260.839 ± 0.380 M ops/s, latency: 3.834 ns/op

        nr_loops: 1000
bpf_loop - throughput: 262.806 ± 0.629 M ops/s, latency: 3.805 ns/op

        nr_loops: 5000
bpf_loop - throughput: 264.211 ± 1.508 M ops/s, latency: 3.785 ns/op

        nr_loops: 10000
bpf_loop - throughput: 265.366 ± 3.054 M ops/s, latency: 3.768 ns/op

        nr_loops: 50000
bpf_loop - throughput: 235.986 ± 20.205 M ops/s, latency: 4.238 ns/op

        nr_loops: 100000
bpf_loop - throughput: 264.482 ± 0.279 M ops/s, latency: 3.781 ns/op

        nr_loops: 500000
bpf_loop - throughput: 309.773 ± 87.713 M ops/s, latency: 3.228 ns/op

        nr_loops: 1000000
bpf_loop - throughput: 262.818 ± 4.143 M ops/s, latency: 3.805 ns/op

>From this data, we can see that the latency per loop decreases as the
number of loops increases. On this particular machine, each loop had an
overhead of about ~4 ns, and we were able to run ~250 million loops
per second.

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-5-joannekoong@fb.com
2021-11-30 10:56:28 -08:00

106 lines
1.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <argp.h>
#include "bench.h"
#include "bpf_loop_bench.skel.h"
/* BPF triggering benchmarks */
static struct ctx {
struct bpf_loop_bench *skel;
} ctx;
static struct {
__u32 nr_loops;
} args = {
.nr_loops = 10,
};
enum {
ARG_NR_LOOPS = 4000,
};
static const struct argp_option opts[] = {
{ "nr_loops", ARG_NR_LOOPS, "nr_loops", 0,
"Set number of loops for the bpf_loop helper"},
{},
};
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
switch (key) {
case ARG_NR_LOOPS:
args.nr_loops = strtol(arg, NULL, 10);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
/* exported into benchmark runner */
const struct argp bench_bpf_loop_argp = {
.options = opts,
.parser = parse_arg,
};
static void validate(void)
{
if (env.consumer_cnt != 1) {
fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
exit(1);
}
}
static void *producer(void *input)
{
while (true)
/* trigger the bpf program */
syscall(__NR_getpgid);
return NULL;
}
static void *consumer(void *input)
{
return NULL;
}
static void measure(struct bench_res *res)
{
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
}
static void setup(void)
{
struct bpf_link *link;
setup_libbpf();
ctx.skel = bpf_loop_bench__open_and_load();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
link = bpf_program__attach(ctx.skel->progs.benchmark);
if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
ctx.skel->bss->nr_loops = args.nr_loops;
}
const struct bench bench_bpf_loop = {
.name = "bpf-loop",
.validate = validate,
.setup = setup,
.producer_thread = producer,
.consumer_thread = consumer,
.measure = measure,
.report_progress = ops_report_progress,
.report_final = ops_report_final,
};