samples/bpf: bpf_tail_call example for tracing
kprobe example that demonstrates how future seccomp programs may look like. It attaches to seccomp_phase1() function and tail-calls other BPF programs depending on syscall number. Existing optimized classic BPF seccomp programs generated by Chrome look like: if (sd.nr < 121) { if (sd.nr < 57) { if (sd.nr < 22) { if (sd.nr < 7) { if (sd.nr < 4) { if (sd.nr < 1) { check sys_read } else { if (sd.nr < 3) { check sys_write and sys_open } else { check sys_close } } } else { } else { } else { } else { } else { } the future seccomp using native eBPF may look like: bpf_tail_call(&sd, &syscall_jmp_table, sd.nr); which is simpler, faster and leaves more room for per-syscall checks. Usage: $ sudo ./tracex5 <...>-366 [001] d... 4.870033: : read(fd=1, buf=00007f6d5bebf000, size=771) <...>-369 [003] d... 4.870066: : mmap <...>-369 [003] d... 4.870077: : syscall=110 (one of get/set uid/pid/gid) <...>-369 [003] d... 4.870089: : syscall=107 (one of get/set uid/pid/gid) sh-369 [000] d... 4.891740: : read(fd=0, buf=00000000023d1000, size=512) sh-369 [000] d... 4.891747: : write(fd=1, buf=00000000023d3000, size=512) sh-369 [000] d... 4.891747: : read(fd=1, buf=00000000023d3000, size=512) Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b52f00e6a7
commit
5bacd7805a
@ -10,6 +10,7 @@ hostprogs-y += tracex1
|
||||
hostprogs-y += tracex2
|
||||
hostprogs-y += tracex3
|
||||
hostprogs-y += tracex4
|
||||
hostprogs-y += tracex5
|
||||
|
||||
test_verifier-objs := test_verifier.o libbpf.o
|
||||
test_maps-objs := test_maps.o libbpf.o
|
||||
@ -20,6 +21,7 @@ tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
|
||||
tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
|
||||
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
|
||||
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
|
||||
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
|
||||
|
||||
# Tell kbuild to always build the programs
|
||||
always := $(hostprogs-y)
|
||||
@ -29,6 +31,7 @@ always += tracex1_kern.o
|
||||
always += tracex2_kern.o
|
||||
always += tracex3_kern.o
|
||||
always += tracex4_kern.o
|
||||
always += tracex5_kern.o
|
||||
always += tcbpf1_kern.o
|
||||
|
||||
HOSTCFLAGS += -I$(objtree)/usr/include
|
||||
@ -40,6 +43,7 @@ HOSTLOADLIBES_tracex1 += -lelf
|
||||
HOSTLOADLIBES_tracex2 += -lelf
|
||||
HOSTLOADLIBES_tracex3 += -lelf
|
||||
HOSTLOADLIBES_tracex4 += -lelf -lrt
|
||||
HOSTLOADLIBES_tracex5 += -lelf
|
||||
|
||||
# point this to your LLVM backend with bpf support
|
||||
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
|
||||
|
@ -21,6 +21,8 @@ static unsigned long long (*bpf_ktime_get_ns)(void) =
|
||||
(void *) BPF_FUNC_ktime_get_ns;
|
||||
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
|
||||
(void *) BPF_FUNC_trace_printk;
|
||||
static void (*bpf_tail_call)(void *ctx, void *map, int index) =
|
||||
(void *) BPF_FUNC_tail_call;
|
||||
|
||||
/* llvm builtin functions that eBPF C program may use to
|
||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <poll.h>
|
||||
#include <ctype.h>
|
||||
#include "libbpf.h"
|
||||
#include "bpf_helpers.h"
|
||||
#include "bpf_load.h"
|
||||
@ -29,6 +30,19 @@ int map_fd[MAX_MAPS];
|
||||
int prog_fd[MAX_PROGS];
|
||||
int event_fd[MAX_PROGS];
|
||||
int prog_cnt;
|
||||
int prog_array_fd = -1;
|
||||
|
||||
static int populate_prog_array(const char *event, int prog_fd)
|
||||
{
|
||||
int ind = atoi(event), err;
|
||||
|
||||
err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
|
||||
if (err < 0) {
|
||||
printf("failed to store prog_fd in prog_array\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
|
||||
{
|
||||
@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
|
||||
return -1;
|
||||
}
|
||||
|
||||
fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
|
||||
if (fd < 0) {
|
||||
printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
prog_fd[prog_cnt++] = fd;
|
||||
|
||||
if (is_socket) {
|
||||
event += 6;
|
||||
if (*event != '/')
|
||||
return 0;
|
||||
event++;
|
||||
if (!isdigit(*event)) {
|
||||
printf("invalid prog number\n");
|
||||
return -1;
|
||||
}
|
||||
return populate_prog_array(event, fd);
|
||||
}
|
||||
|
||||
if (is_kprobe || is_kretprobe) {
|
||||
if (is_kprobe)
|
||||
event += 7;
|
||||
else
|
||||
event += 10;
|
||||
|
||||
if (*event == 0) {
|
||||
printf("event name cannot be empty\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (isdigit(*event))
|
||||
return populate_prog_array(event, fd);
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
|
||||
is_kprobe ? 'p' : 'r', event, event);
|
||||
@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
|
||||
}
|
||||
}
|
||||
|
||||
fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
|
||||
|
||||
if (fd < 0) {
|
||||
printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
prog_fd[prog_cnt++] = fd;
|
||||
|
||||
if (is_socket)
|
||||
return 0;
|
||||
|
||||
strcpy(buf, DEBUGFS);
|
||||
strcat(buf, "events/kprobes/");
|
||||
strcat(buf, event);
|
||||
@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len)
|
||||
maps[i].max_entries);
|
||||
if (map_fd[i] < 0)
|
||||
return 1;
|
||||
|
||||
if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
|
||||
prog_array_fd = map_fd[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
75
samples/bpf/tracex5_kern.c
Normal file
75
samples/bpf/tracex5_kern.c
Normal file
@ -0,0 +1,75 @@
|
||||
/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/version.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <uapi/linux/seccomp.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
|
||||
|
||||
struct bpf_map_def SEC("maps") progs = {
|
||||
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(u32),
|
||||
.max_entries = 1024,
|
||||
};
|
||||
|
||||
SEC("kprobe/seccomp_phase1")
|
||||
int bpf_prog1(struct pt_regs *ctx)
|
||||
{
|
||||
struct seccomp_data sd = {};
|
||||
|
||||
bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
|
||||
|
||||
/* dispatch into next BPF program depending on syscall number */
|
||||
bpf_tail_call(ctx, &progs, sd.nr);
|
||||
|
||||
/* fall through -> unknown syscall */
|
||||
if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
|
||||
char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
|
||||
bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* we jump here when syscall number == __NR_write */
|
||||
PROG(__NR_write)(struct pt_regs *ctx)
|
||||
{
|
||||
struct seccomp_data sd = {};
|
||||
|
||||
bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
|
||||
if (sd.args[2] == 512) {
|
||||
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
|
||||
bpf_trace_printk(fmt, sizeof(fmt),
|
||||
sd.args[0], sd.args[1], sd.args[2]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
PROG(__NR_read)(struct pt_regs *ctx)
|
||||
{
|
||||
struct seccomp_data sd = {};
|
||||
|
||||
bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
|
||||
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
|
||||
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
|
||||
bpf_trace_printk(fmt, sizeof(fmt),
|
||||
sd.args[0], sd.args[1], sd.args[2]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
PROG(__NR_mmap)(struct pt_regs *ctx)
|
||||
{
|
||||
char fmt[] = "mmap\n";
|
||||
bpf_trace_printk(fmt, sizeof(fmt));
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
u32 _version SEC("version") = LINUX_VERSION_CODE;
|
46
samples/bpf/tracex5_user.c
Normal file
46
samples/bpf/tracex5_user.c
Normal file
@ -0,0 +1,46 @@
|
||||
#include <stdio.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <sys/prctl.h>
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
|
||||
/* install fake seccomp program to enable seccomp code path inside the kernel,
|
||||
* so that our kprobe attached to seccomp_phase1() can be triggered
|
||||
*/
|
||||
static void install_accept_all_seccomp(void)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
struct sock_fprog prog = {
|
||||
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
|
||||
.filter = filter,
|
||||
};
|
||||
if (prctl(PR_SET_SECCOMP, 2, &prog))
|
||||
perror("prctl");
|
||||
}
|
||||
|
||||
int main(int ac, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
char filename[256];
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
|
||||
if (load_bpf_file(filename)) {
|
||||
printf("%s", bpf_log_buf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
install_accept_all_seccomp();
|
||||
|
||||
f = popen("dd if=/dev/zero of=/dev/null count=5", "r");
|
||||
(void) f;
|
||||
|
||||
read_trace_pipe();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user