5e21bb4e81
These two types of XDP progs (BPF_XDP_DEVMAP, BPF_XDP_CPUMAP) will not be executed directly in the driver, therefore we should also not directly run them from here. To run in these two situations, there must be further preparations done, otherwise these may cause a kernel panic. For more details, see also dev_xdp_attach(). [ 46.982479] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 46.984295] #PF: supervisor read access in kernel mode [ 46.985777] #PF: error_code(0x0000) - not-present page [ 46.987227] PGD 800000010dca4067 P4D 800000010dca4067 PUD 10dca6067 PMD 0 [ 46.989201] Oops: 0000 [#1] SMP PTI [ 46.990304] CPU: 7 PID: 562 Comm: a.out Not tainted 5.13.0+ #44 [ 46.992001] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/24 [ 46.995113] RIP: 0010:___bpf_prog_run+0x17b/0x1710 [ 46.996586] Code: 49 03 14 cc e8 76 f6 fe ff e9 ad fe ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 02 [ 47.001562] RSP: 0018:ffffc900005afc58 EFLAGS: 00010246 [ 47.003115] RAX: 0000000000000000 RBX: ffffc9000023f068 RCX: 0000000000000000 [ 47.005163] RDX: 0000000000000000 RSI: 0000000000000079 RDI: ffffc900005afc98 [ 47.007135] RBP: 0000000000000000 R08: ffffc9000023f048 R09: c0000000ffffdfff [ 47.009171] R10: 0000000000000001 R11: ffffc900005afb40 R12: ffffc900005afc98 [ 47.011172] R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff825258a8 [ 47.013244] FS: 00007f04a5207580(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000 [ 47.015705] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.017475] CR2: 0000000000000000 CR3: 0000000100182005 CR4: 0000000000770ee0 [ 47.019558] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.021595] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.023574] PKRU: 55555554 [ 47.024571] Call Trace: [ 47.025424] __bpf_prog_run32+0x32/0x50 [ 47.026296] ? printk+0x53/0x6a [ 47.027066] ? ktime_get+0x39/0x90 [ 47.027895] bpf_test_run.cold.28+0x23/0x123 [ 47.028866] ? printk+0x53/0x6a [ 47.029630] bpf_prog_test_run_xdp+0x149/0x1d0 [ 47.030649] __sys_bpf+0x1305/0x23d0 [ 47.031482] __x64_sys_bpf+0x17/0x20 [ 47.032316] do_syscall_64+0x3a/0x80 [ 47.033165] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 47.034254] RIP: 0033:0x7f04a51364dd [ 47.035133] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 48 [ 47.038768] RSP: 002b:00007fff8f9fc518 EFLAGS: 00000213 ORIG_RAX: 0000000000000141 [ 47.040344] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f04a51364dd [ 47.041749] RDX: 0000000000000048 RSI: 0000000020002a80 RDI: 000000000000000a [ 47.043171] RBP: 00007fff8f9fc530 R08: 0000000002049300 R09: 0000000020000100 [ 47.044626] R10: 0000000000000004 R11: 0000000000000213 R12: 0000000000401070 [ 47.046088] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 47.047579] Modules linked in: [ 47.048318] CR2: 0000000000000000 [ 47.049120] ---[ end trace 7ad34443d5be719a ]--- [ 47.050273] RIP: 0010:___bpf_prog_run+0x17b/0x1710 [ 47.051343] Code: 49 03 14 cc e8 76 f6 fe ff e9 ad fe ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 02 [ 47.054943] RSP: 0018:ffffc900005afc58 EFLAGS: 00010246 [ 47.056068] RAX: 0000000000000000 RBX: ffffc9000023f068 RCX: 0000000000000000 [ 47.057522] RDX: 0000000000000000 RSI: 0000000000000079 RDI: ffffc900005afc98 [ 47.058961] RBP: 0000000000000000 R08: ffffc9000023f048 R09: c0000000ffffdfff [ 47.060390] R10: 0000000000000001 R11: ffffc900005afb40 R12: ffffc900005afc98 [ 47.061803] R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff825258a8 [ 47.063249] FS: 00007f04a5207580(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000 [ 47.065070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.066307] CR2: 0000000000000000 CR3: 0000000100182005 CR4: 0000000000770ee0 [ 47.067747] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.069217] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.070652] PKRU: 55555554 [ 47.071318] Kernel panic - not syncing: Fatal exception [ 47.072854] Kernel Offset: disabled [ 47.073683] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes:9216477449
("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Fixes:fbee97feed
("bpf: Add support to attach bpf program to a devmap entry") Reported-by: Abaci <abaci@linux.alibaba.com> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: David Ahern <dsahern@kernel.org> Acked-by: Song Liu <songliubraving@fb.com> Link: https://lore.kernel.org/bpf/20210708080409.73525-1-xuanzhuo@linux.alibaba.com
967 lines
22 KiB
C
967 lines
22 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Copyright (c) 2017 Facebook
|
|
*/
|
|
#include <linux/bpf.h>
|
|
#include <linux/btf_ids.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/etherdevice.h>
|
|
#include <linux/filter.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <net/bpf_sk_storage.h>
|
|
#include <net/sock.h>
|
|
#include <net/tcp.h>
|
|
#include <net/net_namespace.h>
|
|
#include <linux/error-injection.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/sock_diag.h>
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/bpf_test_run.h>
|
|
|
|
struct bpf_test_timer {
|
|
enum { NO_PREEMPT, NO_MIGRATE } mode;
|
|
u32 i;
|
|
u64 time_start, time_spent;
|
|
};
|
|
|
|
static void bpf_test_timer_enter(struct bpf_test_timer *t)
|
|
__acquires(rcu)
|
|
{
|
|
rcu_read_lock();
|
|
if (t->mode == NO_PREEMPT)
|
|
preempt_disable();
|
|
else
|
|
migrate_disable();
|
|
|
|
t->time_start = ktime_get_ns();
|
|
}
|
|
|
|
static void bpf_test_timer_leave(struct bpf_test_timer *t)
|
|
__releases(rcu)
|
|
{
|
|
t->time_start = 0;
|
|
|
|
if (t->mode == NO_PREEMPT)
|
|
preempt_enable();
|
|
else
|
|
migrate_enable();
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
|
|
__must_hold(rcu)
|
|
{
|
|
t->i++;
|
|
if (t->i >= repeat) {
|
|
/* We're done. */
|
|
t->time_spent += ktime_get_ns() - t->time_start;
|
|
do_div(t->time_spent, t->i);
|
|
*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
|
|
*err = 0;
|
|
goto reset;
|
|
}
|
|
|
|
if (signal_pending(current)) {
|
|
/* During iteration: we've been cancelled, abort. */
|
|
*err = -EINTR;
|
|
goto reset;
|
|
}
|
|
|
|
if (need_resched()) {
|
|
/* During iteration: we need to reschedule between runs. */
|
|
t->time_spent += ktime_get_ns() - t->time_start;
|
|
bpf_test_timer_leave(t);
|
|
cond_resched();
|
|
bpf_test_timer_enter(t);
|
|
}
|
|
|
|
/* Do another round. */
|
|
return true;
|
|
|
|
reset:
|
|
t->i = 0;
|
|
return false;
|
|
}
|
|
|
|
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
|
u32 *retval, u32 *time, bool xdp)
|
|
{
|
|
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
|
|
struct bpf_test_timer t = { NO_MIGRATE };
|
|
enum bpf_cgroup_storage_type stype;
|
|
int ret;
|
|
|
|
for_each_cgroup_storage_type(stype) {
|
|
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
|
|
if (IS_ERR(storage[stype])) {
|
|
storage[stype] = NULL;
|
|
for_each_cgroup_storage_type(stype)
|
|
bpf_cgroup_storage_free(storage[stype]);
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
|
|
if (!repeat)
|
|
repeat = 1;
|
|
|
|
bpf_test_timer_enter(&t);
|
|
do {
|
|
ret = bpf_cgroup_storage_set(storage);
|
|
if (ret)
|
|
break;
|
|
|
|
if (xdp)
|
|
*retval = bpf_prog_run_xdp(prog, ctx);
|
|
else
|
|
*retval = BPF_PROG_RUN(prog, ctx);
|
|
|
|
bpf_cgroup_storage_unset();
|
|
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
|
|
bpf_test_timer_leave(&t);
|
|
|
|
for_each_cgroup_storage_type(stype)
|
|
bpf_cgroup_storage_free(storage[stype]);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int bpf_test_finish(const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr, const void *data,
|
|
u32 size, u32 retval, u32 duration)
|
|
{
|
|
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
|
|
int err = -EFAULT;
|
|
u32 copy_size = size;
|
|
|
|
/* Clamp copy if the user has provided a size hint, but copy the full
|
|
* buffer if not to retain old behaviour.
|
|
*/
|
|
if (kattr->test.data_size_out &&
|
|
copy_size > kattr->test.data_size_out) {
|
|
copy_size = kattr->test.data_size_out;
|
|
err = -ENOSPC;
|
|
}
|
|
|
|
if (data_out && copy_to_user(data_out, data, copy_size))
|
|
goto out;
|
|
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
|
|
goto out;
|
|
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
|
|
goto out;
|
|
if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
|
|
goto out;
|
|
if (err != -ENOSPC)
|
|
err = 0;
|
|
out:
|
|
trace_bpf_test_finish(&err);
|
|
return err;
|
|
}
|
|
|
|
/* Integer types of various sizes and pointer combinations cover variety of
|
|
* architecture dependent calling conventions. 7+ can be supported in the
|
|
* future.
|
|
*/
|
|
__diag_push();
|
|
__diag_ignore(GCC, 8, "-Wmissing-prototypes",
|
|
"Global functions as their definitions will be in vmlinux BTF");
|
|
int noinline bpf_fentry_test1(int a)
|
|
{
|
|
return a + 1;
|
|
}
|
|
|
|
int noinline bpf_fentry_test2(int a, u64 b)
|
|
{
|
|
return a + b;
|
|
}
|
|
|
|
int noinline bpf_fentry_test3(char a, int b, u64 c)
|
|
{
|
|
return a + b + c;
|
|
}
|
|
|
|
int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
|
|
{
|
|
return (long)a + b + c + d;
|
|
}
|
|
|
|
int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
|
|
{
|
|
return a + (long)b + c + d + e;
|
|
}
|
|
|
|
int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
|
|
{
|
|
return a + (long)b + c + d + (long)e + f;
|
|
}
|
|
|
|
struct bpf_fentry_test_t {
|
|
struct bpf_fentry_test_t *a;
|
|
};
|
|
|
|
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
|
|
{
|
|
return (long)arg;
|
|
}
|
|
|
|
int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
|
|
{
|
|
return (long)arg->a;
|
|
}
|
|
|
|
int noinline bpf_modify_return_test(int a, int *b)
|
|
{
|
|
*b += 1;
|
|
return a + *b;
|
|
}
|
|
|
|
u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
|
|
{
|
|
return a + b + c + d;
|
|
}
|
|
|
|
int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
|
|
{
|
|
return a + b;
|
|
}
|
|
|
|
struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
|
|
{
|
|
return sk;
|
|
}
|
|
|
|
__diag_pop();
|
|
|
|
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
|
|
|
|
BTF_SET_START(test_sk_kfunc_ids)
|
|
BTF_ID(func, bpf_kfunc_call_test1)
|
|
BTF_ID(func, bpf_kfunc_call_test2)
|
|
BTF_ID(func, bpf_kfunc_call_test3)
|
|
BTF_SET_END(test_sk_kfunc_ids)
|
|
|
|
bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
|
|
{
|
|
return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
|
|
}
|
|
|
|
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
|
|
u32 headroom, u32 tailroom)
|
|
{
|
|
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
|
|
u32 user_size = kattr->test.data_size_in;
|
|
void *data;
|
|
|
|
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
if (user_size > size)
|
|
return ERR_PTR(-EMSGSIZE);
|
|
|
|
data = kzalloc(size + headroom + tailroom, GFP_USER);
|
|
if (!data)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (copy_from_user(data + headroom, data_in, user_size)) {
|
|
kfree(data);
|
|
return ERR_PTR(-EFAULT);
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
int bpf_prog_test_run_tracing(struct bpf_prog *prog,
|
|
const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
struct bpf_fentry_test_t arg = {};
|
|
u16 side_effect = 0, ret = 0;
|
|
int b = 2, err = -EFAULT;
|
|
u32 retval = 0;
|
|
|
|
if (kattr->test.flags || kattr->test.cpu)
|
|
return -EINVAL;
|
|
|
|
switch (prog->expected_attach_type) {
|
|
case BPF_TRACE_FENTRY:
|
|
case BPF_TRACE_FEXIT:
|
|
if (bpf_fentry_test1(1) != 2 ||
|
|
bpf_fentry_test2(2, 3) != 5 ||
|
|
bpf_fentry_test3(4, 5, 6) != 15 ||
|
|
bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
|
|
bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
|
|
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
|
|
bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
|
|
bpf_fentry_test8(&arg) != 0)
|
|
goto out;
|
|
break;
|
|
case BPF_MODIFY_RETURN:
|
|
ret = bpf_modify_return_test(1, &b);
|
|
if (b != 2)
|
|
side_effect = 1;
|
|
break;
|
|
default:
|
|
goto out;
|
|
}
|
|
|
|
retval = ((u32)side_effect << 16) | ret;
|
|
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
|
|
goto out;
|
|
|
|
err = 0;
|
|
out:
|
|
trace_bpf_test_finish(&err);
|
|
return err;
|
|
}
|
|
|
|
struct bpf_raw_tp_test_run_info {
|
|
struct bpf_prog *prog;
|
|
void *ctx;
|
|
u32 retval;
|
|
};
|
|
|
|
static void
|
|
__bpf_prog_test_run_raw_tp(void *data)
|
|
{
|
|
struct bpf_raw_tp_test_run_info *info = data;
|
|
|
|
rcu_read_lock();
|
|
info->retval = BPF_PROG_RUN(info->prog, info->ctx);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
|
|
const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
|
|
__u32 ctx_size_in = kattr->test.ctx_size_in;
|
|
struct bpf_raw_tp_test_run_info info;
|
|
int cpu = kattr->test.cpu, err = 0;
|
|
int current_cpu;
|
|
|
|
/* doesn't support data_in/out, ctx_out, duration, or repeat */
|
|
if (kattr->test.data_in || kattr->test.data_out ||
|
|
kattr->test.ctx_out || kattr->test.duration ||
|
|
kattr->test.repeat)
|
|
return -EINVAL;
|
|
|
|
if (ctx_size_in < prog->aux->max_ctx_offset ||
|
|
ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64))
|
|
return -EINVAL;
|
|
|
|
if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
|
|
return -EINVAL;
|
|
|
|
if (ctx_size_in) {
|
|
info.ctx = kzalloc(ctx_size_in, GFP_USER);
|
|
if (!info.ctx)
|
|
return -ENOMEM;
|
|
if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
|
|
err = -EFAULT;
|
|
goto out;
|
|
}
|
|
} else {
|
|
info.ctx = NULL;
|
|
}
|
|
|
|
info.prog = prog;
|
|
|
|
current_cpu = get_cpu();
|
|
if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
|
|
cpu == current_cpu) {
|
|
__bpf_prog_test_run_raw_tp(&info);
|
|
} else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
|
|
/* smp_call_function_single() also checks cpu_online()
|
|
* after csd_lock(). However, since cpu is from user
|
|
* space, let's do an extra quick check to filter out
|
|
* invalid value before smp_call_function_single().
|
|
*/
|
|
err = -ENXIO;
|
|
} else {
|
|
err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
|
|
&info, 1);
|
|
}
|
|
put_cpu();
|
|
|
|
if (!err &&
|
|
copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
|
|
err = -EFAULT;
|
|
|
|
out:
|
|
kfree(info.ctx);
|
|
return err;
|
|
}
|
|
|
|
static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
|
|
{
|
|
void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
|
|
void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
|
|
u32 size = kattr->test.ctx_size_in;
|
|
void *data;
|
|
int err;
|
|
|
|
if (!data_in && !data_out)
|
|
return NULL;
|
|
|
|
data = kzalloc(max_size, GFP_USER);
|
|
if (!data)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (data_in) {
|
|
err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size);
|
|
if (err) {
|
|
kfree(data);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
size = min_t(u32, max_size, size);
|
|
if (copy_from_user(data, data_in, size)) {
|
|
kfree(data);
|
|
return ERR_PTR(-EFAULT);
|
|
}
|
|
}
|
|
return data;
|
|
}
|
|
|
|
static int bpf_ctx_finish(const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr, const void *data,
|
|
u32 size)
|
|
{
|
|
void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
|
|
int err = -EFAULT;
|
|
u32 copy_size = size;
|
|
|
|
if (!data || !data_out)
|
|
return 0;
|
|
|
|
if (copy_size > kattr->test.ctx_size_out) {
|
|
copy_size = kattr->test.ctx_size_out;
|
|
err = -ENOSPC;
|
|
}
|
|
|
|
if (copy_to_user(data_out, data, copy_size))
|
|
goto out;
|
|
if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
|
|
goto out;
|
|
if (err != -ENOSPC)
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* range_is_zero - test whether buffer is initialized
|
|
* @buf: buffer to check
|
|
* @from: check from this position
|
|
* @to: check up until (excluding) this position
|
|
*
|
|
* This function returns true if the there is a non-zero byte
|
|
* in the buf in the range [from,to).
|
|
*/
|
|
static inline bool range_is_zero(void *buf, size_t from, size_t to)
|
|
{
|
|
return !memchr_inv((u8 *)buf + from, 0, to - from);
|
|
}
|
|
|
|
static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
|
|
{
|
|
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
|
|
|
|
if (!__skb)
|
|
return 0;
|
|
|
|
/* make sure the fields we don't use are zeroed */
|
|
if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
|
|
return -EINVAL;
|
|
|
|
/* mark is allowed */
|
|
|
|
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
|
|
offsetof(struct __sk_buff, priority)))
|
|
return -EINVAL;
|
|
|
|
/* priority is allowed */
|
|
|
|
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
|
|
offsetof(struct __sk_buff, ifindex)))
|
|
return -EINVAL;
|
|
|
|
/* ifindex is allowed */
|
|
|
|
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
|
|
offsetof(struct __sk_buff, cb)))
|
|
return -EINVAL;
|
|
|
|
/* cb is allowed */
|
|
|
|
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
|
|
offsetof(struct __sk_buff, tstamp)))
|
|
return -EINVAL;
|
|
|
|
/* tstamp is allowed */
|
|
/* wire_len is allowed */
|
|
/* gso_segs is allowed */
|
|
|
|
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
|
|
offsetof(struct __sk_buff, gso_size)))
|
|
return -EINVAL;
|
|
|
|
/* gso_size is allowed */
|
|
|
|
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
|
|
sizeof(struct __sk_buff)))
|
|
return -EINVAL;
|
|
|
|
skb->mark = __skb->mark;
|
|
skb->priority = __skb->priority;
|
|
skb->tstamp = __skb->tstamp;
|
|
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
|
|
|
|
if (__skb->wire_len == 0) {
|
|
cb->pkt_len = skb->len;
|
|
} else {
|
|
if (__skb->wire_len < skb->len ||
|
|
__skb->wire_len > GSO_MAX_SIZE)
|
|
return -EINVAL;
|
|
cb->pkt_len = __skb->wire_len;
|
|
}
|
|
|
|
if (__skb->gso_segs > GSO_MAX_SEGS)
|
|
return -EINVAL;
|
|
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
|
|
skb_shinfo(skb)->gso_size = __skb->gso_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
|
|
{
|
|
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
|
|
|
|
if (!__skb)
|
|
return;
|
|
|
|
__skb->mark = skb->mark;
|
|
__skb->priority = skb->priority;
|
|
__skb->ifindex = skb->dev->ifindex;
|
|
__skb->tstamp = skb->tstamp;
|
|
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
|
|
__skb->wire_len = cb->pkt_len;
|
|
__skb->gso_segs = skb_shinfo(skb)->gso_segs;
|
|
}
|
|
|
|
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
bool is_l2 = false, is_direct_pkt_access = false;
|
|
struct net *net = current->nsproxy->net_ns;
|
|
struct net_device *dev = net->loopback_dev;
|
|
u32 size = kattr->test.data_size_in;
|
|
u32 repeat = kattr->test.repeat;
|
|
struct __sk_buff *ctx = NULL;
|
|
u32 retval, duration;
|
|
int hh_len = ETH_HLEN;
|
|
struct sk_buff *skb;
|
|
struct sock *sk;
|
|
void *data;
|
|
int ret;
|
|
|
|
if (kattr->test.flags || kattr->test.cpu)
|
|
return -EINVAL;
|
|
|
|
data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
|
|
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
|
|
if (IS_ERR(data))
|
|
return PTR_ERR(data);
|
|
|
|
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
|
|
if (IS_ERR(ctx)) {
|
|
kfree(data);
|
|
return PTR_ERR(ctx);
|
|
}
|
|
|
|
switch (prog->type) {
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
|
case BPF_PROG_TYPE_SCHED_ACT:
|
|
is_l2 = true;
|
|
fallthrough;
|
|
case BPF_PROG_TYPE_LWT_IN:
|
|
case BPF_PROG_TYPE_LWT_OUT:
|
|
case BPF_PROG_TYPE_LWT_XMIT:
|
|
is_direct_pkt_access = true;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
sk = kzalloc(sizeof(struct sock), GFP_USER);
|
|
if (!sk) {
|
|
kfree(data);
|
|
kfree(ctx);
|
|
return -ENOMEM;
|
|
}
|
|
sock_net_set(sk, net);
|
|
sock_init_data(NULL, sk);
|
|
|
|
skb = build_skb(data, 0);
|
|
if (!skb) {
|
|
kfree(data);
|
|
kfree(ctx);
|
|
kfree(sk);
|
|
return -ENOMEM;
|
|
}
|
|
skb->sk = sk;
|
|
|
|
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
|
__skb_put(skb, size);
|
|
if (ctx && ctx->ifindex > 1) {
|
|
dev = dev_get_by_index(net, ctx->ifindex);
|
|
if (!dev) {
|
|
ret = -ENODEV;
|
|
goto out;
|
|
}
|
|
}
|
|
skb->protocol = eth_type_trans(skb, dev);
|
|
skb_reset_network_header(skb);
|
|
|
|
switch (skb->protocol) {
|
|
case htons(ETH_P_IP):
|
|
sk->sk_family = AF_INET;
|
|
if (sizeof(struct iphdr) <= skb_headlen(skb)) {
|
|
sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
|
|
sk->sk_daddr = ip_hdr(skb)->daddr;
|
|
}
|
|
break;
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
case htons(ETH_P_IPV6):
|
|
sk->sk_family = AF_INET6;
|
|
if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
|
|
sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
|
|
sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (is_l2)
|
|
__skb_push(skb, hh_len);
|
|
if (is_direct_pkt_access)
|
|
bpf_compute_data_pointers(skb);
|
|
ret = convert___skb_to_skb(skb, ctx);
|
|
if (ret)
|
|
goto out;
|
|
ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
|
|
if (ret)
|
|
goto out;
|
|
if (!is_l2) {
|
|
if (skb_headroom(skb) < hh_len) {
|
|
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
|
|
|
|
if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
}
|
|
memset(__skb_push(skb, hh_len), 0, hh_len);
|
|
}
|
|
convert_skb_to___skb(skb, ctx);
|
|
|
|
size = skb->len;
|
|
/* bpf program can never convert linear skb to non-linear */
|
|
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
|
|
size = skb_headlen(skb);
|
|
ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
|
|
if (!ret)
|
|
ret = bpf_ctx_finish(kattr, uattr, ctx,
|
|
sizeof(struct __sk_buff));
|
|
out:
|
|
if (dev && dev != net->loopback_dev)
|
|
dev_put(dev);
|
|
kfree_skb(skb);
|
|
bpf_sk_storage_free(sk);
|
|
kfree(sk);
|
|
kfree(ctx);
|
|
return ret;
|
|
}
|
|
|
|
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
u32 headroom = XDP_PACKET_HEADROOM;
|
|
u32 size = kattr->test.data_size_in;
|
|
u32 repeat = kattr->test.repeat;
|
|
struct netdev_rx_queue *rxqueue;
|
|
struct xdp_buff xdp = {};
|
|
u32 retval, duration;
|
|
u32 max_data_sz;
|
|
void *data;
|
|
int ret;
|
|
|
|
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
|
|
prog->expected_attach_type == BPF_XDP_CPUMAP)
|
|
return -EINVAL;
|
|
if (kattr->test.ctx_in || kattr->test.ctx_out)
|
|
return -EINVAL;
|
|
|
|
/* XDP have extra tailroom as (most) drivers use full page */
|
|
max_data_sz = 4096 - headroom - tailroom;
|
|
|
|
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
|
|
if (IS_ERR(data))
|
|
return PTR_ERR(data);
|
|
|
|
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
|
|
xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
|
|
&rxqueue->xdp_rxq);
|
|
xdp_prepare_buff(&xdp, data, headroom, size, true);
|
|
|
|
bpf_prog_change_xdp(NULL, prog);
|
|
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
|
|
if (ret)
|
|
goto out;
|
|
if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
|
|
size = xdp.data_end - xdp.data;
|
|
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
|
|
out:
|
|
bpf_prog_change_xdp(prog, NULL);
|
|
kfree(data);
|
|
return ret;
|
|
}
|
|
|
|
static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
|
|
{
|
|
/* make sure the fields we don't use are zeroed */
|
|
if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
|
|
return -EINVAL;
|
|
|
|
/* flags is allowed */
|
|
|
|
if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
|
|
sizeof(struct bpf_flow_keys)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
|
|
const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
struct bpf_test_timer t = { NO_PREEMPT };
|
|
u32 size = kattr->test.data_size_in;
|
|
struct bpf_flow_dissector ctx = {};
|
|
u32 repeat = kattr->test.repeat;
|
|
struct bpf_flow_keys *user_ctx;
|
|
struct bpf_flow_keys flow_keys;
|
|
const struct ethhdr *eth;
|
|
unsigned int flags = 0;
|
|
u32 retval, duration;
|
|
void *data;
|
|
int ret;
|
|
|
|
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
|
|
return -EINVAL;
|
|
|
|
if (kattr->test.flags || kattr->test.cpu)
|
|
return -EINVAL;
|
|
|
|
if (size < ETH_HLEN)
|
|
return -EINVAL;
|
|
|
|
data = bpf_test_init(kattr, size, 0, 0);
|
|
if (IS_ERR(data))
|
|
return PTR_ERR(data);
|
|
|
|
eth = (struct ethhdr *)data;
|
|
|
|
if (!repeat)
|
|
repeat = 1;
|
|
|
|
user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
|
|
if (IS_ERR(user_ctx)) {
|
|
kfree(data);
|
|
return PTR_ERR(user_ctx);
|
|
}
|
|
if (user_ctx) {
|
|
ret = verify_user_bpf_flow_keys(user_ctx);
|
|
if (ret)
|
|
goto out;
|
|
flags = user_ctx->flags;
|
|
}
|
|
|
|
ctx.flow_keys = &flow_keys;
|
|
ctx.data = data;
|
|
ctx.data_end = (__u8 *)data + size;
|
|
|
|
bpf_test_timer_enter(&t);
|
|
do {
|
|
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
|
|
size, flags);
|
|
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
|
|
bpf_test_timer_leave(&t);
|
|
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
|
|
retval, duration);
|
|
if (!ret)
|
|
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
|
|
sizeof(struct bpf_flow_keys));
|
|
|
|
out:
|
|
kfree(user_ctx);
|
|
kfree(data);
|
|
return ret;
|
|
}
|
|
|
|
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
struct bpf_test_timer t = { NO_PREEMPT };
|
|
struct bpf_prog_array *progs = NULL;
|
|
struct bpf_sk_lookup_kern ctx = {};
|
|
u32 repeat = kattr->test.repeat;
|
|
struct bpf_sk_lookup *user_ctx;
|
|
u32 retval, duration;
|
|
int ret = -EINVAL;
|
|
|
|
if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
|
|
return -EINVAL;
|
|
|
|
if (kattr->test.flags || kattr->test.cpu)
|
|
return -EINVAL;
|
|
|
|
if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
|
|
kattr->test.data_size_out)
|
|
return -EINVAL;
|
|
|
|
if (!repeat)
|
|
repeat = 1;
|
|
|
|
user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
|
|
if (IS_ERR(user_ctx))
|
|
return PTR_ERR(user_ctx);
|
|
|
|
if (!user_ctx)
|
|
return -EINVAL;
|
|
|
|
if (user_ctx->sk)
|
|
goto out;
|
|
|
|
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
|
|
goto out;
|
|
|
|
if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
|
|
ret = -ERANGE;
|
|
goto out;
|
|
}
|
|
|
|
ctx.family = (u16)user_ctx->family;
|
|
ctx.protocol = (u16)user_ctx->protocol;
|
|
ctx.dport = (u16)user_ctx->local_port;
|
|
ctx.sport = (__force __be16)user_ctx->remote_port;
|
|
|
|
switch (ctx.family) {
|
|
case AF_INET:
|
|
ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
|
|
ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
|
|
break;
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
case AF_INET6:
|
|
ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
|
|
ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
ret = -EAFNOSUPPORT;
|
|
goto out;
|
|
}
|
|
|
|
progs = bpf_prog_array_alloc(1, GFP_KERNEL);
|
|
if (!progs) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
progs->items[0].prog = prog;
|
|
|
|
bpf_test_timer_enter(&t);
|
|
do {
|
|
ctx.selected_sk = NULL;
|
|
retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
|
|
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
|
|
bpf_test_timer_leave(&t);
|
|
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
user_ctx->cookie = 0;
|
|
if (ctx.selected_sk) {
|
|
if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
|
|
ret = -EOPNOTSUPP;
|
|
goto out;
|
|
}
|
|
|
|
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
|
|
}
|
|
|
|
ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
|
|
if (!ret)
|
|
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
|
|
|
|
out:
|
|
bpf_prog_array_free(progs);
|
|
kfree(user_ctx);
|
|
return ret;
|
|
}
|
|
|
|
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
|
|
const union bpf_attr *kattr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
|
|
__u32 ctx_size_in = kattr->test.ctx_size_in;
|
|
void *ctx = NULL;
|
|
u32 retval;
|
|
int err = 0;
|
|
|
|
/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
|
|
if (kattr->test.data_in || kattr->test.data_out ||
|
|
kattr->test.ctx_out || kattr->test.duration ||
|
|
kattr->test.repeat || kattr->test.flags)
|
|
return -EINVAL;
|
|
|
|
if (ctx_size_in < prog->aux->max_ctx_offset ||
|
|
ctx_size_in > U16_MAX)
|
|
return -EINVAL;
|
|
|
|
if (ctx_size_in) {
|
|
ctx = kzalloc(ctx_size_in, GFP_USER);
|
|
if (!ctx)
|
|
return -ENOMEM;
|
|
if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
|
|
err = -EFAULT;
|
|
goto out;
|
|
}
|
|
}
|
|
retval = bpf_prog_run_pin_on_cpu(prog, ctx);
|
|
|
|
if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
|
|
err = -EFAULT;
|
|
goto out;
|
|
}
|
|
if (ctx_size_in)
|
|
if (copy_to_user(ctx_in, ctx, ctx_size_in))
|
|
err = -EFAULT;
|
|
out:
|
|
kfree(ctx);
|
|
return err;
|
|
}
|