From 07236eab7a3139da97aef9f5f21f403be82a82ea Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:14 -0800 Subject: [PATCH 001/260] bpf: factor out fetching basic kfunc metadata Factor out logic to fetch basic kfunc metadata based on struct bpf_insn. This is not exactly short or trivial code to just copy/paste and this information is sometimes necessary in other parts of the verifier logic. Subsequent patches will rely on this to determine if an instruction is a kfunc call to iterator next method. No functional changes intended, including that verbose() warning behavior when kfunc is not allowed for a particular program type. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 92 +++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b2116ca78d9a..8d40fba6a1c0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10079,24 +10079,21 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return 0; } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx_p) +static int fetch_kfunc_meta(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct bpf_kfunc_call_arg_meta *meta, + const char **kfunc_name) { - const struct btf_type *t, *func, *func_proto, *ptr_type; - u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id; - struct bpf_reg_state *regs = cur_regs(env); - const char *func_name, *ptr_type_name; - bool sleepable, rcu_lock, rcu_unlock; - struct bpf_kfunc_call_arg_meta meta; - int err, insn_idx = *insn_idx_p; - const struct btf_param *args; - const struct btf_type *ret_t; + const struct btf_type *func, *func_proto; + u32 func_id, *kfunc_flags; + const char *func_name; struct btf *desc_btf; - u32 *kfunc_flags; - /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (kfunc_name) + *kfunc_name = NULL; + if (!insn->imm) - return 0; + return -EINVAL; desc_btf = find_kfunc_desc_btf(env, insn->off); if (IS_ERR(desc_btf)) @@ -10105,22 +10102,51 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_id = insn->imm; func = btf_type_by_id(desc_btf, func_id); func_name = btf_name_by_offset(desc_btf, func->name_off); + if (kfunc_name) + *kfunc_name = func_name; func_proto = btf_type_by_id(desc_btf, func->type); kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); if (!kfunc_flags) { - verbose(env, "calling kernel function %s is not allowed\n", - func_name); return -EACCES; } - /* Prepare kfunc call metadata */ - memset(&meta, 0, sizeof(meta)); - meta.btf = desc_btf; - meta.func_id = func_id; - meta.kfunc_flags = *kfunc_flags; - meta.func_proto = func_proto; - meta.func_name = func_name; + memset(meta, 0, sizeof(*meta)); + meta->btf = desc_btf; + meta->func_id = func_id; + meta->kfunc_flags = *kfunc_flags; + meta->func_proto = func_proto; + meta->func_name = func_name; + + return 0; +} + +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) +{ + const struct btf_type *t, *ptr_type; + u32 i, nargs, ptr_type_id, release_ref_obj_id; + struct bpf_reg_state *regs = cur_regs(env); + const char *func_name, *ptr_type_name; + bool sleepable, rcu_lock, rcu_unlock; + struct bpf_kfunc_call_arg_meta meta; + struct bpf_insn_aux_data *insn_aux; + int err, insn_idx = *insn_idx_p; + const struct btf_param *args; + const struct btf_type *ret_t; + struct btf *desc_btf; + + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + + err = fetch_kfunc_meta(env, insn, &meta, &func_name); + if (err == -EACCES && func_name) + verbose(env, "calling kernel function %s is not allowed\n", func_name); + if (err) + return err; + desc_btf = meta.btf; + insn_aux = &env->insn_aux_data[insn_idx]; if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); @@ -10173,7 +10199,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, err = release_reference(env, regs[meta.release_regno].ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10185,14 +10211,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", - func_name, func_id); + func_name, meta.func_id); return err; } err = release_reference(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10202,7 +10228,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, set_rbtree_add_callback_state); if (err) { verbose(env, "kfunc %s#%d failed callback verification\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10211,7 +10237,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL); if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { /* Only exception is bpf_obj_new_impl */ @@ -10260,11 +10286,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; - env->insn_aux_data[insn_idx].obj_new_size = ret_t->size; - env->insn_aux_data[insn_idx].kptr_struct_meta = + insn_aux->obj_new_size = ret_t->size; + insn_aux->kptr_struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { - env->insn_aux_data[insn_idx].kptr_struct_meta = + insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_obj_drop.btf, meta.arg_obj_drop.btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || @@ -10397,8 +10423,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ - nargs = btf_type_vlen(func_proto); - args = (const struct btf_param *)(func_proto + 1); + nargs = btf_type_vlen(meta.func_proto); + args = (const struct btf_param *)(meta.func_proto + 1); for (i = 0; i < nargs; i++) { u32 regno = i + 1; From 215bf4962f6c9605710012fad222a5fec001b3ad Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:15 -0800 Subject: [PATCH 002/260] bpf: add iterator kfuncs registration and validation logic Add ability to register kfuncs that implement BPF open-coded iterator contract and enforce naming and function proto convention. Enforcement happens at the time of kfunc registration and significantly simplifies the rest of iterators logic in the verifier. More details follow in subsequent patches, but we enforce the following conditions. All kfuncs (constructor, next, destructor) have to be named consistenly as bpf_iter__{new,next,destroy}(), respectively. represents iterator type, and iterator state should be represented as a matching `struct bpf_iter_` state type. Also, all iter kfuncs should have a pointer to this `struct bpf_iter_` as the very first argument. Additionally: - Constructor, i.e., bpf_iter__new(), can have arbitrary extra number of arguments. Return type is not enforced either. - Next method, i.e., bpf_iter__next(), has to return a pointer type and should have exactly one argument: `struct bpf_iter_ *` (const/volatile/restrict and typedefs are ignored). - Destructor, i.e., bpf_iter__destroy(), should return void and should have exactly one argument, similar to the next method. - struct bpf_iter_ size is enforced to be positive and a multiple of 8 bytes (to fit stack slots correctly). Such strictness and consistency allows to build generic helpers abstracting important, but boilerplate, details to be able to use open-coded iterators effectively and ergonomically (see bpf_for_each() in subsequent patches). It also simplifies the verifier logic in some places. At the same time, this doesn't hurt generality of possible iterator implementations. Win-win. Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course), while destructor kfunc is marked as KF_ITER_DESTROY. Additionally, we add a trivial kfunc name validation: it should be a valid non-NULL and non-empty string. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 + include/linux/btf.h | 4 ++ kernel/bpf/btf.c | 112 ++++++++++++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 18538bad2b8c..e2dc7f064449 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,8 @@ struct bpf_active_lock { u32 id; }; +#define ITER_PREFIX "bpf_iter_" + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; diff --git a/include/linux/btf.h b/include/linux/btf.h index 556b3e2e7471..1bba0827e8c4 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -71,6 +71,10 @@ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ #define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */ +/* only one of KF_ITER_{NEW,NEXT,DESTROY} could be specified per kfunc */ +#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ +#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ +#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a8cb09e5973b..71758cd15b07 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7596,6 +7596,108 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE +static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, + const struct btf_type *func, u32 func_flags) +{ + u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); + const char *name, *sfx, *iter_name; + const struct btf_param *arg; + const struct btf_type *t; + char exp_name[128]; + u32 nr_args; + + /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ + if (!flags || (flags & (flags - 1))) + return -EINVAL; + + /* any BPF iter kfunc should have `struct bpf_iter_ *` first arg */ + nr_args = btf_type_vlen(func); + if (nr_args < 1) + return -EINVAL; + + arg = &btf_params(func)[0]; + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + t = btf_type_skip_modifiers(btf, t->type, NULL); + if (!t || !__btf_type_is_struct(t)) + return -EINVAL; + + name = btf_name_by_offset(btf, t->name_off); + if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) + return -EINVAL; + + /* sizeof(struct bpf_iter_) should be a multiple of 8 to + * fit nicely in stack slots + */ + if (t->size == 0 || (t->size % 8)) + return -EINVAL; + + /* validate bpf_iter__{new,next,destroy}(struct bpf_iter_ *) + * naming pattern + */ + iter_name = name + sizeof(ITER_PREFIX) - 1; + if (flags & KF_ITER_NEW) + sfx = "new"; + else if (flags & KF_ITER_NEXT) + sfx = "next"; + else /* (flags & KF_ITER_DESTROY) */ + sfx = "destroy"; + + snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx); + if (strcmp(func_name, exp_name)) + return -EINVAL; + + /* only iter constructor should have extra arguments */ + if (!(flags & KF_ITER_NEW) && nr_args != 1) + return -EINVAL; + + if (flags & KF_ITER_NEXT) { + /* bpf_iter__next() should return pointer */ + t = btf_type_skip_modifiers(btf, func->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + } + + if (flags & KF_ITER_DESTROY) { + /* bpf_iter__destroy() should return void */ + t = btf_type_by_id(btf, func->type); + if (!t || !btf_type_is_void(t)) + return -EINVAL; + } + + return 0; +} + +static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags) +{ + const struct btf_type *func; + const char *func_name; + int err; + + /* any kfunc should be FUNC -> FUNC_PROTO */ + func = btf_type_by_id(btf, func_id); + if (!func || !btf_type_is_func(func)) + return -EINVAL; + + /* sanity check kfunc name */ + func_name = btf_name_by_offset(btf, func->name_off); + if (!func_name || !func_name[0]) + return -EINVAL; + + func = btf_type_by_id(btf, func->type); + if (!func || !btf_type_is_func_proto(func)) + return -EINVAL; + + if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) { + err = btf_check_iter_kfuncs(btf, func_name, func, func_flags); + if (err) + return err; + } + + return 0; +} + /* Kernel Function (kfunc) BTF ID set registration API */ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, @@ -7772,7 +7874,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, const struct btf_kfunc_id_set *kset) { struct btf *btf; - int ret; + int ret, i; btf = btf_get_module_btf(kset->owner); if (!btf) { @@ -7789,7 +7891,15 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, if (IS_ERR(btf)) return PTR_ERR(btf); + for (i = 0; i < kset->set->cnt; i++) { + ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, + kset->set->pairs[i].flags); + if (ret) + goto err_out; + } + ret = btf_populate_kfunc_set(btf, hook, kset->set); +err_out: btf_put(btf); return ret; } From 06accc8779c1d558a5b5a21f2ac82b0c95827ddd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:16 -0800 Subject: [PATCH 003/260] bpf: add support for open-coded iterator loops Teach verifier about the concept of the open-coded (or inline) iterators. This patch adds generic iterator loop verification logic, new STACK_ITER stack slot type to contain iterator state, and necessary kfunc plumbing for iterator's constructor, destructor and next methods. Next patch implements first specific iterator (numbers iterator for implementing for() loop logic). Such split allows to have more focused commits for verifier logic and separate commit that we could point later to demonstrating what does it take to add a new kind of iterator. Each kind of iterator has its own associated struct bpf_iter_, where denotes a specific type of iterator. struct bpf_iter_ state is supposed to live on BPF program stack, so there will be no way to change its size later on without breaking backwards compatibility, so choose wisely! But given this struct is specific to a given of iterator, this allows a lot of flexibility: simple iterators could be fine with just one stack slot (8 bytes), like numbers iterator in the next patch, while some other more complicated iterators might need way more to keep their iterator state. Either way, such design allows to avoid runtime memory allocations, which otherwise would be necessary if we fixed on-the-stack size and it turned out to be too small for a given iterator implementation. The way BPF verifier logic is implemented, there are no artificial restrictions on a number of active iterators, it should work correctly using multiple active iterators at the same time. This also means you can have multiple nested iteration loops. struct bpf_iter_ reference can be safely passed to subprograms as well. General flow is easiest to demonstrate with a simple example using number iterator implemented in next patch. Here's the simplest possible loop: struct bpf_iter_num it; int *v; bpf_iter_num_new(&it, 2, 5); while ((v = bpf_iter_num_next(&it))) { bpf_printk("X = %d", *v); } bpf_iter_num_destroy(&it); Above snippet should output "X = 2", "X = 3", "X = 4". Note that 5 is exclusive and is not returned. This matches similar APIs (e.g., slices in Go or Rust) that implement a range of elements, where end index is non-inclusive. In the above example, we see a trio of function: - constructor, bpf_iter_num_new(), which initializes iterator state (struct bpf_iter_num it) on the stack. If any of the input arguments are invalid, constructor should make sure to still initialize it such that subsequent bpf_iter_num_next() calls will return NULL. I.e., on error, return error and construct empty iterator. - next method, bpf_iter_num_next(), which accepts pointer to iterator state and produces an element. Next method should always return a pointer. The contract between BPF verifier is that next method will always eventually return NULL when elements are exhausted. Once NULL is returned, subsequent next calls should keep returning NULL. In the case of numbers iterator, bpf_iter_num_next() returns a pointer to an int (storage for this integer is inside the iterator state itself), which can be dereferenced after corresponding NULL check. - once done with the iterator, it's mandated that user cleans up its state with the call to destructor, bpf_iter_num_destroy() in this case. Destructor frees up any resources and marks stack space used by struct bpf_iter_num as usable for something else. Any other iterator implementation will have to implement at least these three methods. It is enforced that for any given type of iterator only applicable constructor/destructor/next are callable. I.e., verifier ensures you can't pass number iterator state into, say, cgroup iterator's next method. It is important to keep the naming pattern consistent to be able to create generic macros to help with BPF iter usability. E.g., one of the follow up patches adds generic bpf_for_each() macro to bpf_misc.h in selftests, which allows to utilize iterator "trio" nicely without having to code the above somewhat tedious loop explicitly every time. This is enforced at kfunc registration point by one of the previous patches in this series. At the implementation level, iterator state tracking for verification purposes is very similar to dynptr. We add STACK_ITER stack slot type, reserve necessary number of slots, depending on sizeof(struct bpf_iter_), and keep track of necessary extra state in the "main" slot, which is marked with non-zero ref_obj_id. Other slots are also marked as STACK_ITER, but have zero ref_obj_id. This is simpler than having a separate "is_first_slot" flag. Another big distinction is that STACK_ITER is *always refcounted*, which simplifies implementation without sacrificing usability. So no need for extra "iter_id", no need to anticipate reuse of STACK_ITER slots for new constructors, etc. Keeping it simple here. As far as the verification logic goes, there are two extensive comments: in process_iter_next_call() and iter_active_depths_differ() explaining some important and sometimes subtle aspects. Please refer to them for details. But from 10,000-foot point of view, next methods are the points of forking a verification state, which are conceptually similar to what verifier is doing when validating conditional jump. We branch out at a `call bpf_iter__next` instruction and simulate two outcomes: NULL (iteration is done) and non-NULL (new element is returned). NULL is simulated first and is supposed to reach exit without looping. After that non-NULL case is validated and it either reaches exit (for trivial examples with no real loop), or reaches another `call bpf_iter__next` instruction with the state equivalent to already (partially) validated one. State equivalency at that point means we technically are going to be looping forever without "breaking out" out of established "state envelope" (i.e., subsequent iterations don't add any new knowledge or constraints to the verifier state, so running 1, 2, 10, or a million of them doesn't matter). But taking into account the contract stating that iterator next method *has to* return NULL eventually, we can conclude that loop body is safe and will eventually terminate. Given we validated logic outside of the loop (NULL case), and concluded that loop body is safe (though potentially looping many times), verifier can claim safety of the overall program logic. The rest of the patch is necessary plumbing for state tracking, marking, validation, and necessary further kfunc plumbing to allow implementing iterator constructor, destructor, and next methods. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 23 ++ kernel/bpf/verifier.c | 595 ++++++++++++++++++++++++++++++++++- 2 files changed, 610 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e2dc7f064449..0c052bc79940 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -61,6 +61,12 @@ struct bpf_active_lock { #define ITER_PREFIX "bpf_iter_" +enum bpf_iter_state { + BPF_ITER_STATE_INVALID, /* for non-first slot */ + BPF_ITER_STATE_ACTIVE, + BPF_ITER_STATE_DRAINED, +}; + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; @@ -105,6 +111,18 @@ struct bpf_reg_state { bool first_slot; } dynptr; + /* For bpf_iter stack slots */ + struct { + /* BTF container and BTF type ID describing + * struct bpf_iter_ of an iterator state + */ + struct btf *btf; + u32 btf_id; + /* packing following two fields to fit iter state into 16 bytes */ + enum bpf_iter_state state:2; + int depth:30; + } iter; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -143,6 +161,8 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. * For stack slots that are dynptrs, this is used to track references to * the dynptr to determine proper reference freeing. + * Similarly to dynptrs, we use ID to track "belonging" of a reference + * to a specific instance of bpf_iter. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -213,9 +233,11 @@ enum bpf_stack_slot_type { * is stored in bpf_stack_state->spilled_ptr.dynptr.type */ STACK_DYNPTR, + STACK_ITER, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + #define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) #define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) @@ -450,6 +472,7 @@ struct bpf_insn_aux_data { bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ + bool is_iter_next; /* bpf_iter__next() kfunc call */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8d40fba6a1c0..45a082284464 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -302,6 +302,10 @@ struct bpf_kfunc_call_arg_meta { enum bpf_dynptr_type type; u32 id; } initialized_dynptr; + struct { + u8 spi; + u8 frameno; + } iter; u64 mem_size; }; @@ -668,6 +672,7 @@ static char slot_type_char[] = { [STACK_MISC] = 'm', [STACK_ZERO] = '0', [STACK_DYNPTR] = 'd', + [STACK_ITER] = 'i', }; static void print_liveness(struct bpf_verifier_env *env, @@ -742,6 +747,11 @@ static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *re return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS); } +static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots) +{ + return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); +} + static const char *kernel_type_name(const struct btf* btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); @@ -766,6 +776,30 @@ static const char *dynptr_type_str(enum bpf_dynptr_type type) } } +static const char *iter_type_str(const struct btf *btf, u32 btf_id) +{ + if (!btf || btf_id == 0) + return ""; + + /* we already validated that type is valid and has conforming name */ + return kernel_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; +} + +static const char *iter_state_str(enum bpf_iter_state state) +{ + switch (state) { + case BPF_ITER_STATE_ACTIVE: + return "active"; + case BPF_ITER_STATE_DRAINED: + return "drained"; + case BPF_ITER_STATE_INVALID: + return ""; + default: + WARN_ONCE(1, "unknown iter state %d\n", state); + return ""; + } +} + static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) { env->scratched_regs |= 1U << regno; @@ -1118,6 +1152,157 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg } } +static void __mark_reg_known_zero(struct bpf_reg_state *reg); + +static int mark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int insn_idx, + struct btf *btf, u32 btf_id, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j, id; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + __mark_reg_known_zero(st); + st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + st->live |= REG_LIVE_WRITTEN; + st->ref_obj_id = i == 0 ? id : 0; + st->iter.btf = btf; + st->iter.btf_id = btf_id; + st->iter.state = BPF_ITER_STATE_ACTIVE; + st->iter.depth = 0; + + for (j = 0; j < BPF_REG_SIZE; j++) + slot->slot_type[j] = STACK_ITER; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + +static int unmark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + if (i == 0) + WARN_ON_ONCE(release_reference(env, st->ref_obj_id)); + + __mark_reg_not_init(env, st); + + /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ + st->live |= REG_LIVE_WRITTEN; + + for (j = 0; j < BPF_REG_SIZE; j++) + slot->slot_type[j] = STACK_INVALID; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + +static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + spi = iter_get_spi(env, reg, nr_slots); + if (spi == -ERANGE) + return true; + if (spi < 0) + return false; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + + for (j = 0; j < BPF_REG_SIZE; j++) + if (slot->slot_type[j] == STACK_ITER) + return false; + } + + return true; +} + +static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + struct btf *btf, u32 btf_id, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return false; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + /* only main (first) slot has ref_obj_id set */ + if (i == 0 && !st->ref_obj_id) + return false; + if (i != 0 && st->ref_obj_id) + return false; + if (st->iter.btf != btf || st->iter.btf_id != btf_id) + return false; + + for (j = 0; j < BPF_REG_SIZE; j++) + if (slot->slot_type[j] != STACK_ITER) + return false; + } + + return true; +} + +/* Check if given stack slot is "special": + * - spilled register state (STACK_SPILL); + * - dynptr state (STACK_DYNPTR); + * - iter state (STACK_ITER). + */ +static bool is_stack_slot_special(const struct bpf_stack_state *stack) +{ + enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1]; + + switch (type) { + case STACK_SPILL: + case STACK_DYNPTR: + case STACK_ITER: + return true; + case STACK_INVALID: + case STACK_MISC: + case STACK_ZERO: + return false; + default: + WARN_ONCE(1, "unknown stack slot type %d\n", type); + return true; + } +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -1267,6 +1452,19 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (reg->ref_obj_id) verbose(env, "(ref_id=%d)", reg->ref_obj_id); break; + case STACK_ITER: + /* only main slot has ref_obj_id set; skip others */ + reg = &state->stack[i].spilled_ptr; + if (!reg->ref_obj_id) + continue; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)", + iter_type_str(reg->iter.btf, reg->iter.btf_id), + reg->ref_obj_id, iter_state_str(reg->iter.state), + reg->iter.depth); + break; case STACK_MISC: case STACK_ZERO: default: @@ -2710,6 +2908,25 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state * state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); } +static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int err, i; + + for (i = 0; i < nr_slots; i++) { + struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; + + err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); + if (err) + return err; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + /* This function is supposed to be used by the following 32-bit optimization * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. @@ -3691,8 +3908,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; - /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ - if (is_spilled_reg(&state->stack[spi])) + /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */ + if (is_stack_slot_special(&state->stack[spi])) for (i = 0; i < BPF_REG_SIZE; i++) scrub_spilled_slot(&state->stack[spi].slot_type[i]); @@ -6506,6 +6723,203 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn return err; } +static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) +{ + struct bpf_func_state *state = func(env, reg); + + return state->stack[spi].spilled_ptr.ref_obj_id; +} + +static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); +} + +static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEW; +} + +static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEXT; +} + +static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_DESTROY; +} + +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg) +{ + /* btf_check_iter_kfuncs() guarantees that first argument of any iter + * kfunc is iter state pointer + */ + return arg == 0 && is_iter_kfunc(meta); +} + +static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + const struct btf_type *t; + const struct btf_param *arg; + int spi, err, i, nr_slots; + u32 btf_id; + + /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */ + arg = &btf_params(meta->func_proto)[0]; + t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */ + t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */ + nr_slots = t->size / BPF_REG_SIZE; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0 && spi != -ERANGE) + return spi; + + meta->iter.spi = spi; + meta->iter.frameno = reg->frameno; + + if (is_iter_new_kfunc(meta)) { + /* bpf_iter__new() expects pointer to uninit iter state */ + if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { + verbose(env, "expected uninitialized iter_%s as arg #%d\n", + iter_type_str(meta->btf, btf_id), regno); + return -EINVAL; + } + + for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) { + err = check_mem_access(env, insn_idx, regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots); + if (err) + return err; + } else { + /* iter_next() or iter_destroy() expect initialized iter state*/ + if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) { + verbose(env, "expected an initialized iter_%s as arg #%d\n", + iter_type_str(meta->btf, btf_id), regno); + return -EINVAL; + } + + err = mark_iter_read(env, reg, spi, nr_slots); + if (err) + return err; + + meta->ref_obj_id = iter_ref_obj_id(env, reg, spi); + + if (is_iter_destroy_kfunc(meta)) { + err = unmark_stack_slots_iter(env, reg, nr_slots); + if (err) + return err; + } + } + + return 0; +} + +/* process_iter_next_call() is called when verifier gets to iterator's next + * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer + * to it as just "iter_next()" in comments below. + * + * BPF verifier relies on a crucial contract for any iter_next() + * implementation: it should *eventually* return NULL, and once that happens + * it should keep returning NULL. That is, once iterator exhausts elements to + * iterate, it should never reset or spuriously return new elements. + * + * With the assumption of such contract, process_iter_next_call() simulates + * a fork in the verifier state to validate loop logic correctness and safety + * without having to simulate infinite amount of iterations. + * + * In current state, we first assume that iter_next() returned NULL and + * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such + * conditions we should not form an infinite loop and should eventually reach + * exit. + * + * Besides that, we also fork current state and enqueue it for later + * verification. In a forked state we keep iterator state as ACTIVE + * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We + * also bump iteration depth to prevent erroneous infinite loop detection + * later on (see iter_active_depths_differ() comment for details). In this + * state we assume that we'll eventually loop back to another iter_next() + * calls (it could be in exactly same location or in some other instruction, + * it doesn't matter, we don't make any unnecessary assumptions about this, + * everything revolves around iterator state in a stack slot, not which + * instruction is calling iter_next()). When that happens, we either will come + * to iter_next() with equivalent state and can conclude that next iteration + * will proceed in exactly the same way as we just verified, so it's safe to + * assume that loop converges. If not, we'll go on another iteration + * simulation with a different input state, until all possible starting states + * are validated or we reach maximum number of instructions limit. + * + * This way, we will either exhaustively discover all possible input states + * that iterator loop can start with and eventually will converge, or we'll + * effectively regress into bounded loop simulation logic and either reach + * maximum number of instructions if loop is not provably convergent, or there + * is some statically known limit on number of iterations (e.g., if there is + * an explicit `if n > 100 then break;` statement somewhere in the loop). + * + * One very subtle but very important aspect is that we *always* simulate NULL + * condition first (as the current state) before we simulate non-NULL case. + * This has to do with intricacies of scalar precision tracking. By simulating + * "exit condition" of iter_next() returning NULL first, we make sure all the + * relevant precision marks *that will be set **after** we exit iterator loop* + * are propagated backwards to common parent state of NULL and non-NULL + * branches. Thanks to that, state equivalence checks done later in forked + * state, when reaching iter_next() for ACTIVE iterator, can assume that + * precision marks are finalized and won't change. Because simulating another + * ACTIVE iterator iteration won't change them (because given same input + * states we'll end up with exactly same output states which we are currently + * comparing; and verification after the loop already propagated back what + * needs to be **additionally** tracked as precise). It's subtle, grok + * precision tracking for more intuitive understanding. + */ +static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st; + struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; + struct bpf_reg_state *cur_iter, *queued_iter; + int iter_frameno = meta->iter.frameno; + int iter_spi = meta->iter.spi; + + BTF_TYPE_EMIT(struct bpf_iter); + + cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + + if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && + cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { + verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n", + cur_iter->iter.state, iter_state_str(cur_iter->iter.state)); + return -EFAULT; + } + + if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { + /* branch out active iter state */ + queued_st = push_stack(env, insn_idx + 1, insn_idx, false); + if (!queued_st) + return -ENOMEM; + + queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; + queued_iter->iter.depth++; + + queued_fr = queued_st->frame[queued_st->curframe]; + mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); + } + + /* switch to DRAINED state, but keep the depth unchanged */ + /* mark current iter state as drained and assume returned NULL */ + cur_iter->iter.state = BPF_ITER_STATE_DRAINED; + __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]); + + return 0; +} + static bool arg_type_is_mem_size(enum bpf_arg_type type) { return type == ARG_CONST_SIZE || @@ -9099,6 +9513,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ KF_ARG_PTR_TO_DYNPTR, + KF_ARG_PTR_TO_ITER, KF_ARG_PTR_TO_LIST_HEAD, KF_ARG_PTR_TO_LIST_NODE, KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ @@ -9220,6 +9635,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) return KF_ARG_PTR_TO_DYNPTR; + if (is_kfunc_arg_iter(meta, argno)) + return KF_ARG_PTR_TO_ITER; + if (is_kfunc_arg_list_head(meta->btf, &args[argno])) return KF_ARG_PTR_TO_LIST_HEAD; @@ -9848,6 +10266,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; case KF_ARG_PTR_TO_KPTR: case KF_ARG_PTR_TO_DYNPTR: + case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: case KF_ARG_PTR_TO_LIST_NODE: case KF_ARG_PTR_TO_RB_ROOT: @@ -9944,6 +10363,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; } + case KF_ARG_PTR_TO_ITER: + ret = process_iter_arg(env, regno, insn_idx, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_LIST_HEAD: if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { @@ -10148,6 +10572,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc_btf = meta.btf; insn_aux = &env->insn_aux_data[insn_idx]; + insn_aux->is_iter_next = is_iter_next_kfunc(&meta); + if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); return -EACCES; @@ -10436,6 +10862,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_btf_func_reg_size(env, regno, t->size); } + if (is_iter_next_kfunc(&meta)) { + err = process_iter_next_call(env, insn_idx, &meta); + if (err) + return err; + } + return 0; } @@ -13548,6 +13980,13 @@ static int visit_insn(int t, struct bpf_verifier_env *env) * async state will be pushed for further exploration. */ mark_prune_point(env, t); + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + struct bpf_kfunc_call_arg_meta meta; + + ret = fetch_kfunc_meta(env, insn, &meta, NULL); + if (ret == 0 && is_iter_next_kfunc(&meta)) + mark_prune_point(env, t); + } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); case BPF_JA: @@ -14301,6 +14740,8 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * didn't use them */ for (i = 0; i < old->allocated_stack; i++) { + struct bpf_reg_state *old_reg, *cur_reg; + spi = i / BPF_REG_SIZE; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { @@ -14357,9 +14798,6 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return false; break; case STACK_DYNPTR: - { - const struct bpf_reg_state *old_reg, *cur_reg; - old_reg = &old->stack[spi].spilled_ptr; cur_reg = &cur->stack[spi].spilled_ptr; if (old_reg->dynptr.type != cur_reg->dynptr.type || @@ -14367,7 +14805,22 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) return false; break; - } + case STACK_ITER: + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + /* iter.depth is not compared between states as it + * doesn't matter for correctness and would otherwise + * prevent convergence; we maintain it only to prevent + * infinite loop check triggering, see + * iter_active_depths_differ() + */ + if (old_reg->iter.btf != cur_reg->iter.btf || + old_reg->iter.btf_id != cur_reg->iter.btf_id || + old_reg->iter.state != cur_reg->iter.state || + /* ignore {old_reg,cur_reg}->iter.depth, see above */ + !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; case STACK_MISC: case STACK_ZERO: case STACK_INVALID: @@ -14626,6 +15079,92 @@ static bool states_maybe_looping(struct bpf_verifier_state *old, return true; } +static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].is_iter_next; +} + +/* is_state_visited() handles iter_next() (see process_iter_next_call() for + * terminology) calls specially: as opposed to bounded BPF loops, it *expects* + * states to match, which otherwise would look like an infinite loop. So while + * iter_next() calls are taken care of, we still need to be careful and + * prevent erroneous and too eager declaration of "ininite loop", when + * iterators are involved. + * + * Here's a situation in pseudo-BPF assembly form: + * + * 0: again: ; set up iter_next() call args + * 1: r1 = &it ; + * 2: call bpf_iter_num_next ; this is iter_next() call + * 3: if r0 == 0 goto done + * 4: ... something useful here ... + * 5: goto again ; another iteration + * 6: done: + * 7: r1 = &it + * 8: call bpf_iter_num_destroy ; clean up iter state + * 9: exit + * + * This is a typical loop. Let's assume that we have a prune point at 1:, + * before we get to `call bpf_iter_num_next` (e.g., because of that `goto + * again`, assuming other heuristics don't get in a way). + * + * When we first time come to 1:, let's say we have some state X. We proceed + * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. + * Now we come back to validate that forked ACTIVE state. We proceed through + * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we + * are converging. But the problem is that we don't know that yet, as this + * convergence has to happen at iter_next() call site only. So if nothing is + * done, at 1: verifier will use bounded loop logic and declare infinite + * looping (and would be *technically* correct, if not for iterator's + * "eventual sticky NULL" contract, see process_iter_next_call()). But we + * don't want that. So what we do in process_iter_next_call() when we go on + * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's + * a different iteration. So when we suspect an infinite loop, we additionally + * check if any of the *ACTIVE* iterator states depths differ. If yes, we + * pretend we are not looping and wait for next iter_next() call. + * + * This only applies to ACTIVE state. In DRAINED state we don't expect to + * loop, because that would actually mean infinite loop, as DRAINED state is + * "sticky", and so we'll keep returning into the same instruction with the + * same state (at least in one of possible code paths). + * + * This approach allows to keep infinite loop heuristic even in the face of + * active iterator. E.g., C snippet below is and will be detected as + * inifintely looping: + * + * struct bpf_iter_num it; + * int *p, x; + * + * bpf_iter_num_new(&it, 0, 10); + * while ((p = bpf_iter_num_next(&t))) { + * x = p; + * while (x--) {} // <<-- infinite loop here + * } + * + */ +static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) +{ + struct bpf_reg_state *slot, *cur_slot; + struct bpf_func_state *state; + int i, fr; + + for (fr = old->curframe; fr >= 0; fr--) { + state = old->frame[fr]; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_ITER) + continue; + + slot = &state->stack[i].spilled_ptr; + if (slot->iter.state != BPF_ITER_STATE_ACTIVE) + continue; + + cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; + if (cur_slot->iter.depth != slot->iter.depth) + return true; + } + } + return false; +} static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { @@ -14673,8 +15212,46 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * Since the verifier still needs to catch infinite loops * inside async callbacks. */ - } else if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur)) { + goto skip_inf_loop_check; + } + /* BPF open-coded iterators loop detection is special. + * states_maybe_looping() logic is too simplistic in detecting + * states that *might* be equivalent, because it doesn't know + * about ID remapping, so don't even perform it. + * See process_iter_next_call() and iter_active_depths_differ() + * for overview of the logic. When current and one of parent + * states are detected as equivalent, it's a good thing: we prove + * convergence and can stop simulating further iterations. + * It's safe to assume that iterator loop will finish, taking into + * account iter_next() contract of eventually returning + * sticky NULL result. + */ + if (is_iter_next_insn(env, insn_idx)) { + if (states_equal(env, &sl->state, cur)) { + struct bpf_func_state *cur_frame; + struct bpf_reg_state *iter_state, *iter_reg; + int spi; + + cur_frame = cur->frame[cur->curframe]; + /* btf_check_iter_kfuncs() enforces that + * iter state pointer is always the first arg + */ + iter_reg = &cur_frame->regs[BPF_REG_1]; + /* current state is valid due to states_equal(), + * so we can assume valid iter and reg state, + * no need for extra (re-)validations + */ + spi = __get_spi(iter_reg->off + iter_reg->var_off.value); + iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; + if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) + goto hit; + } + goto skip_inf_loop_check; + } + /* attempt to detect infinite loop to avoid unnecessary doomed work */ + if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur) && + !iter_active_depths_differ(&sl->state, cur)) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); return -EINVAL; @@ -14691,6 +15268,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * This threshold shouldn't be too high either, since states * at the end of the loop are likely to be useful in pruning. */ +skip_inf_loop_check: if (!env->test_state_freq && env->jmps_processed - env->prev_jmps_processed < 20 && env->insn_processed - env->prev_insn_processed < 100) @@ -14698,6 +15276,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) goto miss; } if (states_equal(env, &sl->state, cur)) { +hit: sl->hit_cnt++; /* reached equivalent register/stack state, * prune the search. From 6018e1f407cccf39b804d1f75ad4de7be4e6cc45 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:17 -0800 Subject: [PATCH 004/260] bpf: implement numbers iterator Implement the first open-coded iterator type over a range of integers. It's public API consists of: - bpf_iter_num_new() constructor, which accepts [start, end) range (that is, start is inclusive, end is exclusive). - bpf_iter_num_next() which will keep returning read-only pointer to int until the range is exhausted, at which point NULL will be returned. If bpf_iter_num_next() is kept calling after this, NULL will be persistently returned. - bpf_iter_num_destroy() destructor, which needs to be called at some point to clean up iterator state. BPF verifier enforces that iterator destructor is called at some point before BPF program exits. Note that `start = end = X` is a valid combination to setup an empty iterator. bpf_iter_num_new() will return 0 (success) for any such combination. If bpf_iter_num_new() detects invalid combination of input arguments, it returns error, resets iterator state to, effectively, empty iterator, so any subsequent call to bpf_iter_num_next() will keep returning NULL. BPF verifier has no knowledge that returned integers are in the [start, end) value range, as both `start` and `end` are not statically known and enforced: they are runtime values. While the implementation is pretty trivial, some care needs to be taken to avoid overflows and underflows. Subsequent selftests will validate correctness of [start, end) semantics, especially around extremes (INT_MIN and INT_MAX). Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can be specified. bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded BPF loops and bpf_loop() helper and is the basis for implementing ergonomic BPF loops with no statically known or verified bounds. Subsequent patches implement bpf_for() macro, demonstrating how this can be wrapped into something that works and feels like a normal for() loop in C language. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 +++- include/uapi/linux/bpf.h | 8 ++++ kernel/bpf/bpf_iter.c | 70 ++++++++++++++++++++++++++++++++++ kernel/bpf/helpers.c | 3 ++ tools/include/uapi/linux/bpf.h | 8 ++++ 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6792a7940e1e..e64ff1e89fb2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1617,8 +1617,12 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 -/* Maximum number of loops for bpf_loop */ -#define BPF_MAX_LOOPS BIT(23) +/* Maximum number of loops for bpf_loop and bpf_iter_num. + * It's enum to expose it (and thus make it discoverable) through BTF. + */ +enum { + BPF_MAX_LOOPS = 8 * 1024 * 1024, +}; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 976b194eb775..4abddb668a10 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 5dc307bdeaeb..96856f130cbf 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +struct bpf_iter_num_kern { + int cur; /* current value, inclusive */ + int end; /* final value, exclusive */ +} __aligned(8); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) +{ + struct bpf_iter_num_kern *s = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); + + BTF_TYPE_EMIT(struct btf_iter_num); + + /* start == end is legit, it's an empty range and we'll just get NULL + * on first (and any subsequent) bpf_iter_num_next() call + */ + if (start > end) { + s->cur = s->end = 0; + return -EINVAL; + } + + /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ + if ((s64)end - (s64)start > BPF_MAX_LOOPS) { + s->cur = s->end = 0; + return -E2BIG; + } + + /* user will call bpf_iter_num_next() first, + * which will set s->cur to exactly start value; + * underflow shouldn't matter + */ + s->cur = start - 1; + s->end = end; + + return 0; +} + +__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + /* check failed initialization or if we are done (same behavior); + * need to be careful about overflow, so convert to s64 for checks, + * e.g., if s->cur == s->end == INT_MAX, we can't just do + * s->cur + 1 >= s->end + */ + if ((s64)(s->cur + 1) >= s->end) { + s->cur = s->end = 0; + return NULL; + } + + s->cur++; + + return &s->cur; +} + +__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + s->cur = s->end = 0; +} + +__diag_pop(); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 637ac4e92e75..f9b7eeedce08 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2411,6 +2411,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 976b194eb775..4abddb668a10 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ From 8c2b5e90505e474f36ecc3b7f3f8298b59d72e91 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:18 -0800 Subject: [PATCH 005/260] selftests/bpf: add bpf_for_each(), bpf_for(), and bpf_repeat() macros Add bpf_for_each(), bpf_for(), and bpf_repeat() macros that make writing open-coded iterator-based loops much more convenient and natural. These macros utilize cleanup attribute to ensure proper destruction of the iterator and thanks to that manage to provide the ergonomics that is very close to C language's for() construct. Typical loop would look like: int i; int arr[N]; bpf_for(i, 0, N) { /* verifier will know that i >= 0 && i < N, so could be used to * directly access array elements with no extra checks */ arr[i] = i; } bpf_repeat() is very similar, but it doesn't expose iteration number and is meant as a simple "repeat action N times" loop: bpf_repeat(N) { /* whatever, N times */ } Note that `break` and `continue` statements inside the {} block work as expected. bpf_for_each() is a generalization over any kind of BPF open-coded iterator allowing to use for-each-like approach instead of calling low-level bpf_iter__{new,next,destroy}() APIs explicitly. E.g.: struct cgroup *cg; bpf_for_each(cgroup, cg, some, input, args) { /* do something with each cg */ } would call (not-yet-implemented) bpf_iter_cgroup_{new,next,destroy}() functions to form a loop over cgroups, where `some, input, args` are passed verbatim into constructor as bpf_iter_cgroup_new(&it, some, input, args). As a first demonstration, add pyperf variant based on the bpf_for() loop. Also clean up a few tests that either included bpf_misc.h header unnecessarily from the user-space, which is unsupported, or included it before any common types are defined (and thus leading to unnecessary compilation warnings, potentially). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/bpf_verif_scale.c | 6 ++ .../bpf/prog_tests/uprobe_autoattach.c | 1 - tools/testing/selftests/bpf/progs/bpf_misc.h | 99 +++++++++++++++++++ tools/testing/selftests/bpf/progs/lsm.c | 4 +- tools/testing/selftests/bpf/progs/pyperf.h | 14 ++- .../selftests/bpf/progs/pyperf600_iter.c | 7 ++ .../selftests/bpf/progs/pyperf600_nounroll.c | 3 - 7 files changed, 124 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/pyperf600_iter.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 5ca252823294..731c343897d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -144,6 +144,12 @@ void test_verif_scale_pyperf600_nounroll() scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_pyperf600_iter() +{ + /* open-coded BPF iterator version */ + scale_test("pyperf600_iter.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_loop1() { scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c index 6558c857e620..d5b3377aa33c 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -3,7 +3,6 @@ #include #include "test_uprobe_autoattach.skel.h" -#include "progs/bpf_misc.h" /* uprobe attach point */ static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3, diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index f704885aa534..597688a188ae 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -75,5 +75,104 @@ #define FUNC_REG_ARG_CNT 5 #endif +struct bpf_iter_num; + +extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym; +extern int *bpf_iter_num_next(struct bpf_iter_num *it) __ksym; +extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; + +#ifndef bpf_for_each +/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for + * using BPF open-coded iterators without having to write mundane explicit + * low-level loop logic. Instead, it provides for()-like generic construct + * that can be used pretty naturally. E.g., for some hypothetical cgroup + * iterator, you'd write: + * + * struct cgroup *cg, *parent_cg = <...>; + * + * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) { + * bpf_printk("Child cgroup id = %d", cg->cgroup_id); + * if (cg->cgroup_id == 123) + * break; + * } + * + * I.e., it looks almost like high-level for each loop in other languages, + * supports continue/break, and is verifiable by BPF verifier. + * + * For iterating integers, the difference betwen bpf_for_each(num, i, N, M) + * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to + * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int + * *`, not just `int`. So for integers bpf_for() is more convenient. + * + * Note: this macro relies on C99 feature of allowing to declare variables + * inside for() loop, bound to for() loop lifetime. It also utilizes GCC + * extension: __attribute__((cleanup())), supported by both GCC and + * Clang. + */ +#define bpf_for_each(type, cur, args...) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ + cleanup(bpf_iter_##type##_destroy))), \ + /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ + *___p = (bpf_iter_##type##_new(&___it, ##args), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_##type##_destroy, (void *)0); \ + /* iteration and termination check */ \ + (((cur) = bpf_iter_##type##_next(&___it))); \ +) +#endif /* bpf_for_each */ + +#ifndef bpf_for +/* bpf_for(i, start, end) implements a for()-like looping construct that sets + * provided integer variable *i* to values starting from *start* through, + * but not including, *end*. It also proves to BPF verifier that *i* belongs + * to range [start, end), so this can be used for accessing arrays without + * extra checks. + * + * Note: *start* and *end* are assumed to be expressions with no side effects + * and whose values do not change throughout bpf_for() loop execution. They do + * not have to be statically known or constant, though. + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_for(i, start, end) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p = (bpf_iter_num_new(&___it, (start), (end)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + ({ \ + /* iteration step */ \ + int *___t = bpf_iter_num_next(&___it); \ + /* termination and bounds check */ \ + (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \ + }); \ +) +#endif /* bpf_for */ + +#ifndef bpf_repeat +/* bpf_repeat(N) performs N iterations without exposing iteration number + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_repeat(N) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p = (bpf_iter_num_new(&___it, 0, (N)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + bpf_iter_num_next(&___it); \ + /* nothing here */ \ +) +#endif /* bpf_repeat */ #endif diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index dc93887ed34c..fadfdd98707c 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -4,12 +4,12 @@ * Copyright 2020 Google LLC. */ -#include "bpf_misc.h" #include "vmlinux.h" +#include #include #include #include -#include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 6c7b1fb268d6..f2e7a31c8d75 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -7,6 +7,7 @@ #include #include #include +#include "bpf_misc.h" #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -294,17 +295,22 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) if (ctx.done) return 0; #else -#ifdef NO_UNROLL +#if defined(USE_ITER) +/* no for loop, no unrolling */ +#elif defined(NO_UNROLL) #pragma clang loop unroll(disable) -#else -#ifdef UNROLL_COUNT +#elif defined(UNROLL_COUNT) #pragma clang loop unroll_count(UNROLL_COUNT) #else #pragma clang loop unroll(full) -#endif #endif /* NO_UNROLL */ /* Unwind python stack */ +#ifdef USE_ITER + int i; + bpf_for(i, 0, STACK_MAX_LEN) { +#else /* !USE_ITER */ for (int i = 0; i < STACK_MAX_LEN; ++i) { +#endif if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); diff --git a/tools/testing/selftests/bpf/progs/pyperf600_iter.c b/tools/testing/selftests/bpf/progs/pyperf600_iter.c new file mode 100644 index 000000000000..d62e1b200c30 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600_iter.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Meta Platforms, Inc. and affiliates. +#define STACK_MAX_LEN 600 +#define SUBPROGS +#define NO_UNROLL +#define USE_ITER +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c index 6beff7502f4d..520b58c4f8db 100644 --- a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c +++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c @@ -2,7 +2,4 @@ // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 600 #define NO_UNROLL -/* clang will not unroll at all. - * Total program size is around 2k insns - */ #include "pyperf.h" From 57400dcce6c2cf3985120c4ee28b37a1f4238dbb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:19 -0800 Subject: [PATCH 006/260] selftests/bpf: add iterators tests Add various tests for open-coded iterators. Some of them excercise various possible coding patterns in C, some go down to low-level assembly for more control over various conditions, especially invalid ones. We also make use of bpf_for(), bpf_for_each(), bpf_repeat() macros in some of these tests. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/iters.c | 15 + tools/testing/selftests/bpf/progs/bpf_misc.h | 1 + tools/testing/selftests/bpf/progs/iters.c | 720 ++++++++++++++++++ .../selftests/bpf/progs/iters_looping.c | 163 ++++ .../selftests/bpf/progs/iters_state_safety.c | 426 +++++++++++ 5 files changed, 1325 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/iters.c create mode 100644 tools/testing/selftests/bpf/progs/iters.c create mode 100644 tools/testing/selftests/bpf/progs/iters_looping.c create mode 100644 tools/testing/selftests/bpf/progs/iters_state_safety.c diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c new file mode 100644 index 000000000000..414fb8d82145 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include + +#include "iters.skel.h" +#include "iters_state_safety.skel.h" +#include "iters_looping.skel.h" + +void test_iters(void) +{ + RUN_TESTS(iters_state_safety); + RUN_TESTS(iters_looping); + RUN_TESTS(iters); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 597688a188ae..43b154a639e7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -36,6 +36,7 @@ #define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory" #define __imm(name) [name]"i"(name) #define __imm_addr(name) [name]"i"(&name) +#define __imm_ptr(name) [name]"p"(&name) #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c new file mode 100644 index 000000000000..84e5dc10243c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_misc.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static volatile int zero = 0; + +int my_pid; +int arr[256]; +int small_arr[16] SEC(".data.small_arr"); + +#ifdef REAL_TEST +#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 +#else +#define MY_PID_GUARD() ({ }) +#endif + +SEC("?raw_tp") +__failure __msg("math between map_value pointer and register with unbounded min value is not allowed") +int iter_err_unsafe_c_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = zero; /* obscure initial value of i */ + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 1000); + while ((v = bpf_iter_num_next(&it))) { + i++; + } + bpf_iter_num_destroy(&it); + + small_arr[i] = 123; /* invalid */ + + return 0; +} + +SEC("?raw_tp") +__failure __msg("unbounded memory access") +int iter_err_unsafe_asm_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = 0; + + MY_PID_GUARD(); + + asm volatile ( + "r6 = %[zero];" /* iteration counter */ + "r1 = %[it];" /* iterator state */ + "r2 = 0;" + "r3 = 1000;" + "r4 = 1;" + "call %[bpf_iter_num_new];" + "loop:" + "r1 = %[it];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto out;" + "r6 += 1;" + "goto loop;" + "out:" + "r1 = %[it];" + "call %[bpf_iter_num_destroy];" + "r1 = %[small_arr];" + "r2 = r6;" + "r2 <<= 2;" + "r1 += r2;" + "*(u32 *)(r1 + 0) = r6;" /* invalid */ + : + : [it]"r"(&it), + [small_arr]"p"(small_arr), + [zero]"p"(zero), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_common, "r6" + ); + + return 0; +} + +SEC("raw_tp") +__success +int iter_while_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_while_loop_auto_cleanup(const void *ctx) +{ + __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + /* (!) no explicit bpf_iter_num_destroy() */ + + return 0; +} + +SEC("raw_tp") +__success +int iter_for_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 5, 10); + for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_bpf_for_each_macro(const void *ctx) +{ + int *v; + + MY_PID_GUARD(); + + bpf_for_each(num, v, 5, 10) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_bpf_for_macro(const void *ctx) +{ + int i; + + MY_PID_GUARD(); + + bpf_for(i, 5, 10) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", i); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_pragma_unroll_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 2); +#pragma nounroll + for (i = 0; i < 3; i++) { + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_manual_unroll_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 100, 200); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1); + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_multiple_sequential_loops(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 5, 10); + for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 0, 2); +#pragma nounroll + for (i = 0; i < 3; i++) { + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 100, 200); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1); + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_limit_cond_break_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = 0, sum = 0; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v); + sum += *v; + + i++; + if (i > 3) + break; + } + bpf_iter_num_destroy(&it); + + bpf_printk("ITER_SIMPLE: sum=%d\n", sum); + + return 0; +} + +SEC("raw_tp") +__success +int iter_obfuscate_counter(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + /* Make i's initial value unknowable for verifier to prevent it from + * pruning if/else branch inside the loop body and marking i as precise. + */ + int i = zero; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + int x; + + i += 1; + + /* If we initialized i as `int i = 0;` above, verifier would + * track that i becomes 1 on first iteration after increment + * above, and here verifier would eagerly prune else branch + * and mark i as precise, ruining open-coded iterator logic + * completely, as each next iteration would have a different + * *precise* value of i, and thus there would be no + * convergence of state. This would result in reaching maximum + * instruction limit, no matter what the limit is. + */ + if (i == 1) + x = 123; + else + x = i * 3 + 1; + + bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x); + + sum += x; + } + bpf_iter_num_destroy(&it); + + bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum); + + return 0; +} + +SEC("raw_tp") +__success +int iter_search_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, *elem = NULL; + bool found = false; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_SEARCH_LOOP: v=%d", *v); + + if (*v == 2) { + found = true; + elem = v; + barrier_var(elem); + } + } + + /* should fail to verify if bpf_iter_num_destroy() is here */ + + if (found) + /* here found element will be wrong, we should have copied + * value to a variable, but here we want to make sure we can + * access memory after the loop anyways + */ + bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem); + else + bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n"); + + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_array_fill(const void *ctx) +{ + int sum, i; + + MY_PID_GUARD(); + + bpf_for(i, 0, ARRAY_SIZE(arr)) { + arr[i] = i * 2; + } + + sum = 0; + bpf_for(i, 0, ARRAY_SIZE(arr)) { + sum += arr[i]; + } + + bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256); + + return 0; +} + +static int arr2d[4][5]; +static int arr2d_row_sums[4]; +static int arr2d_col_sums[5]; + +SEC("raw_tp") +__success +int iter_nested_iters(const void *ctx) +{ + int sum, row, col; + + MY_PID_GUARD(); + + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d[row][col] = row * col; + } + } + + /* zero-initialize sums */ + sum = 0; + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + arr2d_row_sums[row] = 0; + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d_col_sums[col] = 0; + } + + /* calculate sums */ + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + sum += arr2d[row][col]; + arr2d_row_sums[row] += arr2d[row][col]; + arr2d_col_sums[col] += arr2d[row][col]; + } + } + + bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum); + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]); + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s", + col, arr2d_col_sums[col], + col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : ""); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_nested_deeply_iters(const void *ctx) +{ + int sum = 0; + + MY_PID_GUARD(); + + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + sum += 1; + } + } + } + } + /* validate that we can break from inside bpf_repeat() */ + break; + } + + return sum; +} + +static __noinline void fill_inner_dimension(int row) +{ + int col; + + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d[row][col] = row * col; + } +} + +static __noinline int sum_inner_dimension(int row) +{ + int sum = 0, col; + + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + sum += arr2d[row][col]; + arr2d_row_sums[row] += arr2d[row][col]; + arr2d_col_sums[col] += arr2d[row][col]; + } + + return sum; +} + +SEC("raw_tp") +__success +int iter_subprog_iters(const void *ctx) +{ + int sum, row, col; + + MY_PID_GUARD(); + + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + fill_inner_dimension(row); + } + + /* zero-initialize sums */ + sum = 0; + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + arr2d_row_sums[row] = 0; + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d_col_sums[col] = 0; + } + + /* calculate sums */ + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + sum += sum_inner_dimension(row); + } + + bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum); + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d", + row, arr2d_row_sums[row]); + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s", + col, arr2d_col_sums[col], + col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : ""); + } + + return 0; +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1000); +} arr_map SEC(".maps"); + +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int iter_err_too_permissive1(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + + MY_PID_GUARD(); + + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (!map_val) + return 0; + + bpf_repeat(1000000) { + map_val = NULL; + } + + *map_val = 123; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid mem access 'map_value_or_null'") +int iter_err_too_permissive2(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + + MY_PID_GUARD(); + + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (!map_val) + return 0; + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + } + + *map_val = 123; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid mem access 'map_value_or_null'") +int iter_err_too_permissive3(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + bool found = false; + + MY_PID_GUARD(); + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + found = true; + } + + if (found) + *map_val = 123; + + return 0; +} + +SEC("raw_tp") +__success +int iter_tricky_but_fine(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + bool found = false; + + MY_PID_GUARD(); + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (map_val) { + found = true; + break; + } + } + + if (found) + *map_val = 123; + + return 0; +} + +#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0) + +SEC("raw_tp") +__success +int iter_stack_array_loop(const void *ctx) +{ + long arr1[16], arr2[16], sum = 0; + int *v, i; + + MY_PID_GUARD(); + + /* zero-init arr1 and arr2 in such a way that verifier doesn't know + * it's all zeros; if we don't do that, we'll make BPF verifier track + * all combination of zero/non-zero stack slots for arr1/arr2, which + * will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different + * states + */ + __bpf_memzero(arr1, sizeof(arr1)); + __bpf_memzero(arr2, sizeof(arr1)); + + /* validate that we can break and continue when using bpf_for() */ + bpf_for(i, 0, ARRAY_SIZE(arr1)) { + if (i & 1) { + arr1[i] = i; + continue; + } else { + arr2[i] = i; + break; + } + } + + bpf_for(i, 0, ARRAY_SIZE(arr1)) { + sum += arr1[i] + arr2[i]; + } + + return sum; +} + +static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul) +{ + int *t, i; + + while ((t = bpf_iter_num_next(it))) { + i = *t; + if (i >= n) + break; + arr[i] = i * mul; + } +} + +static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) +{ + int *t, i, sum = 0;; + + while ((t = bpf_iter_num_next(it))) { + i = *t; + if (i >= n) + break; + sum += arr[i]; + } + + return sum; +} + +SEC("raw_tp") +__success +int iter_pass_iter_ptr_to_subprog(const void *ctx) +{ + int arr1[16], arr2[32]; + struct bpf_iter_num it; + int n, sum1, sum2; + + MY_PID_GUARD(); + + /* fill arr1 */ + n = ARRAY_SIZE(arr1); + bpf_iter_num_new(&it, 0, n); + fill(&it, arr1, n, 2); + bpf_iter_num_destroy(&it); + + /* fill arr2 */ + n = ARRAY_SIZE(arr2); + bpf_iter_num_new(&it, 0, n); + fill(&it, arr2, n, 10); + bpf_iter_num_destroy(&it); + + /* sum arr1 */ + n = ARRAY_SIZE(arr1); + bpf_iter_num_new(&it, 0, n); + sum1 = sum(&it, arr1, n); + bpf_iter_num_destroy(&it); + + /* sum arr2 */ + n = ARRAY_SIZE(arr2); + bpf_iter_num_new(&it, 0, n); + sum2 = sum(&it, arr2, n); + bpf_iter_num_destroy(&it); + + bpf_printk("sum1=%d, sum2=%d", sum1, sum2); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c new file mode 100644 index 000000000000..05fa5ce7fc59 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define ITER_HELPERS \ + __imm(bpf_iter_num_new), \ + __imm(bpf_iter_num_next), \ + __imm(bpf_iter_num_destroy) + +SEC("?raw_tp") +__success +int force_clang_to_emit_btf_for_externs(void *ctx) +{ + /* we need this as a workaround to enforce compiler emitting BTF + * information for bpf_iter_num_{new,next,destroy}() kfuncs, + * as, apparently, it doesn't emit it for symbols only referenced from + * assembly (or cleanup attribute, for that matter, as well) + */ + bpf_repeat(0); + + return 0; +} + +SEC("?raw_tp") +__success +int consume_first_item_only(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto +1;" + "r0 = *(u32 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R0 invalid mem access 'scalar'") +int missing_null_check_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first element */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + /* FAIL: deref with no NULL check */ + "r1 = *(u32 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure +__msg("invalid access to memory, mem_size=4 off=0 size=8") +__msg("R0 min value is outside of the allowed memory range") +int wrong_sized_read_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first element */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto +1;" + /* FAIL: deref more than available 4 bytes */ + "r0 = *(u64 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +int simplest_loop(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + "r6 = 0;" /* init sum */ + + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto 2f;" + "r0 = *(u32 *)(r0 + 0);" + "r6 += r0;" /* accumulate sum */ + "goto 1b;" + + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c new file mode 100644 index 000000000000..d47e59aba6de --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -0,0 +1,426 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define ITER_HELPERS \ + __imm(bpf_iter_num_new), \ + __imm(bpf_iter_num_next), \ + __imm(bpf_iter_num_destroy) + +SEC("?raw_tp") +__success +int force_clang_to_emit_btf_for_externs(void *ctx) +{ + /* we need this as a workaround to enforce compiler emitting BTF + * information for bpf_iter_num_{new,next,destroy}() kfuncs, + * as, apparently, it doesn't emit it for symbols only referenced from + * assembly (or cleanup attribute, for that matter, as well) + */ + bpf_repeat(0); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +int create_and_destroy(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") +int create_and_forget_to_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int destroy_without_creating_fail(void *ctx) +{ + /* init with zeros to stop verifier complaining about uninit stack */ + struct bpf_iter_num iter; + + asm volatile ( + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int compromise_iter_w_direct_write_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* directly write over first half of iter state */ + "*(u64 *)(%[iter] + 0) = r0;" + + /* (attempt to) destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") +int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* directly write over first half of iter state */ + "*(u64 *)(%[iter] + 0) = r0;" + + /* don't destroy iter, leaking ref, which should fail */ + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int compromise_iter_w_helper_write_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* overwrite 8th byte with bpf_probe_read_kernel() */ + "r1 = %[iter];" + "r1 += 7;" + "r2 = 1;" + "r3 = 0;" /* NULL */ + "call %[bpf_probe_read_kernel];" + + /* (attempt to) destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS, __imm(bpf_probe_read_kernel) + : __clobber_common + ); + + return 0; +} + +static __noinline void subprog_with_iter(void) +{ + struct bpf_iter_num iter; + + bpf_iter_num_new(&iter, 0, 1); + + return; +} + +SEC("?raw_tp") +__failure +/* ensure there was a call to subprog, which might happen without __noinline */ +__msg("returning from callee:") +__msg("Unreleased reference id=1") +int leak_iter_from_subprog_fail(void *ctx) +{ + subprog_with_iter(); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +int valid_stack_reuse(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + + /* now reuse same stack slots */ + + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected uninitialized iter_num as arg #1") +int double_create_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* (attempt to) create iterator again */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int double_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + /* (attempt to) destroy iterator again */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int next_without_new_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* don't create iterator and try to iterate*/ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int next_after_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + /* don't create iterator and try to iterate*/ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid read from stack") +int __naked read_from_iter_slot_fail(void) +{ + asm volatile ( + /* r6 points to struct bpf_iter_num on the stack */ + "r6 = r10;" + "r6 += -24;" + + /* create iterator */ + "r1 = r6;" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* attemp to leak bpf_iter_num state */ + "r7 = *(u64 *)(r6 + 0);" + "r8 = *(u64 *)(r6 + 8);" + + /* destroy iterator */ + "r1 = r6;" + "call %[bpf_iter_num_destroy];" + + /* leak bpf_iter_num state */ + "r0 = r7;" + "if r7 > r8 goto +1;" + "r0 = r8;" + "exit;" + : + : ITER_HELPERS + : __clobber_common, "r6", "r7", "r8" + ); +} + +int zero; + +SEC("?raw_tp") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__msg("Unreleased reference") +int stacksafe_should_not_conflate_stack_spill_and_iter(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* Create a fork in logic, with general setup as follows: + * - fallthrough (first) path is valid; + * - branch (second) path is invalid. + * Then depending on what we do in fallthrough vs branch path, + * we try to detect bugs in func_states_equal(), regsafe(), + * refsafe(), stack_safe(), and similar by tricking verifier + * into believing that branch state is a valid subset of + * a fallthrough state. Verifier should reject overall + * validation, unless there is a bug somewhere in verifier + * logic. + */ + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + + "if r6 > r7 goto bad;" /* fork */ + + /* spill r6 into stack slot of bpf_iter_num var */ + "*(u64 *)(%[iter] + 0) = r6;" + + "goto skip_bad;" + + "bad:" + /* create iterator in the same stack slot */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* but then forget about it and overwrite it back to r6 spill */ + "*(u64 *)(%[iter] + 0) = r6;" + + "skip_bad:" + "goto +0;" /* force checkpoint */ + + /* corrupt stack slots, if they are really dynptr */ + "*(u64 *)(%[iter] + 0) = r6;" + : + : __imm_ptr(iter), + __imm_addr(zero), + __imm(bpf_get_prandom_u32), + __imm(bpf_dynptr_from_mem), + ITER_HELPERS + : __clobber_common, "r6", "r7" + ); + + return 0; +} From f59b146092653bcf014ccdc9bd8bc94e79065ce3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:20 -0800 Subject: [PATCH 007/260] selftests/bpf: add number iterator tests Add number iterator (bpf_iter_num_{new,next,destroy}()) tests, validating the correct handling of various corner and common cases *at runtime*. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/iters.c | 49 ++++ tools/testing/selftests/bpf/progs/iters_num.c | 242 ++++++++++++++++++ 2 files changed, 291 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/iters_num.c diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 414fb8d82145..2e7caff9523e 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -6,10 +6,59 @@ #include "iters.skel.h" #include "iters_state_safety.skel.h" #include "iters_looping.skel.h" +#include "iters_num.skel.h" + +static void subtest_num_iters(void) +{ + struct iters_num *skel; + int err; + + skel = iters_num__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = iters_num__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + usleep(1); + iters_num__detach(skel); + +#define VALIDATE_CASE(case_name) \ + ASSERT_EQ(skel->bss->res_##case_name, \ + skel->rodata->exp_##case_name, \ + #case_name) + + VALIDATE_CASE(empty_zero); + VALIDATE_CASE(empty_int_min); + VALIDATE_CASE(empty_int_max); + VALIDATE_CASE(empty_minus_one); + + VALIDATE_CASE(simple_sum); + VALIDATE_CASE(neg_sum); + VALIDATE_CASE(very_neg_sum); + VALIDATE_CASE(neg_pos_sum); + + VALIDATE_CASE(invalid_range); + VALIDATE_CASE(max_range); + VALIDATE_CASE(e2big_range); + + VALIDATE_CASE(succ_elem_cnt); + VALIDATE_CASE(overfetched_elem_cnt); + VALIDATE_CASE(fail_elem_cnt); + +#undef VALIDATE_CASE + +cleanup: + iters_num__destroy(skel); +} void test_iters(void) { RUN_TESTS(iters_state_safety); RUN_TESTS(iters_looping); RUN_TESTS(iters); + + if (test__start_subtest("num")) + subtest_num_iters(); } diff --git a/tools/testing/selftests/bpf/progs/iters_num.c b/tools/testing/selftests/bpf/progs/iters_num.c new file mode 100644 index 000000000000..7a77a8daee0d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_num.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +const volatile __s64 exp_empty_zero = 0 + 1; +__s64 res_empty_zero; + +SEC("raw_tp/sys_enter") +int num_empty_zero(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, 0, 0) sum += i; + res_empty_zero = 1 + sum; + + return 0; +} + +const volatile __s64 exp_empty_int_min = 0 + 2; +__s64 res_empty_int_min; + +SEC("raw_tp/sys_enter") +int num_empty_int_min(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MIN, INT_MIN) sum += i; + res_empty_int_min = 2 + sum; + + return 0; +} + +const volatile __s64 exp_empty_int_max = 0 + 3; +__s64 res_empty_int_max; + +SEC("raw_tp/sys_enter") +int num_empty_int_max(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MAX, INT_MAX) sum += i; + res_empty_int_max = 3 + sum; + + return 0; +} + +const volatile __s64 exp_empty_minus_one = 0 + 4; +__s64 res_empty_minus_one; + +SEC("raw_tp/sys_enter") +int num_empty_minus_one(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -1, -1) sum += i; + res_empty_minus_one = 4 + sum; + + return 0; +} + +const volatile __s64 exp_simple_sum = 9 * 10 / 2; +__s64 res_simple_sum; + +SEC("raw_tp/sys_enter") +int num_simple_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, 0, 10) sum += i; + res_simple_sum = sum; + + return 0; +} + +const volatile __s64 exp_neg_sum = -11 * 10 / 2; +__s64 res_neg_sum; + +SEC("raw_tp/sys_enter") +int num_neg_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -10, 0) sum += i; + res_neg_sum = sum; + + return 0; +} + +const volatile __s64 exp_very_neg_sum = INT_MIN + (__s64)(INT_MIN + 1); +__s64 res_very_neg_sum; + +SEC("raw_tp/sys_enter") +int num_very_neg_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MIN, INT_MIN + 2) sum += i; + res_very_neg_sum = sum; + + return 0; +} + +const volatile __s64 exp_very_big_sum = (__s64)(INT_MAX - 1) + (__s64)(INT_MAX - 2); +__s64 res_very_big_sum; + +SEC("raw_tp/sys_enter") +int num_very_big_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MAX - 2, INT_MAX) sum += i; + res_very_big_sum = sum; + + return 0; +} + +const volatile __s64 exp_neg_pos_sum = -3; +__s64 res_neg_pos_sum; + +SEC("raw_tp/sys_enter") +int num_neg_pos_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -3, 3) sum += i; + res_neg_pos_sum = sum; + + return 0; +} + +const volatile __s64 exp_invalid_range = -EINVAL; +__s64 res_invalid_range; + +SEC("raw_tp/sys_enter") +int num_invalid_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_invalid_range = bpf_iter_num_new(&it, 1, 0); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_max_range = 0 + 10; +__s64 res_max_range; + +SEC("raw_tp/sys_enter") +int num_max_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_max_range = 10 + bpf_iter_num_new(&it, 0, BPF_MAX_LOOPS); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_e2big_range = -E2BIG; +__s64 res_e2big_range; + +SEC("raw_tp/sys_enter") +int num_e2big_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_e2big_range = bpf_iter_num_new(&it, -1, BPF_MAX_LOOPS); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_succ_elem_cnt = 10; +__s64 res_succ_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_succ_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v; + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + cnt++; + } + bpf_iter_num_destroy(&it); + + res_succ_elem_cnt = cnt; + + return 0; +} + +const volatile __s64 exp_overfetched_elem_cnt = 5; +__s64 res_overfetched_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_overfetched_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v, i; + + bpf_iter_num_new(&it, 0, 5); + for (i = 0; i < 10; i++) { + v = bpf_iter_num_next(&it); + if (v) + cnt++; + } + bpf_iter_num_destroy(&it); + + res_overfetched_elem_cnt = cnt; + + return 0; +} + +const volatile __s64 exp_fail_elem_cnt = 20 + 0; +__s64 res_fail_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_fail_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v, i; + + bpf_iter_num_new(&it, 100, 10); + for (i = 0; i < 10; i++) { + v = bpf_iter_num_next(&it); + if (v) + cnt++; + } + bpf_iter_num_destroy(&it); + + res_fail_elem_cnt = 20 + cnt; + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 7e86a8c4ac8d5dcf7dd58f5a4779d1a6ff0a827d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:21 -0800 Subject: [PATCH 008/260] selftests/bpf: implement and test custom testmod_seq iterator Implement a trivial iterator returning same specified integer value N times as part of bpf_testmod kernel module. Add selftests to validate everything works end to end. We also reuse these tests as "verification-only" tests to validate that kernel prints the state of custom kernel module-defined iterator correctly: fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0) "testmod_seq" part is an iterator type, and is coming from module's BTF data dynamically at runtime. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 42 +++++++++- .../selftests/bpf/bpf_testmod/bpf_testmod.h | 6 ++ .../testing/selftests/bpf/prog_tests/iters.c | 42 ++++++++++ .../selftests/bpf/progs/iters_testmod_seq.c | 79 +++++++++++++++++++ 5 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/iters_testmod_seq.c diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index a02a085e7f32..34cb8b2de8ca 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -8,6 +8,7 @@ dynptr/test_dynptr_skb_data dynptr/test_skb_readonly fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +iters/testmod_seq* # s390x doesn't support kfuncs in modules yet kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test # relies on fentry ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 46500636d8cd..5e6e85c8d77d 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -65,6 +65,34 @@ bpf_testmod_test_mod_kfunc(int i) *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; } +__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) +{ + if (cnt < 0) { + it->cnt = 0; + return -EINVAL; + } + + it->value = value; + it->cnt = cnt; + + return 0; +} + +__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it) +{ + if (it->cnt <= 0) + return NULL; + + it->cnt--; + + return &it->value; +} + +__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) +{ + it->cnt = 0; +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -220,6 +248,17 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_SET8_END(bpf_testmod_common_kfunc_ids) + +static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_testmod_common_kfunc_ids, +}; + BTF_SET8_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_SET8_END(bpf_testmod_check_kfunc_ids) @@ -235,7 +274,8 @@ static int bpf_testmod_init(void) { int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 0d71e2607832..f32793efe095 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -22,4 +22,10 @@ struct bpf_testmod_test_writable_ctx { int val; }; +/* BPF iter that returns *value* *n* times in a row */ +struct bpf_iter_testmod_seq { + s64 value; + int cnt; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 2e7caff9523e..10804ae5ae97 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -7,6 +7,7 @@ #include "iters_state_safety.skel.h" #include "iters_looping.skel.h" #include "iters_num.skel.h" +#include "iters_testmod_seq.skel.h" static void subtest_num_iters(void) { @@ -53,12 +54,53 @@ cleanup: iters_num__destroy(skel); } +static void subtest_testmod_seq_iters(void) +{ + struct iters_testmod_seq *skel; + int err; + + if (!env.has_testmod) { + test__skip(); + return; + } + + skel = iters_testmod_seq__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = iters_testmod_seq__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + usleep(1); + iters_testmod_seq__detach(skel); + +#define VALIDATE_CASE(case_name) \ + ASSERT_EQ(skel->bss->res_##case_name, \ + skel->rodata->exp_##case_name, \ + #case_name) + + VALIDATE_CASE(empty); + VALIDATE_CASE(full); + VALIDATE_CASE(truncated); + +#undef VALIDATE_CASE + +cleanup: + iters_testmod_seq__destroy(skel); +} + void test_iters(void) { RUN_TESTS(iters_state_safety); RUN_TESTS(iters_looping); RUN_TESTS(iters); + if (env.has_testmod) + RUN_TESTS(iters_testmod_seq); + if (test__start_subtest("num")) subtest_num_iters(); + if (test__start_subtest("testmod_seq")) + subtest_testmod_seq_iters(); } diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c new file mode 100644 index 000000000000..3873fb6c292a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +struct bpf_iter_testmod_seq { + u64 :64; + u64 :64; +}; + +extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym; +extern s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq *it) __ksym; +extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym; + +const volatile __s64 exp_empty = 0 + 1; +__s64 res_empty; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_empty(const void *ctx) +{ + __s64 sum = 0, *i; + + bpf_for_each(testmod_seq, i, 1000, 0) sum += *i; + res_empty = 1 + sum; + + return 0; +} + +const volatile __s64 exp_full = 1000000; +__s64 res_full; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_full(const void *ctx) +{ + __s64 sum = 0, *i; + + bpf_for_each(testmod_seq, i, 1000, 1000) sum += *i; + res_full = sum; + + return 0; +} + +const volatile __s64 exp_truncated = 10 * 1000000; +__s64 res_truncated; + +static volatile int zero = 0; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_truncated(const void *ctx) +{ + __s64 sum = 0, *i; + int cnt = zero; + + bpf_for_each(testmod_seq, i, 10, 2000000) { + sum += *i; + cnt++; + if (cnt >= 1000000) + break; + } + res_truncated = sum; + + return 0; +} + +char _license[] SEC("license") = "GPL"; From a6865576317f6249f3f83cf4c10ab56e627ee153 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Mar 2023 22:02:44 -0800 Subject: [PATCH 009/260] selftests/bpf: Fix flaky fib_lookup test There is a report that fib_lookup test is flaky when running in parallel. A symptom of slowness or delay. An example: Testing IPv6 stale neigh set_lookup_params:PASS:inet_pton(IPV6_IFACE_ADDR) 0 nsec test_fib_lookup:PASS:bpf_prog_test_run_opts 0 nsec test_fib_lookup:FAIL:fib_lookup_ret unexpected fib_lookup_ret: actual 0 != expected 7 test_fib_lookup:FAIL:dmac not match unexpected dmac not match: actual 1 != expected 0 dmac expected 11:11:11:11:11:11 actual 00:00:00:00:00:00 [ Note that the "fib_lookup_ret unexpected fib_lookup_ret actual 0 ..." is reversed in terms of expected and actual value. Fixing in this patch also. ] One possibility is the testing stale neigh entry was marked dead by the gc (in neigh_periodic_work). The default gc_stale_time sysctl is 60s. This patch increases it to 15 mins. It also: - fixes the reversed arg (actual vs expected) in one of the ASSERT_EQ test - removes the nodad command arg when adding v4 neigh entry which currently has a warning. Fixes: 168de0233586 ("selftests/bpf: Add bpf_fib_lookup test") Reported-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309060244.3242491-1-martin.lau@linux.dev --- tools/testing/selftests/bpf/prog_tests/fib_lookup.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 429393caf612..a1e712105811 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -54,11 +54,19 @@ static int setup_netns(void) SYS(fail, "ip link add veth1 type veth peer name veth2"); SYS(fail, "ip link set dev veth1 up"); + err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900"); + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)")) + goto fail; + + err = write_sysctl("/proc/sys/net/ipv6/neigh/veth1/gc_stale_time", "900"); + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.neigh.veth1.gc_stale_time)")) + goto fail; + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); - SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR); SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); @@ -158,7 +166,7 @@ void test_fib_lookup(void) if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) continue; - ASSERT_EQ(tests[i].expected_ret, skel->bss->fib_lookup_ret, + ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, "fib_lookup_ret"); ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); From 5a70f4a63000ba68004fb3c1aaf2f90303dd228f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Thu, 9 Mar 2023 14:38:23 +0100 Subject: [PATCH 010/260] bpf: Fix a typo for BPF_F_ANY_ALIGNMENT in bpf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix s/BPF_PROF_LOAD/BPF_PROG_LOAD/ typo in the documentation comment for BPF_F_ANY_ALIGNMENT in bpf.h. Signed-off-by: Michael Weiß Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309133823.944097-1-michael.weiss@aisec.fraunhofer.de --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4abddb668a10..d8c534e05b0a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1108,7 +1108,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4abddb668a10..d8c534e05b0a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1108,7 +1108,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and From 27a36bc3cdd5e0420eea90762d69bea34daf97e1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 18:32:40 +0100 Subject: [PATCH 011/260] selftests/bpf: Use ifname instead of ifindex in XDP compliance test tool Rely on interface name instead of interface index in error messages or logs from XDP compliance test tool. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/7dc5a8ff56c252b1a7ae29b059d0b2b1543c8b5d.1678382940.git.lorenzo@kernel.org --- tools/testing/selftests/bpf/xdp_features.c | 44 +++++++++++++--------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index fce12165213b..b060a0d24e44 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -25,6 +25,7 @@ static struct env { bool verbosity; + char ifname[IF_NAMESIZE]; int ifindex; bool is_tester; struct { @@ -179,7 +180,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.ifindex = if_nametoindex(arg); if (!env.ifindex) env.ifindex = strtoul(arg, NULL, 0); - if (!env.ifindex) { + if (!env.ifindex || !if_indextoname(env.ifindex, env.ifname)) { fprintf(stderr, "Bad interface index or name (%d): %s\n", errno, strerror(errno)); @@ -205,6 +206,7 @@ static void set_env_default(void) env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT; env.feature.action = -EINVAL; env.ifindex = -ENODEV; + strcpy(env.ifname, "unknown"); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT, &env.dut_ctrl_addr, NULL); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT, @@ -248,15 +250,18 @@ static int dut_run_echo_thread(pthread_t *t, int *sockfd) sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL, DUT_ECHO_PORT, 0, 1); if (!sockfd) { - fprintf(stderr, "Failed to create echo socket\n"); + fprintf(stderr, + "Failed creating data UDP socket on device %s\n", + env.ifname); return -errno; } /* start echo channel */ err = pthread_create(t, NULL, dut_echo_thread, sockfd); if (err) { - fprintf(stderr, "Failed creating dut_echo thread: %s\n", - strerror(-err)); + fprintf(stderr, + "Failed creating data UDP thread on device %s: %s\n", + env.ifname, strerror(-err)); free_fds(sockfd, 1); return -EINVAL; } @@ -320,9 +325,8 @@ static int dut_attach_xdp_prog(struct xdp_features *skel, int flags) err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL); if (err) - fprintf(stderr, - "Failed to attach XDP program to ifindex %d\n", - env.ifindex); + fprintf(stderr, "Failed attaching XDP program to device %s\n", + env.ifname); return err; } @@ -358,13 +362,16 @@ static int dut_run(struct xdp_features *skel) sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL, DUT_CTRL_PORT, 0, 1); if (!sockfd) { - fprintf(stderr, "Failed to create DUT socket\n"); + fprintf(stderr, + "Failed creating control socket on device %s\n", env.ifname); return -errno; } ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen); if (ctrl_sockfd < 0) { - fprintf(stderr, "Failed to accept connection on DUT socket\n"); + fprintf(stderr, + "Failed accepting connections on device %s control socket\n", + env.ifname); free_fds(sockfd, 1); return -errno; } @@ -422,8 +429,8 @@ static int dut_run(struct xdp_features *skel) &opts); if (err) { fprintf(stderr, - "Failed to query XDP cap for ifindex %d\n", - env.ifindex); + "Failed querying XDP cap for device %s\n", + env.ifname); goto end_thread; } @@ -540,7 +547,9 @@ static int send_echo_msg(void) sockfd = socket(AF_INET6, SOCK_DGRAM, 0); if (sockfd < 0) { - fprintf(stderr, "Failed to create echo socket\n"); + fprintf(stderr, + "Failed creating data UDP socket on device %s\n", + env.ifname); return -errno; } @@ -596,8 +605,8 @@ static int tester_run(struct xdp_features *skel) err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL); if (err) { - fprintf(stderr, "Failed to attach XDP program to ifindex %d\n", - env.ifindex); + fprintf(stderr, "Failed attaching XDP program to device %s\n", + env.ifname); goto out; } @@ -653,7 +662,7 @@ int main(int argc, char **argv) return err; if (env.ifindex < 0) { - fprintf(stderr, "Invalid ifindex\n"); + fprintf(stderr, "Invalid device name %s\n", env.ifname); return -ENODEV; } @@ -684,11 +693,12 @@ int main(int argc, char **argv) if (env.is_tester) { /* Tester */ - fprintf(stdout, "Starting tester on device %d\n", env.ifindex); + fprintf(stdout, "Starting tester service on device %s\n", + env.ifname); err = tester_run(skel); } else { /* DUT */ - fprintf(stdout, "Starting DUT on device %d\n", env.ifindex); + fprintf(stdout, "Starting test on device %s\n", env.ifname); err = dut_run(skel); } From c1cd734c1bb3f4d9db75c51c23306e29d8749783 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 18:32:41 +0100 Subject: [PATCH 012/260] selftests/bpf: Improve error logs in XDP compliance test tool Improve some error logs reported in the XDP compliance test tool. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/212fc5bd214ff706f6ef1acbe7272cf4d803ca9c.1678382940.git.lorenzo@kernel.org --- tools/testing/selftests/bpf/xdp_features.c | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index b060a0d24e44..b449788fbd39 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -152,20 +152,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'D': if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT, &env.dut_addr, NULL)) { - fprintf(stderr, "Invalid DUT address: %s\n", arg); + fprintf(stderr, + "Invalid address assigned to the Device Under Test: %s\n", + arg); return ARGP_ERR_UNKNOWN; } break; case 'C': if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT, &env.dut_ctrl_addr, NULL)) { - fprintf(stderr, "Invalid DUT CTRL address: %s\n", arg); + fprintf(stderr, + "Invalid address assigned to the Device Under Test: %s\n", + arg); return ARGP_ERR_UNKNOWN; } break; case 'T': if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) { - fprintf(stderr, "Invalid Tester address: %s\n", arg); + fprintf(stderr, + "Invalid address assigned to the Tester device: %s\n", + arg); return ARGP_ERR_UNKNOWN; } break; @@ -454,7 +460,8 @@ static int dut_run(struct xdp_features *skel) &key, sizeof(key), &val, sizeof(val), 0); if (err) { - fprintf(stderr, "bpf_map_lookup_elem failed\n"); + fprintf(stderr, + "bpf_map_lookup_elem failed (%d)\n", err); goto end_thread; } @@ -496,7 +503,7 @@ static bool tester_collect_detected_cap(struct xdp_features *skel, err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key), &val, sizeof(val), 0); if (err) { - fprintf(stderr, "bpf_map_lookup_elem failed\n"); + fprintf(stderr, "bpf_map_lookup_elem failed (%d)\n", err); return false; } @@ -574,7 +581,8 @@ static int tester_run(struct xdp_features *skel) sockfd = socket(AF_INET6, SOCK_STREAM, 0); if (sockfd < 0) { - fprintf(stderr, "Failed to create tester socket\n"); + fprintf(stderr, + "Failed creating tester service control socket\n"); return -errno; } @@ -584,7 +592,8 @@ static int tester_run(struct xdp_features *skel) err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr, sizeof(env.dut_ctrl_addr)); if (err) { - fprintf(stderr, "Failed to connect to the DUT\n"); + fprintf(stderr, + "Failed connecting to the Device Under Test control socket\n"); return -errno; } From 63d78b7e8ca2d0eb8c687a355fa19d01b6fcc723 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 9 Mar 2023 17:24:10 -0800 Subject: [PATCH 013/260] selftests/bpf: Workaround verification failure for fexit_bpf2bpf/func_replace_return_code With latest llvm17, selftest fexit_bpf2bpf/func_replace_return_code has the following verification failure: 0: R1=ctx(off=0,imm=0) R10=fp0 ; int connect_v4_prog(struct bpf_sock_addr *ctx) 0: (bf) r7 = r1 ; R1=ctx(off=0,imm=0) R7_w=ctx(off=0,imm=0) 1: (b4) w6 = 0 ; R6_w=0 ; memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); ... ; return do_bind(ctx) ? 1 : 0; 179: (bf) r1 = r7 ; R1=ctx(off=0,imm=0) R7=ctx(off=0,imm=0) 180: (85) call pc+147 Func#3 is global and valid. Skipping. 181: R0_w=scalar() 181: (bc) w6 = w0 ; R0_w=scalar() R6_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 182: (05) goto pc-129 ; } 54: (bc) w0 = w6 ; R0_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) R6_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 55: (95) exit At program exit the register R0 has value (0x0; 0xffffffff) should have been in (0x0; 0x1) processed 281 insns (limit 1000000) max_states_per_insn 1 total_states 26 peak_states 26 mark_read 13 -- END PROG LOAD LOG -- libbpf: prog 'connect_v4_prog': failed to load: -22 The corresponding source code: __attribute__ ((noinline)) int do_bind(struct bpf_sock_addr *ctx) { struct sockaddr_in sa = {}; sa.sin_family = AF_INET; sa.sin_port = bpf_htons(0); sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) return 0; return 1; } ... SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { ... return do_bind(ctx) ? 1 : 0; } Insn 180 is a call to 'do_bind'. The call's return value is also the return value for the program. Since do_bind() returns 0/1, so it is legitimate for compiler to optimize 'return do_bind(ctx) ? 1 : 0' to 'return do_bind(ctx)'. However, such optimization breaks verifier as the return value of 'do_bind()' is marked as any scalar which violates the requirement of prog return value 0/1. There are two ways to fix this problem, (1) changing 'return 1' in do_bind() to e.g. 'return 10' so the compiler has to do 'do_bind(ctx) ? 1 :0', or (2) suggested by Andrii, marking do_bind() with __weak attribute so the compiler cannot make any assumption on do_bind() return value. This patch adopted adding __weak approach which is simpler and more resistant to potential compiler optimizations. Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230310012410.2920570-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/connect4_prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index ec25371de789..7ef49ec04838 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -32,7 +32,7 @@ #define IFNAMSIZ 16 #endif -__attribute__ ((noinline)) +__attribute__ ((noinline)) __weak int do_bind(struct bpf_sock_addr *ctx) { struct sockaddr_in sa = {}; From 2498e6231bfd44f8f85afbc838b37441551a4028 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:12 -0800 Subject: [PATCH 014/260] selftests/bpf: prevent unused variable warning in bpf_for() Add __attribute__((unused)) to inner __p variable inside bpf_for(), bpf_for_each(), and bpf_repeat() macros to avoid compiler warnings about unused variable. Reported-by: Tejun Heo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 43b154a639e7..c95eb603403c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -115,7 +115,8 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ cleanup(bpf_iter_##type##_destroy))), \ /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ - *___p = (bpf_iter_##type##_new(&___it, ##args), \ + *___p __attribute__((unused)) = ( \ + bpf_iter_##type##_new(&___it, ##args), \ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ (void)bpf_iter_##type##_destroy, (void *)0); \ @@ -143,7 +144,8 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ cleanup(bpf_iter_num_destroy))), \ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ - *___p = (bpf_iter_num_new(&___it, (start), (end)), \ + *___p __attribute__((unused)) = ( \ + bpf_iter_num_new(&___it, (start), (end)), \ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ (void)bpf_iter_num_destroy, (void *)0); \ @@ -167,7 +169,8 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ cleanup(bpf_iter_num_destroy))), \ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ - *___p = (bpf_iter_num_new(&___it, 0, (N)), \ + *___p __attribute__((unused)) = ( \ + bpf_iter_num_new(&___it, 0, (N)), \ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ (void)bpf_iter_num_destroy, (void *)0); \ From 713461b895ef958ef444b00cc2d979f3ca3a82e2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:13 -0800 Subject: [PATCH 015/260] selftests/bpf: add __sink() macro to fake variable consumption Add __sink(expr) macro that forces compiler to believe that passed in expression is both read and written. It used a simple embedded asm for this. This is useful in a lot of tests where we assign value to some variable to trigger some action, but later don't read variable, causing compiler to complain (if corresponding compiler warnings are turned on, which we'll do in the next patch). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index c95eb603403c..3c03ec8056ce 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -76,6 +76,9 @@ #define FUNC_REG_ARG_CNT 5 #endif +/* make it look to compiler like value is read and written */ +#define __sink(expr) asm volatile("" : "+g"(expr)) + struct bpf_iter_num; extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym; From c8ed66859397237c649998c58a68a86b8ea5f417 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:14 -0800 Subject: [PATCH 016/260] selftests/bpf: fix lots of silly mistakes pointed out by compiler Once we enable -Wall for BPF sources, compiler will complain about lots of unused variables, variables that are set but never read, etc. Fix all these issues first before enabling -Wall in Makefile. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/bpf_iter_ksym.c | 1 - .../selftests/bpf/progs/bpf_iter_setsockopt.c | 1 - tools/testing/selftests/bpf/progs/bpf_loop.c | 2 - tools/testing/selftests/bpf/progs/cb_refs.c | 1 - .../bpf/progs/cgroup_skb_sk_lookup_kern.c | 1 - .../selftests/bpf/progs/cgrp_kfunc_failure.c | 1 + .../bpf/progs/cgrp_ls_attach_cgroup.c | 1 - .../selftests/bpf/progs/cgrp_ls_sleepable.c | 1 - tools/testing/selftests/bpf/progs/core_kern.c | 2 +- .../selftests/bpf/progs/cpumask_failure.c | 3 ++ .../selftests/bpf/progs/cpumask_success.c | 1 - .../testing/selftests/bpf/progs/dynptr_fail.c | 5 ++- .../selftests/bpf/progs/dynptr_success.c | 5 +-- .../selftests/bpf/progs/fexit_bpf2bpf.c | 2 - .../bpf/progs/freplace_attach_probe.c | 2 +- tools/testing/selftests/bpf/progs/iters.c | 11 +++-- .../selftests/bpf/progs/linked_funcs1.c | 3 ++ .../selftests/bpf/progs/linked_funcs2.c | 3 ++ .../testing/selftests/bpf/progs/linked_list.c | 4 -- .../selftests/bpf/progs/linked_list_fail.c | 1 - .../selftests/bpf/progs/local_storage.c | 1 - tools/testing/selftests/bpf/progs/map_kptr.c | 3 -- .../testing/selftests/bpf/progs/netcnt_prog.c | 1 - .../selftests/bpf/progs/netif_receive_skb.c | 1 - .../selftests/bpf/progs/perfbuf_bench.c | 1 - tools/testing/selftests/bpf/progs/pyperf.h | 2 +- .../progs/rbtree_btf_fail__wrong_node_type.c | 11 ----- .../testing/selftests/bpf/progs/rbtree_fail.c | 3 +- .../selftests/bpf/progs/rcu_read_lock.c | 4 -- .../bpf/progs/read_bpf_task_storage_busy.c | 1 - .../selftests/bpf/progs/recvmsg4_prog.c | 2 - .../selftests/bpf/progs/recvmsg6_prog.c | 2 - .../selftests/bpf/progs/sendmsg4_prog.c | 2 - .../bpf/progs/sockmap_verdict_prog.c | 4 ++ .../testing/selftests/bpf/progs/strobemeta.h | 1 - .../selftests/bpf/progs/tailcall_bpf2bpf3.c | 11 +++++ .../selftests/bpf/progs/tailcall_bpf2bpf6.c | 3 ++ .../selftests/bpf/progs/task_kfunc_failure.c | 1 + .../selftests/bpf/progs/task_kfunc_success.c | 6 --- .../testing/selftests/bpf/progs/test_bpf_nf.c | 1 - .../bpf/progs/test_cls_redirect_dynptr.c | 1 - .../progs/test_core_reloc_bitfields_probed.c | 1 - .../selftests/bpf/progs/test_global_func1.c | 4 ++ .../selftests/bpf/progs/test_global_func2.c | 4 ++ .../selftests/bpf/progs/test_hash_large_key.c | 2 +- .../bpf/progs/test_ksyms_btf_write_check.c | 1 - .../selftests/bpf/progs/test_legacy_printk.c | 2 +- .../selftests/bpf/progs/test_map_lock.c | 2 +- .../testing/selftests/bpf/progs/test_obj_id.c | 2 + .../bpf/progs/test_parse_tcp_hdr_opt.c | 1 - .../bpf/progs/test_parse_tcp_hdr_opt_dynptr.c | 2 +- .../selftests/bpf/progs/test_pkt_access.c | 5 +++ .../selftests/bpf/progs/test_ringbuf.c | 1 - .../bpf/progs/test_ringbuf_map_key.c | 1 + .../selftests/bpf/progs/test_ringbuf_multi.c | 1 - .../bpf/progs/test_select_reuseport_kern.c | 2 +- .../selftests/bpf/progs/test_sk_assign.c | 4 +- .../selftests/bpf/progs/test_sk_lookup.c | 9 +--- .../selftests/bpf/progs/test_sk_lookup_kern.c | 2 - .../selftests/bpf/progs/test_sock_fields.c | 2 +- .../selftests/bpf/progs/test_sockmap_kern.h | 14 ++++-- .../selftests/bpf/progs/test_spin_lock.c | 3 ++ .../selftests/bpf/progs/test_tc_dtime.c | 4 +- .../selftests/bpf/progs/test_tc_neigh.c | 4 +- .../selftests/bpf/progs/test_tcpbpf_kern.c | 2 - .../selftests/bpf/progs/test_tunnel_kern.c | 6 --- .../selftests/bpf/progs/test_usdt_multispec.c | 2 - .../selftests/bpf/progs/test_verif_scale1.c | 2 +- .../selftests/bpf/progs/test_verif_scale2.c | 2 +- .../selftests/bpf/progs/test_verif_scale3.c | 2 +- .../bpf/progs/test_xdp_adjust_tail_grow.c | 2 - .../selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 - .../selftests/bpf/progs/test_xdp_dynptr.c | 2 - .../selftests/bpf/progs/test_xdp_noinline.c | 43 ------------------- .../selftests/bpf/progs/test_xdp_vlan.c | 13 ------ tools/testing/selftests/bpf/progs/type_cast.c | 1 - tools/testing/selftests/bpf/progs/udp_limit.c | 2 - .../bpf/progs/user_ringbuf_success.c | 6 --- .../selftests/bpf/progs/xdp_features.c | 1 - .../testing/selftests/bpf/progs/xdping_kern.c | 2 - tools/testing/selftests/bpf/progs/xdpwall.c | 1 - 81 files changed, 90 insertions(+), 187 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 9ba14c37bbcc..5ddcc46fd886 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -33,7 +33,6 @@ int dump_ksym(struct bpf_iter__ksym *ctx) __u32 seq_num = ctx->meta->seq_num; unsigned long value; char type; - int ret; if (!iter) return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c index b77adfd55d73..ec7f91850dec 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -42,7 +42,6 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx) char cur_cc[TCP_CA_NAME_MAX]; struct tcp_sock *tp; struct sock *sk; - int ret; if (!bpf_tcp_sk(ctx->sk_common)) return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index de1fc82d2710..1d194455b109 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -138,8 +138,6 @@ static int callback_set_0f(int i, void *ctx) SEC("fentry/" SYS_PREFIX "sys_nanosleep") int prog_non_constant_callback(void *ctx) { - struct callback_ctx data = {}; - if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index ce96b33e38d6..50f95ec61165 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -52,7 +52,6 @@ int leak_prog(void *ctx) { struct prog_test_ref_kfunc *p; struct map_value *v; - unsigned long sl; v = bpf_map_lookup_elem(&array_map, &(int){0}); if (!v) diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c index 88638315c582..ac86a8a61605 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -66,7 +66,6 @@ static inline int is_allowed_peer_cg(struct __sk_buff *skb, SEC("cgroup_skb/ingress") int ingress_lookup(struct __sk_buff *skb) { - __u32 serv_port_key = 0; struct ipv6hdr ip6h; struct tcphdr tcph; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index b42291ed9586..807fb0ac41e9 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -109,6 +109,7 @@ int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *pat acquired = bpf_cgroup_acquire(cgrp); /* Acquired cgroup is never released. */ + __sink(acquired); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c index 6652d18465b2..8aeba1b75c83 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c @@ -84,7 +84,6 @@ int BPF_PROG(update_cookie_tracing, struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct socket_cookie *p; - struct tcp_sock *tcp_sk; if (uaddr->sa_family != AF_INET6) return 0; diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 7615dc23d301..4c7844e1dbfa 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -24,7 +24,6 @@ void bpf_rcu_read_unlock(void) __ksym; SEC("?iter.s/cgroup") int cgroup_iter(struct bpf_iter__cgroup *ctx) { - struct seq_file *seq = ctx->meta->seq; struct cgroup *cgrp = ctx->cgroup; long *ptr; diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c index 2715fe27d4cf..004f2acef2eb 100644 --- a/tools/testing/selftests/bpf/progs/core_kern.c +++ b/tools/testing/selftests/bpf/progs/core_kern.c @@ -77,7 +77,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 14; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index c16f7563b84e..cfe83f0ef9e2 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -23,6 +23,7 @@ int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags) struct bpf_cpumask *cpumask; cpumask = create_cpumask(); + __sink(cpumask); /* cpumask is never released. */ return 0; @@ -51,6 +52,7 @@ int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_fla /* Can't acquire a non-struct bpf_cpumask. */ cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr); + __sink(cpumask); return 0; } @@ -63,6 +65,7 @@ int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags) /* Can't set the CPU of a non-struct bpf_cpumask. */ bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr); + __sink(cpumask); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 1d38bc65d4b0..97ed08c4ff03 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -353,7 +353,6 @@ SEC("tp_btf/task_newtask") int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; cpumask = create_cpumask(); if (!cpumask) diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 20ce920d891d..759eb5c245cd 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -271,7 +271,7 @@ SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) { - __u32 key = 0, map_val; + __u32 map_val; struct bpf_dynptr ptr; void *data; @@ -388,7 +388,6 @@ int data_slice_missing_null_check2(void *ctx) /* this should fail */ *data2 = 3; -done: bpf_ringbuf_discard_dynptr(&ptr, 0); return 0; } @@ -440,6 +439,7 @@ int invalid_write1(void *ctx) /* this should fail */ data = bpf_dynptr_data(&ptr, 0, 1); + __sink(data); return 0; } @@ -1374,6 +1374,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) * changing packet data */ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + __sink(hdr); return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index c8358a7c7924..b2fa6c47ecc0 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -35,7 +35,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep") int test_read_write(void *ctx) { char write_data[64] = "hello there, world!!"; - char read_data[64] = {}, buf[64] = {}; + char read_data[64] = {}; struct bpf_dynptr ptr; int i; @@ -170,7 +170,6 @@ int test_skb_readonly(struct __sk_buff *skb) { __u8 write_data[2] = {1, 2}; struct bpf_dynptr ptr; - __u64 *data; int ret; if (bpf_dynptr_from_skb(skb, 0, &ptr)) { @@ -191,10 +190,8 @@ int test_skb_readonly(struct __sk_buff *skb) SEC("?cgroup_skb/egress") int test_dynptr_skb_data(struct __sk_buff *skb) { - __u8 write_data[2] = {1, 2}; struct bpf_dynptr ptr; __u64 *data; - int ret; if (bpf_dynptr_from_skb(skb, 0, &ptr)) { err = 1; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 4547b059d487..983b7c233382 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -120,8 +120,6 @@ int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var) void *data = (void *)(long)skb->data; struct ipv6hdr ip6, *ip6p; int ifindex = skb->ifindex; - __u32 eth_proto; - __u32 nh_off; /* check that BPF extension can read packet via direct packet access */ if (data + 14 + sizeof(ip6) > data_end) diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c index bb2a77c5b62b..370a0e1922e0 100644 --- a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c @@ -23,7 +23,7 @@ struct { SEC("freplace/handle_kprobe") int new_handle_kprobe(struct pt_regs *ctx) { - struct hmap_elem zero = {}, *val; + struct hmap_elem *val; int key = 0; val = bpf_map_lookup_elem(&hash_map, &key); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 84e5dc10243c..6b9b3c56f009 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -45,7 +45,6 @@ __failure __msg("unbounded memory access") int iter_err_unsafe_asm_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i = 0; MY_PID_GUARD(); @@ -88,7 +87,7 @@ __success int iter_while_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -106,7 +105,7 @@ __success int iter_while_loop_auto_cleanup(const void *ctx) { __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -124,7 +123,7 @@ __success int iter_for_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -192,7 +191,7 @@ __success int iter_manual_unroll_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -621,7 +620,7 @@ __success int iter_stack_array_loop(const void *ctx) { long arr1[16], arr2[16], sum = 0; - int *v, i; + int i; MY_PID_GUARD(); diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index b05571bc67d5..c4b49ceea967 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -5,6 +5,7 @@ #include #include #include +#include "bpf_misc.h" /* weak and shared between two files */ const volatile int my_tid __weak; @@ -51,6 +52,7 @@ __weak int set_output_weak(int x) * cause problems for BPF static linker */ whatever = bpf_core_type_size(struct task_struct); + __sink(whatever); output_weak1 = x; return x; @@ -71,6 +73,7 @@ int BPF_PROG(handler1, struct pt_regs *regs, long id) /* make sure we have CO-RE relocations in main program */ whatever = bpf_core_type_size(struct task_struct); + __sink(whatever); set_output_val2(1000); set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index ee7e3848ee4f..013ff0645f0c 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -5,6 +5,7 @@ #include #include #include +#include "bpf_misc.h" /* weak and shared between both files */ const volatile int my_tid __weak; @@ -51,6 +52,7 @@ __weak int set_output_weak(int x) * cause problems for BPF static linker */ whatever = 2 * bpf_core_type_size(struct task_struct); + __sink(whatever); output_weak2 = x; return 2 * x; @@ -71,6 +73,7 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) /* make sure we have CO-RE relocations in main program */ whatever = bpf_core_type_size(struct task_struct); + __sink(whatever); set_output_val1(2000); set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 4fa4a9b01bde..53ded51a3abb 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -313,7 +313,6 @@ SEC("tc") int map_list_push_pop_multiple(void *ctx) { struct map_value *v; - int ret; v = bpf_map_lookup_elem(&array_map, &(int){0}); if (!v) @@ -326,7 +325,6 @@ int inner_map_list_push_pop_multiple(void *ctx) { struct map_value *v; void *map; - int ret; map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); if (!map) @@ -352,7 +350,6 @@ SEC("tc") int map_list_in_list(void *ctx) { struct map_value *v; - int ret; v = bpf_map_lookup_elem(&array_map, &(int){0}); if (!v) @@ -365,7 +362,6 @@ int inner_map_list_in_list(void *ctx) { struct map_value *v; void *map; - int ret; map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); if (!map) diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index 69cdc07cba13..41978b46f58e 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -557,7 +557,6 @@ SEC("?tc") int incorrect_head_off2(void *ctx) { struct foo *f; - struct bar *b; f = bpf_obj_new(typeof(*f)); if (!f) diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 19423ed862e3..01c74bc870ae 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -77,7 +77,6 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; int err; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 3903d30217b8..dae5dab1bbf7 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -515,7 +515,6 @@ int test_ls_map_kptr_ref1(void *ctx) { struct task_struct *current; struct map_value *v; - int ret; current = bpf_get_current_task_btf(); if (!current) @@ -534,7 +533,6 @@ int test_ls_map_kptr_ref2(void *ctx) { struct task_struct *current; struct map_value *v; - int ret; current = bpf_get_current_task_btf(); if (!current) @@ -550,7 +548,6 @@ int test_ls_map_kptr_ref_del(void *ctx) { struct task_struct *current; struct map_value *v; - int ret; current = bpf_get_current_task_btf(); if (!current) diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index f718b2c212dc..f9ef8aee56f1 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -26,7 +26,6 @@ SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) { union percpu_net_cnt *percpu_cnt; - char fmt[] = "%d %llu %llu\n"; union net_cnt *cnt; __u64 ts, dt; int ret; diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index 1d8918dfbd3f..c0062645fc68 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -53,7 +53,6 @@ static int __strncmp(const void *m1, const void *m2, size_t len) do { \ static const char _expectedval[EXPECTED_STRSIZE] = \ _expected; \ - static const char _ptrtype[64] = #_type; \ __u64 _hflags = _flags | BTF_F_COMPACT; \ static _type _ptrdata = __VA_ARGS__; \ static struct btf_ptr _ptr = { }; \ diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c index 45204fe0c570..29c1639fc78a 100644 --- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c +++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c @@ -22,7 +22,6 @@ long dropped __attribute__((aligned(128))) = 0; SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_perfbuf(void *ctx) { - __u64 *sample; int i; for (i = 0; i < batch_cnt; i++) { diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index f2e7a31c8d75..026d573ce179 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -345,7 +345,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { - int i, ret = 0; + int ret = 0; ret |= __on_event(ctx); ret |= __on_event(ctx); ret |= __on_event(ctx); diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c index 340f97da1084..7651843f5a80 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c +++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c @@ -16,17 +16,6 @@ struct node_data { struct bpf_list_node node; }; -static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) -{ - struct node_data *node_a; - struct node_data *node_b; - - node_a = container_of(a, struct node_data, node); - node_b = container_of(b, struct node_data, node); - - return node_a->key < node_b->key; -} - #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_data, node); diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index 1ced900f3fce..46d7d18a218f 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=2 alloc_insn=11") +__failure __msg("Unreleased reference id=2 alloc_insn=10") long rbtree_api_remove_no_drop(void *ctx) { struct bpf_rb_node *res; @@ -119,6 +119,7 @@ long rbtree_api_remove_no_drop(void *ctx) res = bpf_rbtree_remove(&groot, res); n = container_of(res, struct node_data, node); + __sink(n); bpf_spin_unlock(&glock); /* bpf_obj_drop(n) is missing here */ diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 7250bb76d18a..6a8c88e58df2 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -179,8 +179,6 @@ SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int miss_lock(void *ctx) { struct task_struct *task; - struct css_set *cgroups; - struct cgroup *dfl_cgrp; /* missing bpf_rcu_read_lock() */ task = bpf_get_current_task_btf(); @@ -195,8 +193,6 @@ SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int miss_unlock(void *ctx) { struct task_struct *task; - struct css_set *cgroups; - struct cgroup *dfl_cgrp; /* missing bpf_rcu_read_unlock() */ task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c index a47bb0120719..76556e0b42b2 100644 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -23,7 +23,6 @@ SEC("raw_tp/sys_enter") int BPF_PROG(read_bpf_task_storage_busy) { int *value; - int key; if (!CONFIG_PREEMPT) return 0; diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c index 3d1ae8b3402f..59748c95471a 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c @@ -17,8 +17,6 @@ SEC("cgroup/recvmsg4") int recvmsg4_prog(struct bpf_sock_addr *ctx) { struct bpf_sock *sk; - __u32 user_ip4; - __u16 user_port; sk = ctx->sk; if (!sk) diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c index 27dfb21b21b4..d9a4016596d5 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c @@ -20,8 +20,6 @@ SEC("cgroup/recvmsg6") int recvmsg6_prog(struct bpf_sock_addr *ctx) { struct bpf_sock *sk; - __u32 user_ip4; - __u16 user_port; sk = ctx->sk; if (!sk) diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index ea75a44cb7fc..351e79aef2fa 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -21,8 +21,6 @@ SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { - int prio; - if (ctx->type != SOCK_DGRAM) return 0; diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index e2468a6d01a5..0660f29dca95 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -1,6 +1,7 @@ #include #include #include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); @@ -40,6 +41,9 @@ int bpf_prog2(struct __sk_buff *skb) __u8 *d = data; __u8 sk, map; + __sink(lport); + __sink(rport); + if (data + 8 > data_end) return SK_DROP; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e562be6356f3..e02cfd380746 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -391,7 +391,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_raw map; void *location; uint64_t len; - int i; descr->tag_len = 0; /* presume no tag is set */ descr->cnt = -1; /* presume no value is set */ diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c index 7fab39a3bb12..99c8d1d8a187 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -2,6 +2,7 @@ #include #include #include "bpf_legacy.h" +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -20,6 +21,8 @@ int subprog_tail2(struct __sk_buff *skb) else bpf_tail_call_static(skb, &jmp_table, 1); + __sink(arr[sizeof(arr) - 1]); + return skb->len; } @@ -30,6 +33,8 @@ int subprog_tail(struct __sk_buff *skb) bpf_tail_call_static(skb, &jmp_table, 0); + __sink(arr[sizeof(arr) - 1]); + return skb->len * 2; } @@ -38,6 +43,8 @@ int classifier_0(struct __sk_buff *skb) { volatile char arr[128] = {}; + __sink(arr[sizeof(arr) - 1]); + return subprog_tail2(skb); } @@ -46,6 +53,8 @@ int classifier_1(struct __sk_buff *skb) { volatile char arr[128] = {}; + __sink(arr[sizeof(arr) - 1]); + return skb->len * 3; } @@ -54,6 +63,8 @@ int entry(struct __sk_buff *skb) { volatile char arr[128] = {}; + __sink(arr[sizeof(arr) - 1]); + return subprog_tail(skb); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c index 41ce83da78e8..4a9f63bea66c 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include "bpf_misc.h" #define __unused __attribute__((unused)) @@ -36,6 +37,8 @@ int entry(struct __sk_buff *skb) /* Have data on stack which size is not a multiple of 8 */ volatile char arr[1] = {}; + __sink(arr[0]); + return subprog_tail(skb); } diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index f19d54eda4f1..002c7f69e47f 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -109,6 +109,7 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_ acquired = bpf_task_acquire(task); /* Acquired task is never released. */ + __sink(acquired); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index 9f359cfd29e7..aebc4bb14e7d 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -171,8 +171,6 @@ static void lookup_compare_pid(const struct task_struct *p) SEC("tp_btf/task_newtask") int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) { - struct task_struct *acquired; - if (!is_test_kfunc_task()) return 0; @@ -183,8 +181,6 @@ int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) SEC("tp_btf/task_newtask") int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags) { - struct task_struct *current, *acquired; - if (!is_test_kfunc_task()) return 0; @@ -208,8 +204,6 @@ static int is_pid_lookup_valid(s32 pid) SEC("tp_btf/task_newtask") int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags) { - struct task_struct *acquired; - if (!is_test_kfunc_task()) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 9fc603c9d673..77ad8adf68da 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -75,7 +75,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; struct bpf_sock_tuple bpf_tuple; struct nf_conn *ct; - int err; __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index f45a7095de7a..f41c81212ee9 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -455,7 +455,6 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynpt static ret_t skip_next_hops(__u64 *offset, int n) { - __u32 res; switch (n) { case 1: *offset += sizeof(struct in_addr); diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c index ab1e647aeb31..b86fdda2a6ea 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c @@ -42,7 +42,6 @@ int test_core_bitfields(void *ctx) { struct core_reloc_bitfields *in = (void *)&data.in; struct core_reloc_bitfields_output *out = (void *)&data.out; - uint64_t res; out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1); out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2); diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 23970a20b324..b85fc8c423ba 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -18,6 +18,8 @@ int f1(struct __sk_buff *skb) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return f0(0, skb) + skb->len; } @@ -34,6 +36,8 @@ int f3(int val, struct __sk_buff *skb, int var) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->ifindex * val * var; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c index 3dce97fb52a4..2beab9c3b68a 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func2.c +++ b/tools/testing/selftests/bpf/progs/test_global_func2.c @@ -18,6 +18,8 @@ int f1(struct __sk_buff *skb) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return f0(0, skb) + skb->len; } @@ -34,6 +36,8 @@ int f3(int val, struct __sk_buff *skb, int var) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->ifindex * val * var; } diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c index 473a22794a62..8b438128f46b 100644 --- a/tools/testing/selftests/bpf/progs/test_hash_large_key.c +++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c @@ -28,7 +28,7 @@ struct bigelement { SEC("raw_tracepoint/sys_enter") int bpf_hash_large_key_test(void *ctx) { - int zero = 0, err = 1, value = 42; + int zero = 0, value = 42; struct bigelement *key; key = bpf_map_lookup_elem(&key_map, &zero); diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c index a72a5bf3812a..27109b877714 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c @@ -35,7 +35,6 @@ SEC("raw_tp/sys_enter") int handler2(const void *ctx) { int *active; - __u32 cpu; active = bpf_this_cpu_ptr(&bpf_prog_active); write_active(active); diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c index 64c2d9ced529..42718cd8e6a4 100644 --- a/tools/testing/selftests/bpf/progs/test_legacy_printk.c +++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c @@ -56,7 +56,7 @@ int handle_legacy(void *ctx) SEC("tp/raw_syscalls/sys_enter") int handle_modern(void *ctx) { - int zero = 0, cur_pid; + int cur_pid; cur_pid = bpf_get_current_pid_tgid() >> 32; if (cur_pid != my_pid_var) diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index acf073db9e8b..1c02511b73cd 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -33,7 +33,7 @@ struct { SEC("cgroup/skb") int bpf_map_lock_test(struct __sk_buff *skb) { - struct hmap_elem zero = {}, *val; + struct hmap_elem *val; int rnd = bpf_get_prandom_u32(); int key = 0, err = 1, i; struct array_elem *q; diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index ded71b3ff6b4..2850ae788a91 100644 --- a/tools/testing/selftests/bpf/progs/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -4,6 +4,7 @@ #include #include #include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -19,6 +20,7 @@ int test_obj_id(void *ctx) __u64 *value; value = bpf_map_lookup_elem(&test_map_id, &key); + __sink(value); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c index 79bab9b50e9e..d9b2ba7ac340 100644 --- a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c @@ -87,7 +87,6 @@ int xdp_ingress_v6(struct xdp_md *xdp) __u8 tcp_hdr_opt_len = 0; struct tcphdr *tcp_hdr; __u64 tcp_offset = 0; - __u32 off; int err; tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c index d3b319722e30..dc6e43bc6a62 100644 --- a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c @@ -30,7 +30,7 @@ __u32 server_id; static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining, __u32 *server_id) { - __u8 *tcp_opt, kind, hdr_len; + __u8 kind, hdr_len; __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)]; __u8 *data; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 5cd7c096f62d..bce7173152c6 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -13,6 +13,7 @@ #include #include #include +#include "bpf_misc.h" /* llvm will optimize both subprograms into exactly the same BPF assembly * @@ -51,6 +52,8 @@ int get_skb_len(struct __sk_buff *skb) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->len; } @@ -73,6 +76,8 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->ifindex * val * var; } diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index 5bdc0d38efc0..501cefa97633 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -41,7 +41,6 @@ int test_ringbuf(void *ctx) { int cur_pid = bpf_get_current_pid_tgid() >> 32; struct sample *sample; - int zero = 0; if (cur_pid != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c index 2760bf60d05a..21bb7da90ea5 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c @@ -53,6 +53,7 @@ int test_ringbuf_mem_map_key(void *ctx) /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg */ lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample); + __sink(lookup_val); /* workaround - memcpy is necessary so that verifier doesn't * complain with: diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c index e416e0ce12b7..9626baa6779c 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -59,7 +59,6 @@ int test_ringbuf(void *ctx) int cur_pid = bpf_get_current_pid_tgid() >> 32; struct sample *sample; void *rb; - int zero = 0; if (cur_pid != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 7d56ed47cd4d..5eb25c6ad75b 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -64,7 +64,7 @@ SEC("sk_reuseport") int _select_by_skb_data(struct sk_reuseport_md *reuse_md) { __u32 linum, index = 0, flags = 0, index_zero = 0; - __u32 *result_cnt, *linum_value; + __u32 *result_cnt; struct data_check data_check = {}; struct cmd *cmd, cmd_copy; void *data, *data_end; diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 21b19b758c4e..3079244c7f96 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -15,6 +15,7 @@ #include #include #include +#include "bpf_misc.h" #if defined(IPROUTE2_HAVE_LIBBPF) /* Use a new-style map definition. */ @@ -57,7 +58,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) void *data = (void *)(long)skb->data; struct bpf_sock_tuple *result; struct ethhdr *eth; - __u64 tuple_len; __u8 proto = 0; __u64 ihl_len; @@ -94,6 +94,7 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) return NULL; *tcp = (proto == IPPROTO_TCP); + __sink(ihl_len); return result; } @@ -173,7 +174,6 @@ int bpf_sk_assign_test(struct __sk_buff *skb) struct bpf_sock_tuple *tuple; bool ipv4 = false; bool tcp = false; - int tuple_len; int ret = 0; tuple = get_tuple(skb, &ipv4, &tcp); diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 6058dcb11b36..71f844b9b902 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -391,7 +391,6 @@ SEC("sk_lookup") int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; - int err, family; __u32 val_u32; bool v4; @@ -645,9 +644,7 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) SEC("sk_lookup") int multi_prog_redir1(struct bpf_sk_lookup *ctx) { - int ret; - - ret = select_server_a(ctx); + (void)select_server_a(ctx); bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_PASS; } @@ -655,9 +652,7 @@ int multi_prog_redir1(struct bpf_sk_lookup *ctx) SEC("sk_lookup") int multi_prog_redir2(struct bpf_sk_lookup *ctx) { - int ret; - - ret = select_server_a(ctx); + (void)select_server_a(ctx); bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index 6ccf6d546074..e9efc3263022 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -110,7 +110,6 @@ int err_modify_sk_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - __u32 family; sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { @@ -125,7 +124,6 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - __u32 family; sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 9f4b8f9f1181..bbad3c2d9aa5 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -121,7 +121,7 @@ static void tpcpy(struct bpf_tcp_sock *dst, SEC("cgroup_skb/egress") int egress_read_sock_fields(struct __sk_buff *skb) { - struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; + struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F }; struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 6c85b00f27b2..baf9ebc6d903 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -14,6 +14,7 @@ #include #include #include +#include "bpf_misc.h" /* Sockmap sample program connects a client and a backend together * using cgroups. @@ -111,12 +112,15 @@ int bpf_prog2(struct __sk_buff *skb) int len, *f, ret, zero = 0; __u64 flags = 0; + __sink(rport); if (lport == 10000) ret = 10; else ret = 1; len = (__u32)skb->data_end - (__u32)skb->data; + __sink(len); + f = bpf_map_lookup_elem(&sock_skb_opts, &zero); if (f && *f) { ret = 3; @@ -180,7 +184,6 @@ int bpf_prog3(struct __sk_buff *skb) if (err) return SK_DROP; bpf_write_pass(skb, 13); -tls_out: return ret; } @@ -188,8 +191,7 @@ SEC("sockops") int bpf_sockmap(struct bpf_sock_ops *skops) { __u32 lport, rport; - int op, err = 0, index, key, ret; - + int op, err, ret; op = (int) skops->op; @@ -228,6 +230,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) break; } + __sink(err); + return 0; } @@ -321,6 +325,10 @@ int bpf_prog8(struct sk_msg_md *msg) } else { return SK_DROP; } + + __sink(data_end); + __sink(data); + return SK_PASS; } SEC("sk_msg4") diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 5bd10409285b..b2440a0ff422 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -3,6 +3,7 @@ #include #include #include +#include "bpf_misc.h" struct hmap_elem { volatile int cnt; @@ -89,6 +90,8 @@ int bpf_spin_lock_test(struct __sk_buff *skb) credit = q->credit; bpf_spin_unlock(&q->lock); + __sink(credit); + /* spin_lock in cgroup local storage */ cls = bpf_get_local_storage(&cls_map, 0); bpf_spin_lock(&cls->lock); diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 125beec31834..74ec09f040b7 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -163,9 +163,9 @@ static int skb_get_type(struct __sk_buff *skb) ip6h = data + sizeof(struct ethhdr); if (ip6h + 1 > data_end) return -1; - if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src)) + if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}})) ns = SRC_NS; - else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) + else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}})) ns = DST_NS; inet_proto = ip6h->nexthdr; trans = ip6h + 1; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 3e32ea375ab4..de15155f2609 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -94,7 +94,7 @@ int tc_dst(struct __sk_buff *skb) redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src)); break; case __bpf_constant_htons(ETH_P_IPV6): - redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src); + redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_src}}); break; } @@ -119,7 +119,7 @@ int tc_src(struct __sk_buff *skb) redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst)); break; case __bpf_constant_htons(ETH_P_IPV6): - redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst); + redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_dst}}); break; } diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 3ded05280757..cf7ed8cbb1fe 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -46,8 +46,6 @@ int bpf_testcb(struct bpf_sock_ops *skops) struct bpf_sock_ops *reuse = skops; struct tcphdr *thdr; int window_clamp = 9216; - int good_call_rv = 0; - int bad_call_rv = 0; int save_syn = 1; int rv = -1; int v = 0; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 95b4aa0928ba..9ab2d55ab7c0 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -209,7 +209,6 @@ int erspan_get_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; - __u32 index; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); @@ -289,7 +288,6 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; - __u32 index; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), @@ -405,8 +403,6 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; - __u32 orig_daddr; - __u32 index = 0; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_FLAGS); @@ -443,9 +439,7 @@ int veth_set_outer_dst(struct __sk_buff *skb) void *data_end = (void *)(long)skb->data_end; struct udphdr *udph; struct iphdr *iph; - __u32 index = 0; int ret = 0; - int shrink; __s64 csum; if ((void *)eth + sizeof(*eth) > data_end) { diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c index aa6de32b50d1..962f3462066a 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c +++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c @@ -18,8 +18,6 @@ int usdt_100_sum; SEC("usdt//proc/self/exe:test:usdt_100") int BPF_USDT(usdt_100, int x) { - long tmp; - if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c index ac6135d9374c..323a73fb2e8c 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c @@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 14; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index f90ffcafd1e8..f5318f757084 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 14; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c index ca33a9b711c4..2e06dbb1ad5c 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c @@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 32; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 297c260fc364..81bb38d72ced 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -5,8 +5,6 @@ SEC("xdp") int _xdp_adjust_tail_grow(struct xdp_md *xdp) { - void *data_end = (void *)(long)xdp->data_end; - void *data = (void *)(long)xdp->data; int data_len = bpf_xdp_get_buff_len(xdp); int offset = 0; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */ diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 3379d303f41a..ee48c4963971 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -45,8 +45,6 @@ SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { struct meta meta; - void *data_end = (void *)(long)xdp->data_end; - void *data = (void *)(long)xdp->data; meta.ifindex = xdp->rxq->dev->ifindex; meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 7521a805b506..25ee4a22e48d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -82,7 +82,6 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xd struct iptnl_info *tnl; struct ethhdr *new_eth; struct ethhdr *old_eth; - __u32 transport_hdr_sz; struct iphdr *iph; __u16 *next_iph; __u16 payload_len; @@ -165,7 +164,6 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xd struct iptnl_info *tnl; struct ethhdr *new_eth; struct ethhdr *old_eth; - __u32 transport_hdr_sz; struct ipv6hdr *ip6h; __u16 payload_len; struct vip vip = {}; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index ba48fcb98ab2..42c8f6ded0e4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -371,45 +371,6 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, return true; } -static __attribute__ ((noinline)) -bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) -{ - struct eth_hdr *new_eth; - struct eth_hdr *old_eth; - - old_eth = *data; - new_eth = *data + sizeof(struct ipv6hdr); - memcpy(new_eth->eth_source, old_eth->eth_source, 6); - memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); - if (inner_v4) - new_eth->eth_proto = 8; - else - new_eth->eth_proto = 56710; - if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) - return false; - *data = (void *)(long)xdp->data; - *data_end = (void *)(long)xdp->data_end; - return true; -} - -static __attribute__ ((noinline)) -bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) -{ - struct eth_hdr *new_eth; - struct eth_hdr *old_eth; - - old_eth = *data; - new_eth = *data + sizeof(struct iphdr); - memcpy(new_eth->eth_source, old_eth->eth_source, 6); - memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); - new_eth->eth_proto = 8; - if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return false; - *data = (void *)(long)xdp->data; - *data_end = (void *)(long)xdp->data_end; - return true; -} - static __attribute__ ((noinline)) int swap_mac_and_send(void *data, void *data_end) { @@ -430,7 +391,6 @@ int send_icmp_reply(void *data, void *data_end) __u16 *next_iph_u16; __u32 tmp_addr = 0; struct iphdr *iph; - __u32 csum1 = 0; __u32 csum = 0; __u64 off = 0; @@ -662,7 +622,6 @@ static int process_l3_headers_v4(struct packet_description *pckt, void *data_end) { struct iphdr *iph; - __u64 iph_len; int action; iph = data + off; @@ -696,7 +655,6 @@ static int process_packet(void *data, __u64 off, void *data_end, struct packet_description pckt = { }; struct vip_definition vip = { }; struct lb_stats *data_stats; - struct eth_hdr *eth = data; void *lru_map = &lru_cache; struct vip_meta *vip_info; __u32 lru_stats_key = 513; @@ -704,7 +662,6 @@ static int process_packet(void *data, __u64 off, void *data_end, __u32 stats_key = 512; struct ctl_value *cval; __u16 pkt_bytes; - __u64 iph_len; __u8 protocol; __u32 vip_num; int action; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index 4ddcb6dfe500..f3ec8086482d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -209,19 +209,6 @@ int xdp_prognum2(struct xdp_md *ctx) return XDP_PASS; } -static __always_inline -void shift_mac_4bytes_16bit(void *data) -{ - __u16 *p = data; - - p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */ - p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */ - p[5] = p[3]; - p[4] = p[2]; - p[3] = p[1]; - p[2] = p[0]; -} - static __always_inline void shift_mac_4bytes_32bit(void *data) { diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c index eb78e6f03129..a9629ac230fd 100644 --- a/tools/testing/selftests/bpf/progs/type_cast.c +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -63,7 +63,6 @@ SEC("?tp_btf/sys_enter") int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) { struct task_struct *task, *task_dup; - long *ptr; task = bpf_get_current_task_btf(); task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c index 165e3c2dd9a3..4767451b59ac 100644 --- a/tools/testing/selftests/bpf/progs/udp_limit.c +++ b/tools/testing/selftests/bpf/progs/udp_limit.c @@ -17,7 +17,6 @@ SEC("cgroup/sock_create") int sock(struct bpf_sock *ctx) { int *sk_storage; - __u32 key; if (ctx->type != SOCK_DGRAM) return 1; @@ -46,7 +45,6 @@ SEC("cgroup/sock_release") int sock_release(struct bpf_sock *ctx) { int *sk_storage; - __u32 key; if (ctx->type != SOCK_DGRAM) return 1; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c index 0ade1110613b..dd3bdf672633 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c @@ -162,8 +162,6 @@ SEC("fentry/" SYS_PREFIX "sys_prctl") int test_user_ringbuf_protocol(void *ctx) { long status = 0; - struct sample *sample = NULL; - struct bpf_dynptr ptr; if (!is_test_process()) return 0; @@ -183,10 +181,6 @@ int test_user_ringbuf_protocol(void *ctx) SEC("fentry/" SYS_PREFIX "sys_getpgid") int test_user_ringbuf(void *ctx) { - int status = 0; - struct sample *sample = NULL; - struct bpf_dynptr ptr; - if (!is_test_process()) return 0; diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c index 87c247d56f72..67424084a38a 100644 --- a/tools/testing/selftests/bpf/progs/xdp_features.c +++ b/tools/testing/selftests/bpf/progs/xdp_features.c @@ -70,7 +70,6 @@ xdp_process_echo_packet(struct xdp_md *xdp, bool dut) struct tlv_hdr *tlv; struct udphdr *uh; __be16 port; - __u8 *cmd; if (eh + 1 > (struct ethhdr *)data_end) return -EINVAL; diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 4ad73847b8a5..54cf1765118b 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -89,7 +89,6 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type) SEC("xdp") int xdping_client(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct pinginfo *pinginfo = NULL; struct ethhdr *eth = data; @@ -153,7 +152,6 @@ int xdping_client(struct xdp_md *ctx) SEC("xdp") int xdping_server(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth = data; struct icmphdr *icmph; diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c index 7a891a0c3a39..c2dd0c28237a 100644 --- a/tools/testing/selftests/bpf/progs/xdpwall.c +++ b/tools/testing/selftests/bpf/progs/xdpwall.c @@ -321,7 +321,6 @@ int edgewall(struct xdp_md *ctx) void *data = (void *)(long)(ctx->data); struct fw_match_info match_info = {}; struct pkt_info info = {}; - __u8 parse_err = NO_ERR; void *transport_hdr; struct ethhdr *eth; bool filter_res; From 3d5a55ddc25508fe950991603d0224c0bba60558 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:15 -0800 Subject: [PATCH 017/260] selftests/bpf: make BPF compiler flags stricter We recently added -Wuninitialized, but it's not enough to catch various silly mistakes or omissions. Let's go all the way to -Wall, just like we do for user-space code. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 16f404aa1b23..606e2d738dd8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -352,12 +352,12 @@ CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ +BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types -Wuninitialized + -Wno-compare-distinct-pointer-types $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline From 4b5ce570dbef57a20acdd71b0c65376009012354 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Mar 2023 22:01:49 -0800 Subject: [PATCH 018/260] bpf: ensure state checkpointing at iter_next() call sites State equivalence check and checkpointing performed in is_state_visited() employs certain heuristics to try to save memory by avoiding state checkpoints if not enough jumps and instructions happened since last checkpoint. This leads to unpredictability of whether a particular instruction will be checkpointed and how regularly. While normally this is not causing much problems (except inconveniences for predictable verifier tests, which we overcome with BPF_F_TEST_STATE_FREQ flag), turns out it's not the case for open-coded iterators. Checking and saving state checkpoints at iter_next() call is crucial for fast convergence of open-coded iterator loop logic, so we need to force it. If we don't do that, is_state_visited() might skip saving a checkpoint, causing unnecessarily long sequence of not checkpointed instructions and jumps, leading to exhaustion of jump history buffer, and potentially other undesired outcomes. It is expected that with correct open-coded iterators convergence will happen quickly, so we don't run a risk of exhausting memory. This patch adds, in addition to prune and jump instruction marks, also a "forced checkpoint" mark, and makes sure that any iter_next() call instruction is marked as such. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230310060149.625887-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 6 +++++- kernel/bpf/verifier.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0c052bc79940..81d525d057c7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -477,8 +477,12 @@ struct bpf_insn_aux_data { /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ - bool prune_point; bool jmp_point; + bool prune_point; + /* ensure we check state equivalence and save state checkpoint and + * this instruction, regardless of any heuristics + */ + bool force_checkpoint; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 45a082284464..13fd4c893f3b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13865,6 +13865,17 @@ static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx) return env->insn_aux_data[insn_idx].prune_point; } +static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].force_checkpoint = true; +} + +static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].force_checkpoint; +} + + enum { DONE_EXPLORING = 0, KEEP_EXPLORING = 1, @@ -13984,8 +13995,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env) struct bpf_kfunc_call_arg_meta meta; ret = fetch_kfunc_meta(env, insn, &meta, NULL); - if (ret == 0 && is_iter_next_kfunc(&meta)) + if (ret == 0 && is_iter_next_kfunc(&meta)) { mark_prune_point(env, t); + /* Checking and saving state checkpoints at iter_next() call + * is crucial for fast convergence of open-coded iterator loop + * logic, so we need to force it. If we don't do that, + * is_state_visited() might skip saving a checkpoint, causing + * unnecessarily long sequence of not checkpointed + * instructions and jumps, leading to exhaustion of jump + * history buffer, and potentially other undesired outcomes. + * It is expected that with correct open-coded iterators + * convergence will happen quickly, so we don't run a risk of + * exhausting memory. + */ + mark_force_checkpoint(env, t); + } } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); @@ -15172,7 +15196,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - bool add_new_state = env->test_state_freq ? true : false; + bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); + bool add_new_state = force_new_state; /* bpf progs typically have pruning point every 4 instructions * http://vger.kernel.org/bpfconf2019.html#session-1 @@ -15269,7 +15294,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * at the end of the loop are likely to be useful in pruning. */ skip_inf_loop_check: - if (!env->test_state_freq && + if (!force_new_state && env->jmps_processed - env->prev_jmps_processed < 20 && env->insn_processed - env->prev_insn_processed < 100) add_new_state = false; From 52c2b005a3c18c565fc70cfd0ca49375f301e952 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Mar 2023 14:41:31 -0800 Subject: [PATCH 019/260] bpf: take into account liveness when propagating precision When doing state comparison, if old state has register that is not marked as REG_LIVE_READ, then we just skip comparison, regardless what's the state of corresponing register in current state. This is because not REG_LIVE_READ register is irrelevant for further program execution and correctness. All good here. But when we get to precision propagation, after two states were declared equivalent, we don't take into account old register's liveness, and thus attempt to propagate precision for register in current state even if that register in old state was not REG_LIVE_READ anymore. This is bad, because register in current state could be anything at all and this could cause -EFAULT due to internal logic bugs. Fix by taking into account REG_LIVE_READ liveness mark to keep the logic in state comparison in sync with precision propagation. Fixes: a3ce685dd01a ("bpf: fix precision tracking") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309224131.57449-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 13fd4c893f3b..0aaf7703326b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15058,7 +15058,8 @@ static int propagate_precision(struct bpf_verifier_env *env, state_reg = state->regs; for (i = 0; i < BPF_REG_FP; i++, state_reg++) { if (state_reg->type != SCALAR_VALUE || - !state_reg->precise) + !state_reg->precise || + !(state_reg->live & REG_LIVE_READ)) continue; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "frame %d: propagating r%d\n", i, fr); @@ -15072,7 +15073,8 @@ static int propagate_precision(struct bpf_verifier_env *env, continue; state_reg = &state->stack[i].spilled_ptr; if (state_reg->type != SCALAR_VALUE || - !state_reg->precise) + !state_reg->precise || + !(state_reg->live & REG_LIVE_READ)) continue; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "frame %d: propagating fp%d\n", From 4a54de65964d37c3929379271ab31355e93ccddf Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 10 Mar 2023 00:19:09 -0600 Subject: [PATCH 020/260] bpf/selftests: Fix send_signal tracepoint tests The send_signal tracepoint tests are non-deterministically failing in CI. The test works as follows: 1. Two pairs of file descriptors are created using the pipe() function. One pair is used to communicate between a parent process -> child process, and the other for the reverse direction. 2. A child is fork()'ed. The child process registers a signal handler, notifies its parent that the signal handler is registered, and then and waits for its parent to have enabled a BPF program that sends a signal. 3. The parent opens and loads a BPF skeleton with programs that send signals to the child process. The different programs are triggered by different perf events (either NMI or normal perf), or by regular tracepoints. The signal is delivered to the child whenever the child triggers the program. 4. The child's signal handler is invoked, which sets a flag saying that the signal handler was reached. The child then signals to the parent that it received the signal, and the test ends. The perf testcases (send_signal_perf{_thread} and send_signal_nmi{_thread}) work 100% of the time, but the tracepoint testcases fail non-deterministically because the tracepoint is not always being fired for the child. There are two tracepoint programs registered in the test: 'tracepoint/sched/sched_switch', and 'tracepoint/syscalls/sys_enter_nanosleep'. The child never intentionally blocks, nor sleeps, so neither tracepoint is guaranteed to be triggered. To fix this, we can have the child trigger the nanosleep program with a usleep(). Before this patch, the test would fail locally every 2-3 runs. Now, it doesn't fail after more than 1000 runs. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230310061909.1420887-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d63a20fbed33..b15b343ebb6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,8 +64,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 1000000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) { j /= i + j + 1; + if (!attr) + /* trigger the nanosleep tracepoint program. */ + usleep(1); + } buf[0] = sigusr1_received ? '2' : '0'; ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); From 4cbd23cc92c49173e402753cab62b8a7754ed18f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:20 -0800 Subject: [PATCH 021/260] bpf: Move a few bpf_local_storage functions to static scope This patch moves the bpf_local_storage_free_rcu() and bpf_selem_unlink_map() to static because they are not used outside of bpf_local_storage.c. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 3 --- kernel/bpf/bpf_local_storage.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index d934248b8e81..502ad7093f13 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -147,8 +147,6 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); - struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bool charge_mem, gfp_t gfp_flags); @@ -163,7 +161,6 @@ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags, gfp_t gfp_flags); -void bpf_local_storage_free_rcu(struct rcu_head *rcu); u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); #endif /* _BPF_LOCAL_STORAGE_H */ diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index d3ba3f2db640..1904a4245ebe 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -95,7 +95,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } -void bpf_local_storage_free_rcu(struct rcu_head *rcu) +static void bpf_local_storage_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; @@ -251,7 +251,7 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, hlist_add_head_rcu(&selem->snode, &local_storage->list); } -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) { struct bpf_local_storage_map *smap; struct bpf_local_storage_map_bucket *b; From 2ffcb6fc50174d1efc8f98633eb2647d84483c68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:21 -0800 Subject: [PATCH 022/260] bpf: Refactor codes into bpf_local_storage_destroy This patch first renames bpf_local_storage_unlink_nolock to bpf_local_storage_destroy(). It better reflects that it is only used when the storage's owner (sk/task/cgrp/inode) is being kfree(). All bpf_local_storage_destroy's caller is taking the spin lock and then free the storage. This patch also moves these two steps into the bpf_local_storage_destroy. This is a preparation work for a later patch that uses bpf_mem_cache_alloc/free in the bpf_local_storage. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-3-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 2 +- kernel/bpf/bpf_cgrp_storage.c | 9 +-------- kernel/bpf/bpf_inode_storage.c | 8 +------- kernel/bpf/bpf_local_storage.c | 8 ++++++-- kernel/bpf/bpf_task_storage.c | 9 +-------- net/core/bpf_sk_storage.c | 8 +------- 6 files changed, 11 insertions(+), 33 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 502ad7093f13..5908a954ddc2 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -128,7 +128,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage); +void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache, diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 9ae07aedaf23..492594d69a86 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -46,8 +46,6 @@ static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner) void bpf_cgrp_storage_free(struct cgroup *cgroup) { struct bpf_local_storage *local_storage; - bool free_cgroup_storage = false; - unsigned long flags; rcu_read_lock(); local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); @@ -57,14 +55,9 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) } bpf_cgrp_storage_lock(); - raw_spin_lock_irqsave(&local_storage->lock, flags); - free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_local_storage_destroy(local_storage); bpf_cgrp_storage_unlock(); rcu_read_unlock(); - - if (free_cgroup_storage) - kfree_rcu(local_storage, rcu); } static struct bpf_local_storage_data * diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 43e2619c8167..2d25bcfa371b 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -57,7 +57,6 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, void bpf_inode_storage_free(struct inode *inode) { struct bpf_local_storage *local_storage; - bool free_inode_storage = false; struct bpf_storage_blob *bsb; bsb = bpf_inode(inode); @@ -72,13 +71,8 @@ void bpf_inode_storage_free(struct inode *inode) return; } - raw_spin_lock_bh(&local_storage->lock); - free_inode_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_bh(&local_storage->lock); + bpf_local_storage_destroy(local_storage); rcu_read_unlock(); - - if (free_inode_storage) - kfree_rcu(local_storage, rcu); } static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 1904a4245ebe..e19f9f50a60d 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -652,11 +652,12 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, return 0; } -bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) +void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) { struct bpf_local_storage_elem *selem; bool free_storage = false; struct hlist_node *n; + unsigned long flags; /* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. @@ -667,6 +668,7 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) * when unlinking elem from the local_storage->list and * the map's bucket->list. */ + raw_spin_lock_irqsave(&local_storage->lock, flags); hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { /* Always unlink from map before unlinking from * local_storage. @@ -681,8 +683,10 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) free_storage = bpf_selem_unlink_storage_nolock( local_storage, selem, false, false); } + raw_spin_unlock_irqrestore(&local_storage->lock, flags); - return free_storage; + if (free_storage) + kfree_rcu(local_storage, rcu); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 20f942229f3c..4dcef28744d1 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -72,8 +72,6 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; - bool free_task_storage = false; - unsigned long flags; rcu_read_lock(); @@ -84,14 +82,9 @@ void bpf_task_storage_free(struct task_struct *task) } bpf_task_storage_lock(); - raw_spin_lock_irqsave(&local_storage->lock, flags); - free_task_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_local_storage_destroy(local_storage); bpf_task_storage_unlock(); rcu_read_unlock(); - - if (free_task_storage) - kfree_rcu(local_storage, rcu); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 7a36353dbc22..8f56438c104b 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -49,7 +49,6 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; - bool free_sk_storage = false; rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); @@ -58,13 +57,8 @@ void bpf_sk_storage_free(struct sock *sk) return; } - raw_spin_lock_bh(&sk_storage->lock); - free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); - raw_spin_unlock_bh(&sk_storage->lock); + bpf_local_storage_destroy(sk_storage); rcu_read_unlock(); - - if (free_sk_storage) - kfree_rcu(sk_storage, rcu); } static void bpf_sk_storage_map_free(struct bpf_map *map) From 62827d612ae525695799b3635a087cb49c55e977 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:22 -0800 Subject: [PATCH 023/260] bpf: Remove __bpf_local_storage_map_alloc bpf_local_storage_map_alloc() is the only caller of __bpf_local_storage_map_alloc(). The remaining logic in bpf_local_storage_map_alloc() is only a one liner setting the smap->cache_idx. Remove __bpf_local_storage_map_alloc() to simplify code. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 63 ++++++++++++++-------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index e19f9f50a60d..f7234a8d4959 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -601,40 +601,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr) -{ - struct bpf_local_storage_map *smap; - unsigned int i; - u32 nbuckets; - - smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); - if (!smap) - return ERR_PTR(-ENOMEM); - bpf_map_init_from_attr(&smap->map, attr); - - nbuckets = roundup_pow_of_two(num_possible_cpus()); - /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ - nbuckets = max_t(u32, 2, nbuckets); - smap->bucket_log = ilog2(nbuckets); - - smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), - nbuckets, GFP_USER | __GFP_NOWARN); - if (!smap->buckets) { - bpf_map_area_free(smap); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < nbuckets; i++) { - INIT_HLIST_HEAD(&smap->buckets[i].list); - raw_spin_lock_init(&smap->buckets[i].lock); - } - - smap->elem_size = offsetof(struct bpf_local_storage_elem, - sdata.data[attr->value_size]); - - return smap; -} - int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, @@ -704,10 +670,33 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache) { struct bpf_local_storage_map *smap; + unsigned int i; + u32 nbuckets; - smap = __bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) - return ERR_CAST(smap); + smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); + if (!smap) + return ERR_PTR(-ENOMEM); + bpf_map_init_from_attr(&smap->map, attr); + + nbuckets = roundup_pow_of_two(num_possible_cpus()); + /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ + nbuckets = max_t(u32, 2, nbuckets); + smap->bucket_log = ilog2(nbuckets); + + smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), + nbuckets, GFP_USER | __GFP_NOWARN); + if (!smap->buckets) { + bpf_map_area_free(smap); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < nbuckets; i++) { + INIT_HLIST_HEAD(&smap->buckets[i].list); + raw_spin_lock_init(&smap->buckets[i].lock); + } + + smap->elem_size = offsetof(struct bpf_local_storage_elem, + sdata.data[attr->value_size]); smap->cache_idx = bpf_local_storage_cache_idx_get(cache); return &smap->map; From 121f31f3e00dfc1acbca43f6f35779e050b56cfc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:23 -0800 Subject: [PATCH 024/260] bpf: Remove the preceding __ from __bpf_selem_unlink_storage __bpf_selem_unlink_storage is taking the spin lock and there is no name collision also. Having the preceding '__' is confusing when reviewing the later patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index f7234a8d4959..70df8dcb2066 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -216,8 +216,8 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor return free_local_storage; } -static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool use_trace_rcu) +static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, + bool use_trace_rcu) { struct bpf_local_storage *local_storage; bool free_local_storage = false; @@ -288,7 +288,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) * the local_storage. */ bpf_selem_unlink_map(selem); - __bpf_selem_unlink_storage(selem, use_trace_rcu); + bpf_selem_unlink_storage(selem, use_trace_rcu); } /* If cacheit_lockit is false, this lookup function is lockless */ From fc6652aab6ad545de70b772550da9043d0b47f1c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:24 -0800 Subject: [PATCH 025/260] bpf: Remember smap in bpf_local_storage This patch remembers which smap triggers the allocation of a 'struct bpf_local_storage' object. The local_storage is allocated during the very first selem added to the owner. The smap pointer is needed when using the bpf_mem_cache_free in a later patch because it needs to free to the correct smap's bpf_mem_alloc object. When a selem is being removed, it needs to check if it is the selem that triggers the creation of the local_storage. If it is, the local_storage->smap pointer will be reset to NULL. This NULL reset is done under the local_storage->lock in bpf_selem_unlink_storage_nolock() when a selem is being removed. Also note that the local_storage may not go away even local_storage->smap is NULL because there may be other selem still stored in the local_storage. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-6-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 1 + kernel/bpf/bpf_local_storage.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 5908a954ddc2..613b1805ed9f 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -83,6 +83,7 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; + struct bpf_local_storage_map __rcu *smap; struct hlist_head list; /* List of bpf_local_storage_elem */ void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 70df8dcb2066..5585dbfd9c66 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -213,6 +213,9 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor kfree_rcu(selem, rcu); } + if (rcu_access_pointer(local_storage->smap) == smap) + RCU_INIT_POINTER(local_storage->smap, NULL); + return free_local_storage; } @@ -368,6 +371,7 @@ int bpf_local_storage_alloc(void *owner, goto uncharge; } + RCU_INIT_POINTER(storage->smap, smap); INIT_HLIST_HEAD(&storage->list); raw_spin_lock_init(&storage->lock); storage->owner = owner; From a47eabf216f77cb6f22ceb38d46f1bb95968579c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:25 -0800 Subject: [PATCH 026/260] bpf: Repurpose use_trace_rcu to reuse_now in bpf_local_storage This patch re-purpose the use_trace_rcu to mean if the freed memory can be reused immediately or not. The use_trace_rcu is renamed to reuse_now. Other than the boolean test is reversed, it should be a no-op. The following explains the reason for the rename and how it will be used in a later patch. In a later patch, bpf_mem_cache_alloc/free will be used in the bpf_local_storage. The bpf mem allocator will reuse the freed memory immediately. Some of the free paths in bpf_local_storage does not support memory to be reused immediately. These paths are the "delete" elem cases from the bpf_*_storage_delete() helper and the map_delete_elem() syscall. Note that "delete" elem before the owner's (sk/task/cgrp/inode) lifetime ended is not the common usage for the local storage. The common free path, bpf_local_storage_destroy(), can reuse the memory immediately. This common path means the storage stays with its owner until the owner is destroyed. The above mentioned "delete" elem paths that cannot reuse immediately always has the 'use_trace_rcu == true'. The cases that is safe for immediate reuse always have 'use_trace_rcu == false'. Instead of adding another arg in a later patch, this patch re-purpose this arg to reuse_now and have the test logic reversed. In a later patch, 'reuse_now == true' will free to the bpf_mem_cache_free() where the memory can be reused immediately. 'reuse_now == false' will go through the call_rcu_tasks_trace(). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-7-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 2 +- kernel/bpf/bpf_cgrp_storage.c | 2 +- kernel/bpf/bpf_inode_storage.c | 2 +- kernel/bpf/bpf_local_storage.c | 24 ++++++++++++------------ kernel/bpf/bpf_task_storage.c | 2 +- net/core/bpf_sk_storage.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 613b1805ed9f..18a31add2255 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -143,7 +143,7 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 492594d69a86..c975cacdd16b 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -121,7 +121,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 2d25bcfa371b..ad2ab0187e45 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -122,7 +122,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 5585dbfd9c66..70c34a948c3c 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -147,7 +147,7 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) */ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool use_trace_rcu) + bool uncharge_mem, bool reuse_now) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -201,7 +201,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor * any special fields. */ rec = smap->map.record; - if (use_trace_rcu) { + if (!reuse_now) { if (!IS_ERR_OR_NULL(rec)) call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); else @@ -220,7 +220,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor } static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool use_trace_rcu) + bool reuse_now) { struct bpf_local_storage *local_storage; bool free_local_storage = false; @@ -235,11 +235,11 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, use_trace_rcu); + local_storage, selem, true, reuse_now); raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_local_storage) { - if (use_trace_rcu) + if (!reuse_now) call_rcu_tasks_trace(&local_storage->rcu, bpf_local_storage_free_rcu); else @@ -284,14 +284,14 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, raw_spin_unlock_irqrestore(&b->lock, flags); } -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) { /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from * the local_storage. */ bpf_selem_unlink_map(selem); - bpf_selem_unlink_storage(selem, use_trace_rcu); + bpf_selem_unlink_storage(selem, reuse_now); } /* If cacheit_lockit is false, this lookup function is lockless */ @@ -538,7 +538,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (old_sdata) { bpf_selem_unlink_map(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), - false, true); + false, false); } unlock: @@ -651,7 +651,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * of the loop will set the free_cgroup_storage to true. */ free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false, false); + local_storage, selem, false, true); } raw_spin_unlock_irqrestore(&local_storage->lock, flags); @@ -745,7 +745,7 @@ void bpf_local_storage_map_free(struct bpf_map *map, migrate_disable(); this_cpu_inc(*busy_counter); } - bpf_selem_unlink(selem, false); + bpf_selem_unlink(selem, true); if (busy_counter) { this_cpu_dec(*busy_counter); migrate_enable(); @@ -783,8 +783,8 @@ void bpf_local_storage_map_free(struct bpf_map *map, /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() * is true, because while call_rcu invocation is skipped in that * case in bpf_selem_free_fields_trace_rcu (and all local - * storage maps pass use_trace_rcu = true), there can be - * call_rcu callbacks based on use_trace_rcu = false in the + * storage maps pass reuse_now = false), there can be + * call_rcu callbacks based on reuse_now = true in the * while ((selem = ...)) loop above or when owner's free path * calls bpf_local_storage_unlink_nolock. */ diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 4dcef28744d1..c88cc04c17c1 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -168,7 +168,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map, if (!nobusy) return -EBUSY; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 8f56438c104b..a5f185b8e50a 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } From c609981342dca634e5dea8c6ca175b6533581261 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:26 -0800 Subject: [PATCH 027/260] bpf: Remove bpf_selem_free_fields*_rcu This patch removes the bpf_selem_free_fields*_rcu. The bpf_obj_free_fields() can be done before the call_rcu_trasks_trace() and kfree_rcu(). It is needed when a later patch uses bpf_mem_cache_alloc/free. In bpf hashtab, bpf_obj_free_fields() is also called before calling bpf_mem_cache_free. The discussion can be found in https://lore.kernel.org/bpf/f67021ee-21d9-bfae-6134-4ca542fab843@linux.dev/ Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-8-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 67 +++------------------------------- 1 file changed, 5 insertions(+), 62 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 70c34a948c3c..715deaaefe13 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -109,27 +109,6 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_fields_rcu(struct rcu_head *rcu) -{ - struct bpf_local_storage_elem *selem; - struct bpf_local_storage_map *smap; - - selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - /* protected by the rcu_barrier*() */ - smap = rcu_dereference_protected(SDATA(selem)->smap, true); - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - kfree(selem); -} - -static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu) -{ - /* Free directly if Tasks Trace RCU GP also implies RCU GP */ - if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_fields_rcu(rcu); - else - call_rcu(rcu, bpf_selem_free_fields_rcu); -} - static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -151,7 +130,6 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor { struct bpf_local_storage_map *smap; bool free_local_storage; - struct btf_record *rec; void *owner; smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); @@ -192,26 +170,11 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - /* A different RCU callback is chosen whenever we need to free - * additional fields in selem data before freeing selem. - * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU - * callbacks when it has special fields, hence we can only conditionally - * dereference smap, as by this time the map might have already been - * freed without waiting for our call_rcu callback if it did not have - * any special fields. - */ - rec = smap->map.record; - if (!reuse_now) { - if (!IS_ERR_OR_NULL(rec)) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); - else - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); - } else { - if (!IS_ERR_OR_NULL(rec)) - call_rcu(&selem->rcu, bpf_selem_free_fields_rcu); - else - kfree_rcu(selem, rcu); - } + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + if (!reuse_now) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + else + kfree_rcu(selem, rcu); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); @@ -769,26 +732,6 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); - /* Only delay freeing of smap, buckets are not needed anymore */ kvfree(smap->buckets); - - /* When local storage has special fields, callbacks for - * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will - * keep using the map BTF record, we need to execute an RCU barrier to - * wait for them as the record will be freed right after our map_free - * callback. - */ - if (!IS_ERR_OR_NULL(smap->map.record)) { - rcu_barrier_tasks_trace(); - /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() - * is true, because while call_rcu invocation is skipped in that - * case in bpf_selem_free_fields_trace_rcu (and all local - * storage maps pass reuse_now = false), there can be - * call_rcu callbacks based on reuse_now = true in the - * while ((selem = ...)) loop above or when owner's free path - * calls bpf_local_storage_unlink_nolock. - */ - rcu_barrier(); - } bpf_map_area_free(smap); } From f8ccf30c179ec1ac16654f6e6ceb40cce1530b91 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:27 -0800 Subject: [PATCH 028/260] bpf: Add bpf_selem_free_rcu callback Add bpf_selem_free_rcu() callback to do the kfree() instead of using kfree_rcu. It is a preparation work for using bpf_mem_cache_alloc/free in a later patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-9-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 715deaaefe13..146e9caeda96 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -109,15 +109,20 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +static void bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + kfree(selem); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ if (rcu_trace_implies_rcu_gp()) - kfree(selem); + bpf_selem_free_rcu(rcu); else - kfree_rcu(selem, rcu); + call_rcu(rcu, bpf_selem_free_rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -174,7 +179,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor if (!reuse_now) call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); else - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, bpf_selem_free_rcu); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); From c0d63f309186d8492577c67c67984c714b6b72bc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:28 -0800 Subject: [PATCH 029/260] bpf: Add bpf_selem_free() This patch refactors the selem freeing logic into bpf_selem_free(). It is a preparation work for a later patch using bpf_mem_cache_alloc/free. The other kfree(selem) cases are also changed to bpf_selem_free(..., reuse_now = true). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-10-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 4 ++++ kernel/bpf/bpf_local_storage.c | 21 ++++++++++++++------- net/core/bpf_sk_storage.c | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 18a31add2255..a34f61467a2f 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -152,6 +152,10 @@ struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bool charge_mem, gfp_t gfp_flags); +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now); + int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 146e9caeda96..512943aac435 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -125,6 +125,17 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) call_rcu(rcu, bpf_selem_free_rcu); } +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now) +{ + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + if (!reuse_now) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + else + call_rcu(&selem->rcu, bpf_selem_free_rcu); +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -175,11 +186,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - if (!reuse_now) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); - else - call_rcu(&selem->rcu, bpf_selem_free_rcu); + bpf_selem_free(selem, smap, reuse_now); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); @@ -423,7 +430,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags); if (err) { - kfree(selem); + bpf_selem_free(selem, smap, true); mem_uncharge(smap, owner, smap->elem_size); return ERR_PTR(err); } @@ -517,7 +524,7 @@ unlock_err: raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (selem) { mem_uncharge(smap, owner, smap->elem_size); - kfree(selem); + bpf_selem_free(selem, smap, true); } return ERR_PTR(err); } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index a5f185b8e50a..24c3dc0d62e5 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -197,7 +197,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) } else { ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); if (ret) { - kfree(copy_selem); + bpf_selem_free(copy_selem, smap, true); atomic_sub(smap->elem_size, &newsk->sk_omem_alloc); bpf_map_put(map); From 1288aaa2786b1e58c9e88e53f7654d520ebe0f3b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:29 -0800 Subject: [PATCH 030/260] bpf: Add bpf_local_storage_rcu callback The existing bpf_local_storage_free_rcu is renamed to bpf_local_storage_free_trace_rcu. A new bpf_local_storage_rcu callback is added to do the kfree instead of using kfree_rcu. It is a preparation work for a later patch using bpf_mem_cache_alloc/free. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-11-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 512943aac435..0fbc477c8b27 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -99,14 +99,19 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + kfree(local_storage); +} + +static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) +{ /* If RCU Tasks Trace grace period implies RCU grace period, do * kfree(), else do kfree_rcu(). */ - local_storage = container_of(rcu, struct bpf_local_storage, rcu); if (rcu_trace_implies_rcu_gp()) - kfree(local_storage); + bpf_local_storage_free_rcu(rcu); else - kfree_rcu(local_storage, rcu); + call_rcu(rcu, bpf_local_storage_free_rcu); } static void bpf_selem_free_rcu(struct rcu_head *rcu) @@ -216,9 +221,9 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, if (free_local_storage) { if (!reuse_now) call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_rcu); + bpf_local_storage_free_trace_rcu); else - kfree_rcu(local_storage, rcu); + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); } } @@ -631,7 +636,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_storage) - kfree_rcu(local_storage, rcu); + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) From 7e30a8477b0bdd13dfd0b24e4f32b26d22b96e6c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:30 -0800 Subject: [PATCH 031/260] bpf: Add bpf_local_storage_free() This patch refactors local_storage freeing logic into bpf_local_storage_free(). It is a preparation work for a later patch that uses bpf_mem_cache_alloc/free. The other kfree(local_storage) cases are also changed to bpf_local_storage_free(..., reuse_now = true). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-12-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 0fbc477c8b27..351d991694cb 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -114,6 +114,16 @@ static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) call_rcu(rcu, bpf_local_storage_free_rcu); } +static void bpf_local_storage_free(struct bpf_local_storage *local_storage, + bool reuse_now) +{ + if (!reuse_now) + call_rcu_tasks_trace(&local_storage->rcu, + bpf_local_storage_free_trace_rcu); + else + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); +} + static void bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -218,13 +228,8 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, local_storage, selem, true, reuse_now); raw_spin_unlock_irqrestore(&local_storage->lock, flags); - if (free_local_storage) { - if (!reuse_now) - call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_trace_rcu); - else - call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); - } + if (free_local_storage) + bpf_local_storage_free(local_storage, reuse_now); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -391,7 +396,7 @@ int bpf_local_storage_alloc(void *owner, return 0; uncharge: - kfree(storage); + bpf_local_storage_free(storage, true); mem_uncharge(smap, owner, sizeof(*storage)); return err; } @@ -636,7 +641,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_storage) - call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); + bpf_local_storage_free(local_storage, true); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) From 57ef77152b58770cbd54d624babd8f5d90805ea7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:34 -0800 Subject: [PATCH 032/260] selftests/bpf: Replace CHECK with ASSERT in test_local_storage This patch migrates the CHECK macro to ASSERT macro. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-16-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/test_local_storage.c | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 9c77cd6b1eaf..563a9c746b7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -13,8 +13,6 @@ #include "network_helpers.h" #include "task_local_storage_helpers.h" -static unsigned int duration; - #define TEST_STORAGE_VALUE 0xbeefdead struct storage { @@ -60,36 +58,30 @@ static bool check_syscall_operations(int map_fd, int obj_fd) /* Looking up an existing element should fail initially */ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem")) return false; /* Create a new element */ err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST); - if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, - errno)) + if (!ASSERT_OK(err, "bpf_map_update_elem")) return false; /* Lookup the newly created element */ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); - if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, - errno)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) return false; /* Check the value of the newly created element */ - if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem", - "value got = %x errno:%d", lookup_val.value, val.value)) + if (!ASSERT_EQ(lookup_val.value, val.value, "bpf_map_lookup_elem")) return false; err = bpf_map_delete_elem(map_fd, &obj_fd); - if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, - errno)) + if (!ASSERT_OK(err, "bpf_map_delete_elem()")) return false; /* The lookup should fail, now that the element has been deleted */ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem")) return false; return true; @@ -104,35 +96,32 @@ void test_test_local_storage(void) char cmd[256]; skel = local_storage__open_and_load(); - if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_load")) goto close_prog; err = local_storage__attach(skel); - if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) + if (!ASSERT_OK(err, "attach")) goto close_prog; task_fd = sys_pidfd_open(getpid(), 0); - if (CHECK(task_fd < 0, "pidfd_open", - "failed to get pidfd err:%d, errno:%d", task_fd, errno)) + if (!ASSERT_GE(task_fd, 0, "pidfd_open")) goto close_prog; if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map), task_fd)) goto close_prog; - if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp", - "unable to create tmpdir: %d\n", errno)) + if (!ASSERT_OK_PTR(mkdtemp(tmp_dir_path), "mkdtemp")) goto close_prog; snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm", tmp_dir_path); snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path); - if (CHECK_FAIL(system(cmd))) + if (!ASSERT_OK(system(cmd), "system(cp)")) goto close_prog_rmdir; rm_fd = open(tmp_exec_path, O_RDONLY); - if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", - tmp_exec_path, rm_fd, errno)) + if (!ASSERT_GE(rm_fd, 0, "open(tmp_exec_path)")) goto close_prog_rmdir; if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), @@ -145,7 +134,7 @@ void test_test_local_storage(void) * LSM program. */ err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); - if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) + if (!ASSERT_EQ(err, EPERM, "run_self_unlink")) goto close_prog_rmdir; /* Set the process being monitored to be the current process */ @@ -156,18 +145,16 @@ void test_test_local_storage(void) */ snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr", tmp_dir_path, tmp_dir_path); - if (CHECK_FAIL(system(cmd))) + if (!ASSERT_OK(system(cmd), "system(mv)")) goto close_prog_rmdir; - CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", - "inode_local_storage not set\n"); + ASSERT_EQ(skel->data->inode_storage_result, 0, "inode_storage_result"); serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); - if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) + if (!ASSERT_GE(serv_sk, 0, "start_server")) goto close_prog_rmdir; - CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", - "sk_local_storage not set\n"); + ASSERT_EQ(skel->data->sk_storage_result, 0, "sk_storage_result"); if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), serv_sk)) From 1f443d0f2b5702bad5f03aab544858ddd33999b7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:35 -0800 Subject: [PATCH 033/260] selftests/bpf: Check freeing sk->sk_local_storage with sk_local_storage->smap is NULL This patch tweats the socket_bind bpf prog to test the local_storage->smap == NULL case in the bpf_local_storage_free() code path. The idea is to create the local_storage with the sk_storage_map's selem first. Then add the sk_storage_map2's selem and then delete the earlier sk_storeage_map's selem. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-17-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/local_storage.c | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 01c74bc870ae..c8ba7207f5a5 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -108,18 +108,17 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, { __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; - int err; if (pid != monitored_pid) return 0; - storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 0); if (!storage) return 0; + sk_storage_result = -1; if (storage->value != DUMMY_STORAGE_VALUE) - sk_storage_result = -1; + return 0; /* This tests that we can associate multiple elements * with the local storage. @@ -129,14 +128,26 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (!storage) return 0; - err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); - if (err) + if (bpf_sk_storage_delete(&sk_storage_map2, sock->sk)) return 0; - err = bpf_sk_storage_delete(&sk_storage_map2, sock->sk); - if (!err) - sk_storage_result = err; + storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + if (bpf_sk_storage_delete(&sk_storage_map, sock->sk)) + return 0; + + /* Ensure that the sk_storage_map is disconnected from the storage. + * The storage memory should not be freed back to the + * bpf_mem_alloc of the sk_bpf_storage_map because + * sk_bpf_storage_map may have been gone. + */ + if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap) + return 0; + + sk_storage_result = 0; return 0; } From 4659fba121dac21a3516986a3c2cf8459c7ac3bc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:36 -0800 Subject: [PATCH 034/260] selftests/bpf: Add local-storage-create benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch tests how many kmallocs is needed to create and free a batch of UDP sockets and each socket has a 64bytes bpf storage. It also measures how fast the UDP sockets can be created. The result is from my qemu setup. Before bpf_mem_cache_alloc/free: ./bench -p 1 local-storage-create Setting up benchmark 'local-storage-create'... Benchmark 'local-storage-create' started. Iter 0 ( 73.193us): creates 213.552k/s (213.552k/prod), 3.09 kmallocs/create Iter 1 (-20.724us): creates 211.908k/s (211.908k/prod), 3.09 kmallocs/create Iter 2 ( 9.280us): creates 212.574k/s (212.574k/prod), 3.12 kmallocs/create Iter 3 ( 11.039us): creates 213.209k/s (213.209k/prod), 3.12 kmallocs/create Iter 4 (-11.411us): creates 213.351k/s (213.351k/prod), 3.12 kmallocs/create Iter 5 ( -7.915us): creates 214.754k/s (214.754k/prod), 3.12 kmallocs/create Iter 6 ( 11.317us): creates 210.942k/s (210.942k/prod), 3.12 kmallocs/create Summary: creates 212.789 ± 1.310k/s (212.789k/prod), 3.12 kmallocs/create After bpf_mem_cache_alloc/free: ./bench -p 1 local-storage-create Setting up benchmark 'local-storage-create'... Benchmark 'local-storage-create' started. Iter 0 ( 68.265us): creates 243.984k/s (243.984k/prod), 1.04 kmallocs/create Iter 1 ( 30.357us): creates 238.424k/s (238.424k/prod), 1.04 kmallocs/create Iter 2 (-18.712us): creates 232.963k/s (232.963k/prod), 1.04 kmallocs/create Iter 3 (-15.885us): creates 238.879k/s (238.879k/prod), 1.04 kmallocs/create Iter 4 ( 5.590us): creates 237.490k/s (237.490k/prod), 1.04 kmallocs/create Iter 5 ( 8.577us): creates 237.521k/s (237.521k/prod), 1.04 kmallocs/create Iter 6 ( -6.263us): creates 238.508k/s (238.508k/prod), 1.04 kmallocs/create Summary: creates 237.298 ± 2.198k/s (237.298k/prod), 1.04 kmallocs/create Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-18-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/bench.c | 2 + .../bpf/benchs/bench_local_storage_create.c | 141 ++++++++++++++++++ .../bpf/progs/bench_local_storage_create.c | 57 +++++++ 4 files changed, 202 insertions(+) create mode 100644 tools/testing/selftests/bpf/benchs/bench_local_storage_create.c create mode 100644 tools/testing/selftests/bpf/progs/bench_local_storage_create.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 606e2d738dd8..55811c448eb7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -639,6 +639,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h +$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm @@ -656,6 +657,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_local_storage.o \ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \ $(OUTPUT)/bench_bpf_hashmap_lookup.o \ + $(OUTPUT)/bench_local_storage_create.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 0b2a53bb8460..dc3827c1f139 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -515,6 +515,7 @@ extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; extern const struct bench bench_bpf_hashmap_lookup; +extern const struct bench bench_local_storage_create; static const struct bench *benchs[] = { &bench_count_global, @@ -555,6 +556,7 @@ static const struct bench *benchs[] = { &bench_local_storage_cache_hashmap_control, &bench_local_storage_tasks_trace, &bench_bpf_hashmap_lookup, + &bench_local_storage_create, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c new file mode 100644 index 000000000000..f8b2a640ccbe --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "bench.h" +#include "bench_local_storage_create.skel.h" + +#define BATCH_SZ 32 + +struct thread { + int fds[BATCH_SZ]; +}; + +static struct bench_local_storage_create *skel; +static struct thread *threads; +static long socket_errs; + +static void validate(void) +{ + if (env.consumer_cnt > 1) { + fprintf(stderr, + "local-storage-create benchmark does not need consumer\n"); + exit(1); + } +} + +static void setup(void) +{ + skel = bench_local_storage_create__open_and_load(); + if (!skel) { + fprintf(stderr, "error loading skel\n"); + exit(1); + } + + skel->bss->bench_pid = getpid(); + + if (!bpf_program__attach(skel->progs.socket_post_create)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + + if (!bpf_program__attach(skel->progs.kmalloc)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + + threads = calloc(env.producer_cnt, sizeof(*threads)); + + if (!threads) { + fprintf(stderr, "cannot alloc thread_res\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&skel->bss->create_cnts, 0); + res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + int *fds = t->fds; + int i; + + while (true) { + for (i = 0; i < BATCH_SZ; i++) { + fds[i] = socket(AF_INET6, SOCK_DGRAM, 0); + if (fds[i] == -1) + atomic_inc(&socket_errs); + } + + for (i = 0; i < BATCH_SZ; i++) { + if (fds[i] != -1) + close(fds[i]); + } + } + + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double creates_per_sec, kmallocs_per_create; + + creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0); + kmallocs_per_create = (double)res->drops / res->hits; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + printf("creates %8.3lfk/s (%7.3lfk/prod), ", + creates_per_sec, creates_per_sec / env.producer_cnt); + printf("%3.2lf kmallocs/create\n", kmallocs_per_create); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + double creates_mean = 0.0, creates_stddev = 0.0; + long total_creates = 0, total_kmallocs = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + total_creates += res[i].hits; + total_kmallocs += res[i].drops; + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + creates_stddev += (creates_mean - res[i].hits / 1000.0) * + (creates_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + creates_stddev = sqrt(creates_stddev); + } + printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ", + creates_mean, creates_stddev, creates_mean / env.producer_cnt); + printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates); + if (socket_errs || skel->bss->create_errs) + printf("socket() errors %ld create_errs %ld\n", socket_errs, + skel->bss->create_errs); +} + +/* Benchmark performance of creating bpf local storage */ +const struct bench bench_local_storage_create = { + .name = "local-storage-create", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c new file mode 100644 index 000000000000..2814bab54d28 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include +#include + +long create_errs = 0; +long create_cnts = 0; +long kmalloc_cnts = 0; +__u32 bench_pid = 0; + +struct storage { + __u8 data[64]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct storage); +} sk_storage_map SEC(".maps"); + +SEC("raw_tp/kmalloc") +int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, + int node) +{ + __sync_fetch_and_add(&kmalloc_cnts, 1); + + return 0; +} + +SEC("lsm.s/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct storage *stg; + __u32 pid; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != bench_pid) + return 0; + + stg = bpf_sk_storage_get(&sk_storage_map, sock->sk, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + if (stg) + __sync_fetch_and_add(&create_cnts, 1); + else + __sync_fetch_and_add(&create_errs, 1); + + return 0; +} + +char __license[] SEC("license") = "GPL"; From b32a5dae44cc7346835839c2e92356ff4609c823 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 9 Mar 2023 10:01:06 -0800 Subject: [PATCH 035/260] bpf: verifier: Rename kernel_type_name helper to btf_type_name kernel_type_name was introduced in commit 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF") with type signature: const char *kernel_type_name(u32 id) At that time the function used global btf_vmlinux BTF for all id lookups. Later, in commit 22dc4a0f5ed1 ("bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier"), the type signature was changed to: static const char *kernel_type_name(const struct btf* btf, u32 id) With the btf parameter used for lookups instead of global btf_vmlinux. The helper will function as expected for type name lookup using non-kernel BTFs, and will be used for such in further patches in the series. Let's rename it to avoid incorrect assumptions that might arise when seeing the current name. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230309180111.1618459-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0aaf7703326b..073c56dbec63 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -752,7 +752,7 @@ static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); } -static const char *kernel_type_name(const struct btf* btf, u32 id) +static const char *btf_type_name(const struct btf *btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } @@ -782,7 +782,7 @@ static const char *iter_type_str(const struct btf *btf, u32 btf_id) return ""; /* we already validated that type is valid and has conforming name */ - return kernel_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; + return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; } static const char *iter_state_str(enum bpf_iter_state state) @@ -1349,7 +1349,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "%s", reg_type_str(env, t)); if (base_type(t) == PTR_TO_BTF_ID) - verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); + verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); verbose(env, "("); /* * _a stands for append, was shortened to avoid multiline statements below. @@ -4518,7 +4518,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, struct btf_field *kptr_field, struct bpf_reg_state *reg, u32 regno) { - const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); + const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; const char *reg_name = ""; @@ -4534,7 +4534,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, return -EINVAL; } /* We need to verify reg->type and reg->btf, before accessing reg->btf */ - reg_name = kernel_type_name(reg->btf, reg->btf_id); + reg_name = btf_type_name(reg->btf, reg->btf_id); /* For ref_ptr case, release function check should ensure we get one * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the @@ -7177,8 +7177,8 @@ found: btf_vmlinux, *arg_btf_id, strict_type_match)) { verbose(env, "R%d is of type %s but %s is expected\n", - regno, kernel_type_name(reg->btf, reg->btf_id), - kernel_type_name(btf_vmlinux, *arg_btf_id)); + regno, btf_type_name(reg->btf, reg->btf_id), + btf_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } } @@ -7248,7 +7248,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, verbose(env, "R%d must have zero offset when passed to release func\n", regno); verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno, - kernel_type_name(reg->btf, reg->btf_id), reg->off); + btf_type_name(reg->btf, reg->btf_id), reg->off); return -EINVAL; } From a4aa38897b6a6dad4318bed036edc7ed0c8a4578 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 9 Mar 2023 10:01:07 -0800 Subject: [PATCH 036/260] bpf: btf: Remove unused btf_field_info_type enum This enum was added and used in commit aa3496accc41 ("bpf: Refactor kptr_off_tab into btf_record"). Later refactoring in commit db559117828d ("bpf: Consolidate spin_lock, timer management into btf_record") resulted in the enum values no longer being used anywhere. Let's remove them. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230309180111.1618459-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 71758cd15b07..37779ceefd09 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3231,12 +3231,6 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -enum btf_field_info_type { - BTF_FIELD_SPIN_LOCK, - BTF_FIELD_TIMER, - BTF_FIELD_KPTR, -}; - enum { BTF_FIELD_IGNORE = 0, BTF_FIELD_FOUND = 1, From 74843b57ec70af7b67b7e6153374834ee18d139f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 9 Mar 2023 10:01:08 -0800 Subject: [PATCH 037/260] bpf: Change btf_record_find enum parameter to field_mask btf_record_find's 3rd parameter can be multiple enum btf_field_type's masked together. The function is called with BPF_KPTR in two places in verifier.c, so it works with masked values already. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230309180111.1618459-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/syscall.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e64ff1e89fb2..3a38db315f7f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1925,7 +1925,7 @@ void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map); struct btf_field *btf_record_find(const struct btf_record *rec, - u32 offset, enum btf_field_type type); + u32 offset, u32 field_mask); void btf_record_free(struct btf_record *rec); void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f406dfa13792..cc4b7684910c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -520,14 +520,14 @@ static int btf_field_cmp(const void *a, const void *b) } struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, - enum btf_field_type type) + u32 field_mask) { struct btf_field *field; - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) return NULL; field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); - if (!field || !(field->type & type)) + if (!field || !(field->type & field_mask)) return NULL; return field; } From c1f9e14e3b676eb88fe1c9488c0b5f4fc9108a1c Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Wed, 8 Mar 2023 20:53:03 +0000 Subject: [PATCH 038/260] bpf, docs: Explain helper functions Add brief text about existence of helper functions, with details to go in separate psABI text. Note that text about runtime functions (kfuncs) is part of a separate patch, not this one. Signed-off-by: Dave Thaler Link: https://lore.kernel.org/r/20230308205303.1308-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/clang-notes.rst | 6 ++++++ Documentation/bpf/instruction-set.rst | 9 ++++++++- Documentation/bpf/linux-notes.rst | 8 ++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst index 528feddf2db9..2c872a1ee08e 100644 --- a/Documentation/bpf/clang-notes.rst +++ b/Documentation/bpf/clang-notes.rst @@ -20,6 +20,12 @@ Arithmetic instructions For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with ``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included. +Jump instructions +================= + +If ``-O0`` is used, Clang will generate the ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d) +instruction, which is not supported by the Linux kernel verifier. + Atomic operations ================= diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index db8789e6969e..5e43e14abe80 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -253,7 +253,7 @@ BPF_JSET 0x40 PC += off if dst & src BPF_JNE 0x50 PC += off if dst != src BPF_JSGT 0x60 PC += off if dst > src signed BPF_JSGE 0x70 PC += off if dst >= src signed -BPF_CALL 0x80 function call +BPF_CALL 0x80 function call see `Helper functions`_ BPF_EXIT 0x90 function / program return BPF_JMP only BPF_JLT 0xa0 PC += off if dst < src unsigned BPF_JLE 0xb0 PC += off if dst <= src unsigned @@ -264,6 +264,13 @@ BPF_JSLE 0xd0 PC += off if dst <= src signed The eBPF program needs to store the return value into register R0 before doing a BPF_EXIT. +Helper functions +~~~~~~~~~~~~~~~~ + +Helper functions are a concept whereby BPF programs can call into a +set of function calls exposed by the runtime. Each helper +function is identified by an integer used in a ``BPF_CALL`` instruction. +The available helper functions may differ for each program type. Load and store instructions =========================== diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst index 956b0c86699d..f43b9c797bcb 100644 --- a/Documentation/bpf/linux-notes.rst +++ b/Documentation/bpf/linux-notes.rst @@ -12,6 +12,14 @@ Byte swap instructions ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively. +Jump instructions +================= + +``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function +integer would be read from a specified register, is not currently supported +by the verifier. Any programs with this instruction will fail to load +until such support is added. + Legacy BPF Packet access instructions ===================================== From c8e18754091479fac3f5b6c053c6bc4be0b7fb11 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:41 -0800 Subject: [PATCH 039/260] bpf: Support __kptr to local kptrs If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module BTF - it must have been allocated by bpf_obj_new and therefore must be free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new. This patch adds support for treating __kptr-tagged pointers to "local kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr acquire / release semantics. Consider the following example: struct node_data { long key; long data; struct bpf_rb_node node; }; struct map_value { struct node_data __kptr *node; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); __type(value, struct map_value); __uint(max_entries, 1); } some_nodes SEC(".maps"); If struct node_data had a matching definition in kernel BTF, the verifier would expect a destructor for the type to be registered. Since struct node_data does not match any type in kernel BTF, the verifier knows that there is no kfunc that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can only come from bpf_obj_new. So instead of searching for a registered dtor, a bpf_obj_drop dtor can be assumed. This allows the runtime to properly destruct such kptrs in bpf_obj_free_fields, which enables maps to clean up map_vals w/ such kptrs when going away. Implementation notes: * "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr. Before this patch, the variable would only ever point to vmlinux or module BTFs, but now it can point to some program BTF for local kptr type. It's later used to populate the (btf, btf_id) pair in kptr btf field. * It's necessary to btf_get the program BTF when populating btf_field for local kptr. btf_record_free later does a btf_put. * Behavior for non-local referenced kptrs is not modified, as bpf_find_btf_id helper only searches vmlinux and module BTFs for matching BTF type. If such a type is found, btf_field_kptr's btf will pass btf_is_kernel check, and the associated release function is some one-argument dtor. If btf_is_kernel check fails, associated release function is two-arg bpf_obj_drop_impl. Before this patch only btf_field_kptr's w/ kernel or module BTFs were created. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 ++++++++++- include/linux/btf.h | 2 -- kernel/bpf/btf.c | 37 ++++++++++++++++++++++++++++--------- kernel/bpf/helpers.c | 11 ++++++++--- kernel/bpf/syscall.c | 14 +++++++++++++- 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3a38db315f7f..756b85f0d0d3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -189,10 +189,19 @@ enum btf_field_type { BPF_RB_NODE | BPF_RB_ROOT, }; +typedef void (*btf_dtor_kfunc_t)(void *); +typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *); + struct btf_field_kptr { struct btf *btf; struct module *module; - btf_dtor_kfunc_t dtor; + union { + /* dtor used if btf_is_kernel(btf), otherwise the type + * is program-allocated and obj_drop is used + */ + btf_dtor_kfunc_t dtor; + btf_dtor_obj_drop obj_drop; + }; u32 btf_id; }; diff --git a/include/linux/btf.h b/include/linux/btf.h index 1bba0827e8c4..d53b10cc55f2 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -121,8 +121,6 @@ struct btf_struct_metas { struct btf_struct_meta types[]; }; -typedef void (*btf_dtor_kfunc_t)(void *); - extern const struct file_operations btf_fops; void btf_get(struct btf *btf); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 37779ceefd09..66fad7a16b6c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3551,12 +3551,17 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, return -EINVAL; } +extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { struct module *mod = NULL; const struct btf_type *t; - struct btf *kernel_btf; + /* If a matching btf type is found in kernel or module BTFs, kptr_ref + * is that BTF, otherwise it's program BTF + */ + struct btf *kptr_btf; int ret; s32 id; @@ -3565,7 +3570,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, */ t = btf_type_by_id(btf, info->kptr.type_id); id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), - &kernel_btf); + &kptr_btf); + if (id == -ENOENT) { + /* btf_parse_kptr should only be called w/ btf = program BTF */ + WARN_ON_ONCE(btf_is_kernel(btf)); + + /* Type exists only in program BTF. Assume that it's a MEM_ALLOC + * kptr allocated via bpf_obj_new + */ + field->kptr.dtor = (void *)&__bpf_obj_drop_impl; + id = info->kptr.type_id; + kptr_btf = (struct btf *)btf; + btf_get(kptr_btf); + goto found_dtor; + } if (id < 0) return id; @@ -3582,20 +3600,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, * can be used as a referenced pointer and be stored in a map at * the same time. */ - dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); + dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id); if (dtor_btf_id < 0) { ret = dtor_btf_id; goto end_btf; } - dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); + dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id); if (!dtor_func) { ret = -ENOENT; goto end_btf; } - if (btf_is_module(kernel_btf)) { - mod = btf_try_get_module(kernel_btf); + if (btf_is_module(kptr_btf)) { + mod = btf_try_get_module(kptr_btf); if (!mod) { ret = -ENXIO; goto end_btf; @@ -3605,7 +3623,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, /* We already verified dtor_func to be btf_type_is_func * in register_btf_id_dtor_kfuncs. */ - dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); + dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off); addr = kallsyms_lookup_name(dtor_func_name); if (!addr) { ret = -EINVAL; @@ -3614,14 +3632,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, field->kptr.dtor = (void *)addr; } +found_dtor: field->kptr.btf_id = id; - field->kptr.btf = kernel_btf; + field->kptr.btf = kptr_btf; field->kptr.module = mod; return 0; end_mod: module_put(mod); end_btf: - btf_put(kernel_btf); + btf_put(kptr_btf); return ret; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f9b7eeedce08..77d64b6951b9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) return p; } +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) +{ + if (rec) + bpf_obj_free_fields(rec, p); + bpf_mem_free(&bpf_global_ma, p); +} + __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; - if (meta) - bpf_obj_free_fields(meta->record, p); - bpf_mem_free(&bpf_global_ma, p); + __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cc4b7684910c..0684febc447a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -659,8 +659,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) return; fields = rec->fields; for (i = 0; i < rec->cnt; i++) { + struct btf_struct_meta *pointee_struct_meta; const struct btf_field *field = &fields[i]; void *field_ptr = obj + field->offset; + void *xchgd_field; switch (fields[i].type) { case BPF_SPIN_LOCK: @@ -672,7 +674,17 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) WRITE_ONCE(*(u64 *)field_ptr, 0); break; case BPF_KPTR_REF: - field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0)); + xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); + if (!btf_is_kernel(field->kptr.btf)) { + pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, + field->kptr.btf_id); + WARN_ON_ONCE(!pointee_struct_meta); + field->kptr.obj_drop(xchgd_field, pointee_struct_meta ? + pointee_struct_meta->record : + NULL); + } else { + field->kptr.dtor(xchgd_field); + } break; case BPF_LIST_HEAD: if (WARN_ON_ONCE(rec->spin_lock_off < 0)) From 738c96d5e2e3700b370f02ac84a9dc159ca81f25 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:42 -0800 Subject: [PATCH 040/260] bpf: Allow local kptrs to be exchanged via bpf_kptr_xchg The previous patch added necessary plumbing for verifier and runtime to know what to do with non-kernel PTR_TO_BTF_IDs in map values, but didn't provide any way to get such local kptrs into a map value. This patch modifies verifier handling of bpf_kptr_xchg to allow MEM_ALLOC kptr types. check_reg_type is modified accept MEM_ALLOC-flagged input to bpf_kptr_xchg despite such types not being in btf_ptr_types. This could have been done with a MAYBE_MEM_ALLOC equivalent to MAYBE_NULL, but bpf_kptr_xchg is the only helper that I can forsee using MAYBE_MEM_ALLOC, so keep it special-cased for now. The verifier tags bpf_kptr_xchg retval MEM_ALLOC if and only if the BTF associated with the retval is not kernel BTF. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 073c56dbec63..519d465407ef 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7123,6 +7123,9 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, if (arg_type & PTR_MAYBE_NULL) type &= ~PTR_MAYBE_NULL; + if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC) + type &= ~MEM_ALLOC; + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { expected = compatible->types[i]; if (expected == NOT_INIT) @@ -7185,7 +7188,8 @@ found: break; } case PTR_TO_BTF_ID | MEM_ALLOC: - if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) { + if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && + meta->func_id != BPF_FUNC_kptr_xchg) { verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); return -EFAULT; } @@ -9151,6 +9155,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (func_id == BPF_FUNC_kptr_xchg) { ret_btf = meta.kptr_field->kptr.btf; ret_btf_id = meta.kptr_field->kptr.btf_id; + if (!btf_is_kernel(ret_btf)) + regs[BPF_REG_0].type |= MEM_ALLOC; } else { if (fn->ret_btf_id == BPF_PTR_POISON) { verbose(env, "verifier internal error:"); From 5d8d6634cccf1ebd0db4e220e52e7128b030c7b4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:43 -0800 Subject: [PATCH 041/260] selftests/bpf: Add local kptr stashing test Add a new selftest, local_kptr_stash, which uses bpf_kptr_xchg to stash a bpf_obj_new-allocated object in a map. Test the following scenarios: * Stash two rb_nodes in an arraymap, don't unstash them, rely on map free to destruct them * Stash two rb_nodes in an arraymap, unstash the second one in a separate program, rely on map free to destruct first Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/local_kptr_stash.c | 60 ++++++++++ .../selftests/bpf/progs/local_kptr_stash.c | 108 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c create mode 100644 tools/testing/selftests/bpf/progs/local_kptr_stash.c diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c new file mode 100644 index 000000000000..76f1da877f81 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "local_kptr_stash.skel.h" +static void test_local_kptr_stash_simple(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval"); + + local_kptr_stash__destroy(skel); +} + +static void test_local_kptr_stash_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.unstash_rb_node), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); + ASSERT_EQ(opts.retval, 42, "local_kptr_stash_add_nodes retval"); + + local_kptr_stash__destroy(skel); +} + +void test_local_kptr_stash_success(void) +{ + if (test__start_subtest("local_kptr_stash_simple")) + test_local_kptr_stash_simple(); + if (test__start_subtest("local_kptr_stash_unstash")) + test_local_kptr_stash_unstash(); +} diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c new file mode 100644 index 000000000000..0ef286da092b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_experimental.h" + +struct node_data { + long key; + long data; + struct bpf_rb_node node; +}; + +struct map_value { + struct prog_test_ref_kfunc *not_kptr; + struct prog_test_ref_kfunc __kptr *val; + struct node_data __kptr *node; +}; + +/* This is necessary so that LLVM generates BTF for node_data struct + * If it's not included, a fwd reference for node_data will be generated but + * no struct. Example BTF of "node" field in map_value when not included: + * + * [10] PTR '(anon)' type_id=35 + * [34] FWD 'node_data' fwd_kind=struct + * [35] TYPE_TAG 'kptr_ref' type_id=34 + * + * (with no node_data struct defined) + * Had to do the same w/ bpf_kfunc_call_test_release below + */ +struct node_data *just_here_because_btf_bug; + +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 2); +} some_nodes SEC(".maps"); + +static int create_and_stash(int idx, int val) +{ + struct map_value *mapval; + struct node_data *res; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + res->key = val; + + res = bpf_kptr_xchg(&mapval->node, res); + if (res) + bpf_obj_drop(res); + return 0; +} + +SEC("tc") +long stash_rb_nodes(void *ctx) +{ + return create_and_stash(0, 41) ?: create_and_stash(1, 42); +} + +SEC("tc") +long unstash_rb_node(void *ctx) +{ + struct map_value *mapval; + struct node_data *res; + long retval; + int key = 1; + + mapval = bpf_map_lookup_elem(&some_nodes, &key); + if (!mapval) + return 1; + + res = bpf_kptr_xchg(&mapval->node, NULL); + if (res) { + retval = res->key; + bpf_obj_drop(res); + return retval; + } + return 1; +} + +SEC("tc") +long stash_test_ref_kfunc(void *ctx) +{ + struct prog_test_ref_kfunc *res; + struct map_value *mapval; + int key = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &key); + if (!mapval) + return 1; + + res = bpf_kptr_xchg(&mapval->val, NULL); + if (res) + bpf_kfunc_call_test_release(res); + return 0; +} + +char _license[] SEC("license") = "GPL"; From 34f0677e7afd3a292bc1aadda7ce8e35faedb204 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 13 Mar 2023 11:40:17 -0700 Subject: [PATCH 042/260] bpf: fix precision propagation verbose logging Fix wrong order of frame index vs register/slot index in precision propagation verbose (level 2) output. It's wrong and very confusing as is. Fixes: 529409ea92d5 ("bpf: propagate precision across all frames, not just the last one") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230313184017.4083374-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 519d465407ef..883d4ff2e288 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15068,7 +15068,7 @@ static int propagate_precision(struct bpf_verifier_env *env, !(state_reg->live & REG_LIVE_READ)) continue; if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "frame %d: propagating r%d\n", i, fr); + verbose(env, "frame %d: propagating r%d\n", fr, i); err = mark_chain_precision_frame(env, fr, i); if (err < 0) return err; @@ -15084,7 +15084,7 @@ static int propagate_precision(struct bpf_verifier_env *env, continue; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "frame %d: propagating fp%d\n", - (-i - 1) * BPF_REG_SIZE, fr); + fr, (-i - 1) * BPF_REG_SIZE); err = mark_chain_precision_stack_frame(env, fr, i); if (err < 0) return err; From 22df776a9a866713d9decfb92b633bcfdb571954 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 15 Feb 2023 17:30:33 -0600 Subject: [PATCH 043/260] tasks: Extract rcu_users out of union In commit 3fbd7ee285b2b ("tasks: Add a count of task RCU users"), a count on the number of RCU users was added to struct task_struct. This was done so as to enable the removal of task_rcu_dereference(), and allow tasks to be protected by RCU even after exiting and being removed from the runqueue. In this commit, the 'refcount_t rcu_users' field that keeps track of this refcount was put into a union co-located with 'struct rcu_head rcu', so as to avoid taking up any extra space in task_struct. This was possible to do safely, because the field was only ever decremented by a static set of specific callers, and then never incremented again. While this restriction of there only being a small, static set of users of this field has worked fine, it prevents us from leveraging the field to use RCU to protect tasks in other contexts. During tracing, for example, it would be useful to be able to collect some tasks that performed a certain operation, put them in a map, and then periodically summarize who they are, which cgroup they're in, how much CPU time they've utilized, etc. While this can currently be done with 'usage', it becomes tricky when a task is already in a map, or if a reference should only be taken if a task is valid and will not soon be reaped. Ideally, we could do something like pass a reference to a map value, and then try to acquire a reference to the task in an RCU read region by using refcount_inc_not_zero(). Similarly, in sched_ext, schedulers are using integer pids to remember tasks, and then looking them up with find_task_by_pid_ns(). This is slow, error prone, and adds complexity. It would be more convenient and performant if BPF schedulers could instead store tasks directly in maps, and then leverage RCU to ensure they can be safely accessed with low overhead. Finally, overloading fields like this is error prone. Someone that wants to use 'rcu_users' could easily overlook the fact that once the rcu callback is scheduled, the refcount will go back to being nonzero, thus precluding the use of refcount_inc_not_zero(). Furthermore, as described below, it's possible to extract the fields of the union without changing the size of task_struct. There are several possible ways to enable this: 1. The lightest touch approach is likely the one proposed in this patch, which is to simply extract 'rcu_users' and 'rcu' from the union, so that scheduling the 'rcu' callback doesn't overwrite the 'rcu_users' refcount. If we have a trusted task pointer, this would allow us to use refcnt_inc_not_zero() inside of an RCU region to determine if we can safely acquire a reference to the task and store it in a map. As mentioned below, this can be done without changing the size of task_struct, by moving the location of the union to another location that has padding gaps we can fill in. 2. Removing 'refcount_t rcu_users', and instead having the entire task be freed in an rcu callback. This is likely the most sound overall design, though it changes the behavioral semantics exposed to callers, who currently expect that a task that's successfully looked up in e.g. the pid_list with find_task_by_pid_ns(), can always have a 'usage' reference acquired on them, as it's guaranteed to be > 0 until after the next gp. In order for this approach to work, we'd have to audit all callers. This approach also slightly changes behavior observed by user space by not invoking trace_sched_process_free() until the whole task_struct is actually being freed, rather than just after it's exited. It also may change timings, as memory will be freed in an RCU callback rather than immediately when the final 'usage' refcount drops to 0. This also is arguably a benefit, as it provides more predictable performance to callers who are refcounting tasks. 3. There may be other solutions as well that don't require changing the layout of task_struct. For example, we could possibly do something complex from the BPF side, such as listen for task exit and remove a task from a map when the task is exiting. This would likely require significant custom handling for task_struct in the verifier, so a more generalizable solution is likely warranted. As mentioned above, this patch proposes the lightest-touch approach which allows callers elsewhere in the kernel to use 'rcu_users' to ensure the lifetime of a task, by extracting 'rcu_users' and 'rcu' from the union. There is no size change in task_struct with this patch. Cc: Linus Torvalds Cc: Eric W. Biederman Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: David Vernet Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20230215233033.889644-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- include/linux/sched.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 63d242164b1a..b11b4517760f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1318,11 +1318,6 @@ struct task_struct { struct tlbflush_unmap_batch tlb_ubc; - union { - refcount_t rcu_users; - struct rcu_head rcu; - }; - /* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe; @@ -1459,6 +1454,8 @@ struct task_struct { unsigned long saved_state_change; # endif #endif + struct rcu_head rcu; + refcount_t rcu_users; int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; From 9e36a204bd43553a9cd4bd574612cd9a5df791ea Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 13 Mar 2023 14:46:41 -0700 Subject: [PATCH 044/260] bpf: Disable migration when freeing stashed local kptr using obj drop When a local kptr is stashed in a map and freed when the map goes away, currently an error like the below appears: [ 39.195695] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u32:15/2875 [ 39.196549] caller is bpf_mem_free+0x56/0xc0 [ 39.196958] CPU: 15 PID: 2875 Comm: kworker/u32:15 Tainted: G O 6.2.0-13016-g22df776a9a86 #4477 [ 39.197897] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 39.198949] Workqueue: events_unbound bpf_map_free_deferred [ 39.199470] Call Trace: [ 39.199703] [ 39.199911] dump_stack_lvl+0x60/0x70 [ 39.200267] check_preemption_disabled+0xbf/0xe0 [ 39.200704] bpf_mem_free+0x56/0xc0 [ 39.201032] ? bpf_obj_new_impl+0xa0/0xa0 [ 39.201430] bpf_obj_free_fields+0x1cd/0x200 [ 39.201838] array_map_free+0xad/0x220 [ 39.202193] ? finish_task_switch+0xe5/0x3c0 [ 39.202614] bpf_map_free_deferred+0xea/0x210 [ 39.203006] ? lockdep_hardirqs_on_prepare+0xe/0x220 [ 39.203460] process_one_work+0x64f/0xbe0 [ 39.203822] ? pwq_dec_nr_in_flight+0x110/0x110 [ 39.204264] ? do_raw_spin_lock+0x107/0x1c0 [ 39.204662] ? lockdep_hardirqs_on_prepare+0xe/0x220 [ 39.205107] worker_thread+0x74/0x7a0 [ 39.205451] ? process_one_work+0xbe0/0xbe0 [ 39.205818] kthread+0x171/0x1a0 [ 39.206111] ? kthread_complete_and_exit+0x20/0x20 [ 39.206552] ret_from_fork+0x1f/0x30 [ 39.206886] This happens because the call to __bpf_obj_drop_impl I added in the patch adding support for stashing local kptrs doesn't disable migration. Prior to that patch, __bpf_obj_drop_impl logic only ran when called by a BPF progarm, whereas now it can be called from map free path, so it's necessary to explicitly disable migration. Also, refactor a bit to just call __bpf_obj_drop_impl directly instead of bothering w/ dtor union and setting pointer-to-obj_drop. Fixes: c8e187540914 ("bpf: Support __kptr to local kptrs") Reported-by: Alexei Starovoitov Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230313214641.3731908-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 12 ++++-------- kernel/bpf/btf.c | 4 +--- kernel/bpf/syscall.c | 10 +++++++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 756b85f0d0d3..71cc92a4ba48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -190,18 +190,14 @@ enum btf_field_type { }; typedef void (*btf_dtor_kfunc_t)(void *); -typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *); struct btf_field_kptr { struct btf *btf; struct module *module; - union { - /* dtor used if btf_is_kernel(btf), otherwise the type - * is program-allocated and obj_drop is used - */ - btf_dtor_kfunc_t dtor; - btf_dtor_obj_drop obj_drop; - }; + /* dtor used if btf_is_kernel(btf), otherwise the type is + * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used + */ + btf_dtor_kfunc_t dtor; u32 btf_id; }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 66fad7a16b6c..b7e5a5510b91 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3551,8 +3551,6 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, return -EINVAL; } -extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); - static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { @@ -3578,7 +3576,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, /* Type exists only in program BTF. Assume that it's a MEM_ALLOC * kptr allocated via bpf_obj_new */ - field->kptr.dtor = (void *)&__bpf_obj_drop_impl; + field->kptr.dtor = NULL; id = info->kptr.type_id; kptr_btf = (struct btf *)btf; btf_get(kptr_btf); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0684febc447a..5b88301a2ae0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -650,6 +650,8 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(obj + rec->timer_off); } +extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; @@ -679,9 +681,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, field->kptr.btf_id); WARN_ON_ONCE(!pointee_struct_meta); - field->kptr.obj_drop(xchgd_field, pointee_struct_meta ? - pointee_struct_meta->record : - NULL); + migrate_disable(); + __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? + pointee_struct_meta->record : + NULL); + migrate_enable(); } else { field->kptr.dtor(xchgd_field); } From 27d7fdf06fdb84455ff585b58c8034e2fab42583 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 13 Mar 2023 14:56:27 -0600 Subject: [PATCH 045/260] bpf: use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many comments and samples in the bpf code still refer to this older debugfs path, so let's update them to avoid confusion. There are a few spots where the bpf code explicitly checks both tracefs and debugfs (tools/bpf/bpftool/tracelog.c and tools/lib/api/fs/fs.c) and I've left those alone so that the tools can continue to work with both paths. Signed-off-by: Ross Zwisler Acked-by: Michael S. Tsirkin Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230313205628.1058720-2-zwisler@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++---- samples/bpf/cpustat_kern.c | 4 ++-- samples/bpf/hbm.c | 4 ++-- samples/bpf/ibumad_kern.c | 4 ++-- samples/bpf/lwt_len_hist.sh | 2 +- samples/bpf/offwaketime_kern.c | 2 +- samples/bpf/task_fd_query_user.c | 4 ++-- samples/bpf/test_lwt_bpf.sh | 2 +- samples/bpf/test_overhead_tp.bpf.c | 4 ++-- tools/include/uapi/linux/bpf.h | 8 ++++---- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d8c534e05b0a..13129df937cd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1647,17 +1647,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c index 5aefd19cdfa1..944f13fe164a 100644 --- a/samples/bpf/cpustat_kern.c +++ b/samples/bpf/cpustat_kern.c @@ -76,8 +76,8 @@ struct { /* * The trace events for cpu_idle and cpu_frequency are taken from: - * /sys/kernel/debug/tracing/events/power/cpu_idle/format - * /sys/kernel/debug/tracing/events/power/cpu_frequency/format + * /sys/kernel/tracing/events/power/cpu_idle/format + * /sys/kernel/tracing/events/power/cpu_frequency/format * * These two events have same format, so define one common structure. */ diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 516fbac28b71..ff58ec43f56a 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -65,7 +65,7 @@ static void Usage(void); static void read_trace_pipe2(void); static void do_error(char *msg, bool errno_flag); -#define DEBUGFS "/sys/kernel/debug/tracing/" +#define TRACEFS "/sys/kernel/tracing/" static struct bpf_program *bpf_prog; static struct bpf_object *obj; @@ -77,7 +77,7 @@ static void read_trace_pipe2(void) FILE *outf; char *outFname = "hbm_out.log"; - trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + trace_fd = open(TRACEFS "trace_pipe", O_RDONLY, 0); if (trace_fd < 0) { printf("Error opening trace_pipe\n"); return; diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c index 9b193231024a..f07474c72525 100644 --- a/samples/bpf/ibumad_kern.c +++ b/samples/bpf/ibumad_kern.c @@ -39,8 +39,8 @@ struct { /* Taken from the current format defined in * include/trace/events/ib_umad.h * and - * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format - * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format + * /sys/kernel/tracing/events/ib_umad/ib_umad_read/format + * /sys/kernel/tracing/events/ib_umad/ib_umad_write/format */ struct ib_umad_rw_args { u64 pad; diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh index 7078bfcc4f4d..381b2c634784 100755 --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -5,7 +5,7 @@ NS1=lwt_ns1 VETH0=tst_lwt1a VETH1=tst_lwt1b BPF_PROG=lwt_len_hist.bpf.o -TRACE_ROOT=/sys/kernel/debug/tracing +TRACE_ROOT=/sys/kernel/tracing function cleanup { # To reset saved histogram, remove pinned map diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index eb4d94742e6b..23f12b47e9e5 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -110,7 +110,7 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta) } #if 1 -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[TASK_COMM_LEN]; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index a33d74bd3a4b..1e61f2180470 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -235,7 +235,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) struct bpf_link *link; ssize_t bytes; - snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", + snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events", event_type); kfd = open(buf, O_WRONLY | O_TRUNC, 0); CHECK_PERROR_RET(kfd < 0); @@ -252,7 +252,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) close(kfd); kfd = -1; - snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id", + snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%ss/%s/id", event_type, event_alias); efd = open(buf, O_RDONLY, 0); CHECK_PERROR_RET(efd < 0); diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh index 2e9f5126963b..0bf2d0f6bf4b 100755 --- a/samples/bpf/test_lwt_bpf.sh +++ b/samples/bpf/test_lwt_bpf.sh @@ -21,7 +21,7 @@ IP_LOCAL="192.168.99.1" PROG_SRC="test_lwt_bpf.c" BPF_PROG="test_lwt_bpf.o" -TRACE_ROOT=/sys/kernel/debug/tracing +TRACE_ROOT=/sys/kernel/tracing CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context) function lookup_mac() diff --git a/samples/bpf/test_overhead_tp.bpf.c b/samples/bpf/test_overhead_tp.bpf.c index 67cab3881969..8b498328e961 100644 --- a/samples/bpf/test_overhead_tp.bpf.c +++ b/samples/bpf/test_overhead_tp.bpf.c @@ -7,7 +7,7 @@ #include "vmlinux.h" #include -/* from /sys/kernel/debug/tracing/events/task/task_rename/format */ +/* from /sys/kernel/tracing/events/task/task_rename/format */ struct task_rename { __u64 pad; __u32 pid; @@ -21,7 +21,7 @@ int prog(struct task_rename *ctx) return 0; } -/* from /sys/kernel/debug/tracing/events/fib/fib_table_lookup/format */ +/* from /sys/kernel/tracing/events/fib/fib_table_lookup/format */ struct fib_table_lookup { __u64 pad; __u32 tb_id; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d8c534e05b0a..13129df937cd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1647,17 +1647,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * From ab4c15feb2ebcf9f4abe31457d7cbc8f3de9c2ab Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 13 Mar 2023 14:56:28 -0600 Subject: [PATCH 046/260] selftests/bpf: use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many tests in the bpf selftest code still refer to this older debugfs path, so let's update them to avoid confusion. Signed-off-by: Ross Zwisler Acked-by: Michael S. Tsirkin Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230313205628.1058720-3-zwisler@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/get_cgroup_id_user.c | 9 +++++++-- .../selftests/bpf/prog_tests/kprobe_multi_test.c | 7 ++++++- .../selftests/bpf/prog_tests/task_fd_query_tp.c | 9 +++++++-- .../selftests/bpf/prog_tests/tp_attach_query.c | 9 +++++++-- .../testing/selftests/bpf/prog_tests/trace_printk.c | 10 +++++++--- .../selftests/bpf/prog_tests/trace_vprintk.c | 10 +++++++--- .../selftests/bpf/progs/test_stacktrace_map.c | 2 +- tools/testing/selftests/bpf/progs/test_tracepoint.c | 2 +- tools/testing/selftests/bpf/test_ftrace.sh | 7 ++++++- tools/testing/selftests/bpf/test_tunnel.sh | 13 +++++++++---- tools/testing/selftests/bpf/trace_helpers.c | 8 ++++++-- 11 files changed, 64 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index 156743cf5870..aefd83ebdcd7 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -86,8 +86,13 @@ int main(int argc, char **argv) pid = getpid(); bpf_map_update_elem(pidmap_fd, &key, &pid, 0); - snprintf(buf, sizeof(buf), - "/sys/kernel/debug/tracing/events/%s/id", probe_name); + if (access("/sys/kernel/tracing/trace", F_OK) == 0) { + snprintf(buf, sizeof(buf), + "/sys/kernel/tracing/events/%s/id", probe_name); + } else { + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + } efd = open(buf, O_RDONLY, 0); if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 113dba349a57..22be0a9a5a0a 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -338,7 +338,12 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel) * Filtering out duplicates by using hashmap__add, which won't * add existing entry. */ - f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + if (!f) return -EINVAL; diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c index c717741bf8b6..c91eda624657 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c @@ -17,8 +17,13 @@ static void test_task_fd_query_tp_core(const char *probe_name, if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) goto close_prog; - snprintf(buf, sizeof(buf), - "/sys/kernel/debug/tracing/events/%s/id", probe_name); + if (access("/sys/kernel/tracing/trace", F_OK) == 0) { + snprintf(buf, sizeof(buf), + "/sys/kernel/tracing/events/%s/id", probe_name); + } else { + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + } efd = open(buf, O_RDONLY, 0); if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index 770fcc3bb1ba..655d69f0ff0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -16,8 +16,13 @@ void serial_test_tp_attach_query(void) for (i = 0; i < num_progs; i++) obj[i] = NULL; - snprintf(buf, sizeof(buf), - "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + if (access("/sys/kernel/tracing/trace", F_OK) == 0) { + snprintf(buf, sizeof(buf), + "/sys/kernel/tracing/events/sched/sched_switch/id"); + } else { + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + } efd = open(buf, O_RDONLY, 0); if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index cade7f12315f..7b9124d506a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -5,7 +5,8 @@ #include "trace_printk.lskel.h" -#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" +#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" +#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" void serial_test_trace_printk(void) @@ -34,8 +35,11 @@ void serial_test_trace_printk(void) if (!ASSERT_OK(err, "trace_printk__attach")) goto cleanup; - fp = fopen(TRACEBUF, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) + if (access(TRACEFS_PIPE, F_OK) == 0) + fp = fopen(TRACEFS_PIPE, "r"); + else + fp = fopen(DEBUGFS_PIPE, "r"); + if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)")) goto cleanup; /* We do not want to wait forever if this test fails... */ diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c index 7a4e313e8558..44ea2fd88f4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -5,7 +5,8 @@ #include "trace_vprintk.lskel.h" -#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" +#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" +#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "1,2,3,4,5,6,7,8,9,10" void serial_test_trace_vprintk(void) @@ -27,8 +28,11 @@ void serial_test_trace_vprintk(void) if (!ASSERT_OK(err, "trace_vprintk__attach")) goto cleanup; - fp = fopen(TRACEBUF, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) + if (access(TRACEFS_PIPE, F_OK) == 0) + fp = fopen(TRACEFS_PIPE, "r"); + else + fp = fopen(DEBUGFS_PIPE, "r"); + if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)")) goto cleanup; /* We do not want to wait forever if this test fails... */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 728dbd39eff0..47568007b668 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -38,7 +38,7 @@ struct { __type(value, stack_trace_t); } stack_amap SEC(".maps"); -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[TASK_COMM_LEN]; diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 43bd7a20cc50..4cb8bbb6a320 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -4,7 +4,7 @@ #include #include -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[TASK_COMM_LEN]; diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh index 20de7bb873bc..f5109eb0e951 100755 --- a/tools/testing/selftests/bpf/test_ftrace.sh +++ b/tools/testing/selftests/bpf/test_ftrace.sh @@ -1,6 +1,11 @@ #!/bin/bash -TR=/sys/kernel/debug/tracing/ +if [[ -e /sys/kernel/tracing/trace ]]; then + TR=/sys/kernel/tracing/ +else + TR=/sys/kernel/debug/tracing/ +fi + clear_trace() { # reset trace output echo > $TR/trace } diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 06857b689c11..2dec7dbf29a2 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -571,8 +571,13 @@ setup_xfrm_tunnel() test_xfrm_tunnel() { + if [[ -e /sys/kernel/tracing/trace ]]; then + TRACE=/sys/kernel/tracing/trace + else + TRACE=/sys/kernel/debug/tracing/trace + fi config_device - > /sys/kernel/debug/tracing/trace + > ${TRACE} setup_xfrm_tunnel mkdir -p ${BPF_PIN_TUNNEL_DIR} bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} @@ -581,11 +586,11 @@ test_xfrm_tunnel() ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 sleep 1 - grep "reqid 1" /sys/kernel/debug/tracing/trace + grep "reqid 1" ${TRACE} check_err $? - grep "spi 0x1" /sys/kernel/debug/tracing/trace + grep "spi 0x1" ${TRACE} check_err $? - grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace + grep "remote ip 0xac100164" ${TRACE} check_err $? cleanup diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 09a16a77bae4..934bf28fc888 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -12,7 +12,8 @@ #include #include "trace_helpers.h" -#define DEBUGFS "/sys/kernel/debug/tracing/" +#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" +#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define MAX_SYMS 300000 static struct ksym syms[MAX_SYMS]; @@ -136,7 +137,10 @@ void read_trace_pipe(void) { int trace_fd; - trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (access(TRACEFS_PIPE, F_OK) == 0) + trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0); + else + trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0); if (trace_fd < 0) return; From b9fe8e8d03d0df28b2431e3aaf8e115cf7bf2f65 Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Fri, 10 Mar 2023 23:38:14 +0000 Subject: [PATCH 047/260] bpf, docs: Add signed comparison example Improve clarity by adding an example of a signed comparison instruction Signed-off-by: Dave Thaler Acked-by: David Vernet Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230310233814.4641-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/instruction-set.rst | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 5e43e14abe80..b44640589055 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -11,7 +11,8 @@ Documentation conventions ========================= For brevity, this document uses the type notion "u64", "u32", etc. -to mean an unsigned integer whose width is the specified number of bits. +to mean an unsigned integer whose width is the specified number of bits, +and "s32", etc. to mean a signed integer of the specified number of bits. Registers and calling convention ================================ @@ -264,6 +265,14 @@ BPF_JSLE 0xd0 PC += off if dst <= src signed The eBPF program needs to store the return value into register R0 before doing a BPF_EXIT. +Example: + +``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means:: + + if (s32)dst s>= (s32)src goto +offset + +where 's>=' indicates a signed '>=' comparison. + Helper functions ~~~~~~~~~~~~~~~~ From c9267aa8b794c2188d49c7d7bd2990e98b2d6b84 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:43 -0700 Subject: [PATCH 048/260] bpf: Fix bpf_strncmp proto. bpf_strncmp() doesn't write into its first argument. Make sure that the verifier knows about it. Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-2-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 77d64b6951b9..f753676ef652 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -571,7 +571,7 @@ static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_CONST_STR, }; From 3e30be4288b31702d4898487a74e80ba14150a9f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:44 -0700 Subject: [PATCH 049/260] bpf: Allow helpers access trusted PTR_TO_BTF_ID. The verifier rejects the code: bpf_strncmp(task->comm, 16, "my_task"); with the message: 16: (85) call bpf_strncmp#182 R1 type=trusted_ptr_ expected=fp, pkt, pkt_meta, map_key, map_value, mem, ringbuf_mem, buf Teach the verifier that such access pattern is safe. Do not allow untrusted and legacy ptr_to_btf_id to be passed into helpers. Reported-by: David Vernet Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-3-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 883d4ff2e288..2bbd89279070 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6303,6 +6303,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER, meta); + case PTR_TO_BTF_ID: + return check_ptr_to_btf_access(env, regs, regno, reg->off, + access_size, BPF_READ, -1); case PTR_TO_CTX: /* in case the function doesn't know how to access the context, * (because we are in a program of type SYSCALL for example), we @@ -7014,6 +7017,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MEM, PTR_TO_MEM | MEM_RINGBUF, PTR_TO_BUF, + PTR_TO_BTF_ID | PTR_TRUSTED, }, }; @@ -7145,6 +7149,17 @@ found: if (base_type(reg->type) != PTR_TO_BTF_ID) return 0; + if (compatible == &mem_types) { + if (!(arg_type & MEM_RDONLY)) { + verbose(env, + "%s() may write into memory pointed by R%d type=%s\n", + func_id_name(meta->func_id), + regno, reg_type_str(env, reg->type)); + return -EACCES; + } + return 0; + } + switch ((int)reg->type) { case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_TRUSTED: From f25fd6088216bd257902e5c212177cddcb291218 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:45 -0700 Subject: [PATCH 050/260] selftests/bpf: Add various tests to check helper access into ptr_to_btf_id. Add various tests to check helper access into ptr_to_btf_id. Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-4-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/progs/task_kfunc_failure.c | 36 +++++++++++++++++++ .../selftests/bpf/progs/task_kfunc_success.c | 4 +++ 2 files changed, 40 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 002c7f69e47f..27994d6b2914 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -301,3 +301,39 @@ int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) bpf_task_release(acquired); return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("access beyond the end of member comm") +int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags) +{ + bpf_strncmp(task->comm, 17, "foo"); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("access beyond the end of member comm") +int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags) +{ + bpf_strncmp(task->comm + 1, 16, "foo"); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("write into memory") +int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags) +{ + bpf_probe_read_kernel(task->comm, 16, task->comm); + return 0; +} + +SEC("fentry/__set_task_comm") +__failure __msg("R1 type=ptr_ expected") +int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec) +{ + /* + * task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee + * its safety. Hence it cannot be accessed with normal load insns. + */ + bpf_strncmp(task->comm, 16, "foo"); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index aebc4bb14e7d..4f61596b0242 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -207,6 +207,10 @@ int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_fla if (!is_test_kfunc_task()) return 0; + bpf_strncmp(task->comm, 12, "foo"); + bpf_strncmp(task->comm, 16, "foo"); + bpf_strncmp(&task->comm[8], 4, "foo"); + if (is_pid_lookup_valid(-1)) { err = 1; return 0; From 487deb3e3393cccff0f148c4703efb185d46e314 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:50 +0100 Subject: [PATCH 051/260] selftests/bpf: robustify test_xdp_do_redirect with more payload magics Currently, the test relies on that only dropped ("xmitted") frames will be recycled and if a frame became an skb, it will be freed later by the stack and never come back to its page_pool. So, it easily gets broken by trying to recycle skbs[0]: test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:FAIL:pkt_count_zero unexpected pkt_count_zero: actual 9936 != expected 2 test_xdp_do_redirect:PASS:pkt_count_tc 0 nsec That huge mismatch happened because after the TC ingress hook zeroes the magic, the page gets recycled when skb is freed, not returned to the MM layer. "Live frames" mode initializes only new pages and keeps the recycled ones as is by design, so they appear with zeroed magic on the Rx path again. Expand the possible magic values from two: 0 (was "xmitted"/dropped or did hit the TC hook) and 0x42 (hit the input XDP prog) to three: the new one will mark frames hit the TC hook, so that they will elide both @pkt_count_zero and @pkt_count_xdp. They can then be recycled to their page_pool or returned to the page allocator, this won't affect the counters anyhow. Just make sure to mark them as "input" (0x42) when they appear on the Rx path again. Also make an enum from those magics, so that they will be always visible and can be changed in just one place anytime. This also eases adding any new marks later on. Link: https://github.com/kernel-patches/bpf/actions/runs/4386538411/jobs/7681081789 Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-2-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/test_xdp_do_redirect.c | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 77a123071940..cd2d4e3258b8 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -4,6 +4,19 @@ #define ETH_ALEN 6 #define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) + +/** + * enum frame_mark - magics to distinguish page/packet paths + * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC. + * @MARK_IN: frame is being processed by the input XDP prog. + * @MARK_SKB: frame did hit the TC ingress hook as an skb. + */ +enum frame_mark { + MARK_XMIT = 0U, + MARK_IN = 0x42, + MARK_SKB = 0x45, +}; + const volatile int ifindex_out; const volatile int ifindex_in; const volatile __u8 expect_dst[ETH_ALEN]; @@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp) if (*metadata != 0x42) return XDP_ABORTED; - if (*payload == 0) { - *payload = 0x42; + if (*payload == MARK_XMIT) pkts_seen_zero++; - } + + *payload = MARK_IN; if (bpf_xdp_adjust_meta(xdp, 4)) return XDP_ABORTED; @@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp) return ret; } -static bool check_pkt(void *data, void *data_end) +static bool check_pkt(void *data, void *data_end, const __u32 mark) { struct ipv6hdr *iph = data + sizeof(struct ethhdr); __u8 *payload = data + HDR_SZ; @@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end) if (payload + 1 > data_end) return false; - if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) + if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN) return false; /* reset the payload so the same packet doesn't get counted twice when * it cycles back through the kernel path and out the dst veth */ - *payload = 0; + *payload = mark; return true; } @@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp) void *data = (void *)(long)xdp->data; void *data_end = (void *)(long)xdp->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_XMIT)) pkts_seen_xdp++; - /* Return XDP_DROP to make sure the data page is recycled, like when it - * exits a physical NIC. Recycled pages will be counted in the + /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like + * it exited a physical NIC. Those pages will be counted in the * pkts_seen_zero counter above. */ return XDP_DROP; @@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb) void *data = (void *)(long)skb->data; void *data_end = (void *)(long)skb->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_SKB)) pkts_seen_tc++; + /* Will be either recycled or freed, %MARK_SKB makes sure it won't + * hit any of the counters above. + */ return 0; } From 2c854e5fcd7e243f5a7cf6a6afa0ef83060c903c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:51 +0100 Subject: [PATCH 052/260] net: page_pool, skbuff: make skb_mark_for_recycle() always available skb_mark_for_recycle() is guarded with CONFIG_PAGE_POOL, this creates unneeded complication when using it in the generic code. For now, it's only used in the drivers always selecting Page Pool, so this works. Move the guards so that preprocessor will cut out only the operation itself and the function will still be a noop on !PAGE_POOL systems, but available there as well. No functional changes. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303020342.Wi2PRFFH-lkp@intel.com Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe661011644b..3f3a2a82a86b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) #endif } -#ifdef CONFIG_PAGE_POOL static inline void skb_mark_for_recycle(struct sk_buff *skb) { +#ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; -} #endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ From 9c94bbf9a87b264294f42e6cc0f76d87854733ec Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:52 +0100 Subject: [PATCH 053/260] xdp: recycle Page Pool backed skbs built from XDP frames __xdp_build_skb_from_frame() state(d): /* Until page_pool get SKB return path, release DMA here */ Page Pool got skb pages recycling in April 2021, but missed this function. xdp_release_frame() is relevant only for Page Pool backed frames and it detaches the page from the corresponding page_pool in order to make it freeable via page_frag_free(). It can instead just mark the output skb as eligible for recycling if the frame is backed by a pp. No change for other memory model types (the same condition check as before). cpumap redirect and veth on Page Pool drivers now become zero-alloc (or almost). Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-4-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- net/core/xdp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc553317..a2237cfca8e9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -658,8 +658,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - /* Until page_pool get SKB return path, release DMA here */ - xdp_release_frame(xdpf); + if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ xdp_scrub_frame(xdpf); From d4e492338d11937c55841b1279287280d6e35894 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:53 +0100 Subject: [PATCH 054/260] xdp: remove unused {__,}xdp_release_frame() __xdp_build_skb_from_frame() was the last user of {__,}xdp_release_frame(), which detaches pages from the page_pool. All the consumers now recycle Page Pool skbs and page, except mlx5, stmmac and tsnep drivers, which use page_pool_release_page() directly (might change one day). It's safe to assume this functionality is not needed anymore and can be removed (in favor of recycling). Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-5-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 29 ----------------------------- net/core/xdp.c | 15 --------------- 2 files changed, 44 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index d517bfac937b..5393b3ebe56e 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); -/* When sending xdp_frame into the network stack, then there is no - * return point callback, which is needed to release e.g. DMA-mapping - * resources with page_pool. Thus, have explicit function to release - * frame resources. - */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem); -static inline void xdp_release_frame(struct xdp_frame *xdpf) -{ - struct xdp_mem_info *mem = &xdpf->mem; - struct skb_shared_info *sinfo; - int i; - - /* Curr only page_pool needs this */ - if (mem->type != MEM_TYPE_PAGE_POOL) - return; - - if (likely(!xdp_frame_has_frags(xdpf))) - goto out; - - sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); - - __xdp_release_frame(page_address(page), mem); - } -out: - __xdp_release_frame(xdpf->data, mem); -} - static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; diff --git a/net/core/xdp.c b/net/core/xdp.c index a2237cfca8e9..8d3ad315f18d 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -531,21 +531,6 @@ out: } EXPORT_SYMBOL_GPL(xdp_return_buff); -/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem) -{ - struct xdp_mem_allocator *xa; - struct page *page; - - rcu_read_lock(); - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - page = virt_to_head_page(data); - if (xa) - page_pool_release_page(xa->page_pool, page); - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(__xdp_release_frame); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { From 3c2611bac08a834697be918ac357eaff2e47d5b3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Mar 2023 15:28:11 -0700 Subject: [PATCH 055/260] selftests/bpf: Fix trace_virtqueue_add_sgs test issue with LLVM 17. LLVM commit https://reviews.llvm.org/D143726 introduced hoistMinMax optimization that transformed (i < VIRTIO_MAX_SGS) && (i < out_sgs) into i < MIN(VIRTIO_MAX_SGS, out_sgs) and caused the verifier to stop recognizing such loop as bounded. Which resulted in the following test failure: libbpf: prog 'trace_virtqueue_add_sgs': BPF program load failed: Bad address libbpf: prog 'trace_virtqueue_add_sgs': -- BEGIN PROG LOAD LOG -- The sequence of 8193 jumps is too complex. verification time 789206 usec stack depth 56 processed 156446 insns (limit 1000000) max_states_per_insn 7 total_states 1746 peak_states 1701 mark_read 12 -- END PROG LOAD LOG -- libbpf: prog 'trace_virtqueue_add_sgs': failed to load: -14 libbpf: failed to load object 'loop6.bpf.o' Workaround the verifier limitation for now with inline asm that prevents this particular optimization. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/loop6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c index 38de0331e6b4..e4ff97fbcce1 100644 --- a/tools/testing/selftests/bpf/progs/loop6.c +++ b/tools/testing/selftests/bpf/progs/loop6.c @@ -5,6 +5,7 @@ #include #include #include +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -76,6 +77,7 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, return 0; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) { + __sink(out_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); @@ -85,6 +87,7 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, } for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) { + __sink(in_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); From b8a2e3f93d412114a1539ea97b59b3e6ed6e1f9a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Mar 2023 11:59:49 -1000 Subject: [PATCH 056/260] cgroup: Make current_cgns_cgroup_dfl() safe to call after exit_task_namespace() The commit 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") added bpf_cgroup_from_id() which calls current_cgns_cgroup_dfl() through cgroup_get_from_id(). However, BPF programs may be attached to a point where current->nsproxy has already been cleared to NULL by exit_task_namespace() and calling bpf_cgroup_from_id() would cause an oops. Just return the system-wide root if nsproxy has been cleared. This allows all cgroups to be looked up after the task passed through exit_task_namespace(), which semantically makes sense. Given that the only way to get this behavior is through BPF programs, it seems safe but let's see what others think. Fixes: 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/ZBDuVWiFj2jiz3i8@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- kernel/cgroup/cgroup.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 935e8121b21e..8a5294f4ce72 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1465,8 +1465,18 @@ static struct cgroup *current_cgns_cgroup_dfl(void) { struct css_set *cset; - cset = current->nsproxy->cgroup_ns->root_cset; - return __cset_cgroup_from_root(cset, &cgrp_dfl_root); + if (current->nsproxy) { + cset = current->nsproxy->cgroup_ns->root_cset; + return __cset_cgroup_from_root(cset, &cgrp_dfl_root); + } else { + /* + * NOTE: This function may be called from bpf_cgroup_from_id() + * on a task which has already passed exit_task_namespaces() and + * nsproxy == NULL. Fall back to cgrp_dfl_root which will make all + * cgroups visible for lookups. + */ + return &cgrp_dfl_root.cgrp; + } } /* look up cgroup associated with given css_set on the specified hierarchy */ From 31bf1dbccfb0a9861d4846755096b3fff5687f8a Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Fri, 10 Mar 2023 08:40:59 +0100 Subject: [PATCH 057/260] bpf: Fix attaching fentry/fexit/fmod_ret/lsm to modules This resolves two problems with attachment of fentry/fexit/fmod_ret/lsm to functions located in modules: 1. The verifier tries to find the address to attach to in kallsyms. This is always done by searching the entire kallsyms, not respecting the module in which the function is located. Such approach causes an incorrect attachment address to be computed if the function to attach to is shadowed by a function of the same name located earlier in kallsyms. 2. If the address to attach to is located in a module, the module reference is only acquired in register_fentry. If the module is unloaded between the place where the address is found (bpf_check_attach_target in the verifier) and register_fentry, it is possible that another module is loaded to the same address which may lead to potential errors. Since the attachment must contain the BTF of the program to attach to, we extract the module from it and search for the function address in the correct module (resolving problem no. 1). Then, the module reference is taken directly in bpf_check_attach_target and stored in the bpf program (in bpf_prog_aux). The reference is only released when the program is unloaded (resolving problem no. 2). Signed-off-by: Viktor Malik Acked-by: Jiri Olsa Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/3f6a9d8ae850532b5ef864ef16327b0f7a669063.1678432753.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 6 ++++++ kernel/bpf/trampoline.c | 28 ---------------------------- kernel/bpf/verifier.c | 18 +++++++++++++++++- kernel/module/internal.h | 5 +++++ 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 71cc92a4ba48..3ef98fb92987 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1103,6 +1103,7 @@ struct bpf_trampoline { struct bpf_attach_target_info { struct btf_func_model fmodel; long tgt_addr; + struct module *tgt_mod; const char *tgt_name; const struct btf_type *tgt_type; }; @@ -1406,6 +1407,7 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + struct module *mod; u32 num_exentries; struct exception_table_entry *extable; union { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5b88301a2ae0..099e9068bcdd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2067,6 +2067,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) { bpf_prog_kallsyms_del_all(prog); btf_put(prog->aux->btf); + module_put(prog->aux->mod); kvfree(prog->aux->jited_linfo); kvfree(prog->aux->linfo); kfree(prog->aux->kfunc_tab); @@ -3113,6 +3114,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, if (err) goto out_unlock; + if (tgt_info.tgt_mod) { + module_put(prog->aux->mod); + prog->aux->mod = tgt_info.tgt_mod; + } + tr = bpf_trampoline_get(key, &tgt_info); if (!tr) { err = -ENOMEM; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d0ed7d6f5eec..f61d5138b12b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -172,26 +171,6 @@ out: return tr; } -static int bpf_trampoline_module_get(struct bpf_trampoline *tr) -{ - struct module *mod; - int err = 0; - - preempt_disable(); - mod = __module_text_address((unsigned long) tr->func.addr); - if (mod && !try_module_get(mod)) - err = -ENOENT; - preempt_enable(); - tr->mod = mod; - return err; -} - -static void bpf_trampoline_module_put(struct bpf_trampoline *tr) -{ - module_put(tr->mod); - tr->mod = NULL; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -202,8 +181,6 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); - if (!ret) - bpf_trampoline_module_put(tr); return ret; } @@ -238,9 +215,6 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) tr->func.ftrace_managed = true; } - if (bpf_trampoline_module_get(tr)) - return -ENOENT; - if (tr->func.ftrace_managed) { ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); @@ -248,8 +222,6 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); } - if (ret) - bpf_trampoline_module_put(tr); return ret; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2bbd89279070..60793f793ca6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24,6 +24,7 @@ #include #include #include +#include "../module/internal.h" #include "disasm.h" @@ -18307,6 +18308,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, const char *tname; struct btf *btf; long addr = 0; + struct module *mod = NULL; if (!btf_id) { bpf_log(log, "Tracing programs must provide btf_id\n"); @@ -18480,8 +18482,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, else addr = (long) tgt_prog->aux->func[subprog]->bpf_func; } else { - addr = kallsyms_lookup_name(tname); + if (btf_is_module(btf)) { + mod = btf_try_get_module(btf); + if (mod) + addr = find_kallsyms_symbol_value(mod, tname); + else + addr = 0; + } else { + addr = kallsyms_lookup_name(tname); + } if (!addr) { + module_put(mod); bpf_log(log, "The address of function %s cannot be found\n", tname); @@ -18521,11 +18532,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, break; } if (ret) { + module_put(mod); bpf_log(log, "%s is not sleepable\n", tname); return ret; } } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { if (tgt_prog) { + module_put(mod); bpf_log(log, "can't modify return codes of BPF programs\n"); return -EINVAL; } @@ -18534,6 +18547,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, !check_attach_modify_return(addr, tname)) ret = 0; if (ret) { + module_put(mod); bpf_log(log, "%s() is not modifiable\n", tname); return ret; } @@ -18544,6 +18558,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, tgt_info->tgt_addr = addr; tgt_info->tgt_name = tname; tgt_info->tgt_type = t; + tgt_info->tgt_mod = mod; return 0; } @@ -18623,6 +18638,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) /* store info about the attachment target that will be used later */ prog->aux->attach_func_proto = tgt_info.tgt_type; prog->aux->attach_func_name = tgt_info.tgt_name; + prog->aux->mod = tgt_info.tgt_mod; if (tgt_prog) { prog->aux->saved_dst_prog_type = tgt_prog->type; diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 2e2bf236f558..5c9170f9135c 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -256,6 +256,11 @@ static inline bool sect_empty(const Elf_Shdr *sect) static inline void init_build_id(struct module *mod, const struct load_info *info) { } static inline void layout_symtab(struct module *mod, struct load_info *info) { } static inline void add_kallsyms(struct module *mod, const struct load_info *info) { } +static inline unsigned long find_kallsyms_symbol_value(struct module *mod, + const char *name) +{ + return 0; +} #endif /* CONFIG_KALLSYMS */ #ifdef CONFIG_SYSFS From aa3d65de4b9004d799f97700751a86d3ebd7d5f9 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Fri, 10 Mar 2023 08:41:00 +0100 Subject: [PATCH 058/260] bpf/selftests: Test fentry attachment to shadowed functions Adds a new test that tries to attach a program to fentry of two functions of the same name, one located in vmlinux and the other in bpf_testmod. To avoid conflicts with existing tests, a new function "bpf_fentry_shadow_test" was created both in vmlinux and in bpf_testmod. The previous commit fixed a bug which caused this test to fail. The verifier would always use the vmlinux function's address as the target trampoline address, hence trying to create two trampolines for a single address, which is forbidden. The test (similarly to other fentry/fexit tests) is not working on arm64 at the moment. Signed-off-by: Viktor Malik Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/5fe2f364190b6f79b085066ed7c5989c5bc475fa.1678432753.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 5 + tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 6 + .../bpf/prog_tests/module_fentry_shadow.c | 128 ++++++++++++++++++ 4 files changed, 140 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6a8b33a103a4..71226f68270d 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -560,6 +560,11 @@ long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) return (long)a + (long)b + (long)c + d; } +int noinline bpf_fentry_shadow_test(int a) +{ + return a + 1; +} + struct prog_test_member1 { int a; }; diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 99cc33c51eaa..0a6837f97c32 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -44,6 +44,7 @@ lookup_key # test_lookup_key__attach unexp lru_bug # lru_bug__attach unexpected error: -524 (errno 524) modify_return # modify_return__attach failed unexpected error: -524 (errno 524) module_attach # skel_attach skeleton attach failed: -524 +module_fentry_shadow # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0 mptcp/base # run_test mptcp unexpected error: -524 (errno 524) netcnt # packets unexpected packets: actual 10001 != expected 10000 rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 5e6e85c8d77d..7999476b9446 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -268,6 +268,12 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { .set = &bpf_testmod_check_kfunc_ids, }; +noinline int bpf_fentry_shadow_test(int a) +{ + return a + 2; +} +EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c new file mode 100644 index 000000000000..c7636e18b1eb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Red Hat */ +#include +#include +#include "bpf/libbpf_internal.h" +#include "cgroup_helpers.h" + +static const char *module_name = "bpf_testmod"; +static const char *symbol_name = "bpf_fentry_shadow_test"; + +static int get_bpf_testmod_btf_fd(void) +{ + struct bpf_btf_info info; + char name[64]; + __u32 id = 0, len; + int err, fd; + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err) { + log_err("failed to iterate BTF objects"); + return err; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; /* expected race: BTF was unloaded */ + err = -errno; + log_err("failed to get FD for BTF object #%d", id); + return err; + } + + len = sizeof(info); + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err = -errno; + log_err("failed to get info for BTF object #%d", id); + close(fd); + return err; + } + + if (strcmp(name, module_name) == 0) + return fd; + + close(fd); + } + return -ENOENT; +} + +void test_module_fentry_shadow(void) +{ + struct btf *vmlinux_btf = NULL, *mod_btf = NULL; + int err, i; + int btf_fd[2] = {}; + int prog_fd[2] = {}; + int link_fd[2] = {}; + __s32 btf_id[2] = {}; + + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_FENTRY, + ); + + const struct bpf_insn trace_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux_btf")) + return; + + btf_fd[1] = get_bpf_testmod_btf_fd(); + if (!ASSERT_GE(btf_fd[1], 0, "get_bpf_testmod_btf_fd")) + goto out; + + mod_btf = btf_get_from_fd(btf_fd[1], vmlinux_btf); + if (!ASSERT_OK_PTR(mod_btf, "btf_get_from_fd")) + goto out; + + btf_id[0] = btf__find_by_name_kind(vmlinux_btf, symbol_name, BTF_KIND_FUNC); + if (!ASSERT_GT(btf_id[0], 0, "btf_find_by_name")) + goto out; + + btf_id[1] = btf__find_by_name_kind(mod_btf, symbol_name, BTF_KIND_FUNC); + if (!ASSERT_GT(btf_id[1], 0, "btf_find_by_name")) + goto out; + + for (i = 0; i < 2; i++) { + load_opts.attach_btf_id = btf_id[i]; + load_opts.attach_btf_obj_fd = btf_fd[i]; + prog_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", + trace_program, + sizeof(trace_program) / sizeof(struct bpf_insn), + &load_opts); + if (!ASSERT_GE(prog_fd[i], 0, "bpf_prog_load")) + goto out; + + /* If the verifier incorrectly resolves addresses of the + * shadowed functions and uses the same address for both the + * vmlinux and the bpf_testmod functions, this will fail on + * attempting to create two trampolines for the same address, + * which is forbidden. + */ + link_fd[i] = bpf_link_create(prog_fd[i], 0, BPF_TRACE_FENTRY, NULL); + if (!ASSERT_GE(link_fd[i], 0, "bpf_link_create")) + goto out; + } + + err = bpf_prog_test_run_opts(prog_fd[0], NULL); + ASSERT_OK(err, "running test"); + +out: + btf__free(vmlinux_btf); + btf__free(mod_btf); + for (i = 0; i < 2; i++) { + if (btf_fd[i]) + close(btf_fd[i]); + if (prog_fd[i] > 0) + close(prog_fd[i]); + if (link_fd[i] > 0) + close(link_fd[i]); + } +} From ed01385c0d78a025bdc72128b7aa7c3309cd5852 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 15 Mar 2023 17:07:25 -0700 Subject: [PATCH 059/260] selftests/bpf: Use ASSERT_EQ instead ASSERT_OK for testing memcmp result In tcp_hdr_options test, it ensures the received tcp hdr option and the sk local storage have the expected values. It uses memcmp to check that. Testing the memcmp result with ASSERT_OK is confusing because ASSERT_OK will print out the errno which is not set. This patch uses ASSERT_EQ to check for 0 instead. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230316000726.1016773-1-martin.lau@linux.dev --- tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 5cf85d0f9827..13bcaeb028b8 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -151,7 +151,7 @@ static int check_hdr_opt(const struct bpf_test_option *exp, const struct bpf_test_option *act, const char *hdr_desc) { - if (!ASSERT_OK(memcmp(exp, act, sizeof(*exp)), hdr_desc)) { + if (!ASSERT_EQ(memcmp(exp, act, sizeof(*exp)), 0, hdr_desc)) { print_option(exp, "expected: "); print_option(act, " actual: "); return -1; @@ -169,7 +169,7 @@ static int check_hdr_stg(const struct hdr_stg *exp, int fd, "map_lookup(hdr_stg_map_fd)")) return -1; - if (!ASSERT_OK(memcmp(exp, &act, sizeof(*exp)), stg_desc)) { + if (!ASSERT_EQ(memcmp(exp, &act, sizeof(*exp)), 0, stg_desc)) { print_hdr_stg(exp, "expected: "); print_hdr_stg(&act, " actual: "); return -1; From 226efec2b0efad60d4a6c4b2c3a8710dafc4dc21 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 15 Mar 2023 17:07:26 -0700 Subject: [PATCH 060/260] selftests/bpf: Fix a fd leak in an error path in network_helpers.c In __start_server, it leaks a fd when setsockopt(SO_REUSEPORT) fails. This patch fixes it. Fixes: eed92afdd14c ("bpf: selftest: Test batching and bpf_(get|set)sockopt in bpf tcp iter") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230316000726.1016773-2-martin.lau@linux.dev --- tools/testing/selftests/bpf/network_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 01de33191226..596caa176582 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -95,7 +95,7 @@ static int __start_server(int type, int protocol, const struct sockaddr *addr, if (reuseport && setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { log_err("Failed to set SO_REUSEPORT"); - return -1; + goto error_close; } if (bind(fd, addr, addrlen) < 0) { From 6cb9430be1471d631e1b6b138e6d26657a9caa81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 15 Mar 2023 17:15:50 +0000 Subject: [PATCH 061/260] libbpf: Ignore warnings about "inefficient alignment" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some consumers of libbpf compile the code base with different warnings enabled. In a report for perf, for example, -Wpacked was set which caused warnings about "inefficient alignment" to be emitted on a subset of supported architectures. With this change we silence specifically those warnings, as we intentionally worked with packed structs. This is a similar resolution as in b2f10cd4e805 ("perf cpumap: Fix alignment for masks in event encoding"). Fixes: 1eebcb60633f ("libbpf: Implement basic zip archive parsing support") Reported-by: Linux Kernel Functional Testing Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Cc: Ian Rogers Link: https://lore.kernel.org/bpf/CA+G9fYtBnwxAWXi2+GyNByApxnf_DtP1-6+_zOKAdJKnJBexjg@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20230315171550.1551603-1-deso@posteo.net --- tools/lib/bpf/zip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c index f561aa07438f..3f26d629b2b4 100644 --- a/tools/lib/bpf/zip.c +++ b/tools/lib/bpf/zip.c @@ -16,6 +16,10 @@ #include "libbpf_internal.h" #include "zip.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + /* Specification of ZIP file format can be found here: * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT * For a high level overview of the structure of a ZIP file see @@ -119,6 +123,8 @@ struct local_file_header { __u16 extra_field_length; } __attribute__((packed)); +#pragma GCC diagnostic pop + struct zip_archive { void *data; __u32 size; From 77473d1a962f3d4f7ba48324502b6d27b8ef2591 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:24 -0500 Subject: [PATCH 062/260] bpf: Free struct bpf_cpumask in call_rcu handler The struct bpf_cpumask type uses the bpf_mem_cache_{alloc,free}() APIs to allocate and free its cpumasks. The bpf_mem allocator may currently immediately reuse some memory when its freed, without waiting for an RCU read cycle to elapse. We want to be able to treat struct bpf_cpumask objects as completely RCU safe. This is necessary for two reasons: 1. bpf_cpumask_kptr_get() currently does an RCU-protected refcnt_inc_not_zero(). This of course assumes that the underlying memory is not reused, and is therefore unsafe in its current form. 2. We want to be able to get rid of bpf_cpumask_kptr_get() entirely, and intead use the superior kptr RCU semantics now afforded by the verifier. This patch fixes (1), and enables (2), by making struct bpf_cpumask RCU safe. A subsequent patch will update the verifier to allow struct bpf_cpumask * pointers to be passed to KF_RCU kfuncs, and then a latter patch will remove bpf_cpumask_kptr_get(). Fixes: 516f4d3397c9 ("bpf: Enable cpumasks to be queried and used as kptrs") Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumask.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index b6587ec40f1b..98eea62b6b7b 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -9,6 +9,7 @@ /** * struct bpf_cpumask - refcounted BPF cpumask wrapper structure * @cpumask: The actual cpumask embedded in the struct. + * @rcu: The RCU head used to free the cpumask with RCU safety. * @usage: Object reference counter. When the refcount goes to 0, the * memory is released back to the BPF allocator, which provides * RCU safety. @@ -24,6 +25,7 @@ */ struct bpf_cpumask { cpumask_t cpumask; + struct rcu_head rcu; refcount_t usage; }; @@ -108,6 +110,16 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumas return cpumask; } +static void cpumask_free_cb(struct rcu_head *head) +{ + struct bpf_cpumask *cpumask; + + cpumask = container_of(head, struct bpf_cpumask, rcu); + migrate_disable(); + bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); + migrate_enable(); +} + /** * bpf_cpumask_release() - Release a previously acquired BPF cpumask. * @cpumask: The cpumask being released. @@ -121,11 +133,8 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) if (!cpumask) return; - if (refcount_dec_and_test(&cpumask->usage)) { - migrate_disable(); - bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); - migrate_enable(); - } + if (refcount_dec_and_test(&cpumask->usage)) + call_rcu(&cpumask->rcu, cpumask_free_cb); } /** From 63d2d83d21a6e2c6f019da5b2d5cdabe6d1cb951 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:25 -0500 Subject: [PATCH 063/260] bpf: Mark struct bpf_cpumask as rcu protected struct bpf_cpumask is a BPF-wrapper around the struct cpumask type which can be instantiated by a BPF program, and then queried as a cpumask in similar fashion to normal kernel code. The previous patch in this series makes the type fully RCU safe, so the type can be included in the rcu_protected_type BTF ID list. A subsequent patch will remove bpf_cpumask_kptr_get(), as it's no longer useful now that we can just treat the type as RCU safe by default and do our own if check. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 60793f793ca6..15b5c5c729f9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4599,6 +4599,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env) BTF_SET_START(rcu_protected_types) BTF_ID(struct, prog_test_ref_kfunc) BTF_ID(struct, cgroup) +BTF_ID(struct, bpf_cpumask) BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) From a5a197df58c44ce32a86b57e970da4bd7b71b399 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:26 -0500 Subject: [PATCH 064/260] bpf/selftests: Test using global cpumask kptr with RCU Now that struct bpf_cpumask * is considered an RCU-safe type according to the verifier, we should add tests that validate its common usages. This patch adds those tests to the cpumask test suite. A subsequent changes will remove bpf_cpumask_kptr_get(), and will adjust the selftest and BPF documentation accordingly. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-4-void@manifault.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/cpumask.c | 1 + .../selftests/bpf/progs/cpumask_common.h | 6 ++ .../selftests/bpf/progs/cpumask_failure.c | 62 +++++++++++++++++++ .../selftests/bpf/progs/cpumask_success.c | 33 ++++++++++ 4 files changed, 102 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 5fbe457c4ebe..6c0fe23498c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -17,6 +17,7 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_insert_kptr_get_release", + "test_global_mask_rcu", }; static void verify_success(const char *prog_name) diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 65e5496ca1b2..7623782fbd62 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -9,6 +9,9 @@ int err; +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +private(MASK) static struct bpf_cpumask __kptr * global_mask; + struct __cpumask_map_value { struct bpf_cpumask __kptr * cpumask; }; @@ -51,6 +54,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym u32 bpf_cpumask_any(const struct cpumask *src) __ksym; u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) { return (const struct cpumask *)cpumask; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index cfe83f0ef9e2..9f726d55f747 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -127,3 +127,65 @@ int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("R2 must be a rcu pointer") +int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_rcu_read_unlock(); + + /* RCU region is exited before calling KF_RCU kfunc. */ + + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("NULL pointer passed to trusted arg1") +int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + + /* No NULL check is performed on global cpumask kptr. */ + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + + bpf_rcu_read_unlock(); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 97ed08c4ff03..fe928ff72a06 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -423,3 +423,36 @@ int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_f return 0; } + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + if (!is_test_task()) + return 0; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + bpf_rcu_read_unlock(); + + return 0; +} From 1b403ce77dfbf234723a91bc411dfb03a0499d6e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:27 -0500 Subject: [PATCH 065/260] bpf: Remove bpf_cpumask_kptr_get() kfunc Now that struct bpf_cpumask is RCU safe, there's no need for this kfunc. Rather than doing the following: private(MASK) static struct bpf_cpumask __kptr *global; int BPF_PROG(prog, s32 cpu, ...) { struct bpf_cpumask *cpumask; bpf_rcu_read_lock(); cpumask = bpf_cpumask_kptr_get(&global); if (!cpumask) { bpf_rcu_read_unlock(); return -1; } bpf_cpumask_setall(cpumask); ... bpf_cpumask_release(cpumask); bpf_rcu_read_unlock(); } Programs can instead simply do (assume same global cpumask): int BPF_PROG(prog, ...) { struct bpf_cpumask *cpumask; bpf_rcu_read_lock(); cpumask = global; if (!cpumask) { bpf_rcu_read_unlock(); return -1; } bpf_cpumask_setall(cpumask); ... bpf_rcu_read_unlock(); } In other words, no extra atomic acquire / release, and less boilerplate code. This patch removes both the kfunc, as well as its selftests and documentation. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-5-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumask.c | 29 ------------------ .../selftests/bpf/prog_tests/cpumask.c | 1 - .../selftests/bpf/progs/cpumask_common.h | 1 - .../selftests/bpf/progs/cpumask_failure.c | 24 --------------- .../selftests/bpf/progs/cpumask_success.c | 30 ------------------- 5 files changed, 85 deletions(-) diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 98eea62b6b7b..db9da2194c1a 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -82,34 +82,6 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) return cpumask; } -/** - * bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask - * stored in a map. - * @cpumaskp: A pointer to a BPF cpumask map value. - * - * Attempts to acquire a reference to a BPF cpumask stored in a map value. The - * cpumask returned by this function must either be embedded in a map as a - * kptr, or freed with bpf_cpumask_release(). This function may return NULL if - * no BPF cpumask was found in the specified map value. - */ -__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp) -{ - struct bpf_cpumask *cpumask; - - /* The BPF memory allocator frees memory backing its caches in an RCU - * callback. Thus, we can safely use RCU to ensure that the cpumask is - * safe to read. - */ - rcu_read_lock(); - - cpumask = READ_ONCE(*cpumaskp); - if (cpumask && !refcount_inc_not_zero(&cpumask->usage)) - cpumask = NULL; - - rcu_read_unlock(); - return cpumask; -} - static void cpumask_free_cb(struct rcu_head *head) { struct bpf_cpumask *cpumask; @@ -435,7 +407,6 @@ BTF_SET8_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 6c0fe23498c7..cdf4acc18e4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -16,7 +16,6 @@ static const char * const cpumask_success_testcases[] = { "test_copy_any_anyand", "test_insert_leave", "test_insert_remove_release", - "test_insert_kptr_get_release", "test_global_mask_rcu", }; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 7623782fbd62..0c5b785a93e4 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -26,7 +26,6 @@ struct array_map { struct bpf_cpumask *bpf_cpumask_create(void) __ksym; void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; -struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym; u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index 9f726d55f747..db4f94e72b61 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -94,30 +94,6 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("Unreleased reference") -int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags) -{ - struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; - - cpumask = create_cpumask(); - if (!cpumask) - return 0; - - if (cpumask_map_insert(cpumask)) - return 0; - - v = cpumask_map_value_lookup(); - if (!v) - return 0; - - cpumask = bpf_cpumask_kptr_get(&v->cpumask); - - /* cpumask is never released. */ - return 0; -} - SEC("tp_btf/task_newtask") __failure __msg("NULL pointer passed to trusted arg0") int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index fe928ff72a06..2fcdd7f68ac7 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -394,36 +394,6 @@ int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_fla return 0; } -SEC("tp_btf/task_newtask") -int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags) -{ - struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; - - cpumask = create_cpumask(); - if (!cpumask) - return 0; - - if (cpumask_map_insert(cpumask)) { - err = 3; - return 0; - } - - v = cpumask_map_value_lookup(); - if (!v) { - err = 4; - return 0; - } - - cpumask = bpf_cpumask_kptr_get(&v->cpumask); - if (cpumask) - bpf_cpumask_release(cpumask); - else - err = 5; - - return 0; -} - SEC("tp_btf/task_newtask") int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) { From fec2c6d14fd5001e7d24a2ae44f0e9aea82a6149 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:28 -0500 Subject: [PATCH 066/260] bpf,docs: Remove bpf_cpumask_kptr_get() from documentation Now that the kfunc no longer exists, we can remove it and instead describe how RCU can be used to get a struct bpf_cpumask from a map value. This patch updates the BPF documentation accordingly. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-6-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/cpumasks.rst | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index 75344cd230e5..41efd8874eeb 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -117,12 +117,7 @@ For example: As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in a map, the reference can be removed from the map with bpf_kptr_xchg(), or -opportunistically acquired with bpf_cpumask_kptr_get(): - -.. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_kptr_get - -Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: +opportunistically acquired using RCU: .. code-block:: c @@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: /** * A simple example tracepoint program showing how a * struct bpf_cpumask * kptr that is stored in a map can - * be acquired using the bpf_cpumask_kptr_get() kfunc. + * be passed to kfuncs using RCU protection. */ SEC("tp_btf/cgroup_mkdir") int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path) @@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: if (!v) return -ENOENT; + bpf_rcu_read_lock(); /* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */ - kptr = bpf_cpumask_kptr_get(&v->cpumask); - if (!kptr) + kptr = v->cpumask; + if (!kptr) { /* If no bpf_cpumask was present in the map, it's because * we're racing with another CPU that removed it with * bpf_kptr_xchg() between the bpf_map_lookup_elem() - * above, and our call to bpf_cpumask_kptr_get(). - * bpf_cpumask_kptr_get() internally safely handles this - * race, and will return NULL if the cpumask is no longer - * present in the map by the time we invoke the kfunc. + * above, and our load of the pointer from the map. */ + bpf_rcu_read_unlock(); return -EBUSY; + } - /* Free the reference we just took above. Note that the - * original struct bpf_cpumask * kptr is still in the map. It will - * be freed either at a later time if another context deletes - * it from the map, or automatically by the BPF subsystem if - * it's still present when the map is destroyed. - */ - bpf_cpumask_release(kptr); + bpf_cpumask_setall(kptr); + bpf_rcu_read_unlock(); return 0; } From 082cdc69a4651dd2a77539d69416a359ed1214f5 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Wed, 15 Mar 2023 17:54:00 +0100 Subject: [PATCH 067/260] bpf: Remove misleading spec_v1 check on var-offset stack read For every BPF_ADD/SUB involving a pointer, adjust_ptr_min_max_vals() ensures that the resulting pointer has a constant offset if bypass_spec_v1 is false. This is ensured by calling sanitize_check_bounds() which in turn calls check_stack_access_for_ptr_arithmetic(). There, -EACCESS is returned if the register's offset is not constant, thereby rejecting the program. In summary, an unprivileged user must never be able to create stack pointers with a variable offset. That is also the case, because a respective check in check_stack_write() is missing. If they were able to create a variable-offset pointer, users could still use it in a stack-write operation to trigger unsafe speculative behavior [1]. Because unprivileged users must already be prevented from creating variable-offset stack pointers, viable options are to either remove this check (replacing it with a clarifying comment), or to turn it into a "verifier BUG"-message, also adding a similar check in check_stack_write() (for consistency, as a second-level defense). This patch implements the first option to reduce verifier bloat. This check was introduced by commit 01f810ace9ed ("bpf: Allow variable-offset stack access") which correctly notes that "variable-offset reads and writes are disallowed (they were already disallowed for the indirect access case) because the speculative execution checking code doesn't support them". However, it does not further discuss why the check in check_stack_read() is necessary. The code which made this check obsolete was also introduced in this commit. I have compiled ~650 programs from the Linux selftests, Linux samples, Cilium, and libbpf/examples projects and confirmed that none of these trigger the check in check_stack_read() [2]. Instead, all of these programs are, as expected, already rejected when constructing the variable-offset pointers. Note that the check in check_stack_access_for_ptr_arithmetic() also prints "off=%d" while the code removed by this patch does not (the error removed does not appear in the "verification_error" values). For reproducibility, the repository linked includes the raw data and scripts used to create the plot. [1] https://arxiv.org/pdf/1807.03757.pdf [2] https://gitlab.cs.fau.de/un65esoq/bpf-spectre/-/raw/53dc19fcf459c186613b1156a81504b39c8d49db/data/plots/23-02-26_23-56_bpftool/bpftool/0004-errors.pdf?inline=false Fixes: 01f810ace9ed ("bpf: Allow variable-offset stack access") Signed-off-by: Luis Gerhorst Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230315165358.23701-1-gerhorst@cs.fau.de --- kernel/bpf/verifier.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 15b5c5c729f9..d62b7127ff2a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4303,17 +4303,13 @@ static int check_stack_read(struct bpf_verifier_env *env, } /* Variable offset is prohibited for unprivileged mode for simplicity * since it requires corresponding support in Spectre masking for stack - * ALU. See also retrieve_ptr_limit(). + * ALU. See also retrieve_ptr_limit(). The check in + * check_stack_access_for_ptr_arithmetic() called by + * adjust_ptr_min_max_vals() prevents users from creating stack pointers + * with variable offsets, therefore no check is required here. Further, + * just checking it here would be insufficient as speculative stack + * writes could still lead to unsafe speculative behaviour. */ - if (!env->bypass_spec_v1 && var_off) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", - ptr_regno, tn_buf); - return -EACCES; - } - if (!var_off) { off += reg->var_off.value; err = check_stack_read_fixed_off(env, state, off, size, From e5995bc7e2ba1a0d444f806016d2e4ea91c032d0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 16 Mar 2023 18:50:50 +0100 Subject: [PATCH 068/260] bpf, test_run: fix crashes due to XDP frame overwriting/corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot and Ilya faced the splats when %XDP_PASS happens for bpf_test_run after skb PP recycling was enabled for {__,}xdp_build_skb_from_frame(): BUG: kernel NULL pointer dereference, address: 0000000000000d28 RIP: 0010:memset_erms+0xd/0x20 arch/x86/lib/memset_64.S:66 [...] Call Trace: __finalize_skb_around net/core/skbuff.c:321 [inline] __build_skb_around+0x232/0x3a0 net/core/skbuff.c:379 build_skb_around+0x32/0x290 net/core/skbuff.c:444 __xdp_build_skb_from_frame+0x121/0x760 net/core/xdp.c:622 xdp_recv_frames net/bpf/test_run.c:248 [inline] xdp_test_run_batch net/bpf/test_run.c:334 [inline] bpf_test_run_xdp_live+0x1289/0x1930 net/bpf/test_run.c:362 bpf_prog_test_run_xdp+0xa05/0x14e0 net/bpf/test_run.c:1418 [...] This happens due to that it calls xdp_scrub_frame(), which nullifies xdpf->data. bpf_test_run code doesn't reinit the frame when the XDP program doesn't adjust head or tail. Previously, %XDP_PASS meant the page will be released from the pool and returned to the MM layer, but now it does return to the Pool with the nullified xdpf->data, which doesn't get reinitialized then. So, in addition to checking whether the head and/or tail have been adjusted, check also for a potential XDP frame corruption. xdpf->data is 100% affected and also xdpf->flags is the field closest to the metadata / frame start. Checking for these two should be enough for non-extreme cases. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: syzbot+e1d1b65f7c32f2a86a9f@syzkaller.appspotmail.com Link: https://lore.kernel.org/bpf/000000000000f1985705f6ef2243@google.com Reported-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/e07dd94022ad5731705891b9487cc9ed66328b94.camel@linux.ibm.com Signed-off-by: Alexander Lobakin Acked-by: Toke Høiland-Jørgensen Tested-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230316175051.922550-2-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 71226f68270d..8d6b31209bd6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -208,6 +208,16 @@ static void xdp_test_run_teardown(struct xdp_test_data *xdp) kfree(xdp->skbs); } +static bool frame_was_changed(const struct xdp_page_head *head) +{ + /* xdp_scrub_frame() zeroes the data pointer, flags is the last field, + * i.e. has the highest chances to be overwritten. If those two are + * untouched, it's most likely safe to skip the context reset. + */ + return head->frm.data != head->orig_ctx.data || + head->frm.flags != head->orig_ctx.flags; +} + static bool ctx_was_changed(struct xdp_page_head *head) { return head->orig_ctx.data != head->ctx.data || @@ -217,7 +227,7 @@ static bool ctx_was_changed(struct xdp_page_head *head) static void reset_ctx(struct xdp_page_head *head) { - if (likely(!ctx_was_changed(head))) + if (likely(!frame_was_changed(head) && !ctx_was_changed(head))) return; head->ctx.data = head->orig_ctx.data; From 5640b6d894342d153b719644681b0345fd28ee96 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 16 Mar 2023 18:50:51 +0100 Subject: [PATCH 069/260] selftests/bpf: fix "metadata marker" getting overwritten by the netstack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alexei noticed xdp_do_redirect test on BPF CI started failing on BE systems after skb PP recycling was enabled: test_xdp_do_redirect:PASS:prog_run 0 nsec test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:PASS:pkt_count_zero 0 nsec test_xdp_do_redirect:FAIL:pkt_count_tc unexpected pkt_count_tc: actual 220 != expected 9998 test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:PASS:prog_run_too_big 0 nsec close_netns:PASS:setns 0 nsec #289 xdp_do_redirect:FAIL Summary: 270/1674 PASSED, 30 SKIPPED, 1 FAILED and it doesn't happen on LE systems. Ilya then hunted it down to: #0 0x0000000000aaeee6 in neigh_hh_output (hh=0x83258df0, skb=0x88142200) at linux/include/net/neighbour.h:503 #1 0x0000000000ab2cda in neigh_output (skip_cache=false, skb=0x88142200, n=) at linux/include/net/neighbour.h:544 #2 ip6_finish_output2 (net=net@entry=0x88edba00, sk=sk@entry=0x0, skb=skb@entry=0x88142200) at linux/net/ipv6/ip6_output.c:134 #3 0x0000000000ab4cbc in __ip6_finish_output (skb=0x88142200, sk=0x0, net=0x88edba00) at linux/net/ipv6/ip6_output.c:195 #4 ip6_finish_output (net=0x88edba00, sk=0x0, skb=0x88142200) at linux/net/ipv6/ip6_output.c:206 xdp_do_redirect test places a u32 marker (0x42) right before the Ethernet header to check it then in the XDP program and return %XDP_ABORTED if it's not there. Neigh xmit code likes to round up hard header length to speed up copying the header, so it overwrites two bytes in front of the Eth header. On LE systems, 0x42 is one byte at `data - 4`, while on BE it's `data - 1`, what explains why it happens only there. It didn't happen previously due to that %XDP_PASS meant the page will be discarded and replaced by a new one, but now it can be recycled as well, while bpf_test_run code doesn't reinitialize the content of recycled pages. This mark is limited to this particular test and its setup though, so there's no need to predict 1000 different possible cases. Just move it 4 bytes to the left, still keeping it 32 bit to match on more bytes. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/CAADnVQ+B_JOU+EpP=DKhbY9yXdN6GiRPnpTTXfEZ9sNkUeb-yQ@mail.gmail.com Reported-by: Ilya Leoshkevich # + debugging Link: https://lore.kernel.org/bpf/8341c1d9f935f410438e79d3bd8a9cc50aefe105.camel@linux.ibm.com Signed-off-by: Alexander Lobakin Acked-by: Toke Høiland-Jørgensen Tested-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230316175051.922550-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 7 ++++--- tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 856cbc29e6a1..4eaa3dcaebc8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -86,12 +86,12 @@ static void test_max_pkt_size(int fd) void test_xdp_do_redirect(void) { int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst; - char data[sizeof(pkt_udp) + sizeof(__u32)]; + char data[sizeof(pkt_udp) + sizeof(__u64)]; struct test_xdp_do_redirect *skel = NULL; struct nstoken *nstoken = NULL; struct bpf_link *link; LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); - struct xdp_md ctx_in = { .data = sizeof(__u32), + struct xdp_md ctx_in = { .data = sizeof(__u64), .data_end = sizeof(data) }; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, @@ -105,8 +105,9 @@ void test_xdp_do_redirect(void) DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); - memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp)); + memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp)); *((__u32 *)data) = 0x42; /* metadata test value */ + *((__u32 *)data + 4) = 0; skel = test_xdp_do_redirect__open(); if (!ASSERT_OK_PTR(skel, "skel")) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index cd2d4e3258b8..5baaafed0d2d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -52,7 +52,7 @@ int xdp_redirect(struct xdp_md *xdp) *payload = MARK_IN; - if (bpf_xdp_adjust_meta(xdp, 4)) + if (bpf_xdp_adjust_meta(xdp, sizeof(__u64))) return XDP_ABORTED; if (retcode > XDP_PASS) From bd5314f8dd2d41330eecb60f0490c3fcfe1fc99d Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Fri, 17 Mar 2023 10:56:01 +0100 Subject: [PATCH 070/260] kallsyms, bpf: Move find_kallsyms_symbol_value out of internal header Moving find_kallsyms_symbol_value from kernel/module/internal.h to include/linux/module.h. The reason is that internal.h is not prepared to be included when CONFIG_MODULES=n. find_kallsyms_symbol_value is used by kernel/bpf/verifier.c and including internal.h from it (without modules) leads into a compilation error: In file included from ../include/linux/container_of.h:5, from ../include/linux/list.h:5, from ../include/linux/timer.h:5, from ../include/linux/workqueue.h:9, from ../include/linux/bpf.h:10, from ../include/linux/bpf-cgroup.h:5, from ../kernel/bpf/verifier.c:7: ../kernel/bpf/../module/internal.h: In function 'mod_find': ../include/linux/container_of.h:20:54: error: invalid use of undefined type 'struct module' 20 | static_assert(__same_type(*(ptr), ((type *)0)->member) || \ | ^~ [...] This patch fixes the above error. Fixes: 31bf1dbccfb0 ("bpf: Fix attaching fentry/fexit/fmod_ret/lsm to modules") Reported-by: kernel test robot Signed-off-by: Viktor Malik Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/oe-kbuild-all/202303161404.OrmfCy09-lkp@intel.com/ Link: https://lore.kernel.org/bpf/20230317095601.386738-1-vmalik@redhat.com --- include/linux/module.h | 8 ++++++++ kernel/bpf/verifier.c | 2 +- kernel/module/internal.h | 6 ------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 4435ad9439ab..41cfd3be57e5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -616,6 +616,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); +unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); + extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); #define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) @@ -796,6 +798,12 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name) return 0; } +static inline unsigned long find_kallsyms_symbol_value(struct module *mod, + const char *name) +{ + return 0; +} + static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d62b7127ff2a..99394a2f7ee4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24,7 +24,7 @@ #include #include #include -#include "../module/internal.h" +#include #include "disasm.h" diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 5c9170f9135c..1c877561a7d2 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -246,7 +246,6 @@ static inline void kmemleak_load_module(const struct module *mod, void init_build_id(struct module *mod, const struct load_info *info); void layout_symtab(struct module *mod, struct load_info *info); void add_kallsyms(struct module *mod, const struct load_info *info); -unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); static inline bool sect_empty(const Elf_Shdr *sect) { @@ -256,11 +255,6 @@ static inline bool sect_empty(const Elf_Shdr *sect) static inline void init_build_id(struct module *mod, const struct load_info *info) { } static inline void layout_symtab(struct module *mod, struct load_info *info) { } static inline void add_kallsyms(struct module *mod, const struct load_info *info) { } -static inline unsigned long find_kallsyms_symbol_value(struct module *mod, - const char *name) -{ - return 0; -} #endif /* CONFIG_KALLSYMS */ #ifdef CONFIG_SYSFS From 0f10f647f45545004ea50b73a7a7c5c3309ff286 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 14 Mar 2023 14:44:49 +0700 Subject: [PATCH 071/260] bpf, docs: Use internal linking for link to netdev subsystem doc Commit d56b0c461d19da ("bpf, docs: Fix link to netdev-FAQ target") attempts to fix linking problem to undefined "netdev-FAQ" label introduced in 287f4fa99a5281 ("docs: Update references to netdev-FAQ") by changing internal cross reference to netdev subsystem documentation (Documentation/process/maintainer-netdev.rst) to external one at docs.kernel.org. However, the linking problem is still not resolved, as the generated link points to non-existent netdev-FAQ section of the external doc, which when clicked, will instead going to the top of the doc. Revert back to internal linking by simply mention the doc path while massaging the leading text to the link, since the netdev subsystem doc contains no FAQs but rather general information about the subsystem. Fixes: d56b0c461d19 ("bpf, docs: Fix link to netdev-FAQ target") Fixes: 287f4fa99a52 ("docs: Update references to netdev-FAQ") Signed-off-by: Bagas Sanjaya Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230314074449.23620-1-bagasdotme@gmail.com --- Documentation/bpf/bpf_devel_QA.rst | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 5f5f9ccc3862..e151e61dff38 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -128,7 +128,8 @@ into the bpf-next tree will make their way into net-next tree. net and net-next are both run by David S. Miller. From there, they will go into the kernel mainline tree run by Linus Torvalds. To read up on the process of net and net-next being merged into the mainline tree, see -the `netdev-FAQ`_. +the documentation on netdev subsystem at +Documentation/process/maintainer-netdev.rst. @@ -147,7 +148,8 @@ request):: Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to? --------------------------------------------------------------------------------- -A: The process is the very same as described in the `netdev-FAQ`_, +A: The process is the very same as described in the netdev subsystem +documentation at Documentation/process/maintainer-netdev.rst, so please read up on it. The subject line must indicate whether the patch is a fix or rather "next-like" content in order to let the maintainers know whether it is targeted at bpf or bpf-next. @@ -206,8 +208,9 @@ ii) run extensive BPF test suite and Once the BPF pull request was accepted by David S. Miller, then the patches end up in net or net-next tree, respectively, and make their way from there further into mainline. Again, see the -`netdev-FAQ`_ for additional information e.g. on how often they are -merged to mainline. +documentation for netdev subsystem at +Documentation/process/maintainer-netdev.rst for additional information +e.g. on how often they are merged to mainline. Q: How long do I need to wait for feedback on my BPF patches? ------------------------------------------------------------- @@ -230,7 +233,8 @@ Q: Are patches applied to bpf-next when the merge window is open? ----------------------------------------------------------------- A: For the time when the merge window is open, bpf-next will not be processed. This is roughly analogous to net-next patch processing, -so feel free to read up on the `netdev-FAQ`_ about further details. +so feel free to read up on the netdev docs at +Documentation/process/maintainer-netdev.rst about further details. During those two weeks of merge window, we might ask you to resend your patch series once bpf-next is open again. Once Linus released @@ -394,7 +398,8 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up: netdev@vger.kernel.org The process in general is the same as on netdev itself, see also the -`netdev-FAQ`_. +the documentation on networking subsystem at +Documentation/process/maintainer-netdev.rst. Q: Do you also backport to kernels not currently maintained as stable? ---------------------------------------------------------------------- @@ -410,7 +415,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well What should I do? A: The same rules apply as with netdev patch submissions in general, see -the `netdev-FAQ`_. +the netdev docs at Documentation/process/maintainer-netdev.rst. Never add "``Cc: stable@vger.kernel.org``" to the patch description, but ask the BPF maintainers to queue the patches instead. This can be done @@ -685,7 +690,6 @@ when: .. Links .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/ -.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html .. _selftests: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ .. _Documentation/dev-tools/kselftest.rst: From 58aa2afbb1e61fcf35bfcc819952a3c13d9f9203 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:17 -0700 Subject: [PATCH 072/260] bpf: Allow ld_imm64 instruction to point to kfunc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow ld_imm64 insn with BPF_PSEUDO_BTF_ID to hold the address of kfunc. The ld_imm64 pointing to a valid kfunc will be seen as non-null PTR_TO_MEM by is_branch_taken() logic of the verifier, while libbpf will resolve address to unknown kfunc as ld_imm64 reg, 0 which will also be recognized by is_branch_taken() and the verifier will proceed dead code elimination. BPF programs can use this logic to detect at load time whether kfunc is present in the kernel with bpf_ksym_exists() macro that is introduced in the next patches. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Reviewed-by: Martin KaFai Lau Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230317201920.62030-2-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99394a2f7ee4..8bc44f5dc5b6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15952,8 +15952,8 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, goto err_put; } - if (!btf_type_is_var(t)) { - verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id); + if (!btf_type_is_var(t) && !btf_type_is_func(t)) { + verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id); err = -EINVAL; goto err_put; } @@ -15966,6 +15966,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, err = -ENOENT; goto err_put; } + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32; + + if (btf_type_is_func(t)) { + aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; + aux->btf_var.mem_size = 0; + goto check_btf; + } datasec_id = find_btf_percpu_datasec(btf); if (datasec_id > 0) { @@ -15978,9 +15986,6 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, } } - insn[0].imm = (u32)addr; - insn[1].imm = addr >> 32; - type = t->type; t = btf_type_skip_modifiers(btf, type, NULL); if (percpu) { @@ -16008,7 +16013,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, aux->btf_var.btf = btf; aux->btf_var.btf_id = type; } - +check_btf: /* check whether we recorded this BTF (and maybe module) already */ for (i = 0; i < env->used_btf_cnt; i++) { if (env->used_btfs[i].btf == btf) { From 5fc13ad59b60708e52932188854d4d5bf2b0e10e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:18 -0700 Subject: [PATCH 073/260] libbpf: Fix relocation of kfunc ksym in ld_imm64 insn. void *p = kfunc; -> generates ld_imm64 insn. kfunc() -> generates bpf_call insn. libbpf patches bpf_call insn correctly while only btf_id part of ld_imm64 is set in the former case. Which means that pointers to kfuncs in modules are not patched correctly and the verifier rejects load of such programs due to btf_id being out of range. Fix libbpf to patch ld_imm64 for kfunc. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230317201920.62030-3-alexei.starovoitov@gmail.com --- tools/lib/bpf/libbpf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a557718401e4..4c34fbd7b5be 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7533,6 +7533,12 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ext->is_set = true; ext->ksym.kernel_btf_id = kfunc_id; ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; + /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() + * populates FD into ld_imm64 insn when it's used to point to kfunc. + * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. + * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. + */ + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); From 5cbd3fe3a91df46ea201cc5d8ab4e390332ec26e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:19 -0700 Subject: [PATCH 074/260] libbpf: Introduce bpf_ksym_exists() macro. Introduce bpf_ksym_exists() macro that can be used by BPF programs to detect at load time whether particular ksym (either variable or kfunc) is present in the kernel. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230317201920.62030-4-alexei.starovoitov@gmail.com --- tools/lib/bpf/bpf_helpers.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 7d12d3e620cc..e7e1a8acc299 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -177,6 +177,11 @@ enum libbpf_tristate { #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) +#define bpf_ksym_exists(sym) ({ \ + _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ + !!sym; \ +}) + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif From 95fdf6e313a981b0729886f86916190cb418b04c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:20 -0700 Subject: [PATCH 075/260] selftests/bpf: Add test for bpf_ksym_exists(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add load and run time test for bpf_ksym_exists() and check that the verifier performs dead code elimination for non-existing kfunc. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Reviewed-by: Martin KaFai Lau Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230317201920.62030-5-alexei.starovoitov@gmail.com --- .../selftests/bpf/progs/task_kfunc_success.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index 4f61596b0242..cfa7f12b84e8 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -17,6 +17,10 @@ int err, pid; * TP_PROTO(struct task_struct *p, u64 clone_flags) */ +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; +void invalid_kfunc(void) __ksym __weak; +void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; + static bool is_test_kfunc_task(void) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -26,7 +30,21 @@ static bool is_test_kfunc_task(void) static int test_acquire_release(struct task_struct *task) { - struct task_struct *acquired; + struct task_struct *acquired = NULL; + + if (!bpf_ksym_exists(bpf_task_acquire)) { + err = 3; + return 0; + } + if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) { + err = 4; + return 0; + } + if (bpf_ksym_exists(invalid_kfunc)) { + /* the verifier's dead code elimination should remove this */ + err = 5; + asm volatile ("goto -1"); /* for (;;); */ + } acquired = bpf_task_acquire(task); bpf_task_release(acquired); From 2be7aa76cc69633930fb747e1d85d33a63a60c02 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Fri, 17 Mar 2023 09:32:56 -0700 Subject: [PATCH 076/260] selftests/bpf: Add --json-summary option to test_progs Currently, test_progs outputs all stdout/stderr as it runs, and when it is done, prints a summary. It is non-trivial for tooling to parse that output and extract meaningful information from it. This change adds a new option, `--json-summary`/`-J` that let the caller specify a file where `test_progs{,-no_alu32}` can write a summary of the run in a json format that can later be parsed by tooling. Currently, it creates a summary section with successes/skipped/failures followed by a list of failed tests and subtests. A test contains the following fields: - name: the name of the test - number: the number of the test - message: the log message that was printed by the test. - failed: A boolean indicating whether the test failed or not. Currently we only output failed tests, but in the future, successful tests could be added. - subtests: A list of subtests associated with this test. A subtest contains the following fields: - name: same as above - number: sanme as above - message: the log message that was printed by the subtest. - failed: same as above but for the subtest An example run and json content below: ``` $ sudo ./test_progs -a $(grep -v '^#' ./DENYLIST.aarch64 | awk '{print $1","}' | tr -d '\n') -j -J /tmp/test_progs.json $ jq < /tmp/test_progs.json | head -n 30 { "success": 29, "success_subtest": 23, "skipped": 3, "failed": 28, "results": [ { "name": "bpf_cookie", "number": 10, "message": "test_bpf_cookie:PASS:skel_open 0 nsec\n", "failed": true, "subtests": [ { "name": "multi_kprobe_link_api", "number": 2, "message": "kprobe_multi_link_api_subtest:PASS:load_kallsyms 0 nsec\nlibbpf: extern 'bpf_testmod_fentry_test1' (strong): not resolved\nlibbpf: failed to load object 'kprobe_multi'\nlibbpf: failed to load BPF skeleton 'kprobe_multi': -3\nkprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3\n", "failed": true }, { "name": "multi_kprobe_attach_api", "number": 3, "message": "libbpf: extern 'bpf_testmod_fentry_test1' (strong): not resolved\nlibbpf: failed to load object 'kprobe_multi'\nlibbpf: failed to load BPF skeleton 'kprobe_multi': -3\nkprobe_multi_attach_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3\n", "failed": true }, { "name": "lsm", "number": 8, "message": "lsm_subtest:PASS:lsm.link_create 0 nsec\nlsm_subtest:FAIL:stack_mprotect unexpected stack_mprotect: actual 0 != expected -1\n", "failed": true } ``` The file can then be used to print a summary of the test run and list of failing tests/subtests: ``` $ jq -r < /tmp/test_progs.json '"Success: \(.success)/\(.success_subtest), Skipped: \(.skipped), Failed: \(.failed)"' Success: 29/23, Skipped: 3, Failed: 28 $ jq -r < /tmp/test_progs.json '.results | map([ if .failed then "#\(.number) \(.name)" else empty end, ( . as {name: $tname, number: $tnum} | .subtests | map( if .failed then "#\($tnum)/\(.number) \($tname)/\(.name)" else empty end ) ) ]) | flatten | .[]' | head -n 20 #10 bpf_cookie #10/2 bpf_cookie/multi_kprobe_link_api #10/3 bpf_cookie/multi_kprobe_attach_api #10/8 bpf_cookie/lsm #15 bpf_mod_race #15/1 bpf_mod_race/ksym (used_btfs UAF) #15/2 bpf_mod_race/kfunc (kfunc_btf_tab UAF) #36 cgroup_hierarchical_stats #61 deny_namespace #61/1 deny_namespace/unpriv_userns_create_no_bpf #73 fexit_stress #83 get_func_ip_test #99 kfunc_dynptr_param #99/1 kfunc_dynptr_param/dynptr_data_null #99/4 kfunc_dynptr_param/dynptr_data_null #100 kprobe_multi_bench_attach #100/1 kprobe_multi_bench_attach/kernel #100/2 kprobe_multi_bench_attach/modules #101 kprobe_multi_test #101/1 kprobe_multi_test/skel_api ``` Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230317163256.3809328-1-chantr4@gmail.com --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/json_writer.c | 1 + tools/testing/selftests/bpf/json_writer.h | 1 + tools/testing/selftests/bpf/test_progs.c | 83 +++++++++++++++++++++-- tools/testing/selftests/bpf/test_progs.h | 1 + 5 files changed, 84 insertions(+), 6 deletions(-) create mode 120000 tools/testing/selftests/bpf/json_writer.c create mode 120000 tools/testing/selftests/bpf/json_writer.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 55811c448eb7..fc092582d16d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -234,6 +234,7 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o TESTING_HELPERS := $(OUTPUT)/testing_helpers.o TRACE_HELPERS := $(OUTPUT)/trace_helpers.o +JSON_WRITER := $(OUTPUT)/json_writer.o CAP_HELPERS := $(OUTPUT)/cap_helpers.o $(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) @@ -559,7 +560,8 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c disasm.c + cap_helpers.c test_loader.c xsk.c disasm.c \ + json_writer.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ diff --git a/tools/testing/selftests/bpf/json_writer.c b/tools/testing/selftests/bpf/json_writer.c new file mode 120000 index 000000000000..5effa31e2f39 --- /dev/null +++ b/tools/testing/selftests/bpf/json_writer.c @@ -0,0 +1 @@ +../../../bpf/bpftool/json_writer.c \ No newline at end of file diff --git a/tools/testing/selftests/bpf/json_writer.h b/tools/testing/selftests/bpf/json_writer.h new file mode 120000 index 000000000000..e0a264c26752 --- /dev/null +++ b/tools/testing/selftests/bpf/json_writer.h @@ -0,0 +1 @@ +../../../bpf/bpftool/json_writer.h \ No newline at end of file diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6d5e3022c75f..d903e6a72a96 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -18,6 +18,7 @@ #include #include #include +#include "json_writer.h" static bool verbose(void) { @@ -269,10 +270,23 @@ static void print_subtest_name(int test_num, int subtest_num, fprintf(env.stdout, "\n"); } +static void jsonw_write_log_message(json_writer_t *w, char *log_buf, size_t log_cnt) +{ + /* open_memstream (from stdio_hijack_init) ensures that log_bug is terminated by a + * null byte. Yet in parallel mode, log_buf will be NULL if there is no message. + */ + if (log_cnt) { + jsonw_string_field(w, "message", log_buf); + } else { + jsonw_string_field(w, "message", ""); + } +} + static void dump_test_log(const struct prog_test_def *test, const struct test_state *test_state, bool skip_ok_subtests, - bool par_exec_result) + bool par_exec_result, + json_writer_t *w) { bool test_failed = test_state->error_cnt > 0; bool force_log = test_state->force_log; @@ -296,6 +310,16 @@ static void dump_test_log(const struct prog_test_def *test, if (test_state->log_cnt && print_test) print_test_log(test_state->log_buf, test_state->log_cnt); + if (w && print_test) { + jsonw_start_object(w); + jsonw_string_field(w, "name", test->test_name); + jsonw_uint_field(w, "number", test->test_num); + jsonw_write_log_message(w, test_state->log_buf, test_state->log_cnt); + jsonw_bool_field(w, "failed", test_failed); + jsonw_name(w, "subtests"); + jsonw_start_array(w); + } + for (i = 0; i < test_state->subtest_num; i++) { subtest_state = &test_state->subtest_states[i]; subtest_failed = subtest_state->error_cnt; @@ -314,6 +338,20 @@ static void dump_test_log(const struct prog_test_def *test, test->test_name, subtest_state->name, test_result(subtest_state->error_cnt, subtest_state->skipped)); + + if (w && print_subtest) { + jsonw_start_object(w); + jsonw_string_field(w, "name", subtest_state->name); + jsonw_uint_field(w, "number", i+1); + jsonw_write_log_message(w, subtest_state->log_buf, subtest_state->log_cnt); + jsonw_bool_field(w, "failed", subtest_failed); + jsonw_end_object(w); + } + } + + if (w && print_test) { + jsonw_end_array(w); + jsonw_end_object(w); } print_test_result(test, test_state); @@ -715,6 +753,7 @@ enum ARG_KEYS { ARG_TEST_NAME_GLOB_DENYLIST = 'd', ARG_NUM_WORKERS = 'j', ARG_DEBUG = -1, + ARG_JSON_SUMMARY = 'J' }; static const struct argp_option opts[] = { @@ -740,6 +779,7 @@ static const struct argp_option opts[] = { "Number of workers to run in parallel, default to number of cpus." }, { "debug", ARG_DEBUG, NULL, 0, "print extra debug information for test_progs." }, + { "json-summary", ARG_JSON_SUMMARY, "FILE", 0, "Write report in json format to this file."}, {}, }; @@ -870,6 +910,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_DEBUG: env->debug = true; break; + case ARG_JSON_SUMMARY: + env->json = fopen(arg, "w"); + if (env->json == NULL) { + perror("Failed to open json summary file"); + return -errno; + } + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -1017,7 +1064,7 @@ void crash_handler(int signum) stdio_restore(); if (env.test) { env.test_state->error_cnt++; - dump_test_log(env.test, env.test_state, true, false); + dump_test_log(env.test, env.test_state, true, false, NULL); } if (env.worker_id != -1) fprintf(stderr, "[%d]: ", env.worker_id); @@ -1124,7 +1171,7 @@ static void run_one_test(int test_num) stdio_restore(); - dump_test_log(test, state, false, false); + dump_test_log(test, state, false, false, NULL); } struct dispatch_data { @@ -1283,7 +1330,7 @@ static void *dispatch_thread(void *ctx) } while (false); pthread_mutex_lock(&stdout_output_lock); - dump_test_log(test, state, false, true); + dump_test_log(test, state, false, true, NULL); pthread_mutex_unlock(&stdout_output_lock); } /* while (true) */ error: @@ -1308,6 +1355,7 @@ static void calculate_summary_and_print_errors(struct test_env *env) { int i; int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0; + json_writer_t *w = NULL; for (i = 0; i < prog_test_cnt; i++) { struct test_state *state = &test_states[i]; @@ -1324,6 +1372,22 @@ static void calculate_summary_and_print_errors(struct test_env *env) succ_cnt++; } + if (env->json) { + w = jsonw_new(env->json); + if (!w) + fprintf(env->stderr, "Failed to create new JSON stream."); + } + + if (w) { + jsonw_start_object(w); + jsonw_uint_field(w, "success", succ_cnt); + jsonw_uint_field(w, "success_subtest", sub_succ_cnt); + jsonw_uint_field(w, "skipped", skip_cnt); + jsonw_uint_field(w, "failed", fail_cnt); + jsonw_name(w, "results"); + jsonw_start_array(w); + } + /* * We only print error logs summary when there are failed tests and * verbose mode is not enabled. Otherwise, results may be incosistent. @@ -1340,10 +1404,19 @@ static void calculate_summary_and_print_errors(struct test_env *env) if (!state->tested || !state->error_cnt) continue; - dump_test_log(test, state, true, true); + dump_test_log(test, state, true, true, w); } } + if (w) { + jsonw_end_array(w); + jsonw_end_object(w); + jsonw_destroy(&w); + } + + if (env->json) + fclose(env->json); + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 3cbf005747ed..4b06b8347cd4 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -114,6 +114,7 @@ struct test_env { FILE *stdout; FILE *stderr; int nr_cpus; + FILE *json; int succ_cnt; /* successful tests */ int sub_succ_cnt; /* successful sub-tests */ From 08ff1c9f3e927ba3701c113dda70953a6f4afffa Mon Sep 17 00:00:00 2001 From: Sreevani Sreejith Date: Wed, 15 Mar 2023 12:54:05 -0700 Subject: [PATCH 077/260] bpf, docs: Libbpf overview documentation This patch documents overview of libbpf, including its features for developing BPF programs. Signed-off-by: Sreevani Sreejith Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230315195405.2051559-1-ssreevani@meta.com --- Documentation/bpf/libbpf/index.rst | 25 +- Documentation/bpf/libbpf/libbpf_overview.rst | 228 +++++++++++++++++++ 2 files changed, 245 insertions(+), 8 deletions(-) create mode 100644 Documentation/bpf/libbpf/libbpf_overview.rst diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst index f9b3b252e28f..7545a2049692 100644 --- a/Documentation/bpf/libbpf/index.rst +++ b/Documentation/bpf/libbpf/index.rst @@ -2,23 +2,32 @@ .. _libbpf: +====== libbpf ====== +If you are looking to develop BPF applications using the libbpf library, this +directory contains important documentation that you should read. + +To get started, it is recommended to begin with the :doc:`libbpf Overview +` document, which provides a high-level understanding of the +libbpf APIs and their usage. This will give you a solid foundation to start +exploring and utilizing the various features of libbpf to develop your BPF +applications. + .. toctree:: :maxdepth: 1 + libbpf_overview API Documentation program_types libbpf_naming_convention libbpf_build -This is documentation for libbpf, a userspace library for loading and -interacting with bpf programs. -All general BPF questions, including kernel functionality, libbpf APIs and -their application, should be sent to bpf@vger.kernel.org mailing list. -You can `subscribe `_ to the -mailing list search its `archive `_. -Please search the archive before asking new questions. It very well might -be that this was already addressed or answered before. +All general BPF questions, including kernel functionality, libbpf APIs and their +application, should be sent to bpf@vger.kernel.org mailing list. You can +`subscribe `_ to the mailing list +search its `archive `_. Please search the archive +before asking new questions. It may be that this was already addressed or +answered before. diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst new file mode 100644 index 000000000000..f36a2d4ffea2 --- /dev/null +++ b/Documentation/bpf/libbpf/libbpf_overview.rst @@ -0,0 +1,228 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +libbpf Overview +=============== + +libbpf is a C-based library containing a BPF loader that takes compiled BPF +object files and prepares and loads them into the Linux kernel. libbpf takes the +heavy lifting of loading, verifying, and attaching BPF programs to various +kernel hooks, allowing BPF application developers to focus only on BPF program +correctness and performance. + +The following are the high-level features supported by libbpf: + +* Provides high-level and low-level APIs for user space programs to interact + with BPF programs. The low-level APIs wrap all the bpf system call + functionality, which is useful when users need more fine-grained control + over the interactions between user space and BPF programs. +* Provides overall support for the BPF object skeleton generated by bpftool. + The skeleton file simplifies the process for the user space programs to access + global variables and work with BPF programs. +* Provides BPF-side APIS, including BPF helper definitions, BPF maps support, + and tracing helpers, allowing developers to simplify BPF code writing. +* Supports BPF CO-RE mechanism, enabling BPF developers to write portable + BPF programs that can be compiled once and run across different kernel + versions. + +This document will delve into the above concepts in detail, providing a deeper +understanding of the capabilities and advantages of libbpf and how it can help +you develop BPF applications efficiently. + +BPF App Lifecycle and libbpf APIs +================================== + +A BPF application consists of one or more BPF programs (either cooperating or +completely independent), BPF maps, and global variables. The global +variables are shared between all BPF programs, which allows them to cooperate on +a common set of data. libbpf provides APIs that user space programs can use to +manipulate the BPF programs by triggering different phases of a BPF application +lifecycle. + +The following section provides a brief overview of each phase in the BPF life +cycle: + +* **Open phase**: In this phase, libbpf parses the BPF + object file and discovers BPF maps, BPF programs, and global variables. After + a BPF app is opened, user space apps can make additional adjustments + (setting BPF program types, if necessary; pre-setting initial values for + global variables, etc.) before all the entities are created and loaded. + +* **Load phase**: In the load phase, libbpf creates BPF + maps, resolves various relocations, and verifies and loads BPF programs into + the kernel. At this point, libbpf validates all the parts of a BPF application + and loads the BPF program into the kernel, but no BPF program has yet been + executed. After the load phase, it’s possible to set up the initial BPF map + state without racing with the BPF program code execution. + +* **Attachment phase**: In this phase, libbpf + attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes, + cgroup hooks, network packet processing pipeline, etc.). During this + phase, BPF programs perform useful work such as processing + packets, or updating BPF maps and global variables that can be read from user + space. + +* **Tear down phase**: In the tear down phase, + libbpf detaches BPF programs and unloads them from the kernel. BPF maps are + destroyed, and all the resources used by the BPF app are freed. + +BPF Object Skeleton File +======================== + +BPF skeleton is an alternative interface to libbpf APIs for working with BPF +objects. Skeleton code abstract away generic libbpf APIs to significantly +simplify code for manipulating BPF programs from user space. Skeleton code +includes a bytecode representation of the BPF object file, simplifying the +process of distributing your BPF code. With BPF bytecode embedded, there are no +extra files to deploy along with your application binary. + +You can generate the skeleton header file ``(.skel.h)`` for a specific object +file by passing the BPF object to the bpftool. The generated BPF skeleton +provides the following custom functions that correspond to the BPF lifecycle, +each of them prefixed with the specific object name: + +* ``__open()`` – creates and opens BPF application (```` stands for + the specific bpf object name) +* ``__load()`` – instantiates, loads,and verifies BPF application parts +* ``__attach()`` – attaches all auto-attachable BPF programs (it’s + optional, you can have more control by using libbpf APIs directly) +* ``__destroy()`` – detaches all BPF programs and + frees up all used resources + +Using the skeleton code is the recommended way to work with bpf programs. Keep +in mind, BPF skeleton provides access to the underlying BPF object, so whatever +was possible to do with generic libbpf APIs is still possible even when the BPF +skeleton is used. It's an additive convenience feature, with no syscalls, and no +cumbersome code. + +Other Advantages of Using Skeleton File +--------------------------------------- + +* BPF skeleton provides an interface for user space programs to work with BPF + global variables. The skeleton code memory maps global variables as a struct + into user space. The struct interface allows user space programs to initialize + BPF programs before the BPF load phase and fetch and update data from user + space afterward. + +* The ``skel.h`` file reflects the object file structure by listing out the + available maps, programs, etc. BPF skeleton provides direct access to all the + BPF maps and BPF programs as struct fields. This eliminates the need for + string-based lookups with ``bpf_object_find_map_by_name()`` and + ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source + code and user-space code getting out of sync. + +* The embedded bytecode representation of the object file ensures that the + skeleton and the BPF object file are always in sync. + +BPF Helpers +=========== + +libbpf provides BPF-side APIs that BPF programs can use to interact with the +system. The BPF helpers definition allows developers to use them in BPF code as +any other plain C function. For example, there are helper functions to print +debugging messages, get the time since the system was booted, interact with BPF +maps, manipulate network packets, etc. + +For a complete description of what the helpers do, the arguments they take, and +the return value, see the `bpf-helpers +`_ man page. + +BPF CO-RE (Compile Once – Run Everywhere) +========================================= + +BPF programs work in the kernel space and have access to kernel memory and data +structures. One limitation that BPF applications come across is the lack of +portability across different kernel versions and configurations. `BCC +`_ is one of the solutions for BPF +portability. However, it comes with runtime overhead and a large binary size +from embedding the compiler with the application. + +libbpf steps up the BPF program portability by supporting the BPF CO-RE concept. +BPF CO-RE brings together BTF type information, libbpf, and the compiler to +produce a single executable binary that you can run on multiple kernel versions +and configurations. + +To make BPF programs portable libbpf relies on the BTF type information of the +running kernel. Kernel also exposes this self-describing authoritative BTF +information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``. + +You can generate the BTF information for the running kernel with the following +command: + +:: + + $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h + +The command generates a ``vmlinux.h`` header file with all kernel types +(:doc:`BTF types <../btf>`) that the running kernel uses. Including +``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel +headers. + +libbpf enables portability of BPF programs by looking at the BPF program’s +recorded BTF type and relocation information and matching them to BTF +information (vmlinux) provided by the running kernel. libbpf then resolves and +matches all the types and fields, and updates necessary offsets and other +relocatable data to ensure that BPF program’s logic functions correctly for a +specific kernel on the host. BPF CO-RE concept thus eliminates overhead +associated with BPF development and allows developers to write portable BPF +applications without modifications and runtime source code compilation on the +target machine. + +The following code snippet shows how to read the parent field of a kernel +``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a +CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read +``sz`` bytes from the field referenced by ``src`` into the memory pointed to by +``dst``. + +.. code-block:: C + :emphasize-lines: 6 + + //... + struct task_struct *task = (void *)bpf_get_current_task(); + struct task_struct *parent_task; + int err; + + err = bpf_core_read(&parent_task, sizeof(void *), &task->parent); + if (err) { + /* handle error */ + } + + /* parent_task contains the value of task->parent pointer */ + +In the code snippet, we first get a pointer to the current ``task_struct`` using +``bpf_get_current_task()``. We then use ``bpf_core_read()`` to read the parent +field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is +just like ``bpf_probe_read_kernel()`` BPF helper, except it records information +about the field that should be relocated on the target kernel. i.e, if the +``parent`` field gets shifted to a different offset within +``struct task_struct`` due to some new field added in front of it, libbpf will +automatically adjust the actual offset to the proper value. + +Getting Started with libbpf +=========================== + +Check out the `libbpf-bootstrap `_ +repository with simple examples of using libbpf to build various BPF +applications. + +See also `libbpf API documentation +`_. + +libbpf and Rust +=============== + +If you are building BPF applications in Rust, it is recommended to use the +`Libbpf-rs `_ library instead of bindgen +bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in +Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code +compilation and skeleton generation. Using Libbpf-rs will make building user +space part of the BPF application easier. Note that the BPF program themselves +must still be written in plain C. + +Additional Documentation +======================== + +* `Program types and ELF Sections `_ +* `API naming convention `_ +* `Building libbpf `_ +* `API documentation Convention `_ From a506d6ce1dd184051037dc9d26c3eb187c9fe625 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 19 Mar 2023 13:30:13 -0700 Subject: [PATCH 078/260] libbpf: Fix ld_imm64 copy logic for ksym in light skeleton. Unlike normal libbpf the light skeleton 'loader' program is doing btf_find_by_name_kind() call at run-time to find ksym in the kernel and populate its {btf_id, btf_obj_fd} pair in ld_imm64 insn. To avoid doing the search multiple times for the same ksym it remembers the first patched ld_imm64 insn and copies {btf_id, btf_obj_fd} from it into subsequent ld_imm64 insn. Fix a bug in copying logic, since it may incorrectly clear BPF_PSEUDO_BTF_ID flag. Also replace always true if (btf_obj_fd >= 0) check with unconditional JMP_JA to clarify the code. Fixes: d995816b77eb ("libbpf: Avoid reload of imm for weak, unresolved, repeating ksym") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230319203014.55866-1-alexei.starovoitov@gmail.com --- tools/lib/bpf/gen_loader.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 23f5c46708f8..b74c82bb831e 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -804,11 +804,13 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, return; /* try to copy from existing ldimm64 insn */ if (kdesc->ref > 1) { - move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + offsetof(struct bpf_insn, imm)); move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + /* jump over src_reg adjustment if imm (btf_id) is not 0, reuse BPF_REG_0 from move_blob2blob + * If btf_id is zero, clear BPF_PSEUDO_BTF_ID flag in src_reg of ld_imm64 insn + */ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); goto clear_src_reg; } @@ -831,7 +833,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); /* skip src_reg adjustment */ - emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ reg_mask = src_reg_mask(); From bb4a6a9237293346cf1b3b7bc4ff4dfc1977a103 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 19 Mar 2023 13:30:14 -0700 Subject: [PATCH 079/260] selftest/bpf: Add a test case for ld_imm64 copy logic. Add a test case to exercise {btf_id, btf_obj_fd} copy logic between ld_imm64 insns. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230319203014.55866-2-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/progs/test_ksyms_weak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c index 5f8379aadb29..7003eef0c192 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c @@ -37,7 +37,7 @@ int pass_handler(const void *ctx) /* tests existing symbols. */ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); - if (rq) + if (rq && bpf_ksym_exists(&runqueues)) out__existing_typed = rq->cpu; out__existing_typeless = (__u64)&bpf_prog_active; From 01dc26c980b0a92d9ba94c28652b2675968727b6 Mon Sep 17 00:00:00 2001 From: Liu Pan Date: Mon, 20 Mar 2023 11:07:20 +0800 Subject: [PATCH 080/260] libbpf: Explicitly call write to append content to file Write data to fd by calling "vdprintf", in most implementations of the standard library, the data is finally written by the writev syscall. But "uprobe_events/kprobe_events" does not allow segmented writes, so switch the "append_to_file" function to explicit write() call. Signed-off-by: Liu Pan Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230320030720.650-1-patteliu@gmail.com --- tools/lib/bpf/libbpf.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4c34fbd7b5be..149864ea88d1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9918,16 +9918,20 @@ static int append_to_file(const char *file, const char *fmt, ...) { int fd, n, err = 0; va_list ap; + char buf[1024]; + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (n < 0 || n >= sizeof(buf)) + return -EINVAL; fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); if (fd < 0) return -errno; - va_start(ap, fmt); - n = vdprintf(fd, fmt, ap); - va_end(ap); - - if (n < 0) + if (write(fd, buf, n) < 0) err = -errno; close(fd); From 04aae213e719ec2bb310158c4025316ace50589b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Mar 2023 18:41:13 -0700 Subject: [PATCH 081/260] net: skbuff: rename __pkt_vlan_present_offset to __mono_tc_offset vlan_present is gone since commit 354259fa73e2 ("net: remove skb->vlan_present") rename the offset field to what BPF is currently looking for in this byte - mono_delivery_time and tc_at_ingress. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230321014115.997841-2-kuba@kernel.org Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 4 ++-- net/core/filter.c | 8 ++++---- tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3f3a2a82a86b..5a63878a4550 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -955,7 +955,7 @@ struct sk_buff { __u8 csum_valid:1; /* private: */ - __u8 __pkt_vlan_present_offset[0]; + __u8 __mono_tc_offset[0]; /* public: */ __u8 remcsum_offload:1; __u8 csum_complete_sw:1; @@ -1078,7 +1078,7 @@ struct sk_buff { #define TC_AT_INGRESS_MASK (1 << 7) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif -#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) #ifdef __KERNEL__ /* diff --git a/net/core/filter.c b/net/core/filter.c index 50f649f1b4a9..3370efad1dda 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9185,7 +9185,7 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, __u8 tmp_reg = BPF_REG_AX; *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, - PKT_VLAN_PRESENT_OFFSET); + SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_MONO_DELIVERY_TIME_MASK, 2); *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); @@ -9232,7 +9232,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, /* AX is needed because src_reg and dst_reg could be the same */ __u8 tmp_reg = BPF_REG_AX; - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, @@ -9267,14 +9267,14 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, if (!prog->tstamp_type_access) { __u8 tmp_reg = BPF_REG_AX; - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); /* Writing __sk_buff->tstamp as ingress, goto */ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); /* goto */ *insn++ = BPF_JMP_A(2); /* : mono_delivery_time */ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); - *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } #endif diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index d5fe3d4b936c..ae7b6e50e405 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -68,17 +68,17 @@ static struct test_case test_cases[] = { #if defined(__x86_64__) || defined(__aarch64__) { N(SCHED_CLS, struct __sk_buff, tstamp), - .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" "w11 &= 160;" "if w11 != 0xa0 goto pc+2;" "$dst = 0;" "goto pc+1;" "$dst = *(u64 *)($ctx + sk_buff::tstamp);", - .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" "if w11 & 0x80 goto pc+1;" "goto pc+2;" "w11 &= -33;" - "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;" + "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;" "*(u64 *)($ctx + sk_buff::tstamp) = $src;", }, #endif From b94e032b7ad6318b36615f0e0cc3b0d61a5531e8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Mar 2023 18:41:14 -0700 Subject: [PATCH 082/260] net: skbuff: reorder bytes 2 and 3 of the bitfield BPF needs to know the offsets of fields it tries to access. Zero-length fields are added to make offsetof() work. This unfortunately partitions the bitfield (fields across the zero-length members can't be coalesced). Reorder bytes 2 and 3, BPF needs to know the offset of fields previously in byte 3 and some fields in byte 2 should really be optional. The two bytes are always in the same cacheline so it should not matter. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230321014115.997841-3-kuba@kernel.org Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5a63878a4550..36d31e74db37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -944,16 +944,6 @@ struct sk_buff { __u8 ip_summed:2; __u8 ooo_okay:1; - __u8 l4_hash:1; - __u8 sw_hash:1; - __u8 wifi_acked_valid:1; - __u8 wifi_acked:1; - __u8 no_fcs:1; - /* Indicates the inner headers are valid in the skbuff. */ - __u8 encapsulation:1; - __u8 encap_hdr_csum:1; - __u8 csum_valid:1; - /* private: */ __u8 __mono_tc_offset[0]; /* public: */ @@ -966,6 +956,16 @@ struct sk_buff { __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ #endif + + __u8 l4_hash:1; + __u8 sw_hash:1; + __u8 wifi_acked_valid:1; + __u8 wifi_acked:1; + __u8 no_fcs:1; + /* Indicates the inner headers are valid in the skbuff. */ + __u8 encapsulation:1; + __u8 encap_hdr_csum:1; + __u8 csum_valid:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif From c0ba861117c3e8deb03855d7dc5a7717958bbb18 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Mar 2023 18:41:15 -0700 Subject: [PATCH 083/260] net: skbuff: move the fields BPF cares about directly next to the offset marker To avoid more possible BPF dependencies with moving bitfields around keep the fields BPF cares about right next to the offset marker. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230321014115.997841-4-kuba@kernel.org Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 18 +++++++++--------- .../selftests/bpf/prog_tests/ctx_rewrite.c | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 36d31e74db37..6aeb0e7b9511 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -947,15 +947,15 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ + __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ + __u8 tc_skip_classify:1; +#endif __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 dst_pending_confirm:1; - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ -#ifdef CONFIG_NET_CLS_ACT - __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ -#endif __u8 l4_hash:1; __u8 sw_hash:1; @@ -1072,11 +1072,11 @@ struct sk_buff { * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define TC_AT_INGRESS_MASK (1 << 0) -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) +#define TC_AT_INGRESS_MASK (1 << 6) #else -#define TC_AT_INGRESS_MASK (1 << 7) -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) +#define TC_AT_INGRESS_MASK (1 << 1) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index ae7b6e50e405..4951aa978f33 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -69,15 +69,15 @@ static struct test_case test_cases[] = { { N(SCHED_CLS, struct __sk_buff, tstamp), .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "w11 &= 160;" - "if w11 != 0xa0 goto pc+2;" + "w11 &= 3;" + "if w11 != 0x3 goto pc+2;" "$dst = 0;" "goto pc+1;" "$dst = *(u64 *)($ctx + sk_buff::tstamp);", .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "if w11 & 0x80 goto pc+1;" + "if w11 & 0x2 goto pc+1;" "goto pc+2;" - "w11 &= -33;" + "w11 &= -2;" "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;" "*(u64 *)($ctx + sk_buff::tstamp) = $src;", }, From 9a321fd3308e262f2a76761bea86dd0f311e3f86 Mon Sep 17 00:00:00 2001 From: Tushar Vyavahare Date: Mon, 20 Mar 2023 15:57:05 +0530 Subject: [PATCH 084/260] selftests/xsk: add xdp populate metadata test Add a new test in copy-mode for testing the copying of metadata from the buffer in kernel-space to user-space. This is accomplished by adding a new XDP program and using the bss map to store a counter that is written to the metadata field. This counter is incremented for every packet so that the number becomes unique and should be the same as the payload. It is store in the bss so the value can be reset between runs. The XDP program populates the metadata and the userspace program checks the value stored in the metadata field against the payload using the new is_metadata_correct() function. To turn this verification on or off, add a new parameter (use_metadata) to the ifobject structure. Signed-off-by: Tushar Vyavahare Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20230320102705.306187-1-tushar.vyavahare@intel.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/xsk_xdp_progs.c | 25 ++++++++++ .../testing/selftests/bpf/xsk_xdp_metadata.h | 5 ++ tools/testing/selftests/bpf/xskxceiver.c | 46 ++++++++++++++++++- tools/testing/selftests/bpf/xskxceiver.h | 2 + 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/xsk_xdp_metadata.h diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index 744a01d0e57d..a630c95c7471 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -3,6 +3,7 @@ #include #include +#include "xsk_xdp_metadata.h" struct { __uint(type, BPF_MAP_TYPE_XSKMAP); @@ -12,6 +13,7 @@ struct { } xsk SEC(".maps"); static unsigned int idx; +int count = 0; SEC("xdp") int xsk_def_prog(struct xdp_md *xdp) { @@ -27,4 +29,27 @@ SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp) return bpf_redirect_map(&xsk, 0, XDP_DROP); } +SEC("xdp") int xsk_xdp_populate_metadata(struct xdp_md *xdp) +{ + void *data, *data_meta; + struct xdp_info *meta; + int err; + + /* Reserve enough for all custom metadata. */ + err = bpf_xdp_adjust_meta(xdp, -(int)sizeof(struct xdp_info)); + if (err) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_meta = (void *)(long)xdp->data_meta; + + if (data_meta + sizeof(struct xdp_info) > data) + return XDP_DROP; + + meta = data_meta; + meta->count = count++; + + return bpf_redirect_map(&xsk, 0, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h new file mode 100644 index 000000000000..943133da378a --- /dev/null +++ b/tools/testing/selftests/bpf/xsk_xdp_metadata.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +struct xdp_info { + __u64 count; +} __attribute__((aligned(32))); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index a17655107a94..b65e0645b0cd 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -103,6 +103,7 @@ #include #include #include "../kselftest.h" +#include "xsk_xdp_metadata.h" static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; @@ -464,6 +465,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->use_fill_ring = true; ifobj->release_rx = true; ifobj->validation_func = NULL; + ifobj->use_metadata = false; if (i == 0) { ifobj->rx_on = false; @@ -798,6 +800,20 @@ static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt return false; } +static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) +{ + void *data = xsk_umem__get_data(buffer, addr); + struct xdp_info *meta = data - sizeof(struct xdp_info); + + if (meta->count != pkt->payload) { + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", + __func__, pkt->payload, meta->count); + return false; + } + + return true; +} + static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) { void *data = xsk_umem__get_data(buffer, addr); @@ -959,7 +975,8 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) addr = xsk_umem__add_offset_to_addr(addr); if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || - !is_offset_correct(umem, pkt_stream, addr, pkt->addr)) + !is_offset_correct(umem, pkt_stream, addr, pkt->addr) || + (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) return TEST_FAILURE; if (ifobj->use_fill_ring) @@ -1686,6 +1703,30 @@ static void testapp_xdp_drop(struct test_spec *test) testapp_validate_traffic(test); } +static void testapp_xdp_metadata_count(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + struct bpf_map *data_map; + int count = 0; + int key = 0; + + test_spec_set_name(test, "XDP_METADATA_COUNT"); + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, + skel_tx->progs.xsk_xdp_populate_metadata, + skel_rx->maps.xsk, skel_tx->maps.xsk); + test->ifobj_rx->use_metadata = true; + + data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); + if (!data_map || !bpf_map__is_internal(data_map)) + exit_with_error(ENOMEM); + + if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) + exit_with_error(errno); + + testapp_validate_traffic(test); +} + static void testapp_poll_txq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_TXQ_FULL"); @@ -1835,6 +1876,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ case TEST_TYPE_XDP_DROP_HALF: testapp_xdp_drop(test); break; + case TEST_TYPE_XDP_METADATA_COUNT: + testapp_xdp_metadata_count(test); + break; default: break; } diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 3e8ec7d8ec32..bdb4efedf3a9 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -88,6 +88,7 @@ enum test_type { TEST_TYPE_STATS_FILL_EMPTY, TEST_TYPE_BPF_RES, TEST_TYPE_XDP_DROP_HALF, + TEST_TYPE_XDP_METADATA_COUNT, TEST_TYPE_MAX }; @@ -158,6 +159,7 @@ struct ifobject { bool use_fill_ring; bool release_rx; bool shared_umem; + bool use_metadata; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; From a18f721415b4d4edea23cb8c7d6276330302af14 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 Mar 2023 13:38:51 -0700 Subject: [PATCH 085/260] libbpf: Rename RELO_EXTERN_VAR/FUNC. RELO_EXTERN_VAR/FUNC names are not correct anymore. RELO_EXTERN_VAR represent ksym symbol in ld_imm64 insn. It can point to kernel variable or kfunc. Rename RELO_EXTERN_VAR->RELO_EXTERN_LD64 and RELO_EXTERN_FUNC->RELO_EXTERN_CALL to match what they actually represent. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230321203854.3035-2-alexei.starovoitov@gmail.com --- tools/lib/bpf/libbpf.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 149864ea88d1..f8131f963803 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -315,8 +315,8 @@ enum reloc_type { RELO_LD64, RELO_CALL, RELO_DATA, - RELO_EXTERN_VAR, - RELO_EXTERN_FUNC, + RELO_EXTERN_LD64, + RELO_EXTERN_CALL, RELO_SUBPROG_ADDR, RELO_CORE, }; @@ -4009,9 +4009,9 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", prog->name, i, ext->name, ext->sym_idx, insn_idx); if (insn->code == (BPF_JMP | BPF_CALL)) - reloc_desc->type = RELO_EXTERN_FUNC; + reloc_desc->type = RELO_EXTERN_CALL; else - reloc_desc->type = RELO_EXTERN_VAR; + reloc_desc->type = RELO_EXTERN_LD64; reloc_desc->insn_idx = insn_idx; reloc_desc->sym_off = i; /* sym_off stores extern index */ return 0; @@ -5855,7 +5855,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) relo->map_idx, map); } break; - case RELO_EXTERN_VAR: + case RELO_EXTERN_LD64: ext = &obj->externs[relo->sym_off]; if (ext->type == EXT_KCFG) { if (obj->gen_loader) { @@ -5877,7 +5877,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) } } break; - case RELO_EXTERN_FUNC: + case RELO_EXTERN_CALL: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; if (ext->is_set) { @@ -6115,7 +6115,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, continue; relo = find_prog_insn_relo(prog, insn_idx); - if (relo && relo->type == RELO_EXTERN_FUNC) + if (relo && relo->type == RELO_EXTERN_CALL) /* kfunc relocations will be handled later * in bpf_object__relocate_data() */ @@ -7072,14 +7072,14 @@ static int bpf_program_record_relos(struct bpf_program *prog) struct extern_desc *ext = &obj->externs[relo->sym_off]; switch (relo->type) { - case RELO_EXTERN_VAR: + case RELO_EXTERN_LD64: if (ext->type != EXT_KSYM) continue; bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, !ext->ksym.type_id, BTF_KIND_VAR, relo->insn_idx); break; - case RELO_EXTERN_FUNC: + case RELO_EXTERN_CALL: bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, false, BTF_KIND_FUNC, relo->insn_idx); From 1057d299459657b85e593a4b6294a000f920672a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 Mar 2023 13:38:52 -0700 Subject: [PATCH 086/260] bpf: Teach the verifier to recognize rdonly_mem as not null. Teach the verifier to recognize PTR_TO_MEM | MEM_RDONLY as not NULL otherwise if (!bpf_ksym_exists(known_kfunc)) doesn't go through dead code elimination. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230321203854.3035-3-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8bc44f5dc5b6..5693e4a92752 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -486,8 +486,17 @@ static bool type_is_sk_pointer(enum bpf_reg_type type) type == PTR_TO_XDP_SOCK; } +static bool type_may_be_null(u32 type) +{ + return type & PTR_MAYBE_NULL; +} + static bool reg_type_not_null(enum bpf_reg_type type) { + if (type_may_be_null(type)) + return false; + + type = base_type(type); return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || @@ -531,11 +540,6 @@ static bool type_is_rdonly_mem(u32 type) return type & MEM_RDONLY; } -static bool type_may_be_null(u32 type) -{ - return type & PTR_MAYBE_NULL; -} - static bool is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map) { From 708cdc5706a4701be9e5f81cb2e2b60b57f34c42 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 Mar 2023 13:38:53 -0700 Subject: [PATCH 087/260] libbpf: Support kfunc detection in light skeleton. Teach gen_loader to find {btf_id, btf_obj_fd} of kernel variables and kfuncs and populate corresponding ld_imm64 and bpf_call insns. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230321203854.3035-4-alexei.starovoitov@gmail.com --- tools/lib/bpf/bpf_gen_internal.h | 4 +++- tools/lib/bpf/gen_loader.c | 38 ++++++++++++++++---------------- tools/lib/bpf/libbpf.c | 7 ++++-- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 223308931d55..fdf44403ff36 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -11,6 +11,7 @@ struct ksym_relo_desc { int insn_idx; bool is_weak; bool is_typeless; + bool is_ld64; }; struct ksym_desc { @@ -24,6 +25,7 @@ struct ksym_desc { bool typeless; }; int insn; + bool is_ld64; }; struct bpf_gen { @@ -65,7 +67,7 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - bool is_typeless, int kind, int insn_idx); + bool is_typeless, bool is_ld64, int kind, int insn_idx); void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index b74c82bb831e..83e8e3bfd8ff 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -560,7 +560,7 @@ static void emit_find_attach_target(struct bpf_gen *gen) } void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - bool is_typeless, int kind, int insn_idx) + bool is_typeless, bool is_ld64, int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -574,6 +574,7 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, relo->name = name; relo->is_weak = is_weak; relo->is_typeless = is_typeless; + relo->is_ld64 = is_ld64; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; @@ -586,9 +587,11 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des int i; for (i = 0; i < gen->nr_ksyms; i++) { - if (!strcmp(gen->ksyms[i].name, relo->name)) { - gen->ksyms[i].ref++; - return &gen->ksyms[i]; + kdesc = &gen->ksyms[i]; + if (kdesc->kind == relo->kind && kdesc->is_ld64 == relo->is_ld64 && + !strcmp(kdesc->name, relo->name)) { + kdesc->ref++; + return kdesc; } } kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); @@ -603,6 +606,7 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des kdesc->ref = 1; kdesc->off = 0; kdesc->insn = 0; + kdesc->is_ld64 = relo->is_ld64; return kdesc; } @@ -864,23 +868,17 @@ static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn { int insn; - pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); + pr_debug("gen: emit_relo (%d): %s at %d %s\n", + relo->kind, relo->name, relo->insn_idx, relo->is_ld64 ? "ld64" : "call"); insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); - switch (relo->kind) { - case BTF_KIND_VAR: + if (relo->is_ld64) { if (relo->is_typeless) emit_relo_ksym_typeless(gen, relo, insn); else emit_relo_ksym_btf(gen, relo, insn); - break; - case BTF_KIND_FUNC: + } else { emit_relo_kfunc_btf(gen, relo, insn); - break; - default: - pr_warn("Unknown relocation kind '%d'\n", relo->kind); - gen->error = -EDOM; - return; } } @@ -903,18 +901,20 @@ static void cleanup_core_relo(struct bpf_gen *gen) static void cleanup_relos(struct bpf_gen *gen, int insns) { + struct ksym_desc *kdesc; int i, insn; for (i = 0; i < gen->nr_ksyms; i++) { + kdesc = &gen->ksyms[i]; /* only close fds for typed ksyms and kfuncs */ - if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { + if (kdesc->is_ld64 && !kdesc->typeless) { /* close fd recorded in insn[insn_idx + 1].imm */ - insn = gen->ksyms[i].insn; + insn = kdesc->insn; insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); emit_sys_close_blob(gen, insn); - } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { - emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); - if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) + } else if (!kdesc->is_ld64) { + emit_sys_close_blob(gen, blob_fd_array_off(gen, kdesc->off)); + if (kdesc->off < MAX_FD_ARRAY_SZ) gen->nr_fd_array--; } } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f8131f963803..5d32aa8ea38a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7070,18 +7070,21 @@ static int bpf_program_record_relos(struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct extern_desc *ext = &obj->externs[relo->sym_off]; + int kind; switch (relo->type) { case RELO_EXTERN_LD64: if (ext->type != EXT_KSYM) continue; + kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? + BTF_KIND_VAR : BTF_KIND_FUNC; bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, !ext->ksym.type_id, - BTF_KIND_VAR, relo->insn_idx); + true, kind, relo->insn_idx); break; case RELO_EXTERN_CALL: bpf_gen__record_extern(obj->gen_loader, ext->name, - ext->is_weak, false, BTF_KIND_FUNC, + ext->is_weak, false, false, BTF_KIND_FUNC, relo->insn_idx); break; case RELO_CORE: { From 3b2ec2140fa27febb21034943d656898b659dc02 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 Mar 2023 13:38:54 -0700 Subject: [PATCH 088/260] selftests/bpf: Add light skeleton test for kfunc detection. Add light skeleton test for kfunc detection and denylist it for s390. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230321203854.3035-5-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../testing/selftests/bpf/progs/test_ksyms_weak.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 34cb8b2de8ca..c7463f3ec3c0 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -11,6 +11,7 @@ get_stack_raw_tp # user_stack corrupted user stack iters/testmod_seq* # s390x doesn't support kfuncs in modules yet kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test # relies on fentry +ksyms_btf/weak_ksyms* # test_ksyms_weak__open_and_load unexpected error: -22 (kfunc) ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c index 7003eef0c192..d00268c91e19 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c @@ -20,6 +20,8 @@ __u64 out__non_existent_typed = -1; /* test existing weak symbols can be resolved. */ extern const struct rq runqueues __ksym __weak; /* typed */ extern const void bpf_prog_active __ksym __weak; /* typeless */ +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; +void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; /* non-existent weak symbols. */ @@ -29,6 +31,7 @@ extern const void bpf_link_fops1 __ksym __weak; /* typed symbols, default to zero. */ extern const int bpf_link_fops2 __ksym __weak; +void invalid_kfunc(void) __ksym __weak; SEC("raw_tp/sys_enter") int pass_handler(const void *ctx) @@ -50,6 +53,18 @@ int pass_handler(const void *ctx) if (&bpf_link_fops2) /* can't happen */ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0); + if (!bpf_ksym_exists(bpf_task_acquire)) + /* dead code won't be seen by the verifier */ + bpf_task_acquire(0); + + if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) + /* dead code won't be seen by the verifier */ + bpf_testmod_test_mod_kfunc(0); + + if (bpf_ksym_exists(invalid_kfunc)) + /* dead code won't be seen by the verifier */ + invalid_kfunc(); + return 0; } From 830154cdc57971b06f81d4ffc39b868e3d7693de Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 22 Mar 2023 12:47:53 -0700 Subject: [PATCH 089/260] bpf/selftests: coverage for bpf_map_ops errors These tests expose the issue of being unable to properly check for errors returned from inlined bpf map helpers that make calls to the bpf_map_ops functions. At best, a check for zero or non-zero can be done but these tests show it is not possible to check for a negative value or for a specific error value. Signed-off-by: JP Kobryn Tested-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230322194754.185781-2-inwardvessel@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/map_ops.c | 162 ++++++++++++++++++ .../selftests/bpf/progs/test_map_ops.c | 138 +++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_ops.c create mode 100644 tools/testing/selftests/bpf/progs/test_map_ops.c diff --git a/tools/testing/selftests/bpf/prog_tests/map_ops.c b/tools/testing/selftests/bpf/prog_tests/map_ops.c new file mode 100644 index 000000000000..be5e42a413b4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_ops.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +#include "test_map_ops.skel.h" +#include "test_progs.h" + +static void map_update(void) +{ + (void)syscall(__NR_getpid); +} + +static void map_delete(void) +{ + (void)syscall(__NR_getppid); +} + +static void map_push(void) +{ + (void)syscall(__NR_getuid); +} + +static void map_pop(void) +{ + (void)syscall(__NR_geteuid); +} + +static void map_peek(void) +{ + (void)syscall(__NR_getgid); +} + +static void map_for_each_pass(void) +{ + (void)syscall(__NR_gettid); +} + +static void map_for_each_fail(void) +{ + (void)syscall(__NR_getpgid); +} + +static int setup(struct test_map_ops **skel) +{ + int err = 0; + + if (!skel) + return -1; + + *skel = test_map_ops__open(); + if (!ASSERT_OK_PTR(*skel, "test_map_ops__open")) + return -1; + + (*skel)->rodata->pid = getpid(); + + err = test_map_ops__load(*skel); + if (!ASSERT_OK(err, "test_map_ops__load")) + return err; + + err = test_map_ops__attach(*skel); + if (!ASSERT_OK(err, "test_map_ops__attach")) + return err; + + return err; +} + +static void teardown(struct test_map_ops **skel) +{ + if (skel && *skel) + test_map_ops__destroy(*skel); +} + +static void map_ops_update_delete_subtest(void) +{ + struct test_map_ops *skel; + + if (setup(&skel)) + goto teardown; + + map_update(); + ASSERT_OK(skel->bss->err, "map_update_initial"); + + map_update(); + ASSERT_LT(skel->bss->err, 0, "map_update_existing"); + ASSERT_EQ(skel->bss->err, -EEXIST, "map_update_existing"); + + map_delete(); + ASSERT_OK(skel->bss->err, "map_delete_existing"); + + map_delete(); + ASSERT_LT(skel->bss->err, 0, "map_delete_non_existing"); + ASSERT_EQ(skel->bss->err, -ENOENT, "map_delete_non_existing"); + +teardown: + teardown(&skel); +} + +static void map_ops_push_peek_pop_subtest(void) +{ + struct test_map_ops *skel; + + if (setup(&skel)) + goto teardown; + + map_push(); + ASSERT_OK(skel->bss->err, "map_push_initial"); + + map_push(); + ASSERT_LT(skel->bss->err, 0, "map_push_when_full"); + ASSERT_EQ(skel->bss->err, -E2BIG, "map_push_when_full"); + + map_peek(); + ASSERT_OK(skel->bss->err, "map_peek"); + + map_pop(); + ASSERT_OK(skel->bss->err, "map_pop"); + + map_peek(); + ASSERT_LT(skel->bss->err, 0, "map_peek_when_empty"); + ASSERT_EQ(skel->bss->err, -ENOENT, "map_peek_when_empty"); + + map_pop(); + ASSERT_LT(skel->bss->err, 0, "map_pop_when_empty"); + ASSERT_EQ(skel->bss->err, -ENOENT, "map_pop_when_empty"); + +teardown: + teardown(&skel); +} + +static void map_ops_for_each_subtest(void) +{ + struct test_map_ops *skel; + + if (setup(&skel)) + goto teardown; + + map_for_each_pass(); + /* expect to iterate over 1 element */ + ASSERT_EQ(skel->bss->err, 1, "map_for_each_no_flags"); + + map_for_each_fail(); + ASSERT_LT(skel->bss->err, 0, "map_for_each_with_flags"); + ASSERT_EQ(skel->bss->err, -EINVAL, "map_for_each_with_flags"); + +teardown: + teardown(&skel); +} + +void test_map_ops(void) +{ + if (test__start_subtest("map_ops_update_delete")) + map_ops_update_delete_subtest(); + + if (test__start_subtest("map_ops_push_peek_pop")) + map_ops_push_peek_pop_subtest(); + + if (test__start_subtest("map_ops_for_each")) + map_ops_for_each_subtest(); +} diff --git a/tools/testing/selftests/bpf/progs/test_map_ops.c b/tools/testing/selftests/bpf/progs/test_map_ops.c new file mode 100644 index 000000000000..b53b46a090c8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_ops.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK); + __uint(max_entries, 1); + __type(value, int); +} stack_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} array_map SEC(".maps"); + +const volatile pid_t pid; +long err = 0; + +static u64 callback(u64 map, u64 key, u64 val, u64 ctx, u64 flags) +{ + return 0; +} + +SEC("tp/syscalls/sys_enter_getpid") +int map_update(void *ctx) +{ + const int key = 0; + const int val = 1; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + err = bpf_map_update_elem(&hash_map, &key, &val, BPF_NOEXIST); + + return 0; +} + +SEC("tp/syscalls/sys_enter_getppid") +int map_delete(void *ctx) +{ + const int key = 0; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + err = bpf_map_delete_elem(&hash_map, &key); + + return 0; +} + +SEC("tp/syscalls/sys_enter_getuid") +int map_push(void *ctx) +{ + const int val = 1; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + err = bpf_map_push_elem(&stack_map, &val, 0); + + return 0; +} + +SEC("tp/syscalls/sys_enter_geteuid") +int map_pop(void *ctx) +{ + int val; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + err = bpf_map_pop_elem(&stack_map, &val); + + return 0; +} + +SEC("tp/syscalls/sys_enter_getgid") +int map_peek(void *ctx) +{ + int val; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + err = bpf_map_peek_elem(&stack_map, &val); + + return 0; +} + +SEC("tp/syscalls/sys_enter_gettid") +int map_for_each_pass(void *ctx) +{ + const int key = 0; + const int val = 1; + const u64 flags = 0; + int callback_ctx; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + bpf_map_update_elem(&array_map, &key, &val, flags); + + err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags); + + return 0; +} + +SEC("tp/syscalls/sys_enter_getpgid") +int map_for_each_fail(void *ctx) +{ + const int key = 0; + const int val = 1; + const u64 flags = BPF_NOEXIST; + int callback_ctx; + + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + bpf_map_update_elem(&array_map, &key, &val, flags); + + /* calling for_each with non-zero flags will return error */ + err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags); + + return 0; +} From d7ba4cc900bf1eea2d8c807c6b1fc6bd61f41237 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 22 Mar 2023 12:47:54 -0700 Subject: [PATCH 090/260] bpf: return long from bpf_map_ops funcs This patch changes the return types of bpf_map_ops functions to long, where previously int was returned. Using long allows for bpf programs to maintain the sign bit in the absence of sign extension during situations where inlined bpf helper funcs make calls to the bpf_map_ops funcs and a negative error is returned. The definitions of the helper funcs are generated from comments in the bpf uapi header at `include/uapi/linux/bpf.h`. The return type of these helpers was previously changed from int to long in commit bdb7b79b4ce8. For any case where one of the map helpers call the bpf_map_ops funcs that are still returning 32-bit int, a compiler might not include sign extension instructions to properly convert the 32-bit negative value a 64-bit negative value. For example: bpf assembly excerpt of an inlined helper calling a kernel function and checking for a specific error: ; err = bpf_map_update_elem(&mymap, &key, &val, BPF_NOEXIST); ... 46: call 0xffffffffe103291c ; htab_map_update_elem ; if (err && err != -EEXIST) { 4b: cmp $0xffffffffffffffef,%rax ; cmp -EEXIST,%rax kernel function assembly excerpt of return value from `htab_map_update_elem` returning 32-bit int: movl $0xffffffef, %r9d ... movl %r9d, %eax ...results in the comparison: cmp $0xffffffffffffffef, $0x00000000ffffffef Fixes: bdb7b79b4ce8 ("bpf: Switch most helper return values from 32-bit int to 64-bit long") Tested-by: Eduard Zingerman Signed-off-by: JP Kobryn Link: https://lore.kernel.org/r/20230322194754.185781-3-inwardvessel@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 14 ++++++------- include/linux/filter.h | 6 +++--- kernel/bpf/arraymap.c | 12 ++++++------ kernel/bpf/bloom_filter.c | 12 ++++++------ kernel/bpf/bpf_cgrp_storage.c | 6 +++--- kernel/bpf/bpf_inode_storage.c | 6 +++--- kernel/bpf/bpf_struct_ops.c | 6 +++--- kernel/bpf/bpf_task_storage.c | 6 +++--- kernel/bpf/cpumap.c | 8 ++++---- kernel/bpf/devmap.c | 24 +++++++++++------------ kernel/bpf/hashtab.c | 36 +++++++++++++++++----------------- kernel/bpf/local_storage.c | 6 +++--- kernel/bpf/lpm_trie.c | 6 +++--- kernel/bpf/queue_stack_maps.c | 22 ++++++++++----------- kernel/bpf/reuseport_array.c | 2 +- kernel/bpf/ringbuf.c | 6 +++--- kernel/bpf/stackmap.c | 6 +++--- kernel/bpf/verifier.c | 14 ++++++------- net/core/bpf_sk_storage.c | 6 +++--- net/core/sock_map.c | 8 ++++---- net/xdp/xskmap.c | 8 ++++---- 21 files changed, 110 insertions(+), 110 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3ef98fb92987..ec0df059f562 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,11 +96,11 @@ struct bpf_map_ops { /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); - int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); - int (*map_delete_elem)(struct bpf_map *map, void *key); - int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); - int (*map_pop_elem)(struct bpf_map *map, void *value); - int (*map_peek_elem)(struct bpf_map *map, void *value); + long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); + long (*map_delete_elem)(struct bpf_map *map, void *key); + long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + long (*map_pop_elem)(struct bpf_map *map, void *value); + long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ @@ -139,7 +139,7 @@ struct bpf_map_ops { struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ - int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); + long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure @@ -157,7 +157,7 @@ struct bpf_map_ops { int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); - int (*map_for_each_callback)(struct bpf_map *map, + long (*map_for_each_callback)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags); diff --git a/include/linux/filter.h b/include/linux/filter.h index efa5d4a1677e..23c08c31bea9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1504,9 +1504,9 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ -static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, - u64 flags, const u64 flag_mask, - void *lookup_elem(struct bpf_map *map, u32 key)) +static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, + u64 flags, const u64 flag_mask, + void *lookup_elem(struct bpf_map *map, u32 key)) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1588c793a715..2058e89b5ddd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -307,8 +307,8 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key } /* Called from syscall or from eBPF program */ -static int array_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long array_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; @@ -386,7 +386,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, } /* Called from syscall or from eBPF program */ -static int array_map_delete_elem(struct bpf_map *map, void *key) +static long array_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } @@ -686,8 +686,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), }; -static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, - void *callback_ctx, u64 flags) +static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, + void *callback_ctx, u64 flags) { u32 i, key, num_elems = 0; struct bpf_array *array; @@ -871,7 +871,7 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, return 0; } -static int fd_array_map_delete_elem(struct bpf_map *map, void *key) +static long fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *old_ptr; diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 6350c5d35a9b..db19784601a7 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -41,7 +41,7 @@ static u32 hash(struct bpf_bloom_filter *bloom, void *value, return h & bloom->bitset_mask; } -static int bloom_map_peek_elem(struct bpf_map *map, void *value) +static long bloom_map_peek_elem(struct bpf_map *map, void *value) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -56,7 +56,7 @@ static int bloom_map_peek_elem(struct bpf_map *map, void *value) return 0; } -static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) +static long bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -73,12 +73,12 @@ static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) return 0; } -static int bloom_map_pop_elem(struct bpf_map *map, void *value) +static long bloom_map_pop_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } -static int bloom_map_delete_elem(struct bpf_map *map, void *value) +static long bloom_map_delete_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } @@ -177,8 +177,8 @@ static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(-EINVAL); } -static int bloom_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long bloom_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { /* The eBPF program should use map_push_elem instead */ return -EINVAL; diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index c975cacdd16b..f5b016a5484d 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -93,8 +93,8 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key) return sdata ? sdata->data : NULL; } -static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct cgroup *cgroup; @@ -125,7 +125,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) return 0; } -static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) { struct cgroup *cgroup; int err, fd; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index ad2ab0187e45..9a5f05151898 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -91,8 +91,8 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) return sdata ? sdata->data : NULL; } -static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct file *f; @@ -127,7 +127,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) return 0; } -static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) { struct file *f; int fd, err; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 38903fb52f98..ba7a94276e3b 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -349,8 +349,8 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, model, flags, tlinks, NULL); } -static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; const struct bpf_struct_ops *st_ops = st_map->st_ops; @@ -524,7 +524,7 @@ unlock: return err; } -static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) +static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) { enum bpf_struct_ops_state prev_state; struct bpf_struct_ops_map *st_map; diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index c88cc04c17c1..ab5bd1ef58c4 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -120,8 +120,8 @@ out: return ERR_PTR(err); } -static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct task_struct *task; @@ -173,7 +173,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map, return 0; } -static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) { struct task_struct *task; unsigned int f_flags; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 871809e71b4e..8ec18faa74ac 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -540,7 +540,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, } } -static int cpu_map_delete_elem(struct bpf_map *map, void *key) +static long cpu_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); u32 key_cpu = *(u32 *)key; @@ -553,8 +553,8 @@ static int cpu_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long cpu_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); struct bpf_cpumap_val cpumap_value = {}; @@ -667,7 +667,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } -static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags) +static long cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags) { return __bpf_xdp_redirect_map(map, index, flags, 0, __cpu_map_lookup_elem); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 19b036a228f7..802692fa3905 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -809,7 +809,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu) kfree(dev); } -static int dev_map_delete_elem(struct bpf_map *map, void *key) +static long dev_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; @@ -826,7 +826,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) +static long dev_map_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; @@ -897,8 +897,8 @@ err_out: return ERR_PTR(-EINVAL); } -static int __dev_map_update_elem(struct net *net, struct bpf_map *map, - void *key, void *value, u64 map_flags) +static long __dev_map_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; @@ -939,15 +939,15 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map, return 0; } -static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { return __dev_map_update_elem(current->nsproxy->net_ns, map, key, value, map_flags); } -static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, - void *key, void *value, u64 map_flags) +static long __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; @@ -999,21 +999,21 @@ out_err: return err; } -static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { return __dev_map_hash_update_elem(current->nsproxy->net_ns, map, key, value, map_flags); } -static int dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) +static long dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) { return __bpf_xdp_redirect_map(map, ifindex, flags, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, __dev_map_lookup_elem); } -static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) +static long dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) { return __bpf_xdp_redirect_map(map, ifindex, flags, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 0df4b0c10f59..96b645bba3a4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1073,8 +1073,8 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, } /* Called from syscall or from eBPF program */ -static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; @@ -1175,8 +1175,8 @@ static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) bpf_lru_push_free(&htab->lru, &elem->lru_node); } -static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old = NULL; @@ -1242,9 +1242,9 @@ err: return ret; } -static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags, - bool onallcpus) +static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool onallcpus) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; @@ -1297,9 +1297,9 @@ err: return ret; } -static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags, - bool onallcpus) +static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool onallcpus) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; @@ -1364,21 +1364,21 @@ err: return ret; } -static int htab_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { return __htab_percpu_map_update_elem(map, key, value, map_flags, false); } -static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, false); } /* Called from syscall or from eBPF program */ -static int htab_map_delete_elem(struct bpf_map *map, void *key) +static long htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_nulls_head *head; @@ -1414,7 +1414,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) return ret; } -static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) +static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_nulls_head *head; @@ -2134,8 +2134,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), }; -static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, - void *callback_ctx, u64 flags) +static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, + void *callback_ctx, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_nulls_head *head; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index a993560f200a..4c7bbec4a9e4 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -141,8 +141,8 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key) return &READ_ONCE(storage->buf)->data[0]; } -static int cgroup_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long cgroup_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; @@ -348,7 +348,7 @@ static void cgroup_storage_map_free(struct bpf_map *_map) bpf_map_area_free(map); } -static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) +static long cgroup_storage_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index dc23f2ac9cde..e0d3ddf2037a 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -300,8 +300,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, } /* Called from syscall or from eBPF program */ -static int trie_update_elem(struct bpf_map *map, - void *_key, void *value, u64 flags) +static long trie_update_elem(struct bpf_map *map, + void *_key, void *value, u64 flags) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; @@ -431,7 +431,7 @@ out: } /* Called from syscall or from eBPF program */ -static int trie_delete_elem(struct bpf_map *map, void *_key) +static long trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct bpf_lpm_trie_key *key = _key; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 63ecbbcb349d..601609164ef3 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -95,7 +95,7 @@ static void queue_stack_map_free(struct bpf_map *map) bpf_map_area_free(qs); } -static int __queue_map_get(struct bpf_map *map, void *value, bool delete) +static long __queue_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; @@ -124,7 +124,7 @@ out: } -static int __stack_map_get(struct bpf_map *map, void *value, bool delete) +static long __stack_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; @@ -156,32 +156,32 @@ out: } /* Called from syscall or from eBPF program */ -static int queue_map_peek_elem(struct bpf_map *map, void *value) +static long queue_map_peek_elem(struct bpf_map *map, void *value) { return __queue_map_get(map, value, false); } /* Called from syscall or from eBPF program */ -static int stack_map_peek_elem(struct bpf_map *map, void *value) +static long stack_map_peek_elem(struct bpf_map *map, void *value) { return __stack_map_get(map, value, false); } /* Called from syscall or from eBPF program */ -static int queue_map_pop_elem(struct bpf_map *map, void *value) +static long queue_map_pop_elem(struct bpf_map *map, void *value) { return __queue_map_get(map, value, true); } /* Called from syscall or from eBPF program */ -static int stack_map_pop_elem(struct bpf_map *map, void *value) +static long stack_map_pop_elem(struct bpf_map *map, void *value) { return __stack_map_get(map, value, true); } /* Called from syscall or from eBPF program */ -static int queue_stack_map_push_elem(struct bpf_map *map, void *value, - u64 flags) +static long queue_stack_map_push_elem(struct bpf_map *map, void *value, + u64 flags) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long irq_flags; @@ -227,14 +227,14 @@ static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) } /* Called from syscall or from eBPF program */ -static int queue_stack_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long queue_stack_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ -static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) +static long queue_stack_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 71cb72f5b733..cbf2d8d784b8 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -59,7 +59,7 @@ static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key) } /* Called from syscall only */ -static int reuseport_array_delete_elem(struct bpf_map *map, void *key) +static long reuseport_array_delete_elem(struct bpf_map *map, void *key) { struct reuseport_array *array = reuseport_array(map); u32 index = *(u32 *)key; diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 0d2a45ff83f1..875ac9b698d9 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -242,13 +242,13 @@ static void *ringbuf_map_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(-ENOTSUPP); } -static int ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 flags) +static long ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 flags) { return -ENOTSUPP; } -static int ringbuf_map_delete_elem(struct bpf_map *map, void *key) +static long ringbuf_map_delete_elem(struct bpf_map *map, void *key) { return -ENOTSUPP; } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 0f1d8dced933..b25fce425b2c 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -618,14 +618,14 @@ static int stack_map_get_next_key(struct bpf_map *map, void *key, return 0; } -static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ -static int stack_map_delete_elem(struct bpf_map *map, void *key) +static long stack_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *old_bucket; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5693e4a92752..50c995697f0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17692,21 +17692,21 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, (void *(*)(struct bpf_map *map, void *key))NULL)); BUILD_BUG_ON(!__same_type(ops->map_delete_elem, - (int (*)(struct bpf_map *map, void *key))NULL)); + (long (*)(struct bpf_map *map, void *key))NULL)); BUILD_BUG_ON(!__same_type(ops->map_update_elem, - (int (*)(struct bpf_map *map, void *key, void *value, + (long (*)(struct bpf_map *map, void *key, void *value, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_push_elem, - (int (*)(struct bpf_map *map, void *value, + (long (*)(struct bpf_map *map, void *value, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_pop_elem, - (int (*)(struct bpf_map *map, void *value))NULL)); + (long (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_peek_elem, - (int (*)(struct bpf_map *map, void *value))NULL)); + (long (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, - (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); + (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, - (int (*)(struct bpf_map *map, + (long (*)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 24c3dc0d62e5..cb0f5a105b89 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -94,8 +94,8 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(err); } -static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct socket *sock; @@ -114,7 +114,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, return err; } -static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) { struct socket *sock; int fd, err; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9b854e236d23..7c189c2e2fbf 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -437,7 +437,7 @@ static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk, __sock_map_delete(stab, sk, link_raw); } -static int sock_map_delete_elem(struct bpf_map *map, void *key) +static long sock_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); u32 i = *(u32 *)key; @@ -587,8 +587,8 @@ out: return ret; } -static int sock_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long sock_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct sock *sk = (struct sock *)value; int ret; @@ -925,7 +925,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, raw_spin_unlock_bh(&bucket->lock); } -static int sock_hash_delete_elem(struct bpf_map *map, void *key) +static long sock_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 hash, key_size = map->key_size; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 0c38d7175922..2c1427074a3b 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -162,8 +162,8 @@ static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) return ERR_PTR(-EOPNOTSUPP); } -static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; @@ -223,7 +223,7 @@ out: return err; } -static int xsk_map_delete_elem(struct bpf_map *map, void *key) +static long xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; @@ -243,7 +243,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) +static long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) { return __bpf_xdp_redirect_map(map, index, flags, 0, __xsk_map_lookup_elem); From 7be14c1c9030f73cc18b4ff23b78a0a081f16188 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Mar 2023 22:30:55 +0100 Subject: [PATCH 091/260] bpf: Fix __reg_bound_offset 64->32 var_off subreg propagation Xu reports that after commit 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking"), the following BPF program is rejected by the verifier: 0: (61) r2 = *(u32 *)(r1 +0) ; R2_w=pkt(off=0,r=0,imm=0) 1: (61) r3 = *(u32 *)(r1 +4) ; R3_w=pkt_end(off=0,imm=0) 2: (bf) r1 = r2 3: (07) r1 += 1 4: (2d) if r1 > r3 goto pc+8 5: (71) r1 = *(u8 *)(r2 +0) ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) 6: (18) r0 = 0x7fffffffffffff10 8: (0f) r1 += r0 ; R1_w=scalar(umin=0x7fffffffffffff10,umax=0x800000000000000f) 9: (18) r0 = 0x8000000000000000 11: (07) r0 += 1 12: (ad) if r0 < r1 goto pc-2 13: (b7) r0 = 0 14: (95) exit And the verifier log says: func#0 @0 0: R1=ctx(off=0,imm=0) R10=fp0 0: (61) r2 = *(u32 *)(r1 +0) ; R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=0,imm=0) 1: (61) r3 = *(u32 *)(r1 +4) ; R1=ctx(off=0,imm=0) R3_w=pkt_end(off=0,imm=0) 2: (bf) r1 = r2 ; R1_w=pkt(off=0,r=0,imm=0) R2_w=pkt(off=0,r=0,imm=0) 3: (07) r1 += 1 ; R1_w=pkt(off=1,r=0,imm=0) 4: (2d) if r1 > r3 goto pc+8 ; R1_w=pkt(off=1,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) 5: (71) r1 = *(u8 *)(r2 +0) ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) R2_w=pkt(off=0,r=1,imm=0) 6: (18) r0 = 0x7fffffffffffff10 ; R0_w=9223372036854775568 8: (0f) r1 += r0 ; R0_w=9223372036854775568 R1_w=scalar(umin=9223372036854775568,umax=9223372036854775823,s32_min=-240,s32_max=15) 9: (18) r0 = 0x8000000000000000 ; R0_w=-9223372036854775808 11: (07) r0 += 1 ; R0_w=-9223372036854775807 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775807 R1_w=scalar(umin=9223372036854775568,umax=9223372036854775809) 13: (b7) r0 = 0 ; R0_w=0 14: (95) exit from 12 to 11: R0_w=-9223372036854775807 R1_w=scalar(umin=9223372036854775810,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) R2_w=pkt(off=0,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775806 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775806 R1_w=scalar(umin=9223372036854775810,umax=9223372036854775810,var_off=(0x8000000000000000; 0xffffffff)) 13: safe [...] from 12 to 11: R0_w=-9223372036854775795 R1=scalar(umin=9223372036854775822,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775794 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775794 R1=scalar(umin=9223372036854775822,umax=9223372036854775822,var_off=(0x8000000000000000; 0xffffffff)) 13: safe from 12 to 11: R0_w=-9223372036854775794 R1=scalar(umin=9223372036854775823,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775793 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775793 R1=scalar(umin=9223372036854775823,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) 13: safe from 12 to 11: R0_w=-9223372036854775793 R1=scalar(umin=9223372036854775824,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775792 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775792 R1=scalar(umin=9223372036854775824,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) 13: safe [...] The 64bit umin=9223372036854775810 bound continuously bumps by +1 while umax=9223372036854775823 stays as-is until the verifier complexity limit is reached and the program gets finally rejected. During this simulation, the umin also eventually surpasses umax. Looking at the first 'from 12 to 11' output line from the loop, R1 has the following state: R1_w=scalar(umin=0x8000000000000002 (9223372036854775810), umax=0x800000000000000f (9223372036854775823), var_off=(0x8000000000000000; 0xffffffff)) The var_off has technically not an inconsistent state but it's very imprecise and far off surpassing 64bit umax bounds whereas the expected output with refined known bits in var_off should have been like: R1_w=scalar(umin=0x8000000000000002 (9223372036854775810), umax=0x800000000000000f (9223372036854775823), var_off=(0x8000000000000000; 0xf)) In the above log, var_off stays as var_off=(0x8000000000000000; 0xffffffff) and does not converge into a narrower mask where more bits become known, eventually transforming R1 into a constant upon umin=9223372036854775823, umax=9223372036854775823 case where the verifier would have terminated and let the program pass. The __reg_combine_64_into_32() marks the subregister unknown and propagates 64bit {s,u}min/{s,u}max bounds to their 32bit equivalents iff they are within the 32bit universe. The question came up whether __reg_combine_64_into_32() should special case the situation that when 64bit {s,u}min bounds have the same value as 64bit {s,u}max bounds to then assign the latter as well to the 32bit reg->{s,u}32_{min,max}_value. As can be seen from the above example however, that is just /one/ special case and not a /generic/ solution given above example would still not be addressed this way and remain at an imprecise var_off=(0x8000000000000000; 0xffffffff). The improvement is needed in __reg_bound_offset() to refine var32_off with the updated var64_off instead of the prior reg->var_off. The reg_bounds_sync() code first refines information about the register's min/max bounds via __update_reg_bounds() from the current var_off, then in __reg_deduce_bounds() from sign bit and with the potentially learned bits from bounds it'll update the var_off tnum in __reg_bound_offset(). For example, intersecting with the old var_off might have improved bounds slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), then new var_off will then result in (0; 0x7f...fc). The intersected var64_off holds then the universe which is a superset of var32_off. The point for the latter is not to broaden, but to further refine known bits based on the intersection of var_off with 32 bit bounds, so that we later construct the final var_off from upper and lower 32 bits. The final __update_reg_bounds() can then potentially still slightly refine bounds if more bits became known from the new var_off. After the improvement, we can see R1 converging successively: func#0 @0 0: R1=ctx(off=0,imm=0) R10=fp0 0: (61) r2 = *(u32 *)(r1 +0) ; R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=0,imm=0) 1: (61) r3 = *(u32 *)(r1 +4) ; R1=ctx(off=0,imm=0) R3_w=pkt_end(off=0,imm=0) 2: (bf) r1 = r2 ; R1_w=pkt(off=0,r=0,imm=0) R2_w=pkt(off=0,r=0,imm=0) 3: (07) r1 += 1 ; R1_w=pkt(off=1,r=0,imm=0) 4: (2d) if r1 > r3 goto pc+8 ; R1_w=pkt(off=1,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) 5: (71) r1 = *(u8 *)(r2 +0) ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) R2_w=pkt(off=0,r=1,imm=0) 6: (18) r0 = 0x7fffffffffffff10 ; R0_w=9223372036854775568 8: (0f) r1 += r0 ; R0_w=9223372036854775568 R1_w=scalar(umin=9223372036854775568,umax=9223372036854775823,s32_min=-240,s32_max=15) 9: (18) r0 = 0x8000000000000000 ; R0_w=-9223372036854775808 11: (07) r0 += 1 ; R0_w=-9223372036854775807 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775807 R1_w=scalar(umin=9223372036854775568,umax=9223372036854775809) 13: (b7) r0 = 0 ; R0_w=0 14: (95) exit from 12 to 11: R0_w=-9223372036854775807 R1_w=scalar(umin=9223372036854775810,umax=9223372036854775823,var_off=(0x8000000000000000; 0xf),s32_min=0,s32_max=15,u32_max=15) R2_w=pkt(off=0,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775806 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775806 R1_w=-9223372036854775806 13: safe from 12 to 11: R0_w=-9223372036854775806 R1_w=scalar(umin=9223372036854775811,umax=9223372036854775823,var_off=(0x8000000000000000; 0xf),s32_min=0,s32_max=15,u32_max=15) R2_w=pkt(off=0,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775805 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775805 R1_w=-9223372036854775805 13: safe [...] from 12 to 11: R0_w=-9223372036854775798 R1=scalar(umin=9223372036854775819,umax=9223372036854775823,var_off=(0x8000000000000008; 0x7),s32_min=8,s32_max=15,u32_min=8,u32_max=15) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775797 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775797 R1=-9223372036854775797 13: safe from 12 to 11: R0_w=-9223372036854775797 R1=scalar(umin=9223372036854775820,umax=9223372036854775823,var_off=(0x800000000000000c; 0x3),s32_min=12,s32_max=15,u32_min=12,u32_max=15) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775796 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775796 R1=-9223372036854775796 13: safe from 12 to 11: R0_w=-9223372036854775796 R1=scalar(umin=9223372036854775821,umax=9223372036854775823,var_off=(0x800000000000000c; 0x3),s32_min=12,s32_max=15,u32_min=12,u32_max=15) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775795 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775795 R1=-9223372036854775795 13: safe from 12 to 11: R0_w=-9223372036854775795 R1=scalar(umin=9223372036854775822,umax=9223372036854775823,var_off=(0x800000000000000e; 0x1),s32_min=14,s32_max=15,u32_min=14,u32_max=15) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775794 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775794 R1=-9223372036854775794 13: safe from 12 to 11: R0_w=-9223372036854775794 R1=-9223372036854775793 R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 11: (07) r0 += 1 ; R0_w=-9223372036854775793 12: (ad) if r0 < r1 goto pc-2 last_idx 12 first_idx 12 parent didn't have regs=1 stack=0 marks: R0_rw=P-9223372036854775801 R1_r=scalar(umin=9223372036854775815,umax=9223372036854775823,var_off=(0x8000000000000000; 0xf),s32_min=0,s32_max=15,u32_max=15) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 last_idx 11 first_idx 11 regs=1 stack=0 before 11: (07) r0 += 1 parent didn't have regs=1 stack=0 marks: R0_rw=P-9223372036854775805 R1_rw=scalar(umin=9223372036854775812,umax=9223372036854775823,var_off=(0x8000000000000000; 0xf),s32_min=0,s32_max=15,u32_max=15) R2_w=pkt(off=0,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 last_idx 12 first_idx 0 regs=1 stack=0 before 12: (ad) if r0 < r1 goto pc-2 regs=1 stack=0 before 11: (07) r0 += 1 regs=1 stack=0 before 12: (ad) if r0 < r1 goto pc-2 regs=1 stack=0 before 11: (07) r0 += 1 regs=1 stack=0 before 12: (ad) if r0 < r1 goto pc-2 regs=1 stack=0 before 11: (07) r0 += 1 regs=1 stack=0 before 9: (18) r0 = 0x8000000000000000 last_idx 12 first_idx 12 parent didn't have regs=2 stack=0 marks: R0_rw=P-9223372036854775801 R1_r=Pscalar(umin=9223372036854775815,umax=9223372036854775823,var_off=(0x8000000000000000; 0xf),s32_min=0,s32_max=15,u32_max=15) R2=pkt(off=0,r=1,imm=0) R3=pkt_end(off=0,imm=0) R10=fp0 last_idx 11 first_idx 11 regs=2 stack=0 before 11: (07) r0 += 1 parent didn't have regs=2 stack=0 marks: R0_rw=P-9223372036854775805 R1_rw=Pscalar(umin=9223372036854775812,umax=9223372036854775823,var_off=(0x8000000000000000; 0xf),s32_min=0,s32_max=15,u32_max=15) R2_w=pkt(off=0,r=1,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 last_idx 12 first_idx 0 regs=2 stack=0 before 12: (ad) if r0 < r1 goto pc-2 regs=2 stack=0 before 11: (07) r0 += 1 regs=2 stack=0 before 12: (ad) if r0 < r1 goto pc-2 regs=2 stack=0 before 11: (07) r0 += 1 regs=2 stack=0 before 12: (ad) if r0 < r1 goto pc-2 regs=2 stack=0 before 11: (07) r0 += 1 regs=2 stack=0 before 9: (18) r0 = 0x8000000000000000 regs=2 stack=0 before 8: (0f) r1 += r0 regs=3 stack=0 before 6: (18) r0 = 0x7fffffffffffff10 regs=2 stack=0 before 5: (71) r1 = *(u8 *)(r2 +0) 13: safe from 4 to 13: safe verification time 322 usec stack depth 0 processed 56 insns (limit 1000000) max_states_per_insn 1 total_states 3 peak_states 3 mark_read 1 This also fixes up a test case along with this improvement where we match on the verifier log. The updated log now has a refined var_off, too. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Xu Kuohai Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20230314203424.4015351-2-xukuohai@huaweicloud.com Link: https://lore.kernel.org/bpf/20230322213056.2470-1-daniel@iogearbox.net --- kernel/bpf/verifier.c | 6 +++--- tools/testing/selftests/bpf/prog_tests/align.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 50c995697f0e..fd2f216de920 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2149,9 +2149,9 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) struct tnum var64_off = tnum_intersect(reg->var_off, tnum_range(reg->umin_value, reg->umax_value)); - struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off), - tnum_range(reg->u32_min_value, - reg->u32_max_value)); + struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off), + tnum_range(reg->u32_min_value, + reg->u32_max_value)); reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index c94fa8d6c4f6..b92770592563 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -575,14 +575,14 @@ static struct bpf_align_test tests[] = { /* New unknown value in R7 is (4n), >= 76 */ {14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0xfffffffc)"}, + {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0xfffffffc)"}, + {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, }, }, }; From 1a3148fc171f5cde11b4c24e808a953ff725a3e2 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Wed, 22 Mar 2023 22:30:56 +0100 Subject: [PATCH 092/260] selftests/bpf: Check when bounds are not in the 32-bit range Add cases to check if bound is updated correctly when 64-bit value is not in the 32-bit range. Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230322213056.2470-2-daniel@iogearbox.net --- tools/testing/selftests/bpf/verifier/bounds.c | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 33125d5f6772..74b1917d4208 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -753,3 +753,124 @@ .result_unpriv = REJECT, .result = ACCEPT, }, +{ + "bound check with JMP_JLT for crossing 64-bit signed boundary", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 8), + + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_LD_IMM64(BPF_REG_0, 0x7fffffffffffff10), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + + BPF_LD_IMM64(BPF_REG_0, 0x8000000000000000), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + /* r1 unsigned range is [0x7fffffffffffff10, 0x800000000000000f] */ + BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_1, -2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "bound check with JMP_JSLT for crossing 64-bit signed boundary", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 8), + + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_LD_IMM64(BPF_REG_0, 0x7fffffffffffff10), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + + BPF_LD_IMM64(BPF_REG_0, 0x8000000000000000), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + /* r1 signed range is [S64_MIN, S64_MAX] */ + BPF_JMP_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF program is too large", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "bound check for loop upper bound greater than U32_MAX", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 8), + + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_LD_IMM64(BPF_REG_0, 0x100000000), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + + BPF_LD_IMM64(BPF_REG_0, 0x100000000), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_1, -2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "bound check with JMP32_JLT for crossing 32-bit signed boundary", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 6), + + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_0, 0x7fffff10), + BPF_ALU32_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + + BPF_MOV32_IMM(BPF_REG_0, 0x80000000), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 1), + /* r1 unsigned range is [0, 0x8000000f] */ + BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_1, -2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "bound check with JMP32_JSLT for crossing 32-bit signed boundary", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 6), + + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_0, 0x7fffff10), + BPF_ALU32_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + + BPF_MOV32_IMM(BPF_REG_0, 0x80000000), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 1), + /* r1 signed range is [S32_MIN, S32_MAX] */ + BPF_JMP32_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF program is too large", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, +}, From b63cbc490e18d893632929b8faa55bb28da3fcd4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 22 Mar 2023 16:25:02 -0700 Subject: [PATCH 093/260] bpf: remember meta->iter info only for initialized iters For iter_new() functions iterator state's slot might not be yet initialized, in which case iter_get_spi() will return -ERANGE. This is expected and is handled properly. But for iter_next() and iter_destroy() cases iter slot is supposed to be initialized and correct, so -ERANGE is not possible. Move meta->iter.{spi,frameno} initialization into iter_next/iter_destroy handling branch to make it more explicit that valid information will be remembered in meta->iter block for subsequent use in process_iter_next_call(), avoiding confusingly looking -ERANGE assignment for meta->iter.spi. Reported-by: Dan Carpenter Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230322232502.836171-1-andrii@kernel.org Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fd2f216de920..64f06f6e16bf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6778,13 +6778,6 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */ nr_slots = t->size / BPF_REG_SIZE; - spi = iter_get_spi(env, reg, nr_slots); - if (spi < 0 && spi != -ERANGE) - return spi; - - meta->iter.spi = spi; - meta->iter.frameno = reg->frameno; - if (is_iter_new_kfunc(meta)) { /* bpf_iter__new() expects pointer to uninit iter state */ if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { @@ -6811,10 +6804,17 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id return -EINVAL; } + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + err = mark_iter_read(env, reg, spi, nr_slots); if (err) return err; + /* remember meta->iter info for process_iter_next_call() */ + meta->iter.spi = spi; + meta->iter.frameno = reg->frameno; meta->ref_obj_id = iter_ref_obj_id(env, reg, spi); if (is_iter_destroy_kfunc(meta)) { From b671c2067a04c0668df174ff5dfdb573d1f9b074 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:23:58 -0700 Subject: [PATCH 094/260] bpf: Retire the struct_ops map kvalue->refcnt. We have replaced kvalue-refcnt with synchronize_rcu() to wait for an RCU grace period. Maintenance of kvalue->refcnt was a complicated task, as we had to simultaneously keep track of two reference counts: one for the reference count of bpf_map. When the kvalue->refcnt reaches zero, we also have to reduce the reference count on bpf_map - yet these steps are not performed in an atomic manner and require us to be vigilant when managing them. By eliminating kvalue->refcnt, we can make our maintenance more straightforward as the refcount of bpf_map is now solely managed! To prevent the trampoline image of a struct_ops from being released while it is still in use, we wait for an RCU grace period. The setsockopt(TCP_CONGESTION, "...") command allows you to change your socket's congestion control algorithm and can result in releasing the old struct_ops implementation. It is fine. However, this function is exposed through bpf_setsockopt(), it may be accessed by BPF programs as well. To ensure that the trampoline image belonging to struct_op can be safely called while its method is in use, the trampoline safeguarde the BPF program with rcu_read_lock(). Doing so prevents any destruction of the associated images before returning from a trampoline and requires us to wait for an RCU grace period. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-2-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 + kernel/bpf/bpf_struct_ops.c | 75 +++++++++++++++++++++---------------- kernel/bpf/syscall.c | 6 ++- 3 files changed, 48 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ec0df059f562..f04098468d7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1945,6 +1945,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index ba7a94276e3b..2f3c4a0e03ee 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -11,6 +11,7 @@ #include #include #include +#include enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, @@ -249,6 +250,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; struct bpf_struct_ops_value *uvalue, *kvalue; enum bpf_struct_ops_state state; + s64 refcnt; if (unlikely(*(u32 *)key != 0)) return -ENOENT; @@ -267,7 +269,14 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, uvalue = value; memcpy(uvalue, st_map->uvalue, map->value_size); uvalue->state = state; - refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); + + /* This value offers the user space a general estimate of how + * many sockets are still utilizing this struct_ops for TCP + * congestion control. The number might not be exact, but it + * should sufficiently meet our present goals. + */ + refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt); + refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0)); return 0; } @@ -491,7 +500,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } - refcount_set(&kvalue->refcnt, 1); bpf_map_inc(map); set_memory_rox((long)st_map->image, 1); @@ -536,8 +544,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: st_map->st_ops->unreg(&st_map->kvalue.data); - if (refcount_dec_and_test(&st_map->kvalue.refcnt)) - bpf_map_put(map); + bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: return -EINPROGRESS; @@ -570,7 +577,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, kfree(value); } -static void bpf_struct_ops_map_free(struct bpf_map *map) +static void __bpf_struct_ops_map_free(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; @@ -582,6 +589,28 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) bpf_map_area_free(st_map); } +static void bpf_struct_ops_map_free(struct bpf_map *map) +{ + /* The struct_ops's function may switch to another struct_ops. + * + * For example, bpf_tcp_cc_x->init() may switch to + * another tcp_cc_y by calling + * setsockopt(TCP_CONGESTION, "tcp_cc_y"). + * During the switch, bpf_struct_ops_put(tcp_cc_x) is called + * and its refcount may reach 0 which then free its + * trampoline image while tcp_cc_x is still running. + * + * A vanilla rcu gp is to wait for all bpf-tcp-cc prog + * to finish. bpf-tcp-cc prog is non sleepable. + * A rcu_tasks gp is to wait for the last few insn + * in the tramopline image to finish before releasing + * the trampoline image. + */ + synchronize_rcu_mult(call_rcu, call_rcu_tasks); + + __bpf_struct_ops_map_free(map); +} + static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) { if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || @@ -630,7 +659,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); if (!st_map->uvalue || !st_map->links || !st_map->image) { - bpf_struct_ops_map_free(map); + __bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } @@ -676,41 +705,23 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { bool bpf_struct_ops_get(const void *kdata) { struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; kvalue = container_of(kdata, struct bpf_struct_ops_value, data); + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); - return refcount_inc_not_zero(&kvalue->refcnt); -} - -static void bpf_struct_ops_put_rcu(struct rcu_head *head) -{ - struct bpf_struct_ops_map *st_map; - - st_map = container_of(head, struct bpf_struct_ops_map, rcu); - bpf_map_put(&st_map->map); + map = __bpf_map_inc_not_zero(&st_map->map, false); + return !IS_ERR(map); } void bpf_struct_ops_put(const void *kdata) { struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map; kvalue = container_of(kdata, struct bpf_struct_ops_value, data); - if (refcount_dec_and_test(&kvalue->refcnt)) { - struct bpf_struct_ops_map *st_map; + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); - st_map = container_of(kvalue, struct bpf_struct_ops_map, - kvalue); - /* The struct_ops's function may switch to another struct_ops. - * - * For example, bpf_tcp_cc_x->init() may switch to - * another tcp_cc_y by calling - * setsockopt(TCP_CONGESTION, "tcp_cc_y"). - * During the switch, bpf_struct_ops_put(tcp_cc_x) is called - * and its map->refcnt may reach 0 which then free its - * trampoline image while tcp_cc_x is still running. - * - * Thus, a rcu grace period is needed here. - */ - call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu); - } + bpf_map_put(&st_map->map); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 099e9068bcdd..cff0348a2871 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1303,8 +1303,10 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) return map; } -/* map_idr_lock should have been held */ -static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) +/* map_idr_lock should have been held or the map should have been + * protected by rcu read lock. + */ +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) { int refold; From 8fb1a76a0f35c45a424c9eb84b0f97ffd51e6052 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:23:59 -0700 Subject: [PATCH 095/260] net: Update an existing TCP congestion control algorithm. This feature lets you immediately transition to another congestion control algorithm or implementation with the same name. Once a name is updated, new connections will apply this new algorithm. The purpose is to update a customized algorithm implemented in BPF struct_ops with a new version on the flight. The following is an example of using the userspace API implemented in later BPF patches. link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ....... err = bpf_link__update_map(link, skel->maps.ca_update_2); We first load and register an algorithm implemented in BPF struct_ops, then swap it out with a new one using the same name. After that, newly created connections will apply the updated algorithm, while older ones retain the previous version already applied. This patch also takes this chance to refactor the ca validation into the new tcp_validate_congestion_control() function. Cc: netdev@vger.kernel.org, Eric Dumazet Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-3-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/net/tcp.h | 3 +++ net/ipv4/tcp_cong.c | 66 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index db9f828e9d1e..2abb755e6a3a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1117,6 +1117,9 @@ struct tcp_congestion_ops { int tcp_register_congestion_control(struct tcp_congestion_ops *type); void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); +int tcp_update_congestion_control(struct tcp_congestion_ops *type, + struct tcp_congestion_ops *old_type); +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db8b4b488c31..1b34050a7538 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key) return NULL; } -/* - * Attach new congestion control algorithm to the list - * of available options. - */ -int tcp_register_congestion_control(struct tcp_congestion_ops *ca) +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca) { - int ret = 0; - /* all algorithms must implement these */ if (!ca->ssthresh || !ca->undo_cwnd || !(ca->cong_avoid || ca->cong_control)) { @@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + return 0; +} + +/* Attach new congestion control algorithm to the list + * of available options. + */ +int tcp_register_congestion_control(struct tcp_congestion_ops *ca) +{ + int ret; + + ret = tcp_validate_congestion_control(ca); + if (ret) + return ret; + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); @@ -130,6 +138,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +/* Replace a registered old ca with a new one. + * + * The new ca must have the same name as the old one, that has been + * registered. + */ +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) +{ + struct tcp_congestion_ops *existing; + int ret; + + ret = tcp_validate_congestion_control(ca); + if (ret) + return ret; + + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + + spin_lock(&tcp_cong_list_lock); + existing = tcp_ca_find_key(old_ca->key); + if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { + pr_notice("%s not registered or non-unique key\n", + ca->name); + ret = -EINVAL; + } else if (existing != old_ca) { + pr_notice("invalid old congestion control algorithm to replace\n"); + ret = -EINVAL; + } else { + /* Add the new one before removing the old one to keep + * one implementation available all the time. + */ + list_add_tail_rcu(&ca->list, &tcp_cong_list); + list_del_rcu(&existing->list); + pr_debug("%s updated\n", ca->name); + } + spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module or struct_ops gets removed entirely. + */ + if (!ret) + synchronize_rcu(); + + return ret; +} + u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; From 68b04864ca425d1894c96b8141d4fba1181f11cb Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:00 -0700 Subject: [PATCH 096/260] bpf: Create links for BPF struct_ops maps. Make bpf_link support struct_ops. Previously, struct_ops were always used alone without any associated links. Upon updating its value, a struct_ops would be activated automatically. Yet other BPF program types required to make a bpf_link with their instances before they could become active. Now, however, you can create an inactive struct_ops, and create a link to activate it later. With bpf_links, struct_ops has a behavior similar to other BPF program types. You can pin/unpin them from their links and the struct_ops will be deactivated when its link is removed while previously need someone to delete the value for it to be deactivated. bpf_links are responsible for registering their associated struct_ops. You can only use a struct_ops that has the BPF_F_LINK flag set to create a bpf_link, while a structs without this flag behaves in the same manner as before and is registered upon updating its value. The BPF_LINK_TYPE_STRUCT_OPS serves a dual purpose. Not only is it used to craft the links for BPF struct_ops programs, but also to create links for BPF struct_ops them-self. Since the links of BPF struct_ops programs are only used to create trampolines internally, they are never seen in other contexts. Thus, they can be reused for struct_ops themself. To maintain a reference to the map supporting this link, we add bpf_struct_ops_link as an additional type. The pointer of the map is RCU and won't be necessary until later in the patchset. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-4-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 7 ++ include/uapi/linux/bpf.h | 12 ++- kernel/bpf/bpf_struct_ops.c | 143 ++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 23 ++++-- net/ipv4/bpf_tcp_ca.c | 8 +- tools/include/uapi/linux/bpf.h | 12 ++- 6 files changed, 190 insertions(+), 15 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f04098468d7a..8552279efe46 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1518,6 +1518,7 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; const char *name; @@ -1552,6 +1553,7 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } +int bpf_struct_ops_link_create(union bpf_attr *attr); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ @@ -1592,6 +1594,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +static inline int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + #endif #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 13129df937cd..42f40ee083bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -6379,6 +6386,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 2f3c4a0e03ee..3d6b5240c25a 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -17,6 +17,7 @@ enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, }; #define BPF_STRUCT_OPS_COMMON_VALUE \ @@ -59,6 +60,11 @@ struct bpf_struct_ops_map { struct bpf_struct_ops_value kvalue; }; +struct bpf_struct_ops_link { + struct bpf_link link; + struct bpf_map __rcu *map; +}; + #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) @@ -500,11 +506,29 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } - bpf_map_inc(map); + if (st_map->map.map_flags & BPF_F_LINK) { + err = st_ops->validate(kdata); + if (err) + goto reset_unlock; + set_memory_rox((long)st_map->image, 1); + /* Let bpf_link handle registration & unregistration. + * + * Pair with smp_load_acquire() during lookup_elem(). + */ + smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY); + goto unlock; + } set_memory_rox((long)st_map->image, 1); err = st_ops->reg(kdata); if (likely(!err)) { + /* This refcnt increment on the map here after + * 'st_ops->reg()' is secure since the state of the + * map must be set to INIT at this moment, and thus + * bpf_struct_ops_map_delete_elem() can't unregister + * or transition it to TOBEFREE concurrently. + */ + bpf_map_inc(map); /* Pair with smp_load_acquire() during lookup_elem(). * It ensures the above udata updates (e.g. prog->aux->id) * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. @@ -520,7 +544,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, */ set_memory_nx((long)st_map->image, 1); set_memory_rw((long)st_map->image, 1); - bpf_map_put(map); reset_unlock: bpf_struct_ops_map_put_progs(st_map); @@ -538,6 +561,9 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) struct bpf_struct_ops_map *st_map; st_map = (struct bpf_struct_ops_map *)map; + if (st_map->map.map_flags & BPF_F_LINK) + return -EOPNOTSUPP; + prev_state = cmpxchg(&st_map->kvalue.state, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE); @@ -614,7 +640,7 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) { if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || - attr->map_flags || !attr->btf_vmlinux_value_type_id) + (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id) return -EINVAL; return 0; } @@ -638,6 +664,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); + if (attr->map_flags & BPF_F_LINK && !st_ops->validate) + return ERR_PTR(-EOPNOTSUPP); + t = st_ops->type; st_map_size = sizeof(*st_map) + @@ -725,3 +754,111 @@ void bpf_struct_ops_put(const void *kdata) bpf_map_put(&st_map->map); } + +static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map) +{ + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; + + return map->map_type == BPF_MAP_TYPE_STRUCT_OPS && + map->map_flags & BPF_F_LINK && + /* Pair with smp_store_release() during map_update */ + smp_load_acquire(&st_map->kvalue.state) == BPF_STRUCT_OPS_STATE_READY; +} + +static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_struct_ops_map *st_map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = (struct bpf_struct_ops_map *) + rcu_dereference_protected(st_link->map, true); + if (st_map) { + /* st_link->map can be NULL if + * bpf_struct_ops_link_create() fails to register. + */ + st_map->st_ops->unreg(&st_map->kvalue.data); + bpf_map_put(&st_map->map); + } + kfree(st_link); +} + +static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_map *map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + rcu_read_lock(); + map = rcu_dereference(st_link->map); + seq_printf(seq, "map_id:\t%d\n", map->id); + rcu_read_unlock(); +} + +static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_map *map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + rcu_read_lock(); + map = rcu_dereference(st_link->map); + info->struct_ops.map_id = map->id; + rcu_read_unlock(); + return 0; +} + +static const struct bpf_link_ops bpf_struct_ops_map_lops = { + .dealloc = bpf_struct_ops_map_link_dealloc, + .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, + .fill_link_info = bpf_struct_ops_map_link_fill_link_info, +}; + +int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + struct bpf_struct_ops_link *link = NULL; + struct bpf_link_primer link_primer; + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; + int err; + + map = bpf_map_get(attr->link_create.map_fd); + if (!map) + return -EINVAL; + + st_map = (struct bpf_struct_ops_map *)map; + + if (!bpf_struct_ops_valid_to_reg(map)) { + err = -EINVAL; + goto err_out; + } + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto err_out; + } + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto err_out; + + err = st_map->st_ops->reg(st_map->kvalue.data); + if (err) { + bpf_link_cleanup(&link_primer); + link = NULL; + goto err_out; + } + RCU_INIT_POINTER(link->map, map); + + return bpf_link_settle(&link_primer); + +err_out: + bpf_map_put(map); + kfree(link); + return err; +} + diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cff0348a2871..21f76698875c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2825,16 +2825,19 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) const struct bpf_prog *prog = link->prog; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "link_type:\t%s\n" - "link_id:\t%u\n" - "prog_tag:\t%s\n" - "prog_id:\t%u\n", + "link_id:\t%u\n", bpf_link_type_strs[link->type], - link->id, - prog_tag, - prog->aux->id); + link->id); + if (prog) { + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + seq_printf(m, + "prog_tag:\t%s\n" + "prog_id:\t%u\n", + prog_tag, + prog->aux->id); + } if (link->ops->show_fdinfo) link->ops->show_fdinfo(link, m); } @@ -4314,7 +4317,8 @@ static int bpf_link_get_info_by_fd(struct file *file, info.type = link->type; info.id = link->id; - info.prog_id = link->prog->aux->id; + if (link->prog) + info.prog_id = link->prog->aux->id; if (link->ops->fill_link_info) { err = link->ops->fill_link_info(link, &info); @@ -4577,6 +4581,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) if (CHECK_ATTR(BPF_LINK_CREATE)) return -EINVAL; + if (attr->link_create.attach_type == BPF_STRUCT_OPS) + return bpf_struct_ops_link_create(attr); + prog = bpf_prog_get(attr->link_create.prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 13fc0c185cd9..bbbd5eb94db2 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -239,8 +239,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)) <= 0) return -EINVAL; - if (tcp_ca_find(utcp_ca->name)) - return -EEXIST; return 1; } @@ -266,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_validate(void *kdata) +{ + return tcp_validate_congestion_control(kdata); +} + struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, @@ -273,6 +276,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, + .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", }; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 13129df937cd..9cf1deaf21f2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* eBPF struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -6379,6 +6386,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); From 8d1608d70927747da9c1a8770edf7b6ee68f8ebc Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:01 -0700 Subject: [PATCH 097/260] libbpf: Create a bpf_link in bpf_map__attach_struct_ops(). bpf_map__attach_struct_ops() was creating a dummy bpf_link as a placeholder, but now it is constructing an authentic one by calling bpf_link_create() if the map has the BPF_F_LINK flag. You can flag a struct_ops map with BPF_F_LINK by calling bpf_map__set_map_flags(). Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230323032405.3735486-5-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/libbpf.c | 101 ++++++++++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d32aa8ea38a..4ff82baba0ea 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -116,6 +116,7 @@ static const char * const attach_type_name[] = { [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", [BPF_PERF_EVENT] = "perf_event", [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", + [BPF_STRUCT_OPS] = "struct_ops", }; static const char * const link_type_name[] = { @@ -7686,6 +7687,37 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } +static void bpf_map_prepare_vdata(const struct bpf_map *map) +{ + struct bpf_struct_ops *st_ops; + __u32 i; + + st_ops = map->st_ops; + for (i = 0; i < btf_vlen(st_ops->type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; + + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; + } +} + +static int bpf_object_prepare_struct_ops(struct bpf_object *obj) +{ + int i; + + for (i = 0; i < obj->nr_maps; i++) + if (bpf_map__is_struct_ops(&obj->maps[i])) + bpf_map_prepare_vdata(&obj->maps[i]); + + return 0; +} + static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) { int err, i; @@ -7711,6 +7743,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); err = err ? : bpf_object__load_progs(obj, extra_log_level); err = err ? : bpf_object_init_prog_arrays(obj); + err = err ? : bpf_object_prepare_struct_ops(obj); if (obj->gen_loader) { /* reset FDs */ @@ -11579,22 +11612,30 @@ struct bpf_link *bpf_program__attach(const struct bpf_program *prog) return link; } +struct bpf_link_struct_ops { + struct bpf_link link; + int map_fd; +}; + static int bpf_link__detach_struct_ops(struct bpf_link *link) { + struct bpf_link_struct_ops *st_link; __u32 zero = 0; - if (bpf_map_delete_elem(link->fd, &zero)) - return -errno; + st_link = container_of(link, struct bpf_link_struct_ops, link); - return 0; + if (st_link->map_fd < 0) + /* w/o a real link */ + return bpf_map_delete_elem(link->fd, &zero); + + return close(link->fd); } struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) { - struct bpf_struct_ops *st_ops; - struct bpf_link *link; - __u32 i, zero = 0; - int err; + struct bpf_link_struct_ops *link; + __u32 zero = 0; + int err, fd; if (!bpf_map__is_struct_ops(map) || map->fd == -1) return libbpf_err_ptr(-EINVAL); @@ -11603,31 +11644,37 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) if (!link) return libbpf_err_ptr(-EINVAL); - st_ops = map->st_ops; - for (i = 0; i < btf_vlen(st_ops->type); i++) { - struct bpf_program *prog = st_ops->progs[i]; - void *kern_data; - int prog_fd; - - if (!prog) - continue; - - prog_fd = bpf_program__fd(prog); - kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; - *(unsigned long *)kern_data = prog_fd; - } - - err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); - if (err) { - err = -errno; + /* kern_vdata should be prepared during the loading phase. */ + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { free(link); return libbpf_err_ptr(err); } - link->detach = bpf_link__detach_struct_ops; - link->fd = map->fd; + link->link.detach = bpf_link__detach_struct_ops; - return link; + if (!(map->def.map_flags & BPF_F_LINK)) { + /* w/o a real link */ + link->link.fd = map->fd; + link->map_fd = -1; + return &link->link; + } + + fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); + if (fd < 0) { + free(link); + return libbpf_err_ptr(fd); + } + + link->link.fd = fd; + link->map_fd = map->fd; + + return &link->link; } typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, From aef56f2e918bf8fc8de25f0b36e8c2aba44116ec Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:02 -0700 Subject: [PATCH 098/260] bpf: Update the struct_ops of a bpf_link. By improving the BPF_LINK_UPDATE command of bpf(), it should allow you to conveniently switch between different struct_ops on a single bpf_link. This would enable smoother transitions from one struct_ops to another. The struct_ops maps passing along with BPF_LINK_UPDATE should have the BPF_F_LINK flag. Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230323032405.3735486-6-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 21 +++++++++++---- kernel/bpf/bpf_struct_ops.c | 48 +++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++ net/ipv4/bpf_tcp_ca.c | 6 +++++ tools/include/uapi/linux/bpf.h | 21 +++++++++++---- 6 files changed, 122 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8552279efe46..2d8f3f639e68 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1476,6 +1476,8 @@ struct bpf_link_ops { void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *old_map); }; struct bpf_tramp_link { @@ -1518,6 +1520,7 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f40ee083bf..e3d3b5160d26 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 3d6b5240c25a..6401deca3b56 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -65,6 +65,8 @@ struct bpf_struct_ops_link { struct bpf_map __rcu *map; }; +static DEFINE_MUTEX(update_mutex); + #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) @@ -664,7 +666,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); - if (attr->map_flags & BPF_F_LINK && !st_ops->validate) + if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update)) return ERR_PTR(-EOPNOTSUPP); t = st_ops->type; @@ -810,10 +812,54 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, return 0; } +static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *expected_old_map) +{ + struct bpf_struct_ops_map *st_map, *old_st_map; + struct bpf_map *old_map; + struct bpf_struct_ops_link *st_link; + int err = 0; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = container_of(new_map, struct bpf_struct_ops_map, map); + + if (!bpf_struct_ops_valid_to_reg(new_map)) + return -EINVAL; + + mutex_lock(&update_mutex); + + old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); + if (expected_old_map && old_map != expected_old_map) { + err = -EPERM; + goto err_out; + } + + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); + /* The new and old struct_ops must be the same type. */ + if (st_map->st_ops != old_st_map->st_ops) { + err = -EINVAL; + goto err_out; + } + + err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + if (err) + goto err_out; + + bpf_map_inc(new_map); + rcu_assign_pointer(st_link->map, new_map); + bpf_map_put(old_map); + +err_out: + mutex_unlock(&update_mutex); + + return err; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, + .update_map = bpf_struct_ops_map_link_update, }; int bpf_struct_ops_link_create(union bpf_attr *attr) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 21f76698875c..b4d758fa5981 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4682,6 +4682,35 @@ out: return ret; } +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) +{ + struct bpf_map *new_map, *old_map = NULL; + int ret; + + new_map = bpf_map_get(attr->link_update.new_map_fd); + if (IS_ERR(new_map)) + return -EINVAL; + + if (attr->link_update.flags & BPF_F_REPLACE) { + old_map = bpf_map_get(attr->link_update.old_map_fd); + if (IS_ERR(old_map)) { + ret = -EINVAL; + goto out_put; + } + } else if (attr->link_update.old_map_fd) { + ret = -EINVAL; + goto out_put; + } + + ret = link->ops->update_map(link, new_map, old_map); + + if (old_map) + bpf_map_put(old_map); +out_put: + bpf_map_put(new_map); + return ret; +} + #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd static int link_update(union bpf_attr *attr) @@ -4702,6 +4731,11 @@ static int link_update(union bpf_attr *attr) if (IS_ERR(link)) return PTR_ERR(link); + if (link->ops->update_map) { + ret = link_update_map(link, attr); + goto out_put_link; + } + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); if (IS_ERR(new_prog)) { ret = PTR_ERR(new_prog); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index bbbd5eb94db2..e8b27826283e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -264,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +{ + return tcp_update_congestion_control(kdata, old_kdata); +} + static int bpf_tcp_ca_validate(void *kdata) { return tcp_validate_congestion_control(kdata); @@ -273,6 +278,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, + .update = bpf_tcp_ca_update, .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9cf1deaf21f2..d6c5a022ae28 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { From 912dd4b0c2a50a839301520badb241f36664ae07 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:03 -0700 Subject: [PATCH 099/260] libbpf: Update a bpf_link with another struct_ops. Introduce bpf_link__update_map(), which allows to atomically update underlying struct_ops implementation for given struct_ops BPF link. Also add old_map_fd to struct bpf_link_update_opts to handle BPF_F_REPLACE feature. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-7-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf.c | 8 +++++++- tools/lib/bpf/bpf.h | 3 ++- tools/lib/bpf/libbpf.c | 35 +++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index e750b6f5fcc3..767035900354 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -794,11 +794,17 @@ int bpf_link_update(int link_fd, int new_prog_fd, if (!OPTS_VALID(opts, bpf_link_update_opts)) return libbpf_err(-EINVAL); + if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); attr.link_update.link_fd = link_fd; attr.link_update.new_prog_fd = new_prog_fd; attr.link_update.flags = OPTS_GET(opts, flags, 0); - attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (OPTS_GET(opts, old_prog_fd, 0)) + attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + else if (OPTS_GET(opts, old_map_fd, 0)) + attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0); ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz); return libbpf_err_errno(ret); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f0f786373238..b073e73439ef 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -336,8 +336,9 @@ struct bpf_link_update_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; /* extra flags */ __u32 old_prog_fd; /* expected old program FD */ + __u32 old_map_fd; /* expected old map FD */ }; -#define bpf_link_update_opts__last_field old_prog_fd +#define bpf_link_update_opts__last_field old_map_fd LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4ff82baba0ea..2b5ea2500b80 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11677,6 +11677,41 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) return &link->link; } +/* + * Swap the back struct_ops of a link with a new struct_ops map. + */ +int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) +{ + struct bpf_link_struct_ops *st_ops_link; + __u32 zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map) || map->fd < 0) + return -EINVAL; + + st_ops_link = container_of(link, struct bpf_link_struct_ops, link); + /* Ensure the type of a link is correct */ + if (st_ops_link->map_fd < 0) + return -EINVAL; + + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && err != -EBUSY) + return err; + + err = bpf_link_update(link->fd, map->fd, NULL); + if (err < 0) + return err; + + st_ops_link->map_fd = map->fd; + + return 0; +} + typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, void *private_data); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index db4992a036f8..1615e55e2e79 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -719,6 +719,7 @@ bpf_program__attach_freplace(const struct bpf_program *prog, struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); +LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 50dde1f6521e..a5aa3a383d69 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -386,6 +386,7 @@ LIBBPF_1.1.0 { LIBBPF_1.2.0 { global: bpf_btf_get_info_by_fd; + bpf_link__update_map; bpf_link_get_info_by_fd; bpf_map_get_info_by_fd; bpf_prog_get_info_by_fd; From 809a69d61899f6b79a8e9de474cd93249580fdaf Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:04 -0700 Subject: [PATCH 100/260] libbpf: Use .struct_ops.link section to indicate a struct_ops with a link. Flags a struct_ops is to back a bpf_link by putting it to the ".struct_ops.link" section. Once it is flagged, the created struct_ops can be used to create a bpf_link or update a bpf_link that has been backed by another struct_ops. Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230323032405.3735486-8-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/libbpf.c | 60 +++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2b5ea2500b80..f6a071db5c6e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -468,6 +468,7 @@ struct bpf_struct_ops { #define KCONFIG_SEC ".kconfig" #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" +#define STRUCT_OPS_LINK_SEC ".struct_ops.link" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -597,6 +598,7 @@ struct elf_state { Elf64_Ehdr *ehdr; Elf_Data *symbols; Elf_Data *st_ops_data; + Elf_Data *st_ops_link_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; @@ -606,6 +608,7 @@ struct elf_state { int text_shndx; int symbols_shndx; int st_ops_shndx; + int st_ops_link_shndx; }; struct usdt_manager; @@ -1119,7 +1122,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) return 0; } -static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, + int shndx, Elf_Data *data, __u32 map_flags) { const struct btf_type *type, *datasec; const struct btf_var_secinfo *vsi; @@ -1130,15 +1134,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) struct bpf_map *map; __u32 i; - if (obj->efile.st_ops_shndx == -1) + if (shndx == -1) return 0; btf = obj->btf; - datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + datasec_id = btf__find_by_name_kind(btf, sec_name, BTF_KIND_DATASEC); if (datasec_id < 0) { pr_warn("struct_ops init: DATASEC %s not found\n", - STRUCT_OPS_SEC); + sec_name); return -EINVAL; } @@ -1151,7 +1155,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) type_id = btf__resolve_type(obj->btf, vsi->type); if (type_id < 0) { pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", - vsi->type, STRUCT_OPS_SEC); + vsi->type, sec_name); return -EINVAL; } @@ -1170,7 +1174,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (IS_ERR(map)) return PTR_ERR(map); - map->sec_idx = obj->efile.st_ops_shndx; + map->sec_idx = shndx; map->sec_offset = vsi->offset; map->name = strdup(var_name); if (!map->name) @@ -1180,6 +1184,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) map->def.key_size = sizeof(int); map->def.value_size = type->size; map->def.max_entries = 1; + map->def.map_flags = map_flags; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1192,14 +1197,14 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) return -ENOMEM; - if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + if (vsi->offset + type->size > data->d_size) { pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", - var_name, STRUCT_OPS_SEC); + var_name, sec_name); return -EINVAL; } memcpy(st_ops->data, - obj->efile.st_ops_data->d_buf + vsi->offset, + data->d_buf + vsi->offset, type->size); st_ops->tname = tname; st_ops->type = type; @@ -1212,6 +1217,19 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) return 0; } +static int bpf_object_init_struct_ops(struct bpf_object *obj) +{ + int err; + + err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx, + obj->efile.st_ops_data, 0); + err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC, + obj->efile.st_ops_link_shndx, + obj->efile.st_ops_link_data, + BPF_F_LINK); + return err; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, @@ -1248,6 +1266,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->efile.st_ops_shndx = -1; + obj->efile.st_ops_link_shndx = -1; obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); @@ -1265,6 +1284,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; + obj->efile.st_ops_link_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -2619,7 +2639,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); - err = err ?: bpf_object__init_struct_ops_maps(obj); + err = err ?: bpf_object_init_struct_ops(obj); return err; } @@ -2753,12 +2773,13 @@ static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || obj->efile.st_ops_shndx >= 0 || + obj->efile.st_ops_link_shndx >= 0 || obj->nr_extern > 0; } static bool kernel_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0; + return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -3451,6 +3472,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { + obj->efile.st_ops_link_data = data; + obj->efile.st_ops_link_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -3465,6 +3489,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && + strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", idx, name, targ_sec_idx, @@ -6611,7 +6636,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx) + if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx) err = bpf_object__collect_st_ops_relos(obj, shdr, data); else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); @@ -8853,6 +8878,7 @@ const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) } static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + int sec_idx, size_t offset) { struct bpf_map *map; @@ -8862,7 +8888,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, map = &obj->maps[i]; if (!bpf_map__is_struct_ops(map)) continue; - if (map->sec_offset <= offset && + if (map->sec_idx == sec_idx && + map->sec_offset <= offset && offset - map->sec_offset < map->def.value_size) return map; } @@ -8904,7 +8931,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } name = elf_sym_str(obj, sym->st_name) ?: ""; - map = find_struct_ops_map_by_offset(obj, rel->r_offset); + map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); if (!map) { pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", (size_t)rel->r_offset); @@ -8971,8 +8998,9 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } /* struct_ops BPF prog can be re-used between multiple - * .struct_ops as long as it's the same struct_ops struct - * definition and the same function pointer field + * .struct_ops & .struct_ops.link as long as it's the + * same struct_ops struct definition and the same + * function pointer field */ if (prog->attach_btf_id != st_ops->type_id || prog->expected_attach_type != member_idx) { From 06da9f3bd6418e06719f15340202996f7a4c258d Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:05 -0700 Subject: [PATCH 101/260] selftests/bpf: Test switching TCP Congestion Control algorithms. Create a pair of sockets that utilize the congestion control algorithm under a particular name. Then switch up this congestion control algorithm to another implementation and check whether newly created connections using the same cc name now run the new implementation. Also, try to update a link with a struct_ops that is without BPF_F_LINK or with a wrong or different name. These cases should fail due to the violation of assumptions. To update a bpf_link of a struct_ops, it must be replaced with another struct_ops that is identical in type and name and has the BPF_F_LINK flag. The other test case is to create links from the same struct_ops more than once. It makes sure a struct_ops can be used repeatly. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-9-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 160 ++++++++++++++++++ .../selftests/bpf/progs/tcp_ca_update.c | 80 +++++++++ 2 files changed, 240 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e980188d4124..a53c254c6058 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -8,6 +8,7 @@ #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" +#include "tcp_ca_update.skel.h" #include "bpf_dctcp_release.skel.h" #include "tcp_ca_write_sk_pacing.skel.h" #include "tcp_ca_incompl_cong_ops.skel.h" @@ -381,6 +382,155 @@ static void test_unsupp_cong_op(void) libbpf_set_print(old_print_fn); } +static void test_update_ca(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + int saved_ca1_cnt; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + saved_ca1_cnt = skel->bss->ca1_cnt; + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_update_2); + ASSERT_OK(err, "update_map"); + + do_test("tcp_ca_update", NULL); + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); + + bpf_link__destroy(link); + tcp_ca_update__destroy(skel); +} + +static void test_update_wrong(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + int saved_ca1_cnt; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + saved_ca1_cnt = skel->bss->ca1_cnt; + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_wrong); + ASSERT_ERR(err, "update_map"); + + do_test("tcp_ca_update", NULL); + ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); + + bpf_link__destroy(link); + tcp_ca_update__destroy(skel); +} + +static void test_mixed_links(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link, *link_nl; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link); + ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"); + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_no_link); + ASSERT_ERR(err, "update_map"); + + bpf_link__destroy(link); + bpf_link__destroy(link_nl); + tcp_ca_update__destroy(skel); +} + +static void test_multi_links(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops_1st"); + bpf_link__destroy(link); + + /* A map should be able to be used to create links multiple + * times. + */ + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + bpf_link__destroy(link); + + tcp_ca_update__destroy(skel); +} + +static void test_link_replace(void) +{ + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts); + struct tcp_ca_update *skel; + struct bpf_link *link; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops_1st"); + bpf_link__destroy(link); + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_2); + ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + + /* BPF_F_REPLACE with a wrong old map Fd. It should fail! + * + * With BPF_F_REPLACE, the link should be updated only if the + * old map fd given here matches the map backing the link. + */ + opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_1); + opts.flags = BPF_F_REPLACE; + err = bpf_link_update(bpf_link__fd(link), + bpf_map__fd(skel->maps.ca_update_1), + &opts); + ASSERT_ERR(err, "bpf_link_update_fail"); + + /* BPF_F_REPLACE with a correct old map Fd. It should success! */ + opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_2); + err = bpf_link_update(bpf_link__fd(link), + bpf_map__fd(skel->maps.ca_update_1), + &opts); + ASSERT_OK(err, "bpf_link_update_success"); + + bpf_link__destroy(link); + + tcp_ca_update__destroy(skel); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -399,4 +549,14 @@ void test_bpf_tcp_ca(void) test_incompl_cong_ops(); if (test__start_subtest("unsupp_cong_op")) test_unsupp_cong_op(); + if (test__start_subtest("update_ca")) + test_update_ca(); + if (test__start_subtest("update_wrong")) + test_update_wrong(); + if (test__start_subtest("mixed_links")) + test_mixed_links(); + if (test__start_subtest("multi_links")) + test_multi_links(); + if (test__start_subtest("link_replace")) + test_link_replace(); } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c new file mode 100644 index 000000000000..b93a0ed33057 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include + +char _license[] SEC("license") = "GPL"; + +int ca1_cnt = 0; +int ca2_cnt = 0; + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/ca_update_1_init") +void BPF_PROG(ca_update_1_init, struct sock *sk) +{ + ca1_cnt++; +} + +SEC("struct_ops/ca_update_2_init") +void BPF_PROG(ca_update_2_init, struct sock *sk) +{ + ca2_cnt++; +} + +SEC("struct_ops/ca_update_cong_control") +void BPF_PROG(ca_update_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ +} + +SEC("struct_ops/ca_update_ssthresh") +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/ca_update_undo_cwnd") +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_update_1 = { + .init = (void *)ca_update_1_init, + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_update", +}; + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_update_2 = { + .init = (void *)ca_update_2_init, + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_update", +}; + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_wrong = { + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_wrong", +}; + +SEC(".struct_ops") +struct tcp_congestion_ops ca_no_link = { + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_no_link", +}; From 55fbae05476df65e5eee8be54f61d0257af0240b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 24 Mar 2023 11:42:41 -0700 Subject: [PATCH 102/260] bpf: Check IS_ERR for the bpf_map_get() return value This patch fixes a mistake in checking NULL instead of checking IS_ERR for the bpf_map_get() return value. It also fixes the return value in link_update_map() from -EINVAL to PTR_ERR(*_map). Reported-by: syzbot+71ccc0fe37abb458406b@syzkaller.appspotmail.com Fixes: 68b04864ca42 ("bpf: Create links for BPF struct_ops maps.") Fixes: aef56f2e918b ("bpf: Update the struct_ops of a bpf_link.") Signed-off-by: Martin KaFai Lau Acked-by: Kui-Feng Lee Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230324184241.1387437-1-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_struct_ops.c | 4 ++-- kernel/bpf/syscall.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 6401deca3b56..d3f0a4825fa6 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -871,8 +871,8 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) int err; map = bpf_map_get(attr->link_create.map_fd); - if (!map) - return -EINVAL; + if (IS_ERR(map)) + return PTR_ERR(map); st_map = (struct bpf_struct_ops_map *)map; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b4d758fa5981..a09597c95029 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4689,12 +4689,12 @@ static int link_update_map(struct bpf_link *link, union bpf_attr *attr) new_map = bpf_map_get(attr->link_update.new_map_fd); if (IS_ERR(new_map)) - return -EINVAL; + return PTR_ERR(new_map); if (attr->link_update.flags & BPF_F_REPLACE) { old_map = bpf_map_get(attr->link_update.old_map_fd); if (IS_ERR(old_map)) { - ret = -EINVAL; + ret = PTR_ERR(old_map); goto out_put; } } else if (attr->link_update.old_map_fd) { From 1431d0b584a673ea690c88a5f7e1aedd9caf0e84 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 25 Mar 2023 16:31:42 -0500 Subject: [PATCH 103/260] bpf: Only invoke kptr dtor following non-NULL xchg When a map value is being freed, we loop over all of the fields of the corresponding BPF object and issue the appropriate cleanup calls corresponding to the field's type. If the field is a referenced kptr, we atomically xchg the value out of the map, and invoke the kptr's destructor on whatever was there before (or bpf_obj_drop() it if it was a local kptr). Currently, we always invoke the destructor (either bpf_obj_drop() or the kptr's registered destructor) on any KPTR_REF-type field in a map, even if there wasn't a value in the map. This means that any function serving as the kptr's KF_RELEASE destructor must always treat the argument as possibly NULL, as the following can and regularly does happen: void *xchgd_field; /* No value was in the map, so xchgd_field is NULL */ xchgd_field = (void *)xchg(unsigned long *field_ptr, 0); field->kptr.dtor(xchgd_field); These are odd semantics to impose on KF_RELEASE kfuncs -- BPF programs are prohibited by the verifier from passing NULL pointers to KF_RELEASE kfuncs, so it doesn't make sense to require this of BPF programs, but not the main kernel destructor path. It's also unnecessary to invoke any cleanup logic for local kptrs. If there is no object there, there's nothing to drop. So as to allow KF_RELEASE kfuncs to fully assume that an argument is non-NULL, this patch updates a KPTR_REF's destructor to only be invoked when a non-NULL value is xchg'd out of the kptr map field. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230325213144.486885-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a09597c95029..e18ac7fdc210 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -677,6 +677,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) break; case BPF_KPTR_REF: xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); + if (!xchgd_field) + break; + if (!btf_is_kernel(field->kptr.btf)) { pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, field->kptr.btf_id); From fb2211a57c110b4ced3cb7f8570bd7246acf2d04 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 25 Mar 2023 16:31:45 -0500 Subject: [PATCH 104/260] bpf: Remove now-unnecessary NULL checks for KF_RELEASE kfuncs Now that we're not invoking kfunc destructors when the kptr in a map was NULL, we no longer require NULL checks in many of our KF_RELEASE kfuncs. This patch removes those NULL checks. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230325213144.486885-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- drivers/hid/bpf/hid_bpf_dispatch.c | 3 --- kernel/bpf/cpumask.c | 3 --- kernel/bpf/helpers.c | 6 ------ net/bpf/test_run.c | 3 --- net/netfilter/nf_conntrack_bpf.c | 2 -- 5 files changed, 17 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 8a034a555d4c..d9ef45fcaeab 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -342,9 +342,6 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; - if (!ctx) - return; - ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx); kfree(ctx_kern); diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index db9da2194c1a..e991af7dc13c 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -102,9 +102,6 @@ static void cpumask_free_cb(struct rcu_head *head) */ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) { - if (!cpumask) - return; - if (refcount_dec_and_test(&cpumask->usage)) call_rcu(&cpumask->rcu, cpumask_free_cb); } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f753676ef652..8980f6859443 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2089,9 +2089,6 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { - if (!p) - return; - put_task_struct(p); } @@ -2148,9 +2145,6 @@ __bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) */ __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp) { - if (!cgrp) - return; - cgroup_put(cgrp); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8d6b31209bd6..27587f1c5f36 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -615,9 +615,6 @@ bpf_kfunc_call_memb_acquire(void) __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { - if (!p) - return; - refcount_dec(&p->cnt); } diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index cd99e6dc1f35..002e9d24a1e9 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -401,8 +401,6 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) */ __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct) { - if (!nfct) - return; nf_ct_put(nfct); } From 6c831c4684124a544f73f7c9b83bc7b2eb0b23d3 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 25 Mar 2023 16:31:46 -0500 Subject: [PATCH 105/260] bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS, even though they have a superset of the requirements of KF_TRUSTED_ARGS. Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require _either_ an argument with ref_obj_id > 0, _or_ (ref->type & BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't automatically imply KF_TRUSTED_ARGS, some of these requirements are enforced in different ways that can make the behavior of the verifier feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able argument will currently fail in the verifier with a message like, "arg#0 is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL pointer passed to trusted arg0". Our intention is the same, but the semantics are different due to implemenetation details that kfunc authors and BPF program writers should not need to care about. Let's make the behavior of the verifier more consistent and intuitive by having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default anyways, so this takes us a step in that direction. Note that it does not make sense to assume KF_TRUSTED_ARGS for all KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that can be left to another patch set, and there are no such subtleties to address for KF_RELEASE. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 7 ++++--- kernel/bpf/cpumask.c | 2 +- kernel/bpf/verifier.c | 2 +- net/bpf/test_run.c | 6 ++++++ tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c | 4 ++-- tools/testing/selftests/bpf/progs/task_kfunc_failure.c | 6 +++--- tools/testing/selftests/bpf/verifier/calls.c | 10 +++++++--- tools/testing/selftests/bpf/verifier/ref_tracking.c | 6 +++--- 8 files changed, 27 insertions(+), 16 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 69eccf6f98ef..bf1b85941452 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -179,9 +179,10 @@ both are orthogonal to each other. --------------------- The KF_RELEASE flag is used to indicate that the kfunc releases the pointer -passed in to it. There can be only one referenced pointer that can be passed in. -All copies of the pointer being released are invalidated as a result of invoking -kfunc with this flag. +passed in to it. There can be only one referenced pointer that can be passed +in. All copies of the pointer being released are invalidated as a result of +invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the +protection afforded by the KF_TRUSTED_ARGS flag described below. 2.4.4 KF_KPTR_GET flag ---------------------- diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index e991af7dc13c..7efdf5d770ca 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -402,7 +402,7 @@ __diag_pop(); BTF_SET8_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 64f06f6e16bf..20eb2015842f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9307,7 +9307,7 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta) { - return meta->kfunc_flags & KF_TRUSTED_ARGS; + return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta); } static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 27587f1c5f36..f1652f5fbd2e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -606,6 +606,11 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) return &prog_test_struct; } +__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) +{ + WARN_ON_ONCE(1); +} + __bpf_kfunc struct prog_test_member * bpf_kfunc_call_memb_acquire(void) { @@ -800,6 +805,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 807fb0ac41e9..48b2034cadb3 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -206,7 +206,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("expects refcounted") +__failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value *v; @@ -234,7 +234,7 @@ int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket") +__failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value local, *v; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 27994d6b2914..2c374a7ffece 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -206,7 +206,7 @@ int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flag } SEC("tp_btf/task_newtask") -__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket") +__failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags) { struct __tasks_kfunc_map_value *v; @@ -234,7 +234,7 @@ int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") -__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket") +__failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) { struct __tasks_kfunc_map_value local, *v; @@ -277,7 +277,7 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_ } SEC("tp_btf/task_newtask") -__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket") +__failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 5702fc9761ef..1bdf2b43e49e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -109,7 +109,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", + .errstr = "Possibly NULL pointer passed to trusted arg0", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 3 }, { "bpf_kfunc_call_test_release", 5 }, @@ -165,19 +165,23 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 16), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -4), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 3 }, - { "bpf_kfunc_call_test_release", 9 }, + { "bpf_kfunc_call_test_offset", 9 }, + { "bpf_kfunc_call_test_release", 12 }, }, .result_unpriv = REJECT, .result = REJECT, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 9540164712b7..5a2e154dd1e0 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -142,7 +142,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", + .errstr = "Possibly NULL pointer passed to trusted arg0", .fixup_kfunc_btf_id = { { "bpf_lookup_user_key", 2 }, { "bpf_key_put", 4 }, @@ -163,7 +163,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", + .errstr = "Possibly NULL pointer passed to trusted arg0", .fixup_kfunc_btf_id = { { "bpf_lookup_system_key", 1 }, { "bpf_key_put", 3 }, @@ -182,7 +182,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "Possibly NULL pointer passed to trusted arg0", .fixup_kfunc_btf_id = { { "bpf_key_put", 1 }, }, From 3e5329e193f463e6aaf98c33f7cb1308160880ab Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:42 +0200 Subject: [PATCH 106/260] selftests/bpf: Report program name on parse_test_spec error Change test_loader.c:run_subtest() behavior to show BPF program name when test spec for that program can't be parsed. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_loader.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index bf41390157bf..8ca5121b5329 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -232,8 +232,11 @@ void run_subtest(struct test_loader *tester, /* if we can't derive test specification, go to the next test */ err = parse_test_spec(tester, obj, prog, &spec); - if (!ASSERT_OK(err, "parse_test_spec")) + if (err) { + PRINT_FAIL("Can't parse test spec for program '%s'\n", + bpf_program__name(prog)); continue; + } tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts); if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ From 207b1ba3019100d862931e97b49f76ff1e0a89f2 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:43 +0200 Subject: [PATCH 107/260] selftests/bpf: __imm_insn & __imm_const macro for bpf_misc.h Add two convenience macro for BPF test cases, allowing the following usage: #include ... asm volatile ( ... ".8byte %[raw_insn];" ... "r1 += %[st_foo_offset];" ... : : __imm_insn(raw_insn, BPF_RAW_INSN(...)), __imm_const(st_foo_offset, offsetof(struct st, foo)) : __clobber_all); Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 3c03ec8056ce..8b4681a96f89 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -35,8 +35,10 @@ #define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory" #define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory" #define __imm(name) [name]"i"(name) +#define __imm_const(name, expr) [name]"i"(expr) #define __imm_addr(name) [name]"i"(&name) #define __imm_ptr(name) [name]"p"(&name) +#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 From 1d56ade032a49b2042f43b3f6bdf116928064267 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:44 +0200 Subject: [PATCH 108/260] selftests/bpf: Unprivileged tests for test_loader.c Extends test_loader.c:test_loader__run_subtests() by allowing to execute tests in unprivileged mode, similar to test_verifier.c. Adds the following new attributes controlling test_loader behavior: __msg_unpriv __success_unpriv __failure_unpriv * If any of these attributes is present the test would be loaded in unprivileged mode. * If only "privileged" attributes are present the test would be loaded only in privileged mode. * If both "privileged" and "unprivileged" attributes are present the test would be loaded in both modes. * If test has to be executed in both modes, __msg(text) is specified and __msg_unpriv is not specified the behavior is the same as if __msg_unpriv(text) is specified. * For test filtering purposes the name of the program loaded in unprivileged mode is derived from the usual program name by adding `@unpriv' suffix. Also adds attribute '__description'. This attribute specifies text to be used instead of a program name for display and filtering purposes. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 10 +- tools/testing/selftests/bpf/autoconf_helper.h | 9 + tools/testing/selftests/bpf/progs/bpf_misc.h | 25 ++ tools/testing/selftests/bpf/test_loader.c | 394 ++++++++++++++---- tools/testing/selftests/bpf/test_verifier.c | 25 +- tools/testing/selftests/bpf/unpriv_helpers.c | 26 ++ tools/testing/selftests/bpf/unpriv_helpers.h | 7 + 7 files changed, 395 insertions(+), 101 deletions(-) create mode 100644 tools/testing/selftests/bpf/autoconf_helper.h create mode 100644 tools/testing/selftests/bpf/unpriv_helpers.c create mode 100644 tools/testing/selftests/bpf/unpriv_helpers.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fc092582d16d..4a8ef118fd9d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -231,8 +231,9 @@ TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL) $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) -CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o TESTING_HELPERS := $(OUTPUT)/testing_helpers.o +CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o +UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o TRACE_HELPERS := $(OUTPUT)/trace_helpers.o JSON_WRITER := $(OUTPUT)/json_writer.o CAP_HELPERS := $(OUTPUT)/cap_helpers.o @@ -252,7 +253,7 @@ $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) $(OUTPUT)/xdping: $(TESTING_HELPERS) $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) -$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) +$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS) $(OUTPUT)/xsk.o: $(BPFOBJ) BPFTOOL ?= $(DEFAULT_BPFTOOL) @@ -560,8 +561,9 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c disasm.c \ - json_writer.c + cap_helpers.c test_loader.c xsk.c disasm.c \ + json_writer.c unpriv_helpers.c + TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ diff --git a/tools/testing/selftests/bpf/autoconf_helper.h b/tools/testing/selftests/bpf/autoconf_helper.h new file mode 100644 index 000000000000..5b243b9cdf8c --- /dev/null +++ b/tools/testing/selftests/bpf/autoconf_helper.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#ifdef HAVE_GENHDR +# include "autoconf.h" +#else +# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__) +# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 +# endif +#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 8b4681a96f89..9defc217a5bd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -5,12 +5,33 @@ /* This set of attributes controls behavior of the * test_loader.c:test_loader__run_subtests(). * + * The test_loader sequentially loads each program in a skeleton. + * Programs could be loaded in privileged and unprivileged modes. + * - __success, __failure, __msg imply privileged mode; + * - __success_unpriv, __failure_unpriv, __msg_unpriv imply + * unprivileged mode. + * If combination of privileged and unprivileged attributes is present + * both modes are used. If none are present privileged mode is implied. + * + * See test_loader.c:drop_capabilities() for exact set of capabilities + * that differ between privileged and unprivileged modes. + * + * For test filtering purposes the name of the program loaded in + * unprivileged mode is derived from the usual program name by adding + * `@unpriv' suffix. + * * __msg Message expected to be found in the verifier log. * Multiple __msg attributes could be specified. + * __msg_unpriv Same as __msg but for unprivileged mode. * * __success Expect program load success in privileged mode. + * __success_unpriv Expect program load success in unprivileged mode. * * __failure Expect program load failure in privileged mode. + * __failure_unpriv Expect program load failure in unprivileged mode. + * + * __description Text to be used instead of a program name for display + * and filtering purposes. * * __log_level Log level to use for the program, numeric value expected. * @@ -27,6 +48,10 @@ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) +#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc))) +#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg))) +#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv"))) +#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) #define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 8ca5121b5329..41cddb303885 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -1,9 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include #include #include #include +#include "autoconf_helper.h" +#include "unpriv_helpers.h" +#include "cap_helpers.h" + #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) @@ -12,16 +17,40 @@ #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv" +#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv" +#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" +#define TEST_TAG_DESCRIPTION_PFX "comment:test_description=" -struct test_spec { - const char *name; +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#define EFFICIENT_UNALIGNED_ACCESS 1 +#else +#define EFFICIENT_UNALIGNED_ACCESS 0 +#endif + +static int sysctl_unpriv_disabled = -1; + +enum mode { + PRIV = 1, + UNPRIV = 2 +}; + +struct test_subspec { + char *name; bool expect_failure; const char **expect_msgs; size_t expect_msg_cnt; +}; + +struct test_spec { + const char *prog_name; + struct test_subspec priv; + struct test_subspec unpriv; int log_level; int prog_flags; + int mode_mask; }; static int tester_init(struct test_loader *tester) @@ -44,17 +73,46 @@ void test_loader_fini(struct test_loader *tester) free(tester->log_buf); } +static void free_test_spec(struct test_spec *spec) +{ + free(spec->priv.name); + free(spec->unpriv.name); + free(spec->priv.expect_msgs); + free(spec->unpriv.expect_msgs); +} + +static int push_msg(const char *msg, struct test_subspec *subspec) +{ + void *tmp; + + tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *)); + if (!tmp) { + ASSERT_FAIL("failed to realloc memory for messages\n"); + return -ENOMEM; + } + subspec->expect_msgs = tmp; + subspec->expect_msgs[subspec->expect_msg_cnt++] = msg; + + return 0; +} + +/* Uses btf_decl_tag attributes to describe the expected test + * behavior, see bpf_misc.h for detailed description of each attribute + * and attribute combinations. + */ static int parse_test_spec(struct test_loader *tester, struct bpf_object *obj, struct bpf_program *prog, struct test_spec *spec) { + const char *description = NULL; + bool has_unpriv_result = false; + int func_id, i, err = 0; struct btf *btf; - int func_id, i; memset(spec, 0, sizeof(*spec)); - spec->name = bpf_program__name(prog); + spec->prog_name = bpf_program__name(prog); btf = bpf_object__btf(obj); if (!btf) { @@ -62,15 +120,15 @@ static int parse_test_spec(struct test_loader *tester, return -EINVAL; } - func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC); + func_id = btf__find_by_name_kind(btf, spec->prog_name, BTF_KIND_FUNC); if (func_id < 0) { - ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name); + ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->prog_name); return -EINVAL; } for (i = 1; i < btf__type_cnt(btf); i++) { + const char *s, *val, *msg; const struct btf_type *t; - const char *s, *val; char *e; t = btf__type_by_id(btf, i); @@ -81,30 +139,42 @@ static int parse_test_spec(struct test_loader *tester, continue; s = btf__str_by_offset(btf, t->name_off); - if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) { - spec->expect_failure = true; + if (str_has_pfx(s, TEST_TAG_DESCRIPTION_PFX)) { + description = s + sizeof(TEST_TAG_DESCRIPTION_PFX) - 1; + } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) { + spec->priv.expect_failure = true; + spec->mode_mask |= PRIV; } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) { - spec->expect_failure = false; + spec->priv.expect_failure = false; + spec->mode_mask |= PRIV; + } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE_UNPRIV) == 0) { + spec->unpriv.expect_failure = true; + spec->mode_mask |= UNPRIV; + has_unpriv_result = true; + } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS_UNPRIV) == 0) { + spec->unpriv.expect_failure = false; + spec->mode_mask |= UNPRIV; + has_unpriv_result = true; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { - void *tmp; - const char **msg; - - tmp = realloc(spec->expect_msgs, - (1 + spec->expect_msg_cnt) * sizeof(void *)); - if (!tmp) { - ASSERT_FAIL("failed to realloc memory for messages\n"); - return -ENOMEM; - } - spec->expect_msgs = tmp; - msg = &spec->expect_msgs[spec->expect_msg_cnt++]; - *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + err = push_msg(msg, &spec->priv); + if (err) + goto cleanup; + spec->mode_mask |= PRIV; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) { + msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1; + err = push_msg(msg, &spec->unpriv); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1; errno = 0; spec->log_level = strtol(val, &e, 0); if (errno || e[0] != '\0') { - ASSERT_FAIL("failed to parse test log level from '%s'", s); - return -EINVAL; + PRINT_FAIL("failed to parse test log level from '%s'\n", s); + err = -EINVAL; + goto cleanup; } } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; @@ -124,14 +194,70 @@ static int parse_test_spec(struct test_loader *tester, errno = 0; spec->prog_flags |= strtol(val, &e, 0); if (errno || e[0] != '\0') { - ASSERT_FAIL("failed to parse test prog flags from '%s'", s); - return -EINVAL; + PRINT_FAIL("failed to parse test prog flags from '%s'\n", + val); + err = -EINVAL; + goto cleanup; } } } } + if (spec->mode_mask == 0) + spec->mode_mask = PRIV; + + if (!description) + description = spec->prog_name; + + if (spec->mode_mask & PRIV) { + spec->priv.name = strdup(description); + if (!spec->priv.name) { + PRINT_FAIL("failed to allocate memory for priv.name\n"); + err = -ENOMEM; + goto cleanup; + } + } + + if (spec->mode_mask & UNPRIV) { + int descr_len = strlen(description); + const char *suffix = " @unpriv"; + char *name; + + name = malloc(descr_len + strlen(suffix) + 1); + if (!name) { + PRINT_FAIL("failed to allocate memory for unpriv.name\n"); + err = -ENOMEM; + goto cleanup; + } + + strcpy(name, description); + strcpy(&name[descr_len], suffix); + spec->unpriv.name = name; + } + + if (spec->mode_mask & (PRIV | UNPRIV)) { + if (!has_unpriv_result) + spec->unpriv.expect_failure = spec->priv.expect_failure; + + if (!spec->unpriv.expect_msgs) { + size_t sz = spec->priv.expect_msg_cnt * sizeof(void *); + + spec->unpriv.expect_msgs = malloc(sz); + if (!spec->unpriv.expect_msgs) { + PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n"); + err = -ENOMEM; + goto cleanup; + } + memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz); + spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt; + } + } + return 0; + +cleanup: + free_test_spec(spec); + return err; } static void prepare_case(struct test_loader *tester, @@ -148,7 +274,7 @@ static void prepare_case(struct test_loader *tester, bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz); - /* Make sure we set at least minimal log level, unless test requirest + /* Make sure we set at least minimal log level, unless test requires * even higher level already. Make sure to preserve independent log * level 4 (verifier stats), though. */ @@ -172,18 +298,18 @@ static void emit_verifier_log(const char *log_buf, bool force) } static void validate_case(struct test_loader *tester, - struct test_spec *spec, + struct test_subspec *subspec, struct bpf_object *obj, struct bpf_program *prog, int load_err) { int i, j; - for (i = 0; i < spec->expect_msg_cnt; i++) { + for (i = 0; i < subspec->expect_msg_cnt; i++) { char *match; const char *expect_msg; - expect_msg = spec->expect_msgs[i]; + expect_msg = subspec->expect_msgs[i]; match = strstr(tester->log_buf + tester->next_match_pos, expect_msg); if (!ASSERT_OK_PTR(match, "expect_msg")) { @@ -191,7 +317,8 @@ static void validate_case(struct test_loader *tester, if (env.verbosity == VERBOSE_NONE) emit_verifier_log(tester->log_buf, true /*force*/); for (j = 0; j < i; j++) - fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]); + fprintf(stderr, + "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]); fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg); return; } @@ -200,17 +327,169 @@ static void validate_case(struct test_loader *tester, } } +struct cap_state { + __u64 old_caps; + bool initialized; +}; + +static int drop_capabilities(struct cap_state *caps) +{ + const __u64 caps_to_drop = (1ULL << CAP_SYS_ADMIN | 1ULL << CAP_NET_ADMIN | + 1ULL << CAP_PERFMON | 1ULL << CAP_BPF); + int err; + + err = cap_disable_effective(caps_to_drop, &caps->old_caps); + if (err) { + PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err)); + return err; + } + + caps->initialized = true; + return 0; +} + +static int restore_capabilities(struct cap_state *caps) +{ + int err; + + if (!caps->initialized) + return 0; + + err = cap_enable_effective(caps->old_caps, NULL); + if (err) + PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err)); + caps->initialized = false; + return err; +} + +static bool can_execute_unpriv(struct test_loader *tester, struct test_spec *spec) +{ + if (sysctl_unpriv_disabled < 0) + sysctl_unpriv_disabled = get_unpriv_disabled() ? 1 : 0; + if (sysctl_unpriv_disabled) + return false; + if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS) + return false; + return true; +} + +static bool is_unpriv_capable_map(struct bpf_map *map) +{ + enum bpf_map_type type; + __u32 flags; + + type = bpf_map__type(map); + + switch (type) { + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_HASH_OF_MAPS: + flags = bpf_map__map_flags(map); + return !(flags & BPF_F_ZERO_SEED); + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_USER_RINGBUF: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + return true; + default: + return false; + } +} + /* this function is forced noinline and has short generic name to look better * in test_progs output (in case of a failure) */ static noinline void run_subtest(struct test_loader *tester, - const char *skel_name, - skel_elf_bytes_fn elf_bytes_factory) + struct bpf_object_open_opts *open_opts, + const void *obj_bytes, + size_t obj_byte_cnt, + struct test_spec *spec, + bool unpriv) +{ + struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv; + struct cap_state caps = {}; + struct bpf_program *tprog; + struct bpf_object *tobj; + struct bpf_map *map; + int err; + + if (!test__start_subtest(subspec->name)) + return; + + if (unpriv) { + if (!can_execute_unpriv(tester, spec)) { + test__skip(); + test__end_subtest(); + return; + } + if (drop_capabilities(&caps)) { + test__end_subtest(); + return; + } + } + + tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts); + if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ + goto subtest_cleanup; + + bpf_object__for_each_program(tprog, tobj) + bpf_program__set_autoload(tprog, false); + + bpf_object__for_each_program(tprog, tobj) { + /* only load specified program */ + if (strcmp(bpf_program__name(tprog), spec->prog_name) == 0) { + bpf_program__set_autoload(tprog, true); + break; + } + } + + prepare_case(tester, spec, tobj, tprog); + + /* By default bpf_object__load() automatically creates all + * maps declared in the skeleton. Some map types are only + * allowed in priv mode. Disable autoload for such maps in + * unpriv mode. + */ + bpf_object__for_each_map(map, tobj) + bpf_map__set_autocreate(map, !unpriv || is_unpriv_capable_map(map)); + + err = bpf_object__load(tobj); + if (subspec->expect_failure) { + if (!ASSERT_ERR(err, "unexpected_load_success")) { + emit_verifier_log(tester->log_buf, false /*force*/); + goto tobj_cleanup; + } + } else { + if (!ASSERT_OK(err, "unexpected_load_failure")) { + emit_verifier_log(tester->log_buf, true /*force*/); + goto tobj_cleanup; + } + } + + emit_verifier_log(tester->log_buf, false /*force*/); + validate_case(tester, subspec, tobj, tprog, err); + +tobj_cleanup: + bpf_object__close(tobj); +subtest_cleanup: + test__end_subtest(); + restore_capabilities(&caps); +} + +static void process_subtest(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory) { LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name); - struct bpf_object *obj = NULL, *tobj; - struct bpf_program *prog, *tprog; + struct bpf_object *obj = NULL; + struct bpf_program *prog; const void *obj_bytes; size_t obj_byte_cnt; int err; @@ -224,12 +503,8 @@ void run_subtest(struct test_loader *tester, return; bpf_object__for_each_program(prog, obj) { - const char *prog_name = bpf_program__name(prog); struct test_spec spec; - if (!test__start_subtest(prog_name)) - continue; - /* if we can't derive test specification, go to the next test */ err = parse_test_spec(tester, obj, prog, &spec); if (err) { @@ -238,41 +513,12 @@ void run_subtest(struct test_loader *tester, continue; } - tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts); - if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ - continue; + if (spec.mode_mask & PRIV) + run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt, &spec, false); + if (spec.mode_mask & UNPRIV) + run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt, &spec, true); - bpf_object__for_each_program(tprog, tobj) - bpf_program__set_autoload(tprog, false); - - bpf_object__for_each_program(tprog, tobj) { - /* only load specified program */ - if (strcmp(bpf_program__name(tprog), prog_name) == 0) { - bpf_program__set_autoload(tprog, true); - break; - } - } - - prepare_case(tester, &spec, tobj, tprog); - - err = bpf_object__load(tobj); - if (spec.expect_failure) { - if (!ASSERT_ERR(err, "unexpected_load_success")) { - emit_verifier_log(tester->log_buf, false /*force*/); - goto tobj_cleanup; - } - } else { - if (!ASSERT_OK(err, "unexpected_load_failure")) { - emit_verifier_log(tester->log_buf, true /*force*/); - goto tobj_cleanup; - } - } - - emit_verifier_log(tester->log_buf, false /*force*/); - validate_case(tester, &spec, tobj, tprog, err); - -tobj_cleanup: - bpf_object__close(tobj); + free_test_spec(&spec); } bpf_object__close(obj); @@ -283,5 +529,5 @@ void test_loader__run_subtests(struct test_loader *tester, skel_elf_bytes_fn elf_bytes_factory) { /* see comment in run_subtest() for why we do this function nesting */ - run_subtest(tester, skel_name, elf_bytes_factory); + process_subtest(tester, skel_name, elf_bytes_factory); } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 49a70d9beb0b..5b90eef09ade 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -33,13 +33,8 @@ #include #include -#ifdef HAVE_GENHDR -# include "autoconf.h" -#else -# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__) -# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 -# endif -#endif +#include "autoconf_helper.h" +#include "unpriv_helpers.h" #include "cap_helpers.h" #include "bpf_rand.h" #include "bpf_util.h" @@ -1665,22 +1660,6 @@ static bool is_admin(void) return (caps & ADMIN_CAPS) == ADMIN_CAPS; } -static void get_unpriv_disabled() -{ - char buf[2]; - FILE *fd; - - fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); - if (!fd) { - perror("fopen /proc/sys/"UNPRIV_SYSCTL); - unpriv_disabled = true; - return; - } - if (fgets(buf, 2, fd) == buf && atoi(buf)) - unpriv_disabled = true; - fclose(fd); -} - static bool test_as_unpriv(struct bpf_test *test) { #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c new file mode 100644 index 000000000000..2a6efbd0401e --- /dev/null +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include "unpriv_helpers.h" + +bool get_unpriv_disabled(void) +{ + bool disabled; + char buf[2]; + FILE *fd; + + fd = fopen("/proc/sys/" UNPRIV_SYSCTL, "r"); + if (fd) { + disabled = (fgets(buf, 2, fd) == buf && atoi(buf)); + fclose(fd); + } else { + perror("fopen /proc/sys/" UNPRIV_SYSCTL); + disabled = true; + } + + return disabled; +} diff --git a/tools/testing/selftests/bpf/unpriv_helpers.h b/tools/testing/selftests/bpf/unpriv_helpers.h new file mode 100644 index 000000000000..151f67329665 --- /dev/null +++ b/tools/testing/selftests/bpf/unpriv_helpers.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include + +#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" + +bool get_unpriv_disabled(void); From 19a8e06f5f9155caf1a5577a0f7969eee13d0cbb Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:45 +0200 Subject: [PATCH 109/260] selftests/bpf: Tests execution support for test_loader.c Extends test_loader.c:test_loader__run_subtests() by allowing to execute BPF_PROG_TEST_RUN bpf command for selected programs. This is similar to functionality provided by test_verifier. Adds the following new attributes controlling test_loader behavior: __retval(...) __retval_unpriv(...) * If any of these attributes is present, the annotated program would be executed using libbpf's bpf_prog_test_run_opts() function. * If __retval is present, the test run would be done for program loaded in privileged mode. * If __retval_unpriv is present, the test run would be done for program loaded in unprivileged mode. * To mimic test_verifier behavior, the actual run is initiated in privileged mode. * The value returned by a test run is compared against retval parameter. The retval attribute takes one of the following parameters: - a decimal number - a hexadecimal number (must start from '0x') - any of a three special literals (provided for compatibility with test_verifier): - INT_MIN - POINTER_VALUE - TEST_DATA_LEN An example of the attribute usage: SEC("socket") __description("return 42") __success __success_unpriv __retval(42) __naked void the_42_test(void) { asm volatile (" \ r0 = 42; \ exit; \ " ::: __clobber_all); } Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 15 ++ tools/testing/selftests/bpf/test_loader.c | 149 +++++++++++++++++-- 2 files changed, 150 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 9defc217a5bd..6e3b4903c541 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -30,6 +30,15 @@ * __failure Expect program load failure in privileged mode. * __failure_unpriv Expect program load failure in unprivileged mode. * + * __retval Execute the program using BPF_PROG_TEST_RUN command, + * expect return value to match passed parameter: + * - a decimal number + * - a hexadecimal number, when starts from 0x + * - literal INT_MIN + * - literal POINTER_VALUE (see definition below) + * - literal TEST_DATA_LEN (see definition below) + * __retval_unpriv Same, but load program in unprivileged mode. + * * __description Text to be used instead of a program name for display * and filtering purposes. * @@ -54,6 +63,8 @@ #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) #define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) +#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val))) +#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) @@ -65,6 +76,10 @@ #define __imm_ptr(name) [name]"p"(&name) #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) +/* Magic constants used with __retval() */ +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 + #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__x64_" diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 41cddb303885..47e9e076bc8f 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -23,6 +23,12 @@ #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" #define TEST_TAG_DESCRIPTION_PFX "comment:test_description=" +#define TEST_TAG_RETVAL_PFX "comment:test_retval=" +#define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv=" + +/* Warning: duplicated in bpf_misc.h */ +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #define EFFICIENT_UNALIGNED_ACCESS 1 @@ -42,6 +48,8 @@ struct test_subspec { bool expect_failure; const char **expect_msgs; size_t expect_msg_cnt; + int retval; + bool execute; }; struct test_spec { @@ -96,6 +104,46 @@ static int push_msg(const char *msg, struct test_subspec *subspec) return 0; } +static int parse_int(const char *str, int *val, const char *name) +{ + char *end; + long tmp; + + errno = 0; + if (str_has_pfx(str, "0x")) + tmp = strtol(str + 2, &end, 16); + else + tmp = strtol(str, &end, 10); + if (errno || end[0] != '\0') { + PRINT_FAIL("failed to parse %s from '%s'\n", name, str); + return -EINVAL; + } + *val = tmp; + return 0; +} + +static int parse_retval(const char *str, int *val, const char *name) +{ + struct { + char *name; + int val; + } named_values[] = { + { "INT_MIN" , INT_MIN }, + { "POINTER_VALUE", POINTER_VALUE }, + { "TEST_DATA_LEN", TEST_DATA_LEN }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(named_values); ++i) { + if (strcmp(str, named_values[i].name) != 0) + continue; + *val = named_values[i].val; + return 0; + } + + return parse_int(str, val, name); +} + /* Uses btf_decl_tag attributes to describe the expected test * behavior, see bpf_misc.h for detailed description of each attribute * and attribute combinations. @@ -107,6 +155,7 @@ static int parse_test_spec(struct test_loader *tester, { const char *description = NULL; bool has_unpriv_result = false; + bool has_unpriv_retval = false; int func_id, i, err = 0; struct btf *btf; @@ -129,7 +178,7 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const char *s, *val, *msg; const struct btf_type *t; - char *e; + int tmp; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -167,15 +216,26 @@ static int parse_test_spec(struct test_loader *tester, if (err) goto cleanup; spec->mode_mask |= UNPRIV; + } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX)) { + val = s + sizeof(TEST_TAG_RETVAL_PFX) - 1; + err = parse_retval(val, &spec->priv.retval, "__retval"); + if (err) + goto cleanup; + spec->priv.execute = true; + spec->mode_mask |= PRIV; + } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX_UNPRIV)) { + val = s + sizeof(TEST_TAG_RETVAL_PFX_UNPRIV) - 1; + err = parse_retval(val, &spec->unpriv.retval, "__retval_unpriv"); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; + spec->unpriv.execute = true; + has_unpriv_retval = true; } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1; - errno = 0; - spec->log_level = strtol(val, &e, 0); - if (errno || e[0] != '\0') { - PRINT_FAIL("failed to parse test log level from '%s'\n", s); - err = -EINVAL; + err = parse_int(val, &spec->log_level, "test log level"); + if (err) goto cleanup; - } } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { @@ -191,14 +251,10 @@ static int parse_test_spec(struct test_loader *tester, } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; } else /* assume numeric value */ { - errno = 0; - spec->prog_flags |= strtol(val, &e, 0); - if (errno || e[0] != '\0') { - PRINT_FAIL("failed to parse test prog flags from '%s'\n", - val); - err = -EINVAL; + err = parse_int(val, &tmp, "test prog flags"); + if (err) goto cleanup; - } + spec->prog_flags |= tmp; } } } @@ -239,6 +295,11 @@ static int parse_test_spec(struct test_loader *tester, if (!has_unpriv_result) spec->unpriv.expect_failure = spec->priv.expect_failure; + if (!has_unpriv_retval) { + spec->unpriv.retval = spec->priv.retval; + spec->unpriv.execute = spec->priv.execute; + } + if (!spec->unpriv.expect_msgs) { size_t sz = spec->priv.expect_msg_cnt * sizeof(void *); @@ -402,6 +463,51 @@ static bool is_unpriv_capable_map(struct bpf_map *map) } } +static int do_prog_test_run(int fd_prog, int *retval) +{ + __u8 tmp_out[TEST_DATA_LEN << 2] = {}; + __u8 tmp_in[TEST_DATA_LEN] = {}; + int err, saved_errno; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = tmp_in, + .data_size_in = sizeof(tmp_in), + .data_out = tmp_out, + .data_size_out = sizeof(tmp_out), + .repeat = 1, + ); + + err = bpf_prog_test_run_opts(fd_prog, &topts); + saved_errno = errno; + + if (err) { + PRINT_FAIL("FAIL: Unexpected bpf_prog_test_run error: %d (%s) ", + saved_errno, strerror(saved_errno)); + return err; + } + + ASSERT_OK(0, "bpf_prog_test_run"); + *retval = topts.retval; + + return 0; +} + +static bool should_do_test_run(struct test_spec *spec, struct test_subspec *subspec) +{ + if (!subspec->execute) + return false; + + if (subspec->expect_failure) + return false; + + if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS) { + if (env.verbosity != VERBOSE_NONE) + printf("alignment prevents execution\n"); + return false; + } + + return true; +} + /* this function is forced noinline and has short generic name to look better * in test_progs output (in case of a failure) */ @@ -418,6 +524,7 @@ void run_subtest(struct test_loader *tester, struct bpf_program *tprog; struct bpf_object *tobj; struct bpf_map *map; + int retval; int err; if (!test__start_subtest(subspec->name)) @@ -476,6 +583,20 @@ void run_subtest(struct test_loader *tester, emit_verifier_log(tester->log_buf, false /*force*/); validate_case(tester, subspec, tobj, tprog, err); + if (should_do_test_run(spec, subspec)) { + /* For some reason test_verifier executes programs + * with all capabilities restored. Do the same here. + */ + if (!restore_capabilities(&caps)) + goto tobj_cleanup; + + do_prog_test_run(bpf_program__fd(tprog), &retval); + if (retval != subspec->retval && subspec->retval != POINTER_VALUE) { + PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); + goto tobj_cleanup; + } + } + tobj_cleanup: bpf_object__close(tobj); subtest_cleanup: From 55108621a35e42f773de5d4b20cf7a14d6d53503 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:46 +0200 Subject: [PATCH 110/260] selftests/bpf: prog_tests entry point for migrated test_verifier tests prog_tests/verifier.c would be used as a host for verifier/*.c tests migrated to use inline assembly and run from test_progs. The run_test_aux() function mimics the test_verifier behavior dropping CAP_SYS_ADMIN upon entry. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-6-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/verifier.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c new file mode 100644 index 000000000000..aa63f5d84d97 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include + +#include "cap_helpers.h" + +__maybe_unused +static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) +{ + struct test_loader tester = {}; + __u64 old_caps; + int err; + + /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */ + err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps); + if (err) { + PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + return; + } + + test_loader__run_subtests(&tester, skel_name, elf_bytes_factory); + test_loader_fini(&tester); + + err = cap_enable_effective(old_caps, NULL); + if (err) + PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); +} + +#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes) From 9d0f1568ad5ba29feddc0897e2ccc7d6de6713c8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:47 +0200 Subject: [PATCH 111/260] selftests/bpf: verifier/and.c converted to inline assembly Test verifier/and.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 3 + .../selftests/bpf/progs/verifier_and.c | 107 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/and.c | 68 ----------- 3 files changed, 110 insertions(+), 68 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_and.c delete mode 100644 tools/testing/selftests/bpf/verifier/and.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index aa63f5d84d97..34526f6d5ab1 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -3,6 +3,7 @@ #include #include "cap_helpers.h" +#include "verifier_and.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -27,3 +28,5 @@ static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_fac } #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes) + +void test_verifier_and(void) { RUN(verifier_and); } diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c new file mode 100644 index 000000000000..e97e518516b6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_and.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/and.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +SEC("socket") +__description("invalid and of negative number") +__failure __msg("R0 max value is outside of the allowed memory range") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void invalid_and_of_negative_number(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u8*)(r0 + 0); \ + r1 &= -4; \ + r1 <<= 2; \ + r0 += r1; \ +l0_%=: r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid range check") +__failure __msg("R0 max value is outside of the allowed memory range") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void invalid_range_check(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + r9 = 1; \ + w1 %%= 2; \ + w1 += 1; \ + w9 &= w1; \ + w9 += 1; \ + w9 >>= 1; \ + w3 = 1; \ + w3 -= w9; \ + w3 *= 0x10000000; \ + r0 += r3; \ + *(u32*)(r0 + 0) = r3; \ +l0_%=: r0 = r0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("socket") +__description("check known subreg with unknown reg") +__success __failure_unpriv __msg_unpriv("R1 !read_ok") +__retval(0) +__naked void known_subreg_with_unknown_reg(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r0 <<= 32; \ + r0 += 1; \ + r0 &= 0xFFFF1234; \ + /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */\ + if w0 < 1 goto l0_%=; \ + r1 = *(u32*)(r1 + 512); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c deleted file mode 100644 index 7d7ebee5cc7a..000000000000 --- a/tools/testing/selftests/bpf/verifier/and.c +++ /dev/null @@ -1,68 +0,0 @@ -{ - "invalid and of negative number", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 max value is outside of the allowed memory range", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid range check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_9, 1), - BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2), - BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1), - BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1), - BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1), - BPF_MOV32_IMM(BPF_REG_3, 1), - BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9), - BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), - BPF_MOV64_REG(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 max value is outside of the allowed memory range", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "check known subreg with unknown reg", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234), - /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */ - BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 !read_ok", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = 0 -}, From a3c830ae02093315a4526fa74fb7d1f66989d895 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:48 +0200 Subject: [PATCH 112/260] selftests/bpf: verifier/array_access.c converted to inline assembly Test verifier/array_access.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_array_access.c | 529 ++++++++++++++++++ .../selftests/bpf/verifier/array_access.c | 379 ------------- 3 files changed, 531 insertions(+), 379 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_array_access.c delete mode 100644 tools/testing/selftests/bpf/verifier/array_access.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 34526f6d5ab1..60eb0f38ed92 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -4,6 +4,7 @@ #include "cap_helpers.h" #include "verifier_and.skel.h" +#include "verifier_array_access.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -30,3 +31,4 @@ static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_fac #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes) void test_verifier_and(void) { RUN(verifier_and); } +void test_verifier_array_access(void) { RUN(verifier_array_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c new file mode 100644 index 000000000000..95d7ecc12963 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/array_access.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct test_val); + __uint(map_flags, BPF_F_RDONLY_PROG); +} map_array_ro SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct test_val); + __uint(map_flags, BPF_F_WRONLY_PROG); +} map_array_wo SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +SEC("socket") +__description("valid map access into an array with a constant") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) +__naked void an_array_with_a_constant_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("valid map access into an array with a register") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void an_array_with_a_register_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 4; \ + r1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("valid map access into an array with a variable") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void an_array_with_a_variable_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + if r1 >= %[max_entries] goto l0_%=; \ + r1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(max_entries, MAX_ENTRIES), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("valid map access into an array with a signed variable") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void array_with_a_signed_variable(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + if w1 s> 0xffffffff goto l1_%=; \ + w1 = 0; \ +l1_%=: w2 = %[max_entries]; \ + if r2 s> r1 goto l2_%=; \ + w1 = 0; \ +l2_%=: w1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(max_entries, MAX_ENTRIES), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access into an array with a constant") +__failure __msg("invalid access to map value, value_size=48 off=48 size=8") +__failure_unpriv +__naked void an_array_with_a_constant_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + %[__imm_0]) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, (MAX_ENTRIES + 1) << 2), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access into an array with a register") +__failure __msg("R0 min value is outside of the allowed memory range") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void an_array_with_a_register_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = %[__imm_0]; \ + r1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, MAX_ENTRIES + 1), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access into an array with a variable") +__failure +__msg("R0 unbounded memory access, make sure to bounds check any such access") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void an_array_with_a_variable_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + r1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access into an array with no floor check") +__failure __msg("R0 unbounded memory access") +__failure_unpriv __msg_unpriv("R0 leaks addr") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void array_with_no_floor_check(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r0 + 0); \ + w2 = %[max_entries]; \ + if r2 s> r1 goto l1_%=; \ + w1 = 0; \ +l1_%=: w1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(max_entries, MAX_ENTRIES), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access into an array with a invalid max check") +__failure __msg("invalid access to map value, value_size=48 off=44 size=8") +__failure_unpriv __msg_unpriv("R0 leaks addr") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void with_a_invalid_max_check_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + w2 = %[__imm_0]; \ + if r2 > r1 goto l1_%=; \ + w1 = 0; \ +l1_%=: w1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, MAX_ENTRIES + 1), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access into an array with a invalid max check") +__failure __msg("R0 pointer += pointer") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void with_a_invalid_max_check_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r8 = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r0 += r8; \ + r0 = *(u32*)(r0 + %[test_val_foo]); \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("socket") +__description("valid read map access into a read-only array 1") +__success __success_unpriv __retval(28) +__naked void a_read_only_array_1_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_ro] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r0 = *(u32*)(r0 + 0); \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_array_ro) + : __clobber_all); +} + +SEC("tc") +__description("valid read map access into a read-only array 2") +__success __retval(65507) +__naked void a_read_only_array_2_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_ro] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 &= 0xffff; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm(bpf_map_lookup_elem), + __imm_addr(map_array_ro) + : __clobber_all); +} + +SEC("socket") +__description("invalid write map access into a read-only array 1") +__failure __msg("write into map forbidden") +__failure_unpriv +__naked void a_read_only_array_1_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_ro] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 42; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_array_ro) + : __clobber_all); +} + +SEC("tc") +__description("invalid write map access into a read-only array 2") +__failure __msg("write into map forbidden") +__naked void a_read_only_array_2_2(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_ro] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r6; \ + r2 = 0; \ + r3 = r0; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_skb_load_bytes), + __imm_addr(map_array_ro) + : __clobber_all); +} + +SEC("socket") +__description("valid write map access into a write-only array 1") +__success __success_unpriv __retval(1) +__naked void a_write_only_array_1_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_wo] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 42; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_array_wo) + : __clobber_all); +} + +SEC("tc") +__description("valid write map access into a write-only array 2") +__success __retval(0) +__naked void a_write_only_array_2_1(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_wo] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r6; \ + r2 = 0; \ + r3 = r0; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_skb_load_bytes), + __imm_addr(map_array_wo) + : __clobber_all); +} + +SEC("socket") +__description("invalid read map access into a write-only array 1") +__failure __msg("read from map forbidden") +__failure_unpriv +__naked void a_write_only_array_1_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_wo] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r0 = *(u64*)(r0 + 0); \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_array_wo) + : __clobber_all); +} + +SEC("tc") +__description("invalid read map access into a write-only array 2") +__failure __msg("read from map forbidden") +__naked void a_write_only_array_2_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_wo] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: exit; \ +" : + : __imm(bpf_csum_diff), + __imm(bpf_map_lookup_elem), + __imm_addr(map_array_wo) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c deleted file mode 100644 index 1b138cd2b187..000000000000 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ /dev/null @@ -1,379 +0,0 @@ -{ - "valid map access into an array with a constant", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, -}, -{ - "valid map access into an array with a register", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "valid map access into an array with a variable", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "valid map access into an array with a signed variable", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), - BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid map access into an array with a constant", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=48 size=8", - .result = REJECT, -}, -{ - "invalid map access into an array with a register", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 min value is outside of the allowed memory range", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid map access into an array with a variable", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 unbounded memory access, make sure to bounds check any such access", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid map access into an array with no floor check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), - BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", - .result_unpriv = REJECT, - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid map access into an array with a invalid max check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .errstr = "invalid access to map value, value_size=48 off=44 size=8", - .result_unpriv = REJECT, - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid map access into an array with a invalid max check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3, 11 }, - .errstr = "R0 pointer += pointer", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "valid read map access into a read-only array 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_ro = { 3 }, - .result = ACCEPT, - .retval = 28, -}, -{ - "valid read map access into a read-only array 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_array_ro = { 3 }, - .result = ACCEPT, - .retval = 65507, -}, -{ - "invalid write map access into a read-only array 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_array_ro = { 3 }, - .result = REJECT, - .errstr = "write into map forbidden", -}, -{ - "invalid write map access into a read-only array 2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_array_ro = { 4 }, - .result = REJECT, - .errstr = "write into map forbidden", -}, -{ - "valid write map access into a write-only array 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_wo = { 3 }, - .result = ACCEPT, - .retval = 1, -}, -{ - "valid write map access into a write-only array 2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_array_wo = { 4 }, - .result = ACCEPT, - .retval = 0, -}, -{ - "invalid read map access into a write-only array 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_wo = { 3 }, - .result = REJECT, - .errstr = "read from map forbidden", -}, -{ - "invalid read map access into a write-only array 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_array_wo = { 3 }, - .result = REJECT, - .errstr = "read from map forbidden", -}, From 0ccbe4956d6c20fa0a09a72d2033c49f0976ed6c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:49 +0200 Subject: [PATCH 113/260] selftests/bpf: verifier/basic_stack.c converted to inline assembly Test verifier/basic_stack.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_basic_stack.c | 100 ++++++++++++++++++ .../selftests/bpf/verifier/basic_stack.c | 64 ----------- 3 files changed, 102 insertions(+), 64 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_basic_stack.c delete mode 100644 tools/testing/selftests/bpf/verifier/basic_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 60eb0f38ed92..95a3151db052 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -5,6 +5,7 @@ #include "cap_helpers.h" #include "verifier_and.skel.h" #include "verifier_array_access.skel.h" +#include "verifier_basic_stack.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -32,3 +33,4 @@ static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_fac void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_array_access(void) { RUN(verifier_array_access); } +void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c new file mode 100644 index 000000000000..359df865a8f3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/basic_stack.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("socket") +__description("stack out of bounds") +__failure __msg("invalid write to stack") +__failure_unpriv +__naked void stack_out_of_bounds(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 + 8) = r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("uninitialized stack1") +__failure __msg("invalid indirect read from stack") +__failure_unpriv +__naked void uninitialized_stack1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("uninitialized stack2") +__failure __msg("invalid read from stack") +__failure_unpriv +__naked void uninitialized_stack2(void) +{ + asm volatile (" \ + r2 = r10; \ + r0 = *(u64*)(r2 - 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("invalid fp arithmetic") +__failure __msg("R1 subtraction from stack pointer") +__failure_unpriv +__naked void invalid_fp_arithmetic(void) +{ + /* If this gets ever changed, make sure JITs can deal with it. */ + asm volatile (" \ + r0 = 0; \ + r1 = r10; \ + r1 -= 8; \ + *(u64*)(r1 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("non-invalid fp arithmetic") +__success __success_unpriv __retval(0) +__naked void non_invalid_fp_arithmetic(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r10 - 8) = r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("misaligned read from stack") +__failure __msg("misaligned stack access") +__failure_unpriv +__naked void misaligned_read_from_stack(void) +{ + asm volatile (" \ + r2 = r10; \ + r0 = *(u64*)(r2 - 4); \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c deleted file mode 100644 index f995777dddb3..000000000000 --- a/tools/testing/selftests/bpf/verifier/basic_stack.c +++ /dev/null @@ -1,64 +0,0 @@ -{ - "stack out of bounds", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid write to stack", - .result = REJECT, -}, -{ - "uninitialized stack1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 2 }, - .errstr = "invalid indirect read from stack", - .result = REJECT, -}, -{ - "uninitialized stack2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8), - BPF_EXIT_INSN(), - }, - .errstr = "invalid read from stack", - .result = REJECT, -}, -{ - "invalid fp arithmetic", - /* If this gets ever changed, make sure JITs can deal with it. */ - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 subtraction from stack pointer", - .result = REJECT, -}, -{ - "non-invalid fp arithmetic", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, -}, -{ - "misaligned read from stack", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned stack access", - .result = REJECT, -}, From 7605f94b3492328f37815c9b5749ffba5c76da84 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:50 +0200 Subject: [PATCH 114/260] selftests/bpf: verifier/bounds_deduction.c converted to inline assembly Test verifier/bounds_deduction.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-10-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_bounds_deduction.c | 171 ++++++++++++++++++ .../selftests/bpf/verifier/bounds_deduction.c | 136 -------------- 3 files changed, 173 insertions(+), 136 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c delete mode 100644 tools/testing/selftests/bpf/verifier/bounds_deduction.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 95a3151db052..a8cfef92ed64 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -6,6 +6,7 @@ #include "verifier_and.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" +#include "verifier_bounds_deduction.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -34,3 +35,4 @@ static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_fac void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_array_access(void) { RUN(verifier_array_access); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } +void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c new file mode 100644 index 000000000000..c506afbdd936 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/bounds_deduction.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("check deducing bounds from const, 1") +__failure __msg("R0 tried to subtract pointer from scalar") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__naked void deducing_bounds_from_const_1(void) +{ + asm volatile (" \ + r0 = 1; \ + if r0 s>= 1 goto l0_%=; \ +l0_%=: r0 -= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 2") +__success __failure_unpriv +__msg_unpriv("R1 has pointer with unsupported alu operation") +__retval(1) +__naked void deducing_bounds_from_const_2(void) +{ + asm volatile (" \ + r0 = 1; \ + if r0 s>= 1 goto l0_%=; \ + exit; \ +l0_%=: if r0 s<= 1 goto l1_%=; \ + exit; \ +l1_%=: r1 -= r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 3") +__failure __msg("R0 tried to subtract pointer from scalar") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__naked void deducing_bounds_from_const_3(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 s<= 0 goto l0_%=; \ +l0_%=: r0 -= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 4") +__success __failure_unpriv +__msg_unpriv("R6 has pointer with unsupported alu operation") +__retval(0) +__naked void deducing_bounds_from_const_4(void) +{ + asm volatile (" \ + r6 = r1; \ + r0 = 0; \ + if r0 s<= 0 goto l0_%=; \ + exit; \ +l0_%=: if r0 s>= 0 goto l1_%=; \ + exit; \ +l1_%=: r6 -= r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 5") +__failure __msg("R0 tried to subtract pointer from scalar") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__naked void deducing_bounds_from_const_5(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 s>= 1 goto l0_%=; \ + r0 -= r1; \ +l0_%=: exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 6") +__failure __msg("R0 tried to subtract pointer from scalar") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__naked void deducing_bounds_from_const_6(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 s>= 0 goto l0_%=; \ + exit; \ +l0_%=: r0 -= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 7") +__failure __msg("dereference of modified ctx ptr") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void deducing_bounds_from_const_7(void) +{ + asm volatile (" \ + r0 = %[__imm_0]; \ + if r0 s>= 0 goto l0_%=; \ +l0_%=: r1 -= r0; \ + r0 = *(u32*)(r1 + %[__sk_buff_mark]); \ + exit; \ +" : + : __imm_const(__imm_0, ~0), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 8") +__failure __msg("negative offset ctx ptr R1 off=-1 disallowed") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void deducing_bounds_from_const_8(void) +{ + asm volatile (" \ + r0 = %[__imm_0]; \ + if r0 s>= 0 goto l0_%=; \ + r1 += r0; \ +l0_%=: r0 = *(u32*)(r1 + %[__sk_buff_mark]); \ + exit; \ +" : + : __imm_const(__imm_0, ~0), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 9") +__failure __msg("R0 tried to subtract pointer from scalar") +__msg_unpriv("R1 has pointer with unsupported alu operation") +__naked void deducing_bounds_from_const_9(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 s>= 0 goto l0_%=; \ +l0_%=: r0 -= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from const, 10") +__failure +__msg("math between ctx pointer and register with unbounded min value is not allowed") +__failure_unpriv +__naked void deducing_bounds_from_const_10(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 s<= 0 goto l0_%=; \ +l0_%=: /* Marks reg as unknown. */ \ + r0 = -r0; \ + r0 -= r1; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c deleted file mode 100644 index 3931c481e30c..000000000000 --- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c +++ /dev/null @@ -1,136 +0,0 @@ -{ - "check deducing bounds from const, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "R0 tried to subtract pointer from scalar", - .result = REJECT, -}, -{ - "check deducing bounds from const, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = 1, -}, -{ - "check deducing bounds from const, 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "R0 tried to subtract pointer from scalar", - .result = REJECT, -}, -{ - "check deducing bounds from const, 4", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R6 has pointer with unsupported alu operation", - .result_unpriv = REJECT, - .result = ACCEPT, -}, -{ - "check deducing bounds from const, 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "R0 tried to subtract pointer from scalar", - .result = REJECT, -}, -{ - "check deducing bounds from const, 6", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "R0 tried to subtract pointer from scalar", - .result = REJECT, -}, -{ - "check deducing bounds from const, 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, ~0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "dereference of modified ctx ptr", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "check deducing bounds from const, 8", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, ~0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "negative offset ctx ptr R1 off=-1 disallowed", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "check deducing bounds from const, 9", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .errstr = "R0 tried to subtract pointer from scalar", - .result = REJECT, -}, -{ - "check deducing bounds from const, 10", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), - /* Marks reg as unknown. */ - BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "math between ctx pointer and register with unbounded min value is not allowed", - .result = REJECT, -}, From b14a702afd0d2da746294ed6070668b839a77793 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:51 +0200 Subject: [PATCH 115/260] selftests/bpf: verifier/bounds_mix_sign_unsign.c converted to inline assembly Test verifier/bounds_mix_sign_unsign.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-11-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../progs/verifier_bounds_mix_sign_unsign.c | 554 ++++++++++++++++++ .../bpf/verifier/bounds_mix_sign_unsign.c | 411 ------------- 3 files changed, 556 insertions(+), 411 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c delete mode 100644 tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index a8cfef92ed64..bbc39412fcd1 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -7,6 +7,7 @@ #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bounds_deduction.skel.h" +#include "verifier_bounds_mix_sign_unsign.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -36,3 +37,4 @@ void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_array_access(void) { RUN(verifier_array_access); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } +void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c new file mode 100644 index 000000000000..91a66357896a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("socket") +__description("bounds checks mixing signed and unsigned, positive bounds") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_positive_bounds(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = 2; \ + if r2 >= r1 goto l0_%=; \ + if r1 s> 4 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void checks_mixing_signed_and_unsigned(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + if r1 > r2 goto l0_%=; \ + if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 2") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_2(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + if r1 > r2 goto l0_%=; \ + r8 = 0; \ + r8 += r1; \ + if r8 s> 1 goto l0_%=; \ + r0 += r8; \ + r0 = 0; \ + *(u8*)(r8 + 0) = r0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 3") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_3(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + if r1 > r2 goto l0_%=; \ + r8 = r1; \ + if r8 s> 1 goto l0_%=; \ + r0 += r8; \ + r0 = 0; \ + *(u8*)(r8 + 0) = r0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 4") +__success __success_unpriv __retval(0) +__naked void signed_and_unsigned_variant_4(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = 1; \ + r1 &= r2; \ + if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 5") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_5(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + if r1 > r2 goto l0_%=; \ + if r1 s> 1 goto l0_%=; \ + r0 += 4; \ + r0 -= r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ + r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 6") +__failure __msg("R4 min value is negative, either use unsigned") +__failure_unpriv +__naked void signed_and_unsigned_variant_6(void) +{ + asm volatile (" \ + r9 = r1; \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = r9; \ + r2 = 0; \ + r3 = r10; \ + r3 += -512; \ + r4 = *(u64*)(r10 - 16); \ + r6 = -1; \ + if r4 > r6 goto l0_%=; \ + if r4 s> 1 goto l0_%=; \ + r4 += 1; \ + r5 = 0; \ + r6 = 0; \ + *(u16*)(r10 - 512) = r6; \ + call %[bpf_skb_load_bytes]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 7") +__success __success_unpriv __retval(0) +__naked void signed_and_unsigned_variant_7(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = %[__imm_0]; \ + if r1 > r2 goto l0_%=; \ + if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(__imm_0, 1024 * 1024 * 1024) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 8") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_8(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + if r2 > r1 goto l1_%=; \ + r0 = 0; \ + exit; \ +l1_%=: if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 9") +__success __success_unpriv __retval(0) +__naked void signed_and_unsigned_variant_9(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -9223372036854775808ULL ll; \ + if r2 > r1 goto l1_%=; \ + r0 = 0; \ + exit; \ +l1_%=: if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 10") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_10(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = 0; \ + if r2 > r1 goto l1_%=; \ + r0 = 0; \ + exit; \ +l1_%=: if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 11") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_11(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + if r2 >= r1 goto l1_%=; \ + /* Dead branch. */ \ + r0 = 0; \ + exit; \ +l1_%=: if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 12") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_12(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -6; \ + if r2 >= r1 goto l1_%=; \ + r0 = 0; \ + exit; \ +l1_%=: if r1 s> 1 goto l0_%=; \ + r0 += r1; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 13") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_13(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = 2; \ + if r2 >= r1 goto l0_%=; \ + r7 = 1; \ + if r7 s> 0 goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r7 += r1; \ + if r7 s> 4 goto l2_%=; \ + r0 += r7; \ + r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l2_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 14") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_14(void) +{ + asm volatile (" \ + r9 = *(u32*)(r1 + %[__sk_buff_mark]); \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -1; \ + r8 = 2; \ + if r9 == 42 goto l1_%=; \ + if r8 s> r1 goto l2_%=; \ +l3_%=: if r1 s> 1 goto l2_%=; \ + r0 += r1; \ +l0_%=: r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ +l2_%=: r0 = 0; \ + exit; \ +l1_%=: if r1 > r2 goto l2_%=; \ + goto l3_%=; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)) + : __clobber_all); +} + +SEC("socket") +__description("bounds checks mixing signed and unsigned, variant 15") +__failure __msg("unbounded min value") +__failure_unpriv +__naked void signed_and_unsigned_variant_15(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + *(u64*)(r10 - 16) = r0; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u64*)(r10 - 16); \ + r2 = -6; \ + if r2 >= r1 goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 += r1; \ + if r0 > 1 goto l2_%=; \ + r0 = 0; \ + exit; \ +l2_%=: r1 = 0; \ + *(u8*)(r0 + 0) = r1; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c deleted file mode 100644 index bf82b923c5fe..000000000000 --- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c +++ /dev/null @@ -1,411 +0,0 @@ -{ - "bounds checks mixing signed and unsigned, positive bounds", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 2), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 2", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 3", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 4", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result = ACCEPT, -}, -{ - "bounds checks mixing signed and unsigned, variant 5", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 6", - .insns = { - BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_6, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R4 min value is negative, either use unsigned", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 7", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result = ACCEPT, -}, -{ - "bounds checks mixing signed and unsigned, variant 8", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 9", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result = ACCEPT, -}, -{ - "bounds checks mixing signed and unsigned, variant 10", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 11", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - /* Dead branch. */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 12", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -6), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 13", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 2), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_7, 1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 14", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_8, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6), - BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), - BPF_JMP_IMM(BPF_JA, 0, 0, -7), - }, - .fixup_map_hash_8b = { 6 }, - .errstr = "unbounded min value", - .result = REJECT, -}, -{ - "bounds checks mixing signed and unsigned, variant 15", - .insns = { - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -6), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "unbounded min value", - .result = REJECT, -}, From 2f2047c22cda4fbbe6bb889cc6c5450cd90688f8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:52 +0200 Subject: [PATCH 116/260] selftests/bpf: verifier/cfg.c converted to inline assembly Test verifier/cfg.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-12-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_cfg.c | 100 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/cfg.c | 73 ------------- 3 files changed, 102 insertions(+), 73 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_cfg.c delete mode 100644 tools/testing/selftests/bpf/verifier/cfg.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index bbc39412fcd1..46182abecabb 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -8,6 +8,7 @@ #include "verifier_basic_stack.skel.h" #include "verifier_bounds_deduction.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" +#include "verifier_cfg.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -38,3 +39,4 @@ void test_verifier_array_access(void) { RUN(verifier_array_access); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } +void test_verifier_cfg(void) { RUN(verifier_cfg); } diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c new file mode 100644 index 000000000000..df7697b94007 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/cfg.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("unreachable") +__failure __msg("unreachable") +__failure_unpriv +__naked void unreachable(void) +{ + asm volatile (" \ + exit; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unreachable2") +__failure __msg("unreachable") +__failure_unpriv +__naked void unreachable2(void) +{ + asm volatile (" \ + goto l0_%=; \ + goto l0_%=; \ +l0_%=: exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("out of range jump") +__failure __msg("jump out of range") +__failure_unpriv +__naked void out_of_range_jump(void) +{ + asm volatile (" \ + goto l0_%=; \ + exit; \ +l0_%=: \ +" ::: __clobber_all); +} + +SEC("socket") +__description("out of range jump2") +__failure __msg("jump out of range") +__failure_unpriv +__naked void out_of_range_jump2(void) +{ + asm volatile (" \ + goto -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("loop (back-edge)") +__failure __msg("unreachable insn 1") +__msg_unpriv("back-edge") +__naked void loop_back_edge(void) +{ + asm volatile (" \ +l0_%=: goto l0_%=; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("loop2 (back-edge)") +__failure __msg("unreachable insn 4") +__msg_unpriv("back-edge") +__naked void loop2_back_edge(void) +{ + asm volatile (" \ +l0_%=: r1 = r0; \ + r2 = r0; \ + r3 = r0; \ + goto l0_%=; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("conditional loop") +__failure __msg("infinite loop detected") +__msg_unpriv("back-edge") +__naked void conditional_loop(void) +{ + asm volatile (" \ + r0 = r1; \ +l0_%=: r2 = r0; \ + r3 = r0; \ + if r1 == 0 goto l0_%=; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c deleted file mode 100644 index 4eb76ed739ce..000000000000 --- a/tools/testing/selftests/bpf/verifier/cfg.c +++ /dev/null @@ -1,73 +0,0 @@ -{ - "unreachable", - .insns = { - BPF_EXIT_INSN(), - BPF_EXIT_INSN(), - }, - .errstr = "unreachable", - .result = REJECT, -}, -{ - "unreachable2", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unreachable", - .result = REJECT, -}, -{ - "out of range jump", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "jump out of range", - .result = REJECT, -}, -{ - "out of range jump2", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, -2), - BPF_EXIT_INSN(), - }, - .errstr = "jump out of range", - .result = REJECT, -}, -{ - "loop (back-edge)", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, -1), - BPF_EXIT_INSN(), - }, - .errstr = "unreachable insn 1", - .errstr_unpriv = "back-edge", - .result = REJECT, -}, -{ - "loop2 (back-edge)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_JMP_IMM(BPF_JA, 0, 0, -4), - BPF_EXIT_INSN(), - }, - .errstr = "unreachable insn 4", - .errstr_unpriv = "back-edge", - .result = REJECT, -}, -{ - "conditional loop", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), - BPF_EXIT_INSN(), - }, - .errstr = "infinite loop detected", - .errstr_unpriv = "back-edge", - .result = REJECT, -}, From 047687a7f494d45198f112b51e72228aa054732c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:53 +0200 Subject: [PATCH 117/260] selftests/bpf: verifier/cgroup_inv_retcode.c converted to inline assembly Test verifier/cgroup_inv_retcode.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-13-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_cgroup_inv_retcode.c | 89 +++++++++++++++++++ .../bpf/verifier/cgroup_inv_retcode.c | 72 --------------- 3 files changed, 91 insertions(+), 72 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c delete mode 100644 tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 46182abecabb..b138c9894abb 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -9,6 +9,7 @@ #include "verifier_bounds_deduction.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_cfg.skel.h" +#include "verifier_cgroup_inv_retcode.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -40,3 +41,4 @@ void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_cfg(void) { RUN(verifier_cfg); } +void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c new file mode 100644 index 000000000000..d6c4a7f3f790 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test1") +__failure __msg("R0 has value (0x0; 0xffffffff)") +__naked void with_invalid_return_code_test1(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test2") +__success +__naked void with_invalid_return_code_test2(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + 0); \ + r0 &= 1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test3") +__failure __msg("R0 has value (0x0; 0x3)") +__naked void with_invalid_return_code_test3(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + 0); \ + r0 &= 3; \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test4") +__success +__naked void with_invalid_return_code_test4(void) +{ + asm volatile (" \ + r0 = 1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test5") +__failure __msg("R0 has value (0x2; 0x0)") +__naked void with_invalid_return_code_test5(void) +{ + asm volatile (" \ + r0 = 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test6") +__failure __msg("R0 is not a known value (ctx)") +__naked void with_invalid_return_code_test6(void) +{ + asm volatile (" \ + r0 = r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/sock") +__description("bpf_exit with invalid return code. test7") +__failure __msg("R0 has unknown scalar value") +__naked void with_invalid_return_code_test7(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + 0); \ + r2 = *(u32*)(r1 + 4); \ + r0 *= r2; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c b/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c deleted file mode 100644 index 6d65fe3e7321..000000000000 --- a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c +++ /dev/null @@ -1,72 +0,0 @@ -{ - "bpf_exit with invalid return code. test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x0; 0xffffffff)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, -{ - "bpf_exit with invalid return code. test2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, -{ - "bpf_exit with invalid return code. test3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x0; 0x3)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, -{ - "bpf_exit with invalid return code. test4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, -{ - "bpf_exit with invalid return code. test5", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x2; 0x0)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, -{ - "bpf_exit with invalid return code. test6", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R0 is not a known value (ctx)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, -{ - "bpf_exit with invalid return code. test7", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), - BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has unknown scalar value", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, -}, From b1b6372535c0cc0cce4870b07a0938309f3a5d37 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:54 +0200 Subject: [PATCH 118/260] selftests/bpf: verifier/cgroup_skb.c converted to inline assembly Test verifier/cgroup_skb.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-14-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_cgroup_skb.c | 227 ++++++++++++++++++ .../selftests/bpf/verifier/cgroup_skb.c | 197 --------------- 3 files changed, 229 insertions(+), 197 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c delete mode 100644 tools/testing/selftests/bpf/verifier/cgroup_skb.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index b138c9894abb..53e41af90821 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -10,6 +10,7 @@ #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" +#include "verifier_cgroup_skb.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -42,3 +43,4 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } +void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c new file mode 100644 index 000000000000..5ee3d349d6d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/cgroup_skb.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("cgroup/skb") +__description("direct packet read test#1 for CGROUP_SKB") +__success __failure_unpriv +__msg_unpriv("invalid bpf_context access off=76 size=4") +__retval(0) +__naked void test_1_for_cgroup_skb(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r4 = *(u32*)(r1 + %[__sk_buff_len]); \ + r5 = *(u32*)(r1 + %[__sk_buff_pkt_type]); \ + r6 = *(u32*)(r1 + %[__sk_buff_mark]); \ + *(u32*)(r1 + %[__sk_buff_mark]) = r6; \ + r7 = *(u32*)(r1 + %[__sk_buff_queue_mapping]); \ + r8 = *(u32*)(r1 + %[__sk_buff_protocol]); \ + r9 = *(u32*)(r1 + %[__sk_buff_vlan_present]); \ + r0 = r2; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)), + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)), + __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)), + __imm_const(__sk_buff_protocol, offsetof(struct __sk_buff, protocol)), + __imm_const(__sk_buff_queue_mapping, offsetof(struct __sk_buff, queue_mapping)), + __imm_const(__sk_buff_vlan_present, offsetof(struct __sk_buff, vlan_present)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("direct packet read test#2 for CGROUP_SKB") +__success __success_unpriv __retval(0) +__naked void test_2_for_cgroup_skb(void) +{ + asm volatile (" \ + r4 = *(u32*)(r1 + %[__sk_buff_vlan_tci]); \ + r5 = *(u32*)(r1 + %[__sk_buff_vlan_proto]); \ + r6 = *(u32*)(r1 + %[__sk_buff_priority]); \ + *(u32*)(r1 + %[__sk_buff_priority]) = r6; \ + r7 = *(u32*)(r1 + %[__sk_buff_ingress_ifindex]);\ + r8 = *(u32*)(r1 + %[__sk_buff_tc_index]); \ + r9 = *(u32*)(r1 + %[__sk_buff_hash]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_hash, offsetof(struct __sk_buff, hash)), + __imm_const(__sk_buff_ingress_ifindex, offsetof(struct __sk_buff, ingress_ifindex)), + __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)), + __imm_const(__sk_buff_tc_index, offsetof(struct __sk_buff, tc_index)), + __imm_const(__sk_buff_vlan_proto, offsetof(struct __sk_buff, vlan_proto)), + __imm_const(__sk_buff_vlan_tci, offsetof(struct __sk_buff, vlan_tci)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("direct packet read test#3 for CGROUP_SKB") +__success __success_unpriv __retval(0) +__naked void test_3_for_cgroup_skb(void) +{ + asm volatile (" \ + r4 = *(u32*)(r1 + %[__sk_buff_cb_0]); \ + r5 = *(u32*)(r1 + %[__sk_buff_cb_1]); \ + r6 = *(u32*)(r1 + %[__sk_buff_cb_2]); \ + r7 = *(u32*)(r1 + %[__sk_buff_cb_3]); \ + r8 = *(u32*)(r1 + %[__sk_buff_cb_4]); \ + r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \ + *(u32*)(r1 + %[__sk_buff_cb_0]) = r4; \ + *(u32*)(r1 + %[__sk_buff_cb_1]) = r5; \ + *(u32*)(r1 + %[__sk_buff_cb_2]) = r6; \ + *(u32*)(r1 + %[__sk_buff_cb_3]) = r7; \ + *(u32*)(r1 + %[__sk_buff_cb_4]) = r8; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])), + __imm_const(__sk_buff_cb_1, offsetof(struct __sk_buff, cb[1])), + __imm_const(__sk_buff_cb_2, offsetof(struct __sk_buff, cb[2])), + __imm_const(__sk_buff_cb_3, offsetof(struct __sk_buff, cb[3])), + __imm_const(__sk_buff_cb_4, offsetof(struct __sk_buff, cb[4])), + __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("direct packet read test#4 for CGROUP_SKB") +__success __success_unpriv __retval(0) +__naked void test_4_for_cgroup_skb(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_family]); \ + r3 = *(u32*)(r1 + %[__sk_buff_remote_ip4]); \ + r4 = *(u32*)(r1 + %[__sk_buff_local_ip4]); \ + r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_0]); \ + r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_1]); \ + r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_2]); \ + r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_3]); \ + r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_0]); \ + r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_1]); \ + r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_2]); \ + r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_3]); \ + r7 = *(u32*)(r1 + %[__sk_buff_remote_port]); \ + r8 = *(u32*)(r1 + %[__sk_buff_local_port]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_family, offsetof(struct __sk_buff, family)), + __imm_const(__sk_buff_local_ip4, offsetof(struct __sk_buff, local_ip4)), + __imm_const(__sk_buff_local_ip6_0, offsetof(struct __sk_buff, local_ip6[0])), + __imm_const(__sk_buff_local_ip6_1, offsetof(struct __sk_buff, local_ip6[1])), + __imm_const(__sk_buff_local_ip6_2, offsetof(struct __sk_buff, local_ip6[2])), + __imm_const(__sk_buff_local_ip6_3, offsetof(struct __sk_buff, local_ip6[3])), + __imm_const(__sk_buff_local_port, offsetof(struct __sk_buff, local_port)), + __imm_const(__sk_buff_remote_ip4, offsetof(struct __sk_buff, remote_ip4)), + __imm_const(__sk_buff_remote_ip6_0, offsetof(struct __sk_buff, remote_ip6[0])), + __imm_const(__sk_buff_remote_ip6_1, offsetof(struct __sk_buff, remote_ip6[1])), + __imm_const(__sk_buff_remote_ip6_2, offsetof(struct __sk_buff, remote_ip6[2])), + __imm_const(__sk_buff_remote_ip6_3, offsetof(struct __sk_buff, remote_ip6[3])), + __imm_const(__sk_buff_remote_port, offsetof(struct __sk_buff, remote_port)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid access of tc_classid for CGROUP_SKB") +__failure __msg("invalid bpf_context access") +__failure_unpriv +__naked void tc_classid_for_cgroup_skb(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid access of data_meta for CGROUP_SKB") +__failure __msg("invalid bpf_context access") +__failure_unpriv +__naked void data_meta_for_cgroup_skb(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[__sk_buff_data_meta]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data_meta, offsetof(struct __sk_buff, data_meta)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid access of flow_keys for CGROUP_SKB") +__failure __msg("invalid bpf_context access") +__failure_unpriv +__naked void flow_keys_for_cgroup_skb(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[__sk_buff_flow_keys]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_flow_keys, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid write access to napi_id for CGROUP_SKB") +__failure __msg("invalid bpf_context access") +__failure_unpriv +__naked void napi_id_for_cgroup_skb(void) +{ + asm volatile (" \ + r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \ + *(u32*)(r1 + %[__sk_buff_napi_id]) = r9; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("write tstamp from CGROUP_SKB") +__success __failure_unpriv +__msg_unpriv("invalid bpf_context access off=152 size=8") +__retval(0) +__naked void write_tstamp_from_cgroup_skb(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r1 + %[__sk_buff_tstamp]) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("read tstamp from CGROUP_SKB") +__success __success_unpriv __retval(0) +__naked void read_tstamp_from_cgroup_skb(void) +{ + asm volatile (" \ + r0 = *(u64*)(r1 + %[__sk_buff_tstamp]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/cgroup_skb.c b/tools/testing/selftests/bpf/verifier/cgroup_skb.c deleted file mode 100644 index 52e4c03b076b..000000000000 --- a/tools/testing/selftests/bpf/verifier/cgroup_skb.c +++ /dev/null @@ -1,197 +0,0 @@ -{ - "direct packet read test#1 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, pkt_type)), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, queue_mapping)), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, protocol)), - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, vlan_present)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid bpf_context access off=76 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "direct packet read test#2 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, vlan_tci)), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, vlan_proto)), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, priority)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, priority)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, ingress_ifindex)), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, tc_index)), - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, hash)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "direct packet read test#3 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, cb[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, cb[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, napi_id)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, - offsetof(struct __sk_buff, cb[1])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, cb[2])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, - offsetof(struct __sk_buff, cb[3])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, - offsetof(struct __sk_buff, cb[4])), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "direct packet read test#4 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, family)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip4)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, local_ip4)), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, remote_port)), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, local_port)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid access of tc_classid for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid access of data_meta for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data_meta)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid access of flow_keys for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, flow_keys)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid write access to napi_id for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, napi_id)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, - offsetof(struct __sk_buff, napi_id)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "write tstamp from CGROUP_SKB", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid bpf_context access off=152 size=8", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "read tstamp from CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, From 8f16f3c07e460f81ff6f4d673c7edd413db19ffe Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:55 +0200 Subject: [PATCH 119/260] selftests/bpf: verifier/cgroup_storage.c converted to inline assembly Test verifier/cgroup_storage.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-15-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_cgroup_storage.c | 308 ++++++++++++++++++ .../selftests/bpf/verifier/cgroup_storage.c | 220 ------------- 3 files changed, 310 insertions(+), 220 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c delete mode 100644 tools/testing/selftests/bpf/verifier/cgroup_storage.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 53e41af90821..3b47620a1f42 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -11,6 +11,7 @@ #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" +#include "verifier_cgroup_storage.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -44,3 +45,4 @@ void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_u void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } +void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); } diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c new file mode 100644 index 000000000000..9a13f5c11ac7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/cgroup_storage.c */ + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __uint(max_entries, 0); + __type(key, struct bpf_cgroup_storage_key); + __type(value, char[TEST_DATA_LEN]); +} cgroup_storage SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __uint(max_entries, 0); + __type(key, struct bpf_cgroup_storage_key); + __type(value, char[64]); +} percpu_cgroup_storage SEC(".maps"); + +SEC("cgroup/skb") +__description("valid cgroup storage access") +__success __success_unpriv __retval(0) +__naked void valid_cgroup_storage_access(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid cgroup storage access 1") +__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage") +__failure_unpriv +__naked void invalid_cgroup_storage_access_1(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid cgroup storage access 2") +__failure __msg("fd 1 is not pointing to valid bpf_map") +__failure_unpriv +__naked void invalid_cgroup_storage_access_2(void) +{ + asm volatile (" \ + r2 = 0; \ + .8byte %[ld_map_fd]; \ + .8byte 0; \ + call %[bpf_get_local_storage]; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid cgroup storage access 3") +__failure __msg("invalid access to map value, value_size=64 off=256 size=4") +__failure_unpriv +__naked void invalid_cgroup_storage_access_3(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 256); \ + r1 += 1; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid cgroup storage access 4") +__failure __msg("invalid access to map value, value_size=64 off=-2 size=4") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void invalid_cgroup_storage_access_4(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 - 2); \ + r0 = r1; \ + r1 += 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid cgroup storage access 5") +__failure __msg("get_local_storage() doesn't support non-zero flags") +__failure_unpriv +__naked void invalid_cgroup_storage_access_5(void) +{ + asm volatile (" \ + r2 = 7; \ + r1 = %[cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid cgroup storage access 6") +__failure __msg("get_local_storage() doesn't support non-zero flags") +__msg_unpriv("R2 leaks addr into helper function") +__naked void invalid_cgroup_storage_access_6(void) +{ + asm volatile (" \ + r2 = r1; \ + r1 = %[cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("valid per-cpu cgroup storage access") +__success __success_unpriv __retval(0) +__naked void per_cpu_cgroup_storage_access(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[percpu_cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(percpu_cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid per-cpu cgroup storage access 1") +__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage") +__failure_unpriv +__naked void cpu_cgroup_storage_access_1(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid per-cpu cgroup storage access 2") +__failure __msg("fd 1 is not pointing to valid bpf_map") +__failure_unpriv +__naked void cpu_cgroup_storage_access_2(void) +{ + asm volatile (" \ + r2 = 0; \ + .8byte %[ld_map_fd]; \ + .8byte 0; \ + call %[bpf_get_local_storage]; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1)) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid per-cpu cgroup storage access 3") +__failure __msg("invalid access to map value, value_size=64 off=256 size=4") +__failure_unpriv +__naked void cpu_cgroup_storage_access_3(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[percpu_cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 256); \ + r1 += 1; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(percpu_cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid per-cpu cgroup storage access 4") +__failure __msg("invalid access to map value, value_size=64 off=-2 size=4") +__failure_unpriv +__flag(BPF_F_ANY_ALIGNMENT) +__naked void cpu_cgroup_storage_access_4(void) +{ + asm volatile (" \ + r2 = 0; \ + r1 = %[cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 - 2); \ + r0 = r1; \ + r1 += 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid per-cpu cgroup storage access 5") +__failure __msg("get_local_storage() doesn't support non-zero flags") +__failure_unpriv +__naked void cpu_cgroup_storage_access_5(void) +{ + asm volatile (" \ + r2 = 7; \ + r1 = %[percpu_cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(percpu_cgroup_storage) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("invalid per-cpu cgroup storage access 6") +__failure __msg("get_local_storage() doesn't support non-zero flags") +__msg_unpriv("R2 leaks addr into helper function") +__naked void cpu_cgroup_storage_access_6(void) +{ + asm volatile (" \ + r2 = r1; \ + r1 = %[percpu_cgroup_storage] ll; \ + call %[bpf_get_local_storage]; \ + r1 = *(u32*)(r0 + 0); \ + r0 = r1; \ + r0 &= 1; \ + exit; \ +" : + : __imm(bpf_get_local_storage), + __imm_addr(percpu_cgroup_storage) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/cgroup_storage.c b/tools/testing/selftests/bpf/verifier/cgroup_storage.c deleted file mode 100644 index 97057c0a1b8a..000000000000 --- a/tools/testing/selftests/bpf/verifier/cgroup_storage.c +++ /dev/null @@ -1,220 +0,0 @@ -{ - "valid cgroup storage access", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid cgroup storage access 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid cgroup storage access 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "fd 1 is not pointing to valid bpf_map", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid cgroup storage access 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=256 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid cgroup storage access 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=-2 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid cgroup storage access 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 7), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid cgroup storage access 6", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .errstr_unpriv = "R2 leaks addr into helper function", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "valid per-cpu cgroup storage access", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid per-cpu cgroup storage access 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid per-cpu cgroup storage access 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "fd 1 is not pointing to valid bpf_map", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid per-cpu cgroup storage access 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=256 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid per-cpu cgroup storage access 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=-2 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid per-cpu cgroup storage access 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 7), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "invalid per-cpu cgroup storage access 6", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .errstr_unpriv = "R2 leaks addr into helper function", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, From a2777eaad5d9b7b06917d5bd3e786a1733e9dc3c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:56 +0200 Subject: [PATCH 120/260] selftests/bpf: verifier/const_or.c converted to inline assembly Test verifier/const_or.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-16-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_const_or.c | 82 +++++++++++++++++++ .../testing/selftests/bpf/verifier/const_or.c | 60 -------------- 3 files changed, 84 insertions(+), 60 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_const_or.c delete mode 100644 tools/testing/selftests/bpf/verifier/const_or.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 3b47620a1f42..36fdede7dcab 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -12,6 +12,7 @@ #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" #include "verifier_cgroup_storage.skel.h" +#include "verifier_const_or.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -46,3 +47,4 @@ void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); } +void test_verifier_const_or(void) { RUN(verifier_const_or); } diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c new file mode 100644 index 000000000000..ba8922b2eebd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/const_or.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("tracepoint") +__description("constant register |= constant should keep constant type") +__success +__naked void constant_should_keep_constant_type(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -48; \ + r2 = 34; \ + r2 |= 13; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("constant register |= constant should not bypass stack boundary checks") +__failure __msg("invalid indirect access to stack R1 off=-48 size=58") +__naked void not_bypass_stack_boundary_checks_1(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -48; \ + r2 = 34; \ + r2 |= 24; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("constant register |= constant register should keep constant type") +__success +__naked void register_should_keep_constant_type(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -48; \ + r2 = 34; \ + r4 = 13; \ + r2 |= r4; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("constant register |= constant register should not bypass stack boundary checks") +__failure __msg("invalid indirect access to stack R1 off=-48 size=58") +__naked void not_bypass_stack_boundary_checks_2(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -48; \ + r2 = 34; \ + r4 = 24; \ + r2 |= r4; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c deleted file mode 100644 index 0719b0ddec04..000000000000 --- a/tools/testing/selftests/bpf/verifier/const_or.c +++ /dev/null @@ -1,60 +0,0 @@ -{ - "constant register |= constant should keep constant type", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "constant register |= constant should not bypass stack boundary checks", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect access to stack R1 off=-48 size=58", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "constant register |= constant register should keep constant type", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_MOV64_IMM(BPF_REG_4, 13), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "constant register |= constant register should not bypass stack boundary checks", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_MOV64_IMM(BPF_REG_4, 24), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect access to stack R1 off=-48 size=58", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, From a58475a98903c756a7f731cffdf20242ed17b9b0 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:57 +0200 Subject: [PATCH 121/260] selftests/bpf: verifier/ctx_sk_msg.c converted to inline assembly Test verifier/ctx_sk_msg.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-17-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_ctx_sk_msg.c | 228 ++++++++++++++++++ .../selftests/bpf/verifier/ctx_sk_msg.c | 181 -------------- 3 files changed, 230 insertions(+), 181 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c delete mode 100644 tools/testing/selftests/bpf/verifier/ctx_sk_msg.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 36fdede7dcab..29351c774ee2 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -13,6 +13,7 @@ #include "verifier_cgroup_skb.skel.h" #include "verifier_cgroup_storage.skel.h" #include "verifier_const_or.skel.h" +#include "verifier_ctx_sk_msg.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -48,3 +49,4 @@ void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode) void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); } void test_verifier_const_or(void) { RUN(verifier_const_or); } +void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); } diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c new file mode 100644 index 000000000000..65edc89799f9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/ctx_sk_msg.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("sk_msg") +__description("valid access family in SK_MSG") +__success +__naked void access_family_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_family]); \ + exit; \ +" : + : __imm_const(sk_msg_md_family, offsetof(struct sk_msg_md, family)) + : __clobber_all); +} + +SEC("sk_msg") +__description("valid access remote_ip4 in SK_MSG") +__success +__naked void remote_ip4_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip4]); \ + exit; \ +" : + : __imm_const(sk_msg_md_remote_ip4, offsetof(struct sk_msg_md, remote_ip4)) + : __clobber_all); +} + +SEC("sk_msg") +__description("valid access local_ip4 in SK_MSG") +__success +__naked void local_ip4_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_local_ip4]); \ + exit; \ +" : + : __imm_const(sk_msg_md_local_ip4, offsetof(struct sk_msg_md, local_ip4)) + : __clobber_all); +} + +SEC("sk_msg") +__description("valid access remote_port in SK_MSG") +__success +__naked void remote_port_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_remote_port]); \ + exit; \ +" : + : __imm_const(sk_msg_md_remote_port, offsetof(struct sk_msg_md, remote_port)) + : __clobber_all); +} + +SEC("sk_msg") +__description("valid access local_port in SK_MSG") +__success +__naked void local_port_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_local_port]); \ + exit; \ +" : + : __imm_const(sk_msg_md_local_port, offsetof(struct sk_msg_md, local_port)) + : __clobber_all); +} + +SEC("sk_skb") +__description("valid access remote_ip6 in SK_MSG") +__success +__naked void remote_ip6_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_0]); \ + r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_1]); \ + r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_2]); \ + r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_3]); \ + exit; \ +" : + : __imm_const(sk_msg_md_remote_ip6_0, offsetof(struct sk_msg_md, remote_ip6[0])), + __imm_const(sk_msg_md_remote_ip6_1, offsetof(struct sk_msg_md, remote_ip6[1])), + __imm_const(sk_msg_md_remote_ip6_2, offsetof(struct sk_msg_md, remote_ip6[2])), + __imm_const(sk_msg_md_remote_ip6_3, offsetof(struct sk_msg_md, remote_ip6[3])) + : __clobber_all); +} + +SEC("sk_skb") +__description("valid access local_ip6 in SK_MSG") +__success +__naked void local_ip6_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_0]); \ + r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_1]); \ + r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_2]); \ + r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_3]); \ + exit; \ +" : + : __imm_const(sk_msg_md_local_ip6_0, offsetof(struct sk_msg_md, local_ip6[0])), + __imm_const(sk_msg_md_local_ip6_1, offsetof(struct sk_msg_md, local_ip6[1])), + __imm_const(sk_msg_md_local_ip6_2, offsetof(struct sk_msg_md, local_ip6[2])), + __imm_const(sk_msg_md_local_ip6_3, offsetof(struct sk_msg_md, local_ip6[3])) + : __clobber_all); +} + +SEC("sk_msg") +__description("valid access size in SK_MSG") +__success +__naked void access_size_in_sk_msg(void) +{ + asm volatile (" \ + r0 = *(u32*)(r1 + %[sk_msg_md_size]); \ + exit; \ +" : + : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size)) + : __clobber_all); +} + +SEC("sk_msg") +__description("invalid 64B read of size in SK_MSG") +__failure __msg("invalid bpf_context access") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void of_size_in_sk_msg(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + %[sk_msg_md_size]); \ + exit; \ +" : + : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size)) + : __clobber_all); +} + +SEC("sk_msg") +__description("invalid read past end of SK_MSG") +__failure __msg("invalid bpf_context access") +__naked void past_end_of_sk_msg(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__imm_0]); \ + exit; \ +" : + : __imm_const(__imm_0, offsetof(struct sk_msg_md, size) + 4) + : __clobber_all); +} + +SEC("sk_msg") +__description("invalid read offset in SK_MSG") +__failure __msg("invalid bpf_context access") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void read_offset_in_sk_msg(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__imm_0]); \ + exit; \ +" : + : __imm_const(__imm_0, offsetof(struct sk_msg_md, family) + 1) + : __clobber_all); +} + +SEC("sk_msg") +__description("direct packet read for SK_MSG") +__success +__naked void packet_read_for_sk_msg(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + %[sk_msg_md_data]); \ + r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \ + r0 = r2; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)), + __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end)) + : __clobber_all); +} + +SEC("sk_msg") +__description("direct packet write for SK_MSG") +__success +__naked void packet_write_for_sk_msg(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + %[sk_msg_md_data]); \ + r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \ + r0 = r2; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + *(u8*)(r2 + 0) = r2; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)), + __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end)) + : __clobber_all); +} + +SEC("sk_msg") +__description("overlapping checks for direct packet access SK_MSG") +__success +__naked void direct_packet_access_sk_msg(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + %[sk_msg_md_data]); \ + r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \ + r0 = r2; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r1 = r2; \ + r1 += 6; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u16*)(r2 + 6); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)), + __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c deleted file mode 100644 index c6c69220a569..000000000000 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c +++ /dev/null @@ -1,181 +0,0 @@ -{ - "valid access family in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, family)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "valid access remote_ip4 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip4)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "valid access local_ip4 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip4)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "valid access remote_port in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_port)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "valid access local_port in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_port)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "valid access remote_ip6 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[3])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, -}, -{ - "valid access local_ip6 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[3])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, -}, -{ - "valid access size in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, size)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "invalid 64B read of size in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, size)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "invalid read past end of SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, size) + 4), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "invalid read offset in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, family) + 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "direct packet read for SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, data)), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, - offsetof(struct sk_msg_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "direct packet write for SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, data)), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, - offsetof(struct sk_msg_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, -{ - "overlapping checks for direct packet access SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, data)), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, - offsetof(struct sk_msg_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, -}, From 84988478fb2c9068c6adf107eb630c48c00ff690 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:58 +0200 Subject: [PATCH 122/260] selftests/bpf: verifier/direct_stack_access_wraparound.c converted to inline assembly Test verifier/direct_stack_access_wraparound.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-18-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../verifier_direct_stack_access_wraparound.c | 56 +++++++++++++++++++ .../verifier/direct_stack_access_wraparound.c | 40 ------------- 3 files changed, 58 insertions(+), 40 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c delete mode 100644 tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 29351c774ee2..8c33b8792a0a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -14,6 +14,7 @@ #include "verifier_cgroup_storage.skel.h" #include "verifier_const_or.skel.h" #include "verifier_ctx_sk_msg.skel.h" +#include "verifier_direct_stack_access_wraparound.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -50,3 +51,4 @@ void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); } void test_verifier_const_or(void) { RUN(verifier_const_or); } void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); } +void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c new file mode 100644 index 000000000000..c538c6893552 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("direct stack access with 32-bit wraparound. test1") +__failure __msg("fp pointer and 2147483647") +__failure_unpriv +__naked void with_32_bit_wraparound_test1(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += 0x7fffffff; \ + r1 += 0x7fffffff; \ + w0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("direct stack access with 32-bit wraparound. test2") +__failure __msg("fp pointer and 1073741823") +__failure_unpriv +__naked void with_32_bit_wraparound_test2(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += 0x3fffffff; \ + r1 += 0x3fffffff; \ + w0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("direct stack access with 32-bit wraparound. test3") +__failure __msg("fp pointer offset 1073741822") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void with_32_bit_wraparound_test3(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += 0x1fffffff; \ + r1 += 0x1fffffff; \ + w0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c deleted file mode 100644 index 698e3779fdd2..000000000000 --- a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c +++ /dev/null @@ -1,40 +0,0 @@ -{ - "direct stack access with 32-bit wraparound. test1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "fp pointer and 2147483647", - .result = REJECT -}, -{ - "direct stack access with 32-bit wraparound. test2", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "fp pointer and 1073741823", - .result = REJECT -}, -{ - "direct stack access with 32-bit wraparound. test3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "fp pointer offset 1073741822", - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result = REJECT -}, From 01a0925531a4ec962c88ceccb464c1c1178e9d81 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:54:59 +0200 Subject: [PATCH 123/260] selftests/bpf: verifier/div0.c converted to inline assembly Test verifier/div0.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-19-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_div0.c | 213 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/div0.c | 184 --------------- 3 files changed, 215 insertions(+), 184 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_div0.c delete mode 100644 tools/testing/selftests/bpf/verifier/div0.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8c33b8792a0a..b172c41cdc61 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -15,6 +15,7 @@ #include "verifier_const_or.skel.h" #include "verifier_ctx_sk_msg.skel.h" #include "verifier_direct_stack_access_wraparound.skel.h" +#include "verifier_div0.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -52,3 +53,4 @@ void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); } void test_verifier_const_or(void) { RUN(verifier_const_or); } void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); } void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } +void test_verifier_div0(void) { RUN(verifier_div0); } diff --git a/tools/testing/selftests/bpf/progs/verifier_div0.c b/tools/testing/selftests/bpf/progs/verifier_div0.c new file mode 100644 index 000000000000..cca5ea18fc28 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_div0.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/div0.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("DIV32 by 0, zero check 1") +__success __success_unpriv __retval(42) +__naked void by_0_zero_check_1_1(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 0; \ + w2 = 1; \ + w2 /= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("DIV32 by 0, zero check 2") +__success __success_unpriv __retval(42) +__naked void by_0_zero_check_2_1(void) +{ + asm volatile (" \ + w0 = 42; \ + r1 = 0xffffffff00000000LL ll; \ + w2 = 1; \ + w2 /= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("DIV64 by 0, zero check") +__success __success_unpriv __retval(42) +__naked void div64_by_0_zero_check(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 0; \ + w2 = 1; \ + r2 /= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOD32 by 0, zero check 1") +__success __success_unpriv __retval(42) +__naked void by_0_zero_check_1_2(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 0; \ + w2 = 1; \ + w2 %%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOD32 by 0, zero check 2") +__success __success_unpriv __retval(42) +__naked void by_0_zero_check_2_2(void) +{ + asm volatile (" \ + w0 = 42; \ + r1 = 0xffffffff00000000LL ll; \ + w2 = 1; \ + w2 %%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOD64 by 0, zero check") +__success __success_unpriv __retval(42) +__naked void mod64_by_0_zero_check(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 0; \ + w2 = 1; \ + r2 %%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("DIV32 by 0, zero check ok, cls") +__success __retval(8) +__naked void _0_zero_check_ok_cls_1(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 2; \ + w2 = 16; \ + w2 /= w1; \ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("DIV32 by 0, zero check 1, cls") +__success __retval(0) +__naked void _0_zero_check_1_cls_1(void) +{ + asm volatile (" \ + w1 = 0; \ + w0 = 1; \ + w0 /= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("DIV32 by 0, zero check 2, cls") +__success __retval(0) +__naked void _0_zero_check_2_cls_1(void) +{ + asm volatile (" \ + r1 = 0xffffffff00000000LL ll; \ + w0 = 1; \ + w0 /= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("DIV64 by 0, zero check, cls") +__success __retval(0) +__naked void by_0_zero_check_cls(void) +{ + asm volatile (" \ + w1 = 0; \ + w0 = 1; \ + r0 /= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("MOD32 by 0, zero check ok, cls") +__success __retval(2) +__naked void _0_zero_check_ok_cls_2(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 3; \ + w2 = 5; \ + w2 %%= w1; \ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("MOD32 by 0, zero check 1, cls") +__success __retval(1) +__naked void _0_zero_check_1_cls_2(void) +{ + asm volatile (" \ + w1 = 0; \ + w0 = 1; \ + w0 %%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("MOD32 by 0, zero check 2, cls") +__success __retval(1) +__naked void _0_zero_check_2_cls_2(void) +{ + asm volatile (" \ + r1 = 0xffffffff00000000LL ll; \ + w0 = 1; \ + w0 %%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("MOD64 by 0, zero check 1, cls") +__success __retval(2) +__naked void _0_zero_check_1_cls_3(void) +{ + asm volatile (" \ + w1 = 0; \ + w0 = 2; \ + r0 %%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("MOD64 by 0, zero check 2, cls") +__success __retval(-1) +__naked void _0_zero_check_2_cls_3(void) +{ + asm volatile (" \ + w1 = 0; \ + w0 = -1; \ + r0 %%= r1; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/div0.c b/tools/testing/selftests/bpf/verifier/div0.c deleted file mode 100644 index 7685edfbcf71..000000000000 --- a/tools/testing/selftests/bpf/verifier/div0.c +++ /dev/null @@ -1,184 +0,0 @@ -{ - "DIV32 by 0, zero check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "DIV32 by 0, zero check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "DIV64 by 0, zero check", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "MOD32 by 0, zero check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "MOD32 by 0, zero check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "MOD64 by 0, zero check", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "DIV32 by 0, zero check ok, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 2), - BPF_MOV32_IMM(BPF_REG_2, 16), - BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 8, -}, -{ - "DIV32 by 0, zero check 1, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "DIV32 by 0, zero check 2, cls", - .insns = { - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "DIV64 by 0, zero check, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "MOD32 by 0, zero check ok, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 3), - BPF_MOV32_IMM(BPF_REG_2, 5), - BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 2, -}, -{ - "MOD32 by 0, zero check 1, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, -}, -{ - "MOD32 by 0, zero check 2, cls", - .insns = { - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, -}, -{ - "MOD64 by 0, zero check 1, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 2), - BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 2, -}, -{ - "MOD64 by 0, zero check 2, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, -1), - BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = -1, -}, From 9553de70a8412a07b16703449b4b4c4e5d37c388 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:00 +0200 Subject: [PATCH 124/260] selftests/bpf: verifier/div_overflow.c converted to inline assembly Test verifier/div_overflow.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-20-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_div_overflow.c | 144 ++++++++++++++++++ .../selftests/bpf/verifier/div_overflow.c | 110 ------------- 3 files changed, 146 insertions(+), 110 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_div_overflow.c delete mode 100644 tools/testing/selftests/bpf/verifier/div_overflow.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index b172c41cdc61..d92211b4c3af 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -16,6 +16,7 @@ #include "verifier_ctx_sk_msg.skel.h" #include "verifier_direct_stack_access_wraparound.skel.h" #include "verifier_div0.skel.h" +#include "verifier_div_overflow.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -54,3 +55,4 @@ void test_verifier_const_or(void) { RUN(verifier_const_or); } void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); } void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } void test_verifier_div0(void) { RUN(verifier_div0); } +void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c new file mode 100644 index 000000000000..458984da804c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/div_overflow.c */ + +#include +#include +#include +#include "bpf_misc.h" + +/* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ + +SEC("tc") +__description("DIV32 overflow, check 1") +__success __retval(0) +__naked void div32_overflow_check_1(void) +{ + asm volatile (" \ + w1 = -1; \ + w0 = %[int_min]; \ + w0 /= w1; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("tc") +__description("DIV32 overflow, check 2") +__success __retval(0) +__naked void div32_overflow_check_2(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 /= -1; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("tc") +__description("DIV64 overflow, check 1") +__success __retval(0) +__naked void div64_overflow_check_1(void) +{ + asm volatile (" \ + r1 = -1; \ + r2 = %[llong_min] ll; \ + r2 /= r1; \ + w0 = 0; \ + if r0 == r2 goto l0_%=; \ + w0 = 1; \ +l0_%=: exit; \ +" : + : __imm_const(llong_min, LLONG_MIN) + : __clobber_all); +} + +SEC("tc") +__description("DIV64 overflow, check 2") +__success __retval(0) +__naked void div64_overflow_check_2(void) +{ + asm volatile (" \ + r1 = %[llong_min] ll; \ + r1 /= -1; \ + w0 = 0; \ + if r0 == r1 goto l0_%=; \ + w0 = 1; \ +l0_%=: exit; \ +" : + : __imm_const(llong_min, LLONG_MIN) + : __clobber_all); +} + +SEC("tc") +__description("MOD32 overflow, check 1") +__success __retval(INT_MIN) +__naked void mod32_overflow_check_1(void) +{ + asm volatile (" \ + w1 = -1; \ + w0 = %[int_min]; \ + w0 %%= w1; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("tc") +__description("MOD32 overflow, check 2") +__success __retval(INT_MIN) +__naked void mod32_overflow_check_2(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 %%= -1; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("tc") +__description("MOD64 overflow, check 1") +__success __retval(1) +__naked void mod64_overflow_check_1(void) +{ + asm volatile (" \ + r1 = -1; \ + r2 = %[llong_min] ll; \ + r3 = r2; \ + r2 %%= r1; \ + w0 = 0; \ + if r3 != r2 goto l0_%=; \ + w0 = 1; \ +l0_%=: exit; \ +" : + : __imm_const(llong_min, LLONG_MIN) + : __clobber_all); +} + +SEC("tc") +__description("MOD64 overflow, check 2") +__success __retval(1) +__naked void mod64_overflow_check_2(void) +{ + asm volatile (" \ + r2 = %[llong_min] ll; \ + r3 = r2; \ + r2 %%= -1; \ + w0 = 0; \ + if r3 != r2 goto l0_%=; \ + w0 = 1; \ +l0_%=: exit; \ +" : + : __imm_const(llong_min, LLONG_MIN) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c deleted file mode 100644 index acab4f00819f..000000000000 --- a/tools/testing/selftests/bpf/verifier/div_overflow.c +++ /dev/null @@ -1,110 +0,0 @@ -/* Just make sure that JITs used udiv/umod as otherwise we get - * an exception from INT_MIN/-1 overflow similarly as with div - * by zero. - */ -{ - "DIV32 overflow, check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "DIV32 overflow, check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "DIV64 overflow, check 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), - BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "DIV64 overflow, check 2", - .insns = { - BPF_LD_IMM64(BPF_REG_1, LLONG_MIN), - BPF_ALU64_IMM(BPF_DIV, BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, -}, -{ - "MOD32 overflow, check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = INT_MIN, -}, -{ - "MOD32 overflow, check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = INT_MIN, -}, -{ - "MOD64 overflow, check 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, -}, -{ - "MOD64 overflow, check 2", - .insns = { - BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, -}, From b37d776b431ec056075feeeaddcdee4512c522f6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:01 +0200 Subject: [PATCH 125/260] selftests/bpf: verifier/helper_access_var_len.c converted to inline assembly Test verifier/helper_access_var_len.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-21-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../progs/verifier_helper_access_var_len.c | 825 ++++++++++++++++++ .../bpf/verifier/helper_access_var_len.c | 650 -------------- 3 files changed, 827 insertions(+), 650 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c delete mode 100644 tools/testing/selftests/bpf/verifier/helper_access_var_len.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d92211b4c3af..22d7e152c05e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -17,6 +17,7 @@ #include "verifier_direct_stack_access_wraparound.skel.h" #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" +#include "verifier_helper_access_var_len.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -56,3 +57,4 @@ void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); } void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } +void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c new file mode 100644 index 000000000000..50c6b22606f6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c @@ -0,0 +1,825 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/helper_access_var_len.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); +} map_ringbuf SEC(".maps"); + +SEC("tracepoint") +__description("helper access to variable memory: stack, bitwise AND + JMP, correct bounds") +__success +__naked void bitwise_and_jmp_correct_bounds(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -64; \ + r0 = 0; \ + *(u64*)(r10 - 64) = r0; \ + *(u64*)(r10 - 56) = r0; \ + *(u64*)(r10 - 48) = r0; \ + *(u64*)(r10 - 40) = r0; \ + *(u64*)(r10 - 32) = r0; \ + *(u64*)(r10 - 24) = r0; \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + r2 = 16; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + r2 &= 64; \ + r4 = 0; \ + if r4 >= r2 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("socket") +__description("helper access to variable memory: stack, bitwise AND, zero included") +/* in privileged mode reads from uninitialized stack locations are permitted */ +__success __failure_unpriv +__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64") +__retval(0) +__naked void stack_bitwise_and_zero_included(void) +{ + asm volatile (" \ + /* set max stack size */ \ + r6 = 0; \ + *(u64*)(r10 - 128) = r6; \ + /* set r3 to a random value */ \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + /* use bitwise AND to limit r3 range to [0, 64] */\ + r3 &= 64; \ + r1 = %[map_ringbuf] ll; \ + r2 = r10; \ + r2 += -64; \ + r4 = 0; \ + /* Call bpf_ringbuf_output(), it is one of a few helper functions with\ + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\ + * For unpriv this should signal an error, because memory at &fp[-64] is\ + * not initialized. \ + */ \ + call %[bpf_ringbuf_output]; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_ringbuf_output), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, bitwise AND + JMP, wrong max") +__failure __msg("invalid indirect access to stack R1 off=-64 size=65") +__naked void bitwise_and_jmp_wrong_max(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + 8); \ + r1 = r10; \ + r1 += -64; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + r2 &= 65; \ + r4 = 0; \ + if r4 >= r2 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, JMP, correct bounds") +__success +__naked void memory_stack_jmp_correct_bounds(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -64; \ + r0 = 0; \ + *(u64*)(r10 - 64) = r0; \ + *(u64*)(r10 - 56) = r0; \ + *(u64*)(r10 - 48) = r0; \ + *(u64*)(r10 - 40) = r0; \ + *(u64*)(r10 - 32) = r0; \ + *(u64*)(r10 - 24) = r0; \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + r2 = 16; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + if r2 > 64 goto l0_%=; \ + r4 = 0; \ + if r4 >= r2 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, JMP (signed), correct bounds") +__success +__naked void stack_jmp_signed_correct_bounds(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -64; \ + r0 = 0; \ + *(u64*)(r10 - 64) = r0; \ + *(u64*)(r10 - 56) = r0; \ + *(u64*)(r10 - 48) = r0; \ + *(u64*)(r10 - 40) = r0; \ + *(u64*)(r10 - 32) = r0; \ + *(u64*)(r10 - 24) = r0; \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + r2 = 16; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + if r2 s> 64 goto l0_%=; \ + r4 = 0; \ + if r4 s>= r2 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, JMP, bounds + offset") +__failure __msg("invalid indirect access to stack R1 off=-64 size=65") +__naked void memory_stack_jmp_bounds_offset(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + 8); \ + r1 = r10; \ + r1 += -64; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + if r2 > 64 goto l0_%=; \ + r4 = 0; \ + if r4 >= r2 goto l0_%=; \ + r2 += 1; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, JMP, wrong max") +__failure __msg("invalid indirect access to stack R1 off=-64 size=65") +__naked void memory_stack_jmp_wrong_max(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + 8); \ + r1 = r10; \ + r1 += -64; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + if r2 > 65 goto l0_%=; \ + r4 = 0; \ + if r4 >= r2 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, JMP, no max check") +__failure +/* because max wasn't checked, signed min is negative */ +__msg("R2 min value is negative, either use unsigned or 'var &= const'") +__naked void stack_jmp_no_max_check(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + 8); \ + r1 = r10; \ + r1 += -64; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + r4 = 0; \ + if r4 >= r2 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("socket") +__description("helper access to variable memory: stack, JMP, no min check") +/* in privileged mode reads from uninitialized stack locations are permitted */ +__success __failure_unpriv +__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64") +__retval(0) +__naked void stack_jmp_no_min_check(void) +{ + asm volatile (" \ + /* set max stack size */ \ + r6 = 0; \ + *(u64*)(r10 - 128) = r6; \ + /* set r3 to a random value */ \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + /* use JMP to limit r3 range to [0, 64] */ \ + if r3 > 64 goto l0_%=; \ + r1 = %[map_ringbuf] ll; \ + r2 = r10; \ + r2 += -64; \ + r4 = 0; \ + /* Call bpf_ringbuf_output(), it is one of a few helper functions with\ + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\ + * For unpriv this should signal an error, because memory at &fp[-64] is\ + * not initialized. \ + */ \ + call %[bpf_ringbuf_output]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_ringbuf_output), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: stack, JMP (signed), no min check") +__failure __msg("R2 min value is negative") +__naked void jmp_signed_no_min_check(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + 8); \ + r1 = r10; \ + r1 += -64; \ + *(u64*)(r1 - 128) = r2; \ + r2 = *(u64*)(r1 - 128); \ + if r2 s> 64 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: map, JMP, correct bounds") +__success +__naked void memory_map_jmp_correct_bounds(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[sizeof_test_val]; \ + *(u64*)(r10 - 128) = r2; \ + r2 = *(u64*)(r10 - 128); \ + if r2 s> %[sizeof_test_val] goto l1_%=; \ + r4 = 0; \ + if r4 s>= r2 goto l1_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l1_%=: r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(sizeof_test_val, sizeof(struct test_val)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: map, JMP, wrong max") +__failure __msg("invalid access to map value, value_size=48 off=0 size=49") +__naked void memory_map_jmp_wrong_max(void) +{ + asm volatile (" \ + r6 = *(u64*)(r1 + 8); \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = r6; \ + *(u64*)(r10 - 128) = r2; \ + r2 = *(u64*)(r10 - 128); \ + if r2 s> %[__imm_0] goto l1_%=; \ + r4 = 0; \ + if r4 s>= r2 goto l1_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l1_%=: r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) + 1) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: map adjusted, JMP, correct bounds") +__success +__naked void map_adjusted_jmp_correct_bounds(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += 20; \ + r2 = %[sizeof_test_val]; \ + *(u64*)(r10 - 128) = r2; \ + r2 = *(u64*)(r10 - 128); \ + if r2 s> %[__imm_0] goto l1_%=; \ + r4 = 0; \ + if r4 s>= r2 goto l1_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l1_%=: r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - 20), + __imm_const(sizeof_test_val, sizeof(struct test_val)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: map adjusted, JMP, wrong max") +__failure __msg("R1 min value is outside of the allowed memory range") +__naked void map_adjusted_jmp_wrong_max(void) +{ + asm volatile (" \ + r6 = *(u64*)(r1 + 8); \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += 20; \ + r2 = r6; \ + *(u64*)(r10 - 128) = r2; \ + r2 = *(u64*)(r10 - 128); \ + if r2 s> %[__imm_0] goto l1_%=; \ + r4 = 0; \ + if r4 s>= r2 goto l1_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l1_%=: r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - 19) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)") +__success __retval(0) +__naked void ptr_to_mem_or_null_1(void) +{ + asm volatile (" \ + r1 = 0; \ + r2 = 0; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ + exit; \ +" : + : __imm(bpf_csum_diff) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)") +__failure __msg("R1 type=scalar expected=fp") +__naked void ptr_to_mem_or_null_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + 0); \ + r1 = 0; \ + *(u64*)(r10 - 128) = r2; \ + r2 = *(u64*)(r10 - 128); \ + r2 &= 64; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ + exit; \ +" : + : __imm(bpf_csum_diff) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)") +__success __retval(0) +__naked void ptr_to_mem_or_null_3(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -8; \ + r2 = 0; \ + *(u64*)(r1 + 0) = r2; \ + r2 &= 8; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ + exit; \ +" : + : __imm(bpf_csum_diff) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)") +__success __retval(0) +__naked void ptr_to_mem_or_null_4(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = 0; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: exit; \ +" : + : __imm(bpf_csum_diff), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)") +__success __retval(0) +__naked void ptr_to_mem_or_null_5(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = *(u64*)(r0 + 0); \ + if r2 > 8 goto l0_%=; \ + r1 = r10; \ + r1 += -8; \ + *(u64*)(r1 + 0) = r2; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: exit; \ +" : + : __imm(bpf_csum_diff), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)") +__success __retval(0) +__naked void ptr_to_mem_or_null_6(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = *(u64*)(r0 + 0); \ + if r2 > 8 goto l0_%=; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: exit; \ +" : + : __imm(bpf_csum_diff), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)") +__success __retval(0) +/* csum_diff of 64-byte packet */ +__flag(BPF_F_ANY_ALIGNMENT) +__naked void ptr_to_mem_or_null_7(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r0 = r6; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r1 = r6; \ + r2 = *(u64*)(r6 + 0); \ + if r2 > 8 goto l0_%=; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)") +__failure __msg("R1 type=scalar expected=fp") +__naked void ptr_to_mem_or_null_8(void) +{ + asm volatile (" \ + r1 = 0; \ + r2 = 0; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)") +__failure __msg("R1 type=scalar expected=fp") +__naked void ptr_to_mem_or_null_9(void) +{ + asm volatile (" \ + r1 = 0; \ + r2 = 1; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)") +__success +__naked void ptr_to_mem_or_null_10(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -8; \ + r2 = 0; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)") +__success +__naked void ptr_to_mem_or_null_11(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = 0; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)") +__success +__naked void ptr_to_mem_or_null_12(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = *(u64*)(r0 + 0); \ + if r2 > 8 goto l0_%=; \ + r1 = r10; \ + r1 += -8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)") +__success +__naked void ptr_to_mem_or_null_13(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = *(u64*)(r0 + 0); \ + if r2 > 8 goto l0_%=; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("helper access to variable memory: 8 bytes leak") +/* in privileged mode reads from uninitialized stack locations are permitted */ +__success __failure_unpriv +__msg_unpriv("invalid indirect read from stack R2 off -64+32 size 64") +__retval(0) +__naked void variable_memory_8_bytes_leak(void) +{ + asm volatile (" \ + /* set max stack size */ \ + r6 = 0; \ + *(u64*)(r10 - 128) = r6; \ + /* set r3 to a random value */ \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r1 = %[map_ringbuf] ll; \ + r2 = r10; \ + r2 += -64; \ + r0 = 0; \ + *(u64*)(r10 - 64) = r0; \ + *(u64*)(r10 - 56) = r0; \ + *(u64*)(r10 - 48) = r0; \ + *(u64*)(r10 - 40) = r0; \ + /* Note: fp[-32] left uninitialized */ \ + *(u64*)(r10 - 24) = r0; \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + /* Limit r3 range to [1, 64] */ \ + r3 &= 63; \ + r3 += 1; \ + r4 = 0; \ + /* Call bpf_ringbuf_output(), it is one of a few helper functions with\ + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\ + * For unpriv this should signal an error, because memory region [1, 64]\ + * at &fp[-64] is not fully initialized. \ + */ \ + call %[bpf_ringbuf_output]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_ringbuf_output), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to variable memory: 8 bytes no leak (init memory)") +__success +__naked void bytes_no_leak_init_memory(void) +{ + asm volatile (" \ + r1 = r10; \ + r0 = 0; \ + r0 = 0; \ + *(u64*)(r10 - 64) = r0; \ + *(u64*)(r10 - 56) = r0; \ + *(u64*)(r10 - 48) = r0; \ + *(u64*)(r10 - 40) = r0; \ + *(u64*)(r10 - 32) = r0; \ + *(u64*)(r10 - 24) = r0; \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + r1 += -64; \ + r2 = 0; \ + r2 &= 32; \ + r2 += 32; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + r1 = *(u64*)(r10 - 16); \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c deleted file mode 100644 index 9c4885885aba..000000000000 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ /dev/null @@ -1,650 +0,0 @@ -{ - "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, bitwise AND, zero included", - .insns = { - /* set max stack size */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), - /* set r3 to a random value */ - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - /* use bitwise AND to limit r3 range to [0, 64] */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), - BPF_MOV64_IMM(BPF_REG_4, 0), - /* Call bpf_ringbuf_output(), it is one of a few helper functions with - * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. - * For unpriv this should signal an error, because memory at &fp[-64] is - * not initialized. - */ - BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 4 }, - .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", - .result_unpriv = REJECT, - /* in privileged mode reads from uninitialized stack locations are permitted */ - .result = ACCEPT, -}, -{ - "helper access to variable memory: stack, bitwise AND + JMP, wrong max", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect access to stack R1 off=-64 size=65", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, JMP (signed), correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, JMP, bounds + offset", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect access to stack R1 off=-64 size=65", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, JMP, wrong max", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect access to stack R1 off=-64 size=65", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, JMP, no max check", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - /* because max wasn't checked, signed min is negative */ - .errstr = "R2 min value is negative, either use unsigned or 'var &= const'", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: stack, JMP, no min check", - .insns = { - /* set max stack size */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), - /* set r3 to a random value */ - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - /* use JMP to limit r3 range to [0, 64] */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), - BPF_MOV64_IMM(BPF_REG_4, 0), - /* Call bpf_ringbuf_output(), it is one of a few helper functions with - * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. - * For unpriv this should signal an error, because memory at &fp[-64] is - * not initialized. - */ - BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 4 }, - .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", - .result_unpriv = REJECT, - /* in privileged mode reads from uninitialized stack locations are permitted */ - .result = ACCEPT, -}, -{ - "helper access to variable memory: stack, JMP (signed), no min check", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: map, JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val), 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: map, JMP, wrong max", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 4 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=49", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: map adjusted, JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 20, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: map adjusted, JMP, wrong max", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 4 }, - .errstr = "R1 min value is outside of the allowed memory range", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .errstr = "R1 type=scalar expected=fp", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 0 /* csum_diff of 64-byte packet */, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .errstr = "R1 type=scalar expected=fp", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .errstr = "R1 type=scalar expected=fp", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to variable memory: 8 bytes leak", - .insns = { - /* set max stack size */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), - /* set r3 to a random value */ - BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - /* Note: fp[-32] left uninitialized */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - /* Limit r3 range to [1, 64] */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), - BPF_MOV64_IMM(BPF_REG_4, 0), - /* Call bpf_ringbuf_output(), it is one of a few helper functions with - * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. - * For unpriv this should signal an error, because memory region [1, 64] - * at &fp[-64] is not fully initialized. - */ - BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 3 }, - .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64", - .result_unpriv = REJECT, - /* in privileged mode reads from uninitialized stack locations are permitted */ - .result = ACCEPT, -}, -{ - "helper access to variable memory: 8 bytes no leak (init memory)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, From fb179fe69e6a7bf66232d72a77f53ec10c026ee7 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:02 +0200 Subject: [PATCH 126/260] selftests/bpf: verifier/helper_packet_access.c converted to inline assembly Test verifier/helper_packet_access.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-22-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_helper_packet_access.c | 550 ++++++++++++++++++ .../bpf/verifier/helper_packet_access.c | 460 --------------- 3 files changed, 552 insertions(+), 460 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c delete mode 100644 tools/testing/selftests/bpf/verifier/helper_packet_access.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 22d7e152c05e..1cd162daf150 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -18,6 +18,7 @@ #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" #include "verifier_helper_access_var_len.skel.h" +#include "verifier_helper_packet_access.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -58,3 +59,4 @@ void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_st void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } +void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c new file mode 100644 index 000000000000..74f5f9cd153d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/helper_packet_access.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("xdp") +__description("helper access to packet: test1, valid packet_ptr range") +__success __retval(0) +__naked void test1_valid_packet_ptr_range(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ + r1 = %[map_hash_8b] ll; \ + r3 = r2; \ + r4 = 0; \ + call %[bpf_map_update_elem]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_update_elem), + __imm_addr(map_hash_8b), + __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("helper access to packet: test2, unchecked packet_ptr") +__failure __msg("invalid access to packet") +__naked void packet_test2_unchecked_packet_ptr(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(xdp_md_data, offsetof(struct xdp_md, data)) + : __clobber_all); +} + +SEC("xdp") +__description("helper access to packet: test3, variable add") +__success __retval(0) +__naked void to_packet_test3_variable_add(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r4 = r2; \ + r4 += 8; \ + if r4 > r3 goto l0_%=; \ + r5 = *(u8*)(r2 + 0); \ + r4 = r2; \ + r4 += r5; \ + r5 = r4; \ + r5 += 8; \ + if r5 > r3 goto l0_%=; \ + r1 = %[map_hash_8b] ll; \ + r2 = r4; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("helper access to packet: test4, packet_ptr with bad range") +__failure __msg("invalid access to packet") +__naked void packet_ptr_with_bad_range_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r4 = r2; \ + r4 += 4; \ + if r4 > r3 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("helper access to packet: test5, packet_ptr with too short range") +__failure __msg("invalid access to packet") +__naked void ptr_with_too_short_range_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r2 += 1; \ + r4 = r2; \ + r4 += 7; \ + if r4 > r3 goto l0_%=; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test6, cls valid packet_ptr range") +__success __retval(0) +__naked void cls_valid_packet_ptr_range(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ + r1 = %[map_hash_8b] ll; \ + r3 = r2; \ + r4 = 0; \ + call %[bpf_map_update_elem]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_update_elem), + __imm_addr(map_hash_8b), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test7, cls unchecked packet_ptr") +__failure __msg("invalid access to packet") +__naked void test7_cls_unchecked_packet_ptr(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test8, cls variable add") +__success __retval(0) +__naked void packet_test8_cls_variable_add(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r4 = r2; \ + r4 += 8; \ + if r4 > r3 goto l0_%=; \ + r5 = *(u8*)(r2 + 0); \ + r4 = r2; \ + r4 += r5; \ + r5 = r4; \ + r5 += 8; \ + if r5 > r3 goto l0_%=; \ + r1 = %[map_hash_8b] ll; \ + r2 = r4; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test9, cls packet_ptr with bad range") +__failure __msg("invalid access to packet") +__naked void packet_ptr_with_bad_range_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r4 = r2; \ + r4 += 4; \ + if r4 > r3 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test10, cls packet_ptr with too short range") +__failure __msg("invalid access to packet") +__naked void ptr_with_too_short_range_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r2 += 1; \ + r4 = r2; \ + r4 += 7; \ + if r4 > r3 goto l0_%=; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test11, cls unsuitable helper 1") +__failure __msg("helper access to the packet") +__naked void test11_cls_unsuitable_helper_1(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r3 = r6; \ + r3 += 7; \ + if r3 > r7 goto l0_%=; \ + r2 = 0; \ + r4 = 42; \ + r5 = 0; \ + call %[bpf_skb_store_bytes]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_skb_store_bytes), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test12, cls unsuitable helper 2") +__failure __msg("helper access to the packet") +__naked void test12_cls_unsuitable_helper_2(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r3 = r6; \ + r6 += 8; \ + if r6 > r7 goto l0_%=; \ + r2 = 0; \ + r4 = 4; \ + call %[bpf_skb_load_bytes]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_skb_load_bytes), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test13, cls helper ok") +__success __retval(0) +__naked void packet_test13_cls_helper_ok(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 = r6; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test14, cls helper ok sub") +__success __retval(0) +__naked void test14_cls_helper_ok_sub(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 -= 4; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test15, cls helper fail sub") +__failure __msg("invalid access to packet") +__naked void test15_cls_helper_fail_sub(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 -= 12; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test16, cls helper fail range 1") +__failure __msg("invalid access to packet") +__naked void cls_helper_fail_range_1(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 = r6; \ + r2 = 8; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test17, cls helper fail range 2") +__failure __msg("R2 min value is negative") +__naked void cls_helper_fail_range_2(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 = r6; \ + r2 = -9; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test18, cls helper fail range 3") +__failure __msg("R2 min value is negative") +__naked void cls_helper_fail_range_3(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 = r6; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__imm_0, ~0), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test19, cls helper range zero") +__success __retval(0) +__naked void test19_cls_helper_range_zero(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 = r6; \ + r2 = 0; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test20, pkt end as input") +__failure __msg("R1 type=pkt_end expected=fp") +__naked void test20_pkt_end_as_input(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r1 = r7; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("helper access to packet: test21, wrong reg") +__failure __msg("invalid access to packet") +__naked void to_packet_test21_wrong_reg(void) +{ + asm volatile (" \ + r6 = *(u32*)(r1 + %[__sk_buff_data]); \ + r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r6 += 1; \ + r1 = r6; \ + r1 += 7; \ + if r1 > r7 goto l0_%=; \ + r2 = 4; \ + r3 = 0; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_csum_diff]; \ + r0 = 0; \ +l0_%=: exit; \ +" : + : __imm(bpf_csum_diff), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/helper_packet_access.c b/tools/testing/selftests/bpf/verifier/helper_packet_access.c deleted file mode 100644 index ae54587e9829..000000000000 --- a/tools/testing/selftests/bpf/verifier/helper_packet_access.c +++ /dev/null @@ -1,460 +0,0 @@ -{ - "helper access to packet: test1, valid packet_ptr range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result_unpriv = ACCEPT, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "helper access to packet: test2, unchecked packet_ptr", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "helper access to packet: test3, variable add", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), - BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 11 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "helper access to packet: test4, packet_ptr with bad range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 7 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "helper access to packet: test5, packet_ptr with too short range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 6 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "helper access to packet: test6, cls valid packet_ptr range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test7, cls unchecked packet_ptr", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test8, cls variable add", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), - BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 11 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test9, cls packet_ptr with bad range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 7 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test10, cls packet_ptr with too short range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 6 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test11, cls unsuitable helper 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_4, 42), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "helper access to the packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test12, cls unsuitable helper 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_4, 4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "helper access to the packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test13, cls helper ok", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test14, cls helper ok sub", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test15, cls helper fail sub", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test16, cls helper fail range 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test17, cls helper fail range 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, -9), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R2 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test18, cls helper fail range 3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, ~0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R2 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test19, cls helper range zero", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test20, pkt end as input", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R1 type=pkt_end expected=fp", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "helper access to packet: test21, wrong reg", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, From 77aa2563cb44a6241990cf4f082b55ee6f0a0623 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:03 +0200 Subject: [PATCH 127/260] selftests/bpf: verifier/helper_restricted.c converted to inline assembly Test verifier/helper_restricted.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-23-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_helper_restricted.c | 279 ++++++++++++++++++ .../bpf/verifier/helper_restricted.c | 196 ------------ 3 files changed, 281 insertions(+), 196 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_helper_restricted.c delete mode 100644 tools/testing/selftests/bpf/verifier/helper_restricted.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 1cd162daf150..02983d1de218 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -19,6 +19,7 @@ #include "verifier_div_overflow.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" +#include "verifier_helper_restricted.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -60,3 +61,4 @@ void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } +void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c new file mode 100644 index 000000000000..0ede0ccd090c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/helper_restricted.c */ + +#include +#include +#include "bpf_misc.h" + +struct val { + int cnt; + struct bpf_spin_lock l; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct val); +} map_spin_lock SEC(".maps"); + +struct timer { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct timer); +} map_timer SEC(".maps"); + +SEC("kprobe") +__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE") +__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__naked void in_bpf_prog_type_kprobe_1(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_coarse_ns]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_coarse_ns) + : __clobber_all); +} + +SEC("tracepoint") +__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT") +__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__naked void in_bpf_prog_type_tracepoint_1(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_coarse_ns]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_coarse_ns) + : __clobber_all); +} + +SEC("perf_event") +__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT") +__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__naked void bpf_prog_type_perf_event_1(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_coarse_ns]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_coarse_ns) + : __clobber_all); +} + +SEC("raw_tracepoint") +__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT") +__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__naked void bpf_prog_type_raw_tracepoint_1(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_coarse_ns]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_coarse_ns) + : __clobber_all); +} + +SEC("kprobe") +__description("bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE") +__failure __msg("tracing progs cannot use bpf_timer yet") +__naked void in_bpf_prog_type_kprobe_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_timer] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[map_timer] ll; \ + r3 = 1; \ +l0_%=: call %[bpf_timer_init]; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_timer_init), + __imm_addr(map_timer) + : __clobber_all); +} + +SEC("perf_event") +__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT") +__failure __msg("tracing progs cannot use bpf_timer yet") +__naked void bpf_prog_type_perf_event_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_timer] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[map_timer] ll; \ + r3 = 1; \ +l0_%=: call %[bpf_timer_init]; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_timer_init), + __imm_addr(map_timer) + : __clobber_all); +} + +SEC("tracepoint") +__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT") +__failure __msg("tracing progs cannot use bpf_timer yet") +__naked void in_bpf_prog_type_tracepoint_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_timer] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[map_timer] ll; \ + r3 = 1; \ +l0_%=: call %[bpf_timer_init]; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_timer_init), + __imm_addr(map_timer) + : __clobber_all); +} + +SEC("raw_tracepoint") +__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT") +__failure __msg("tracing progs cannot use bpf_timer yet") +__naked void bpf_prog_type_raw_tracepoint_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_timer] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[map_timer] ll; \ + r3 = 1; \ +l0_%=: call %[bpf_timer_init]; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_timer_init), + __imm_addr(map_timer) + : __clobber_all); +} + +SEC("kprobe") +__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE") +__failure __msg("tracing progs cannot use bpf_spin_lock yet") +__naked void in_bpf_prog_type_kprobe_3(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_spin_lock] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + call %[bpf_spin_lock]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_spin_lock), + __imm_addr(map_spin_lock) + : __clobber_all); +} + +SEC("tracepoint") +__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT") +__failure __msg("tracing progs cannot use bpf_spin_lock yet") +__naked void in_bpf_prog_type_tracepoint_3(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_spin_lock] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + call %[bpf_spin_lock]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_spin_lock), + __imm_addr(map_spin_lock) + : __clobber_all); +} + +SEC("perf_event") +__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT") +__failure __msg("tracing progs cannot use bpf_spin_lock yet") +__naked void bpf_prog_type_perf_event_3(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_spin_lock] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + call %[bpf_spin_lock]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_spin_lock), + __imm_addr(map_spin_lock) + : __clobber_all); +} + +SEC("raw_tracepoint") +__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT") +__failure __msg("tracing progs cannot use bpf_spin_lock yet") +__naked void bpf_prog_type_raw_tracepoint_3(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_spin_lock] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + call %[bpf_spin_lock]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_spin_lock), + __imm_addr(map_spin_lock) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/helper_restricted.c b/tools/testing/selftests/bpf/verifier/helper_restricted.c deleted file mode 100644 index a067b7098b97..000000000000 --- a/tools/testing/selftests/bpf/verifier/helper_restricted.c +++ /dev/null @@ -1,196 +0,0 @@ -{ - "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown func bpf_ktime_get_coarse_ns", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_KPROBE, -}, -{ - "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown func bpf_ktime_get_coarse_ns", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown func bpf_ktime_get_coarse_ns", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, -}, -{ - "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown func bpf_ktime_get_coarse_ns", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, -}, -{ - "bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_EMIT_CALL(BPF_FUNC_timer_init), - BPF_EXIT_INSN(), - }, - .fixup_map_timer = { 3, 8 }, - .errstr = "tracing progs cannot use bpf_timer yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_KPROBE, -}, -{ - "bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_EMIT_CALL(BPF_FUNC_timer_init), - BPF_EXIT_INSN(), - }, - .fixup_map_timer = { 3, 8 }, - .errstr = "tracing progs cannot use bpf_timer yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, -}, -{ - "bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_EMIT_CALL(BPF_FUNC_timer_init), - BPF_EXIT_INSN(), - }, - .fixup_map_timer = { 3, 8 }, - .errstr = "tracing progs cannot use bpf_timer yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_EMIT_CALL(BPF_FUNC_timer_init), - BPF_EXIT_INSN(), - }, - .fixup_map_timer = { 3, 8 }, - .errstr = "tracing progs cannot use bpf_timer yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, -}, -{ - "bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_spin_lock), - BPF_EXIT_INSN(), - }, - .fixup_map_spin_lock = { 3 }, - .errstr = "tracing progs cannot use bpf_spin_lock yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_KPROBE, -}, -{ - "bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_spin_lock), - BPF_EXIT_INSN(), - }, - .fixup_map_spin_lock = { 3 }, - .errstr = "tracing progs cannot use bpf_spin_lock yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_spin_lock), - BPF_EXIT_INSN(), - }, - .fixup_map_spin_lock = { 3 }, - .errstr = "tracing progs cannot use bpf_spin_lock yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, -}, -{ - "bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_spin_lock), - BPF_EXIT_INSN(), - }, - .fixup_map_spin_lock = { 3 }, - .errstr = "tracing progs cannot use bpf_spin_lock yet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, -}, From ecc424827b775860119f5a5e2c521d7485bcc74f Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:04 +0200 Subject: [PATCH 128/260] selftests/bpf: verifier/helper_value_access.c converted to inline assembly Test verifier/helper_value_access.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-24-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_helper_value_access.c | 1245 +++++++++++++++++ .../bpf/verifier/helper_value_access.c | 953 ------------- 3 files changed, 1247 insertions(+), 953 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_helper_value_access.c delete mode 100644 tools/testing/selftests/bpf/verifier/helper_value_access.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 02983d1de218..2c3745a1fdcb 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -20,6 +20,7 @@ #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" #include "verifier_helper_restricted.skel.h" +#include "verifier_helper_value_access.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -62,3 +63,4 @@ void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } +void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c new file mode 100644 index 000000000000..692216c0ad3d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c @@ -0,0 +1,1245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/helper_value_access.c */ + +#include +#include +#include "bpf_misc.h" + +struct other_val { + long long foo; + long long bar; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct other_val); +} map_hash_16b SEC(".maps"); + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("tracepoint") +__description("helper access to map: full range") +__success +__naked void access_to_map_full_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[sizeof_test_val]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(sizeof_test_val, sizeof(struct test_val)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: partial range") +__success +__naked void access_to_map_partial_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: empty range") +__failure __msg("invalid access to map value, value_size=48 off=0 size=0") +__naked void access_to_map_empty_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = 0; \ + call %[bpf_trace_printk]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_trace_printk), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: out-of-bound range") +__failure __msg("invalid access to map value, value_size=48 off=0 size=56") +__naked void map_out_of_bound_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) + 8) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: negative range") +__failure __msg("R2 min value is negative") +__naked void access_to_map_negative_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r2 = -8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const imm): full range") +__success +__naked void via_const_imm_full_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += %[test_val_foo]; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const imm): partial range") +__success +__naked void via_const_imm_partial_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += %[test_val_foo]; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const imm): empty range") +__failure __msg("invalid access to map value, value_size=48 off=4 size=0") +__naked void via_const_imm_empty_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += %[test_val_foo]; \ + r2 = 0; \ + call %[bpf_trace_printk]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_trace_printk), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const imm): out-of-bound range") +__failure __msg("invalid access to map value, value_size=48 off=4 size=52") +__naked void imm_out_of_bound_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += %[test_val_foo]; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const imm): negative range (> adjustment)") +__failure __msg("R2 min value is negative") +__naked void const_imm_negative_range_adjustment_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += %[test_val_foo]; \ + r2 = -8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const imm): negative range (< adjustment)") +__failure __msg("R2 min value is negative") +__naked void const_imm_negative_range_adjustment_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r1 += %[test_val_foo]; \ + r2 = -1; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const reg): full range") +__success +__naked void via_const_reg_full_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = %[test_val_foo]; \ + r1 += r3; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const reg): partial range") +__success +__naked void via_const_reg_partial_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = %[test_val_foo]; \ + r1 += r3; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const reg): empty range") +__failure __msg("R1 min value is outside of the allowed memory range") +__naked void via_const_reg_empty_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = 0; \ + r1 += r3; \ + r2 = 0; \ + call %[bpf_trace_printk]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_trace_printk), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const reg): out-of-bound range") +__failure __msg("invalid access to map value, value_size=48 off=4 size=52") +__naked void reg_out_of_bound_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = %[test_val_foo]; \ + r1 += r3; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const reg): negative range (> adjustment)") +__failure __msg("R2 min value is negative") +__naked void const_reg_negative_range_adjustment_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = %[test_val_foo]; \ + r1 += r3; \ + r2 = -8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via const reg): negative range (< adjustment)") +__failure __msg("R2 min value is negative") +__naked void const_reg_negative_range_adjustment_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = %[test_val_foo]; \ + r1 += r3; \ + r2 = -1; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via variable): full range") +__success +__naked void map_via_variable_full_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 > %[test_val_foo] goto l0_%=; \ + r1 += r3; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via variable): partial range") +__success +__naked void map_via_variable_partial_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 > %[test_val_foo] goto l0_%=; \ + r1 += r3; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via variable): empty range") +__failure __msg("R1 min value is outside of the allowed memory range") +__naked void map_via_variable_empty_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 > %[test_val_foo] goto l0_%=; \ + r1 += r3; \ + r2 = 0; \ + call %[bpf_trace_printk]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_trace_printk), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via variable): no max check") +__failure __msg("R1 unbounded memory access") +__naked void via_variable_no_max_check_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + r1 += r3; \ + r2 = 1; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to adjusted map (via variable): wrong max check") +__failure __msg("invalid access to map value, value_size=48 off=4 size=45") +__naked void via_variable_wrong_max_check_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 > %[test_val_foo] goto l0_%=; \ + r1 += r3; \ + r2 = %[__imm_0]; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_probe_read_kernel), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 1), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using <, good access") +__success +__naked void bounds_check_using_good_access_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 < 32 goto l1_%=; \ + r0 = 0; \ +l0_%=: exit; \ +l1_%=: r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using <, bad access") +__failure __msg("R1 unbounded memory access") +__naked void bounds_check_using_bad_access_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 < 32 goto l1_%=; \ + r1 += r3; \ +l0_%=: r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using <=, good access") +__success +__naked void bounds_check_using_good_access_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 <= 32 goto l1_%=; \ + r0 = 0; \ +l0_%=: exit; \ +l1_%=: r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using <=, bad access") +__failure __msg("R1 unbounded memory access") +__naked void bounds_check_using_bad_access_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 <= 32 goto l1_%=; \ + r1 += r3; \ +l0_%=: r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using s<, good access") +__success +__naked void check_using_s_good_access_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 s< 32 goto l1_%=; \ +l2_%=: r0 = 0; \ +l0_%=: exit; \ +l1_%=: if r3 s< 0 goto l2_%=; \ + r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using s<, good access 2") +__success +__naked void using_s_good_access_2_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 s< 32 goto l1_%=; \ +l2_%=: r0 = 0; \ +l0_%=: exit; \ +l1_%=: if r3 s< -3 goto l2_%=; \ + r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using s<, bad access") +__failure __msg("R1 min value is negative") +__naked void check_using_s_bad_access_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u64*)(r0 + 0); \ + if r3 s< 32 goto l1_%=; \ +l2_%=: r0 = 0; \ +l0_%=: exit; \ +l1_%=: if r3 s< -3 goto l2_%=; \ + r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using s<=, good access") +__success +__naked void check_using_s_good_access_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 s<= 32 goto l1_%=; \ +l2_%=: r0 = 0; \ +l0_%=: exit; \ +l1_%=: if r3 s<= 0 goto l2_%=; \ + r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using s<=, good access 2") +__success +__naked void using_s_good_access_2_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 s<= 32 goto l1_%=; \ +l2_%=: r0 = 0; \ +l0_%=: exit; \ +l1_%=: if r3 s<= -3 goto l2_%=; \ + r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("helper access to map: bounds check using s<=, bad access") +__failure __msg("R1 min value is negative") +__naked void check_using_s_bad_access_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + r3 = *(u64*)(r0 + 0); \ + if r3 s<= 32 goto l1_%=; \ +l2_%=: r0 = 0; \ +l0_%=: exit; \ +l1_%=: if r3 s<= -3 goto l2_%=; \ + r1 += r3; \ + r0 = 0; \ + *(u8*)(r1 + 0) = r0; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map lookup helper access to map") +__success +__naked void lookup_helper_access_to_map(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map update helper access to map") +__success +__naked void update_helper_access_to_map(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r4 = 0; \ + r3 = r0; \ + r2 = r0; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_update_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_map_update_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map update helper access to map: wrong size") +__failure __msg("invalid access to map value, value_size=8 off=0 size=16") +__naked void access_to_map_wrong_size(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r4 = 0; \ + r3 = r0; \ + r2 = r0; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_update_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_map_update_elem), + __imm_addr(map_hash_16b), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via const imm)") +__success +__naked void adjusted_map_via_const_imm(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r2 += %[other_val_bar]; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b), + __imm_const(other_val_bar, offsetof(struct other_val, bar)) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via const imm): out-of-bound 1") +__failure __msg("invalid access to map value, value_size=16 off=12 size=8") +__naked void imm_out_of_bound_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r2 += %[__imm_0]; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b), + __imm_const(__imm_0, sizeof(struct other_val) - 4) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via const imm): out-of-bound 2") +__failure __msg("invalid access to map value, value_size=16 off=-4 size=8") +__naked void imm_out_of_bound_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r2 += -4; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via const reg)") +__success +__naked void adjusted_map_via_const_reg(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r3 = %[other_val_bar]; \ + r2 += r3; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b), + __imm_const(other_val_bar, offsetof(struct other_val, bar)) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via const reg): out-of-bound 1") +__failure __msg("invalid access to map value, value_size=16 off=12 size=8") +__naked void reg_out_of_bound_1(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r3 = %[__imm_0]; \ + r2 += r3; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b), + __imm_const(__imm_0, sizeof(struct other_val) - 4) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via const reg): out-of-bound 2") +__failure __msg("invalid access to map value, value_size=16 off=-4 size=8") +__naked void reg_out_of_bound_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r3 = -4; \ + r2 += r3; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via variable)") +__success +__naked void to_adjusted_map_via_variable(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 > %[other_val_bar] goto l0_%=; \ + r2 += r3; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b), + __imm_const(other_val_bar, offsetof(struct other_val, bar)) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via variable): no max check") +__failure +__msg("R2 unbounded memory access, make sure to bounds check any such access") +__naked void via_variable_no_max_check_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r3 = *(u32*)(r0 + 0); \ + r2 += r3; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +SEC("tracepoint") +__description("map helper access to adjusted map (via variable): wrong max check") +__failure __msg("invalid access to map value, value_size=16 off=9 size=8") +__naked void via_variable_wrong_max_check_2(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r2 = r0; \ + r3 = *(u32*)(r0 + 0); \ + if r3 > %[__imm_0] goto l0_%=; \ + r2 += r3; \ + r1 = %[map_hash_16b] ll; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b), + __imm_const(__imm_0, offsetof(struct other_val, bar) + 1) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c deleted file mode 100644 index 1c7882ddfa63..000000000000 --- a/tools/testing/selftests/bpf/verifier/helper_value_access.c +++ /dev/null @@ -1,953 +0,0 @@ -{ - "helper access to map: full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=0", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: out-of-bound range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=56", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: negative range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const imm): full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const imm): partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const imm): empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=0", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const imm): out-of-bound range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - offsetof(struct test_val, foo) + 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=52", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const imm): negative range (> adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const imm): negative range (< adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const reg): full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const reg): partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const reg): empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the allowed memory range", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const reg): out-of-bound range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo) + 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=52", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const reg): negative range (> adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via const reg): negative range (< adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via variable): full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via variable): partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via variable): empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the allowed memory range", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via variable): no max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 unbounded memory access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to adjusted map (via variable): wrong max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo) + 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=45", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using <, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using <, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 unbounded memory access", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using <=, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using <=, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 unbounded memory access", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using s<, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using s<, good access 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using s<, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 min value is negative", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using s<=, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using s<=, good access 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "helper access to map: bounds check using s<=, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 min value is negative", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map lookup helper access to map", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 8 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map update helper access to map", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_update_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map update helper access to map: wrong size", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_update_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .fixup_map_hash_16b = { 10 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=8 off=0 size=16", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via const imm)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, offsetof(struct other_val, bar)), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 9 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via const imm): out-of-bound 1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, sizeof(struct other_val) - 4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 9 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=12 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via const imm): out-of-bound 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 9 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=-4 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via const reg)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, offsetof(struct other_val, bar)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via const reg): out-of-bound 1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, sizeof(struct other_val) - 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=12 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via const reg): out-of-bound 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, -4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=-4 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via variable)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 11 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via variable): no max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = REJECT, - .errstr = "R2 unbounded memory access, make sure to bounds check any such access", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, -{ - "map helper access to adjusted map (via variable): wrong max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar) + 1, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 11 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=9 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, -}, From 01481e67dd4d1c2c62eb6a506a5f4803ee50f8a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:05 +0200 Subject: [PATCH 129/260] selftests/bpf: verifier/int_ptr.c converted to inline assembly Test verifier/int_ptr.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-25-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_int_ptr.c | 157 +++++++++++++++++ .../testing/selftests/bpf/verifier/int_ptr.c | 161 ------------------ 3 files changed, 159 insertions(+), 161 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_int_ptr.c delete mode 100644 tools/testing/selftests/bpf/verifier/int_ptr.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 2c3745a1fdcb..d9180da30f1b 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -21,6 +21,7 @@ #include "verifier_helper_packet_access.skel.h" #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" +#include "verifier_int_ptr.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -64,3 +65,4 @@ void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } +void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c new file mode 100644 index 000000000000..b054f9c48143 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/int_ptr.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("cgroup/sysctl") +__description("ARG_PTR_TO_LONG uninitialized") +__failure __msg("invalid indirect read from stack R4 off -16+0 size 8") +__naked void arg_ptr_to_long_uninitialized(void) +{ + asm volatile (" \ + /* bpf_strtoul arg1 (buf) */ \ + r7 = r10; \ + r7 += -8; \ + r0 = 0x00303036; \ + *(u64*)(r7 + 0) = r0; \ + r1 = r7; \ + /* bpf_strtoul arg2 (buf_len) */ \ + r2 = 4; \ + /* bpf_strtoul arg3 (flags) */ \ + r3 = 0; \ + /* bpf_strtoul arg4 (res) */ \ + r7 += -8; \ + r4 = r7; \ + /* bpf_strtoul() */ \ + call %[bpf_strtoul]; \ + r0 = 1; \ + exit; \ +" : + : __imm(bpf_strtoul) + : __clobber_all); +} + +SEC("socket") +__description("ARG_PTR_TO_LONG half-uninitialized") +/* in privileged mode reads from uninitialized stack locations are permitted */ +__success __failure_unpriv +__msg_unpriv("invalid indirect read from stack R4 off -16+4 size 8") +__retval(0) +__naked void ptr_to_long_half_uninitialized(void) +{ + asm volatile (" \ + /* bpf_strtoul arg1 (buf) */ \ + r7 = r10; \ + r7 += -8; \ + r0 = 0x00303036; \ + *(u64*)(r7 + 0) = r0; \ + r1 = r7; \ + /* bpf_strtoul arg2 (buf_len) */ \ + r2 = 4; \ + /* bpf_strtoul arg3 (flags) */ \ + r3 = 0; \ + /* bpf_strtoul arg4 (res) */ \ + r7 += -8; \ + *(u32*)(r7 + 0) = r0; \ + r4 = r7; \ + /* bpf_strtoul() */ \ + call %[bpf_strtoul]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_strtoul) + : __clobber_all); +} + +SEC("cgroup/sysctl") +__description("ARG_PTR_TO_LONG misaligned") +__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8") +__naked void arg_ptr_to_long_misaligned(void) +{ + asm volatile (" \ + /* bpf_strtoul arg1 (buf) */ \ + r7 = r10; \ + r7 += -8; \ + r0 = 0x00303036; \ + *(u64*)(r7 + 0) = r0; \ + r1 = r7; \ + /* bpf_strtoul arg2 (buf_len) */ \ + r2 = 4; \ + /* bpf_strtoul arg3 (flags) */ \ + r3 = 0; \ + /* bpf_strtoul arg4 (res) */ \ + r7 += -12; \ + r0 = 0; \ + *(u32*)(r7 + 0) = r0; \ + *(u64*)(r7 + 4) = r0; \ + r4 = r7; \ + /* bpf_strtoul() */ \ + call %[bpf_strtoul]; \ + r0 = 1; \ + exit; \ +" : + : __imm(bpf_strtoul) + : __clobber_all); +} + +SEC("cgroup/sysctl") +__description("ARG_PTR_TO_LONG size < sizeof(long)") +__failure __msg("invalid indirect access to stack R4 off=-4 size=8") +__naked void to_long_size_sizeof_long(void) +{ + asm volatile (" \ + /* bpf_strtoul arg1 (buf) */ \ + r7 = r10; \ + r7 += -16; \ + r0 = 0x00303036; \ + *(u64*)(r7 + 0) = r0; \ + r1 = r7; \ + /* bpf_strtoul arg2 (buf_len) */ \ + r2 = 4; \ + /* bpf_strtoul arg3 (flags) */ \ + r3 = 0; \ + /* bpf_strtoul arg4 (res) */ \ + r7 += 12; \ + *(u32*)(r7 + 0) = r0; \ + r4 = r7; \ + /* bpf_strtoul() */ \ + call %[bpf_strtoul]; \ + r0 = 1; \ + exit; \ +" : + : __imm(bpf_strtoul) + : __clobber_all); +} + +SEC("cgroup/sysctl") +__description("ARG_PTR_TO_LONG initialized") +__success +__naked void arg_ptr_to_long_initialized(void) +{ + asm volatile (" \ + /* bpf_strtoul arg1 (buf) */ \ + r7 = r10; \ + r7 += -8; \ + r0 = 0x00303036; \ + *(u64*)(r7 + 0) = r0; \ + r1 = r7; \ + /* bpf_strtoul arg2 (buf_len) */ \ + r2 = 4; \ + /* bpf_strtoul arg3 (flags) */ \ + r3 = 0; \ + /* bpf_strtoul arg4 (res) */ \ + r7 += -8; \ + *(u64*)(r7 + 0) = r0; \ + r4 = r7; \ + /* bpf_strtoul() */ \ + call %[bpf_strtoul]; \ + r0 = 1; \ + exit; \ +" : + : __imm(bpf_strtoul) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c deleted file mode 100644 index 02d9e004260b..000000000000 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ /dev/null @@ -1,161 +0,0 @@ -{ - "ARG_PTR_TO_LONG uninitialized", - .insns = { - /* bpf_strtoul arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* bpf_strtoul arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* bpf_strtoul arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* bpf_strtoul arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - /* bpf_strtoul() */ - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack R4 off -16+0 size 8", -}, -{ - "ARG_PTR_TO_LONG half-uninitialized", - .insns = { - /* bpf_strtoul arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* bpf_strtoul arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* bpf_strtoul arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* bpf_strtoul arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - /* bpf_strtoul() */ - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8", - /* in privileged mode reads from uninitialized stack locations are permitted */ - .result = ACCEPT, -}, -{ - "ARG_PTR_TO_LONG misaligned", - .insns = { - /* bpf_strtoul arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* bpf_strtoul arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* bpf_strtoul arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* bpf_strtoul arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - /* bpf_strtoul() */ - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8", -}, -{ - "ARG_PTR_TO_LONG size < sizeof(long)", - .insns = { - /* bpf_strtoul arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* bpf_strtoul arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* bpf_strtoul arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* bpf_strtoul arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12), - BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - /* bpf_strtoul() */ - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect access to stack R4 off=-4 size=8", -}, -{ - "ARG_PTR_TO_LONG initialized", - .insns = { - /* bpf_strtoul arg1 (buf) */ - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - - /* bpf_strtoul arg2 (buf_len) */ - BPF_MOV64_IMM(BPF_REG_2, 4), - - /* bpf_strtoul arg3 (flags) */ - BPF_MOV64_IMM(BPF_REG_3, 0), - - /* bpf_strtoul arg4 (res) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), - - /* bpf_strtoul() */ - BPF_EMIT_CALL(BPF_FUNC_strtoul), - - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, -}, From e297875580662f2fdcde1cba4ea28bf70ea2ca49 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:06 +0200 Subject: [PATCH 130/260] selftests/bpf: verifier/ld_ind.c converted to inline assembly Test verifier/ld_ind.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-26-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_ld_ind.c | 110 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/ld_ind.c | 72 ------------ 3 files changed, 112 insertions(+), 72 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_ld_ind.c delete mode 100644 tools/testing/selftests/bpf/verifier/ld_ind.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d9180da30f1b..d8d4464b6112 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -22,6 +22,7 @@ #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" +#include "verifier_ld_ind.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -66,3 +67,4 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } +void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } diff --git a/tools/testing/selftests/bpf/progs/verifier_ld_ind.c b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c new file mode 100644 index 000000000000..c925ba9a2e74 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/ld_ind.c */ + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +SEC("socket") +__description("ld_ind: check calling conv, r1") +__failure __msg("R1 !read_ok") +__failure_unpriv +__naked void ind_check_calling_conv_r1(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 1; \ + .8byte %[ld_ind]; \ + r0 = r1; \ + exit; \ +" : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000)) + : __clobber_all); +} + +SEC("socket") +__description("ld_ind: check calling conv, r2") +__failure __msg("R2 !read_ok") +__failure_unpriv +__naked void ind_check_calling_conv_r2(void) +{ + asm volatile (" \ + r6 = r1; \ + r2 = 1; \ + .8byte %[ld_ind]; \ + r0 = r2; \ + exit; \ +" : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000)) + : __clobber_all); +} + +SEC("socket") +__description("ld_ind: check calling conv, r3") +__failure __msg("R3 !read_ok") +__failure_unpriv +__naked void ind_check_calling_conv_r3(void) +{ + asm volatile (" \ + r6 = r1; \ + r3 = 1; \ + .8byte %[ld_ind]; \ + r0 = r3; \ + exit; \ +" : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000)) + : __clobber_all); +} + +SEC("socket") +__description("ld_ind: check calling conv, r4") +__failure __msg("R4 !read_ok") +__failure_unpriv +__naked void ind_check_calling_conv_r4(void) +{ + asm volatile (" \ + r6 = r1; \ + r4 = 1; \ + .8byte %[ld_ind]; \ + r0 = r4; \ + exit; \ +" : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000)) + : __clobber_all); +} + +SEC("socket") +__description("ld_ind: check calling conv, r5") +__failure __msg("R5 !read_ok") +__failure_unpriv +__naked void ind_check_calling_conv_r5(void) +{ + asm volatile (" \ + r6 = r1; \ + r5 = 1; \ + .8byte %[ld_ind]; \ + r0 = r5; \ + exit; \ +" : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000)) + : __clobber_all); +} + +SEC("socket") +__description("ld_ind: check calling conv, r7") +__success __success_unpriv __retval(1) +__naked void ind_check_calling_conv_r7(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = 1; \ + .8byte %[ld_ind]; \ + r0 = r7; \ + exit; \ +" : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/ld_ind.c b/tools/testing/selftests/bpf/verifier/ld_ind.c deleted file mode 100644 index 079734227538..000000000000 --- a/tools/testing/selftests/bpf/verifier/ld_ind.c +++ /dev/null @@ -1,72 +0,0 @@ -{ - "ld_ind: check calling conv, r1", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R1 !read_ok", - .result = REJECT, -}, -{ - "ld_ind: check calling conv, r2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R2 !read_ok", - .result = REJECT, -}, -{ - "ld_ind: check calling conv, r3", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .errstr = "R3 !read_ok", - .result = REJECT, -}, -{ - "ld_ind: check calling conv, r4", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), - BPF_EXIT_INSN(), - }, - .errstr = "R4 !read_ok", - .result = REJECT, -}, -{ - "ld_ind: check calling conv, r5", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_5, 1), - BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .errstr = "R5 !read_ok", - .result = REJECT, -}, -{ - "ld_ind: check calling conv, r7", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_7, 1), - BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 1, -}, From 583c7ce5be093277ef1691f35d9bec5ef9934e24 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:07 +0200 Subject: [PATCH 131/260] selftests/bpf: verifier/leak_ptr.c converted to inline assembly Test verifier/leak_ptr.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-27-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_leak_ptr.c | 92 +++++++++++++++++++ .../testing/selftests/bpf/verifier/leak_ptr.c | 67 -------------- 3 files changed, 94 insertions(+), 67 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_leak_ptr.c delete mode 100644 tools/testing/selftests/bpf/verifier/leak_ptr.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d8d4464b6112..f8b3b6beba3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -23,6 +23,7 @@ #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" #include "verifier_ld_ind.skel.h" +#include "verifier_leak_ptr.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -68,3 +69,4 @@ void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } +void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } diff --git a/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c new file mode 100644 index 000000000000..d153fbe50055 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/leak_ptr.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("socket") +__description("leak pointer into ctx 1") +__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed") +__failure_unpriv __msg_unpriv("R2 leaks addr into mem") +__naked void leak_pointer_into_ctx_1(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \ + r2 = %[map_hash_8b] ll; \ + lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r2; \ + exit; \ +" : + : __imm_addr(map_hash_8b), + __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])) + : __clobber_all); +} + +SEC("socket") +__description("leak pointer into ctx 2") +__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed") +__failure_unpriv __msg_unpriv("R10 leaks addr into mem") +__naked void leak_pointer_into_ctx_2(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \ + lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r10; \ + exit; \ +" : + : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])) + : __clobber_all); +} + +SEC("socket") +__description("leak pointer into ctx 3") +__success __failure_unpriv __msg_unpriv("R2 leaks addr into ctx") +__retval(0) +__naked void leak_pointer_into_ctx_3(void) +{ + asm volatile (" \ + r0 = 0; \ + r2 = %[map_hash_8b] ll; \ + *(u64*)(r1 + %[__sk_buff_cb_0]) = r2; \ + exit; \ +" : + : __imm_addr(map_hash_8b), + __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])) + : __clobber_all); +} + +SEC("socket") +__description("leak pointer into map val") +__success __failure_unpriv __msg_unpriv("R6 leaks addr into mem") +__retval(0) +__naked void leak_pointer_into_map_val(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r3 = 0; \ + *(u64*)(r0 + 0) = r3; \ + lock *(u64 *)(r0 + 0) += r6; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c deleted file mode 100644 index 73f0dea95546..000000000000 --- a/tools/testing/selftests/bpf/verifier/leak_ptr.c +++ /dev/null @@ -1,67 +0,0 @@ -{ - "leak pointer into ctx 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_2, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 2 }, - .errstr_unpriv = "R2 leaks addr into mem", - .result_unpriv = REJECT, - .result = REJECT, - .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", -}, -{ - "leak pointer into ctx 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_10, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R10 leaks addr into mem", - .result_unpriv = REJECT, - .result = REJECT, - .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", -}, -{ - "leak pointer into ctx 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .errstr_unpriv = "R2 leaks addr into ctx", - .result_unpriv = REJECT, - .result = ACCEPT, -}, -{ - "leak pointer into map val", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_6, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr_unpriv = "R6 leaks addr into mem", - .result_unpriv = REJECT, - .result = ACCEPT, -}, From caf345cf12073eb5905b03c3c5f905cc0964dc6c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:08 +0200 Subject: [PATCH 132/260] selftests/bpf: verifier/map_ptr.c converted to inline assembly Test verifier/map_ptr.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-28-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_map_ptr.c | 159 ++++++++++++++++++ .../testing/selftests/bpf/verifier/map_ptr.c | 99 ----------- 3 files changed, 161 insertions(+), 99 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_map_ptr.c delete mode 100644 tools/testing/selftests/bpf/verifier/map_ptr.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index f8b3b6beba3f..d2f3bff0e942 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -24,6 +24,7 @@ #include "verifier_int_ptr.skel.h" #include "verifier_ld_ind.skel.h" #include "verifier_leak_ptr.skel.h" +#include "verifier_map_ptr.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -70,3 +71,4 @@ void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } +void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c new file mode 100644 index 000000000000..11a079145966 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/map_ptr.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct test_val); +} map_array_48b SEC(".maps"); + +struct other_val { + long long foo; + long long bar; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct other_val); +} map_hash_16b SEC(".maps"); + +SEC("socket") +__description("bpf_map_ptr: read with negative offset rejected") +__failure __msg("R1 is bpf_array invalid negative access: off=-8") +__failure_unpriv +__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") +__naked void read_with_negative_offset_rejected(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 = %[map_array_48b] ll; \ + r6 = *(u64*)(r1 - 8); \ + r0 = 1; \ + exit; \ +" : + : __imm_addr(map_array_48b) + : __clobber_all); +} + +SEC("socket") +__description("bpf_map_ptr: write rejected") +__failure __msg("only read from bpf_array is supported") +__failure_unpriv +__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") +__naked void bpf_map_ptr_write_rejected(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r10 - 8) = r0; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_array_48b] ll; \ + *(u64*)(r1 + 0) = r2; \ + r0 = 1; \ + exit; \ +" : + : __imm_addr(map_array_48b) + : __clobber_all); +} + +SEC("socket") +__description("bpf_map_ptr: read non-existent field rejected") +__failure +__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4") +__failure_unpriv +__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void read_non_existent_field_rejected(void) +{ + asm volatile (" \ + r6 = 0; \ + r1 = %[map_array_48b] ll; \ + r6 = *(u32*)(r1 + 1); \ + r0 = 1; \ + exit; \ +" : + : __imm_addr(map_array_48b) + : __clobber_all); +} + +SEC("socket") +__description("bpf_map_ptr: read ops field accepted") +__success __failure_unpriv +__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") +__retval(1) +__naked void ptr_read_ops_field_accepted(void) +{ + asm volatile (" \ + r6 = 0; \ + r1 = %[map_array_48b] ll; \ + r6 = *(u64*)(r1 + 0); \ + r0 = 1; \ + exit; \ +" : + : __imm_addr(map_array_48b) + : __clobber_all); +} + +SEC("socket") +__description("bpf_map_ptr: r = 0, map_ptr = map_ptr + r") +__success __failure_unpriv +__msg_unpriv("R1 has pointer with unsupported alu operation") +__retval(0) +__naked void map_ptr_map_ptr_r(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r10 - 8) = r0; \ + r2 = r10; \ + r2 += -8; \ + r0 = 0; \ + r1 = %[map_hash_16b] ll; \ + r1 += r0; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +SEC("socket") +__description("bpf_map_ptr: r = 0, r = r + map_ptr") +__success __failure_unpriv +__msg_unpriv("R0 has pointer with unsupported alu operation") +__retval(0) +__naked void _0_r_r_map_ptr(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r10 - 8) = r0; \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + r0 = %[map_hash_16b] ll; \ + r1 += r0; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_16b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c deleted file mode 100644 index 17ee84dc7766..000000000000 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ /dev/null @@ -1,99 +0,0 @@ -{ - "bpf_map_ptr: read with negative offset rejected", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 1 }, - .result_unpriv = REJECT, - .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", - .result = REJECT, - .errstr = "R1 is bpf_array invalid negative access: off=-8", -}, -{ - "bpf_map_ptr: write rejected", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result_unpriv = REJECT, - .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", - .result = REJECT, - .errstr = "only read from bpf_array is supported", -}, -{ - "bpf_map_ptr: read non-existent field rejected", - .insns = { - BPF_MOV64_IMM(BPF_REG_6, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 1 }, - .result_unpriv = REJECT, - .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", - .result = REJECT, - .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "bpf_map_ptr: read ops field accepted", - .insns = { - BPF_MOV64_IMM(BPF_REG_6, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 1 }, - .result_unpriv = REJECT, - .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", - .result = ACCEPT, - .retval = 1, -}, -{ - "bpf_map_ptr: r = 0, map_ptr = map_ptr + r", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 4 }, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 has pointer with unsupported alu operation", - .result = ACCEPT, -}, -{ - "bpf_map_ptr: r = 0, r = r + map_ptr", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_LD_MAP_FD(BPF_REG_0, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 4 }, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 has pointer with unsupported alu operation", - .result = ACCEPT, -}, From 05e474ecbb56a5fadea6b36b7aa7d8d237aea064 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:09 +0200 Subject: [PATCH 133/260] selftests/bpf: verifier/map_ret_val.c converted to inline assembly Test verifier/map_ret_val.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-29-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_map_ret_val.c | 110 ++++++++++++++++++ .../selftests/bpf/verifier/map_ret_val.c | 65 ----------- 3 files changed, 112 insertions(+), 65 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_map_ret_val.c delete mode 100644 tools/testing/selftests/bpf/verifier/map_ret_val.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d2f3bff0e942..5131a73fd225 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -25,6 +25,7 @@ #include "verifier_ld_ind.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_map_ptr.skel.h" +#include "verifier_map_ret_val.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -72,3 +73,4 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } +void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c new file mode 100644 index 000000000000..1639628b832d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/map_ret_val.c */ + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("socket") +__description("invalid map_fd for function call") +__failure __msg("fd 0 is not pointing to valid bpf_map") +__failure_unpriv +__naked void map_fd_for_function_call(void) +{ + asm volatile (" \ + r2 = 0; \ + *(u64*)(r10 - 8) = r2; \ + r2 = r10; \ + r2 += -8; \ + .8byte %[ld_map_fd]; \ + .8byte 0; \ + call %[bpf_map_delete_elem]; \ + exit; \ +" : + : __imm(bpf_map_delete_elem), + __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0)) + : __clobber_all); +} + +SEC("socket") +__description("don't check return value before access") +__failure __msg("R0 invalid mem access 'map_value_or_null'") +__failure_unpriv +__naked void check_return_value_before_access(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r1 = 0; \ + *(u64*)(r0 + 0) = r1; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("access memory with incorrect alignment") +__failure __msg("misaligned value access") +__failure_unpriv +__flag(BPF_F_STRICT_ALIGNMENT) +__naked void access_memory_with_incorrect_alignment_1(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r0 + 4) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("sometimes access memory with incorrect alignment") +__failure __msg("R0 invalid mem access") +__msg_unpriv("R0 leaks addr") +__flag(BPF_F_STRICT_ALIGNMENT) +__naked void access_memory_with_incorrect_alignment_2(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r0 + 0) = r1; \ + exit; \ +l0_%=: r1 = 1; \ + *(u64*)(r0 + 0) = r1; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/map_ret_val.c b/tools/testing/selftests/bpf/verifier/map_ret_val.c deleted file mode 100644 index bdd0e8d18333..000000000000 --- a/tools/testing/selftests/bpf/verifier/map_ret_val.c +++ /dev/null @@ -1,65 +0,0 @@ -{ - "invalid map_fd for function call", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), - BPF_EXIT_INSN(), - }, - .errstr = "fd 0 is not pointing to valid bpf_map", - .result = REJECT, -}, -{ - "don't check return value before access", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access 'map_value_or_null'", - .result = REJECT, -}, -{ - "access memory with incorrect alignment", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "misaligned value access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, -}, -{ - "sometimes access memory with incorrect alignment", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access", - .errstr_unpriv = "R0 leaks addr", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, -}, From ade3f08fc236ce8a5f00c1ffaf85ba42aa18ded4 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:10 +0200 Subject: [PATCH 134/260] selftests/bpf: verifier/masking.c converted to inline assembly Test verifier/masking.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-30-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_masking.c | 410 ++++++++++++++++++ .../testing/selftests/bpf/verifier/masking.c | 322 -------------- 3 files changed, 412 insertions(+), 322 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_masking.c delete mode 100644 tools/testing/selftests/bpf/verifier/masking.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 5131a73fd225..b23fcbe4f83b 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -26,6 +26,7 @@ #include "verifier_leak_ptr.skel.h" #include "verifier_map_ptr.skel.h" #include "verifier_map_ret_val.skel.h" +#include "verifier_masking.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -74,3 +75,4 @@ void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } +void test_verifier_masking(void) { RUN(verifier_masking); } diff --git a/tools/testing/selftests/bpf/progs/verifier_masking.c b/tools/testing/selftests/bpf/progs/verifier_masking.c new file mode 100644 index 000000000000..5732cc1b4c47 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_masking.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/masking.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("masking, test out of bounds 1") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_1(void) +{ + asm volatile (" \ + w1 = 5; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 5 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 2") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_2(void) +{ + asm volatile (" \ + w1 = 1; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 3") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_3(void) +{ + asm volatile (" \ + w1 = 0xffffffff; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xffffffff - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 4") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_4(void) +{ + asm volatile (" \ + w1 = 0xffffffff; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 5") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_5(void) +{ + asm volatile (" \ + w1 = -1; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 6") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_6(void) +{ + asm volatile (" \ + w1 = -1; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xffffffff - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 7") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_7(void) +{ + asm volatile (" \ + r1 = 5; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 5 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 8") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_8(void) +{ + asm volatile (" \ + r1 = 1; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 9") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_9(void) +{ + asm volatile (" \ + r1 = 0xffffffff; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xffffffff - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 10") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_10(void) +{ + asm volatile (" \ + r1 = 0xffffffff; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 11") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_11(void) +{ + asm volatile (" \ + r1 = -1; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test out of bounds 12") +__success __success_unpriv __retval(0) +__naked void test_out_of_bounds_12(void) +{ + asm volatile (" \ + r1 = -1; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xffffffff - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 1") +__success __success_unpriv __retval(4) +__naked void masking_test_in_bounds_1(void) +{ + asm volatile (" \ + w1 = 4; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 5 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 2") +__success __success_unpriv __retval(0) +__naked void masking_test_in_bounds_2(void) +{ + asm volatile (" \ + w1 = 0; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xffffffff - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 3") +__success __success_unpriv __retval(0xfffffffe) +__naked void masking_test_in_bounds_3(void) +{ + asm volatile (" \ + w1 = 0xfffffffe; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xffffffff - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 4") +__success __success_unpriv __retval(0xabcde) +__naked void masking_test_in_bounds_4(void) +{ + asm volatile (" \ + w1 = 0xabcde; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 0xabcdef - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 5") +__success __success_unpriv __retval(0) +__naked void masking_test_in_bounds_5(void) +{ + asm volatile (" \ + w1 = 0; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 1 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 6") +__success __success_unpriv __retval(46) +__naked void masking_test_in_bounds_6(void) +{ + asm volatile (" \ + w1 = 46; \ + w2 = %[__imm_0]; \ + r2 -= r1; \ + r2 |= r1; \ + r2 = -r2; \ + r2 s>>= 63; \ + r1 &= r2; \ + r0 = r1; \ + exit; \ +" : + : __imm_const(__imm_0, 47 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 7") +__success __success_unpriv __retval(46) +__naked void masking_test_in_bounds_7(void) +{ + asm volatile (" \ + r3 = -46; \ + r3 *= -1; \ + w2 = %[__imm_0]; \ + r2 -= r3; \ + r2 |= r3; \ + r2 = -r2; \ + r2 s>>= 63; \ + r3 &= r2; \ + r0 = r3; \ + exit; \ +" : + : __imm_const(__imm_0, 47 - 1) + : __clobber_all); +} + +SEC("socket") +__description("masking, test in bounds 8") +__success __success_unpriv __retval(0) +__naked void masking_test_in_bounds_8(void) +{ + asm volatile (" \ + r3 = -47; \ + r3 *= -1; \ + w2 = %[__imm_0]; \ + r2 -= r3; \ + r2 |= r3; \ + r2 = -r2; \ + r2 s>>= 63; \ + r3 &= r2; \ + r0 = r3; \ + exit; \ +" : + : __imm_const(__imm_0, 47 - 1) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/masking.c b/tools/testing/selftests/bpf/verifier/masking.c deleted file mode 100644 index 6e1358c544fd..000000000000 --- a/tools/testing/selftests/bpf/verifier/masking.c +++ /dev/null @@ -1,322 +0,0 @@ -{ - "masking, test out of bounds 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 5), - BPF_MOV32_IMM(BPF_REG_2, 5 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 3", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 4", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 5", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 6", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 5), - BPF_MOV32_IMM(BPF_REG_2, 5 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 8", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 9", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 10", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 11", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test out of bounds 12", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test in bounds 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 4), - BPF_MOV32_IMM(BPF_REG_2, 5 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 4, -}, -{ - "masking, test in bounds 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test in bounds 3", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0xfffffffe, -}, -{ - "masking, test in bounds 4", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xabcde), - BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0xabcde, -}, -{ - "masking, test in bounds 5", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, -{ - "masking, test in bounds 6", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 46), - BPF_MOV32_IMM(BPF_REG_2, 47 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 46, -}, -{ - "masking, test in bounds 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, -46), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), - BPF_MOV32_IMM(BPF_REG_2, 47 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 46, -}, -{ - "masking, test in bounds 8", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, -47), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), - BPF_MOV32_IMM(BPF_REG_2, 47 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, -}, From 65428312e38d896ce101ee3489403cff320d9b74 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:11 +0200 Subject: [PATCH 135/260] selftests/bpf: verifier/meta_access.c converted to inline assembly Test verifier/meta_access.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-31-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_meta_access.c | 284 ++++++++++++++++++ .../selftests/bpf/verifier/meta_access.c | 235 --------------- 3 files changed, 286 insertions(+), 235 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_meta_access.c delete mode 100644 tools/testing/selftests/bpf/verifier/meta_access.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index b23fcbe4f83b..bd48a584a356 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -27,6 +27,7 @@ #include "verifier_map_ptr.skel.h" #include "verifier_map_ret_val.skel.h" #include "verifier_masking.skel.h" +#include "verifier_meta_access.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -76,3 +77,4 @@ void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } void test_verifier_masking(void) { RUN(verifier_masking); } +void test_verifier_meta_access(void) { RUN(verifier_meta_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_meta_access.c b/tools/testing/selftests/bpf/progs/verifier_meta_access.c new file mode 100644 index 000000000000..d81722fb5f19 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_meta_access.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/meta_access.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("xdp") +__description("meta access, test1") +__success __retval(0) +__naked void meta_access_test1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r0 = r2; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test2") +__failure __msg("invalid access to packet, off=-8") +__naked void meta_access_test2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r0 = r2; \ + r0 -= 8; \ + r4 = r2; \ + r4 += 8; \ + if r4 > r3 goto l0_%=; \ + r0 = *(u8*)(r0 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test3") +__failure __msg("invalid access to packet") +__naked void meta_access_test3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r0 = r2; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test4") +__failure __msg("invalid access to packet") +__naked void meta_access_test4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r4 = *(u32*)(r1 + %[xdp_md_data]); \ + r0 = r4; \ + r0 += 8; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test5") +__failure __msg("R3 !read_ok") +__naked void meta_access_test5(void) +{ + asm volatile (" \ + r3 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r4 = *(u32*)(r1 + %[xdp_md_data]); \ + r0 = r3; \ + r0 += 8; \ + if r0 > r4 goto l0_%=; \ + r2 = -8; \ + call %[bpf_xdp_adjust_meta]; \ + r0 = *(u8*)(r3 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_xdp_adjust_meta), + __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test6") +__failure __msg("invalid access to packet") +__naked void meta_access_test6(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r0 = r3; \ + r0 += 8; \ + r4 = r2; \ + r4 += 8; \ + if r4 > r0 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test7") +__success __retval(0) +__naked void meta_access_test7(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r0 = r3; \ + r0 += 8; \ + r4 = r2; \ + r4 += 8; \ + if r4 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test8") +__success __retval(0) +__naked void meta_access_test8(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r4 = r2; \ + r4 += 0xFFFF; \ + if r4 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test9") +__failure __msg("invalid access to packet") +__naked void meta_access_test9(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r4 = r2; \ + r4 += 0xFFFF; \ + r4 += 1; \ + if r4 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test10") +__failure __msg("invalid access to packet") +__naked void meta_access_test10(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r4 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r5 = 42; \ + r6 = 24; \ + *(u64*)(r10 - 8) = r5; \ + lock *(u64 *)(r10 - 8) += r6; \ + r5 = *(u64*)(r10 - 8); \ + if r5 > 100 goto l0_%=; \ + r3 += r5; \ + r5 = r3; \ + r6 = r2; \ + r6 += 8; \ + if r6 > r5 goto l0_%=; \ + r2 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test11") +__success __retval(0) +__naked void meta_access_test11(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r5 = 42; \ + r6 = 24; \ + *(u64*)(r10 - 8) = r5; \ + lock *(u64 *)(r10 - 8) += r6; \ + r5 = *(u64*)(r10 - 8); \ + if r5 > 100 goto l0_%=; \ + r2 += r5; \ + r5 = r2; \ + r6 = r2; \ + r6 += 8; \ + if r6 > r3 goto l0_%=; \ + r5 = *(u8*)(r5 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("meta access, test12") +__success __retval(0) +__naked void meta_access_test12(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r4 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r5 = r3; \ + r5 += 16; \ + if r5 > r4 goto l0_%=; \ + r0 = *(u8*)(r3 + 0); \ + r5 = r2; \ + r5 += 16; \ + if r5 > r3 goto l0_%=; \ + r0 = *(u8*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c deleted file mode 100644 index b45e8af41420..000000000000 --- a/tools/testing/selftests/bpf/verifier/meta_access.c +++ /dev/null @@ -1,235 +0,0 @@ -{ - "meta access, test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet, off=-8", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test5", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_xdp_adjust_meta), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R3 !read_ok", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test7", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test8", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test9", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test10", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_IMM(BPF_REG_5, 42), - BPF_MOV64_IMM(BPF_REG_6, 24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), - BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), - BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test11", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_IMM(BPF_REG_5, 42), - BPF_MOV64_IMM(BPF_REG_6, 24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), - BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "meta access, test12", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, -}, From 5a77a01f3320562db51dfdd24b2921768637daf2 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:12 +0200 Subject: [PATCH 136/260] selftests/bpf: verifier/raw_stack.c converted to inline assembly Test verifier/raw_stack.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-32-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_raw_stack.c | 371 ++++++++++++++++++ .../selftests/bpf/verifier/raw_stack.c | 305 -------------- 3 files changed, 373 insertions(+), 305 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_raw_stack.c delete mode 100644 tools/testing/selftests/bpf/verifier/raw_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index bd48a584a356..4a73cac3f9ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -28,6 +28,7 @@ #include "verifier_map_ret_val.skel.h" #include "verifier_masking.skel.h" #include "verifier_meta_access.skel.h" +#include "verifier_raw_stack.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -78,3 +79,4 @@ void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } void test_verifier_masking(void) { RUN(verifier_masking); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } +void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c new file mode 100644 index 000000000000..efbfc3a4ad6a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/raw_stack.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("tc") +__description("raw_stack: no skb_load_bytes") +__failure __msg("invalid read from stack R6 off=-8 size=8") +__naked void stack_no_skb_load_bytes(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + r3 = r6; \ + r4 = 8; \ + /* Call to skb_load_bytes() omitted. */ \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, negative len") +__failure __msg("R4 min value is negative") +__naked void skb_load_bytes_negative_len(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + r3 = r6; \ + r4 = -8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, negative len 2") +__failure __msg("R4 min value is negative") +__naked void load_bytes_negative_len_2(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + r3 = r6; \ + r4 = %[__imm_0]; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes), + __imm_const(__imm_0, ~0) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, zero len") +__failure __msg("invalid zero-sized read") +__naked void skb_load_bytes_zero_len(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + r3 = r6; \ + r4 = 0; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, no init") +__success __retval(0) +__naked void skb_load_bytes_no_init(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, init") +__success __retval(0) +__naked void stack_skb_load_bytes_init(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + r3 = 0xcafe; \ + *(u64*)(r6 + 0) = r3; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, spilled regs around bounds") +__success __retval(0) +__naked void bytes_spilled_regs_around_bounds(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -16; \ + *(u64*)(r6 - 8) = r1; \ + *(u64*)(r6 + 8) = r1; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 - 8); \ + r2 = *(u64*)(r6 + 8); \ + r0 = *(u32*)(r0 + %[__sk_buff_mark]); \ + r2 = *(u32*)(r2 + %[__sk_buff_priority]); \ + r0 += r2; \ + exit; \ +" : + : __imm(bpf_skb_load_bytes), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)), + __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, spilled regs corruption") +__failure __msg("R0 invalid mem access 'scalar'") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void load_bytes_spilled_regs_corruption(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -8; \ + *(u64*)(r6 + 0) = r1; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + r0 = *(u32*)(r0 + %[__sk_buff_mark]); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, spilled regs corruption 2") +__failure __msg("R3 invalid mem access 'scalar'") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void bytes_spilled_regs_corruption_2(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -16; \ + *(u64*)(r6 - 8) = r1; \ + *(u64*)(r6 + 0) = r1; \ + *(u64*)(r6 + 8) = r1; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 - 8); \ + r2 = *(u64*)(r6 + 8); \ + r3 = *(u64*)(r6 + 0); \ + r0 = *(u32*)(r0 + %[__sk_buff_mark]); \ + r2 = *(u32*)(r2 + %[__sk_buff_priority]); \ + r0 += r2; \ + r3 = *(u32*)(r3 + %[__sk_buff_pkt_type]); \ + r0 += r3; \ + exit; \ +" : + : __imm(bpf_skb_load_bytes), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)), + __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)), + __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, spilled regs + data") +__success __retval(0) +__naked void load_bytes_spilled_regs_data(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -16; \ + *(u64*)(r6 - 8) = r1; \ + *(u64*)(r6 + 0) = r1; \ + *(u64*)(r6 + 8) = r1; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 - 8); \ + r2 = *(u64*)(r6 + 8); \ + r3 = *(u64*)(r6 + 0); \ + r0 = *(u32*)(r0 + %[__sk_buff_mark]); \ + r2 = *(u32*)(r2 + %[__sk_buff_priority]); \ + r0 += r2; \ + r0 += r3; \ + exit; \ +" : + : __imm(bpf_skb_load_bytes), + __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)), + __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, invalid access 1") +__failure __msg("invalid indirect access to stack R3 off=-513 size=8") +__naked void load_bytes_invalid_access_1(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -513; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, invalid access 2") +__failure __msg("invalid indirect access to stack R3 off=-1 size=8") +__naked void load_bytes_invalid_access_2(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -1; \ + r3 = r6; \ + r4 = 8; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, invalid access 3") +__failure __msg("R4 min value is negative") +__naked void load_bytes_invalid_access_3(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += 0xffffffff; \ + r3 = r6; \ + r4 = 0xffffffff; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, invalid access 4") +__failure +__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'") +__naked void load_bytes_invalid_access_4(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -1; \ + r3 = r6; \ + r4 = 0x7fffffff; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, invalid access 5") +__failure +__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'") +__naked void load_bytes_invalid_access_5(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -512; \ + r3 = r6; \ + r4 = 0x7fffffff; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, invalid access 6") +__failure __msg("invalid zero-sized read") +__naked void load_bytes_invalid_access_6(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -512; \ + r3 = r6; \ + r4 = 0; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +SEC("tc") +__description("raw_stack: skb_load_bytes, large access") +__success __retval(0) +__naked void skb_load_bytes_large_access(void) +{ + asm volatile (" \ + r2 = 4; \ + r6 = r10; \ + r6 += -512; \ + r3 = r6; \ + r4 = 512; \ + call %[bpf_skb_load_bytes]; \ + r0 = *(u64*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_skb_load_bytes) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c deleted file mode 100644 index eb5ed936580b..000000000000 --- a/tools/testing/selftests/bpf/verifier/raw_stack.c +++ /dev/null @@ -1,305 +0,0 @@ -{ - "raw_stack: no skb_load_bytes", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - /* Call to skb_load_bytes() omitted. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid read from stack R6 off=-8 size=8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, negative len", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, negative len 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, ~0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, zero len", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid zero-sized read", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, no init", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, init", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, spilled regs around bounds", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, - offsetof(struct __sk_buff, priority)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, spilled regs corruption", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 invalid mem access 'scalar'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "raw_stack: skb_load_bytes, spilled regs corruption 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, - offsetof(struct __sk_buff, priority)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3, - offsetof(struct __sk_buff, pkt_type)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R3 invalid mem access 'scalar'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "raw_stack: skb_load_bytes, spilled regs + data", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, - offsetof(struct __sk_buff, priority)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, invalid access 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid indirect access to stack R3 off=-513 size=8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, invalid access 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid indirect access to stack R3 off=-1 size=8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, invalid access 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, invalid access 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, invalid access 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, invalid access 6", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid zero-sized read", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "raw_stack: skb_load_bytes, large access", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 512), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, From 18cdc2b531fbe13450c51132a5b511c670c77585 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:13 +0200 Subject: [PATCH 137/260] selftests/bpf: verifier/raw_tp_writable.c converted to inline assembly Test verifier/raw_tp_writable.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-33-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_raw_tp_writable.c | 50 +++++++++++++++++++ .../selftests/bpf/verifier/raw_tp_writable.c | 35 ------------- 3 files changed, 52 insertions(+), 35 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c delete mode 100644 tools/testing/selftests/bpf/verifier/raw_tp_writable.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 4a73cac3f9ba..f7488904f26e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -29,6 +29,7 @@ #include "verifier_masking.skel.h" #include "verifier_meta_access.skel.h" #include "verifier_raw_stack.skel.h" +#include "verifier_raw_tp_writable.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -80,3 +81,4 @@ void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } void test_verifier_masking(void) { RUN(verifier_masking); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } +void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c new file mode 100644 index 000000000000..14a0172e2141 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/raw_tp_writable.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("raw_tracepoint.w") +__description("raw_tracepoint_writable: reject variable offset") +__failure +__msg("R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void tracepoint_writable_reject_variable_offset(void) +{ + asm volatile (" \ + /* r6 is our tp buffer */ \ + r6 = *(u64*)(r1 + 0); \ + r1 = %[map_hash_8b] ll; \ + /* move the key (== 0) to r10-8 */ \ + w0 = 0; \ + r2 = r10; \ + r2 += -8; \ + *(u64*)(r2 + 0) = r0; \ + /* lookup in the map */ \ + call %[bpf_map_lookup_elem]; \ + /* exit clean if null */ \ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: /* shift the buffer pointer to a variable location */\ + r0 = *(u32*)(r0 + 0); \ + r6 += r0; \ + /* clobber whatever's there */ \ + r7 = 4242; \ + *(u64*)(r6 + 0) = r7; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c deleted file mode 100644 index 2978fb5a769d..000000000000 --- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c +++ /dev/null @@ -1,35 +0,0 @@ -{ - "raw_tracepoint_writable: reject variable offset", - .insns = { - /* r6 is our tp buffer */ - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - - BPF_LD_MAP_FD(BPF_REG_1, 0), - /* move the key (== 0) to r10-8 */ - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - /* lookup in the map */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - - /* exit clean if null */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - - /* shift the buffer pointer to a variable location */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0), - /* clobber whatever's there */ - BPF_MOV64_IMM(BPF_REG_7, 4242), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0), - - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1, }, - .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, - .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, From b7e4203086eb4d85aa7bccd7f33c2835b34778d6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:14 +0200 Subject: [PATCH 138/260] selftests/bpf: verifier/ringbuf.c converted to inline assembly Test verifier/ringbuf.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-34-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_ringbuf.c | 131 ++++++++++++++++++ .../testing/selftests/bpf/verifier/ringbuf.c | 95 ------------- 3 files changed, 133 insertions(+), 95 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_ringbuf.c delete mode 100644 tools/testing/selftests/bpf/verifier/ringbuf.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index f7488904f26e..df5fc6fe1647 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -30,6 +30,7 @@ #include "verifier_meta_access.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" +#include "verifier_ringbuf.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -82,3 +83,4 @@ void test_verifier_masking(void) { RUN(verifier_masking); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } +void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } diff --git a/tools/testing/selftests/bpf/progs/verifier_ringbuf.c b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c new file mode 100644 index 000000000000..ae1d521f326c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/ringbuf.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); +} map_ringbuf SEC(".maps"); + +SEC("socket") +__description("ringbuf: invalid reservation offset 1") +__failure __msg("R1 must have zero offset when passed to release func") +__failure_unpriv +__naked void ringbuf_invalid_reservation_offset_1(void) +{ + asm volatile (" \ + /* reserve 8 byte ringbuf memory */ \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r1 = %[map_ringbuf] ll; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_ringbuf_reserve]; \ + /* store a pointer to the reserved memory in R6 */\ + r6 = r0; \ + /* check whether the reservation was successful */\ + if r0 == 0 goto l0_%=; \ + /* spill R6(mem) into the stack */ \ + *(u64*)(r10 - 8) = r6; \ + /* fill it back in R7 */ \ + r7 = *(u64*)(r10 - 8); \ + /* should be able to access *(R7) = 0 */ \ + r1 = 0; \ + *(u64*)(r7 + 0) = r1; \ + /* submit the reserved ringbuf memory */ \ + r1 = r7; \ + /* add invalid offset to reserved ringbuf memory */\ + r1 += 0xcafe; \ + r2 = 0; \ + call %[bpf_ringbuf_submit]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ringbuf_reserve), + __imm(bpf_ringbuf_submit), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("socket") +__description("ringbuf: invalid reservation offset 2") +__failure __msg("R7 min value is outside of the allowed memory range") +__failure_unpriv +__naked void ringbuf_invalid_reservation_offset_2(void) +{ + asm volatile (" \ + /* reserve 8 byte ringbuf memory */ \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r1 = %[map_ringbuf] ll; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_ringbuf_reserve]; \ + /* store a pointer to the reserved memory in R6 */\ + r6 = r0; \ + /* check whether the reservation was successful */\ + if r0 == 0 goto l0_%=; \ + /* spill R6(mem) into the stack */ \ + *(u64*)(r10 - 8) = r6; \ + /* fill it back in R7 */ \ + r7 = *(u64*)(r10 - 8); \ + /* add invalid offset to reserved ringbuf memory */\ + r7 += 0xcafe; \ + /* should be able to access *(R7) = 0 */ \ + r1 = 0; \ + *(u64*)(r7 + 0) = r1; \ + /* submit the reserved ringbuf memory */ \ + r1 = r7; \ + r2 = 0; \ + call %[bpf_ringbuf_submit]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ringbuf_reserve), + __imm(bpf_ringbuf_submit), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("xdp") +__description("ringbuf: check passing rb mem to helpers") +__success __retval(0) +__naked void passing_rb_mem_to_helpers(void) +{ + asm volatile (" \ + r6 = r1; \ + /* reserve 8 byte ringbuf memory */ \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r1 = %[map_ringbuf] ll; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_ringbuf_reserve]; \ + r7 = r0; \ + /* check whether the reservation was successful */\ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: /* pass allocated ring buffer memory to fib lookup */\ + r1 = r6; \ + r2 = r0; \ + r3 = 8; \ + r4 = 0; \ + call %[bpf_fib_lookup]; \ + /* submit the ringbuf memory */ \ + r1 = r7; \ + r2 = 0; \ + call %[bpf_ringbuf_submit]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_fib_lookup), + __imm(bpf_ringbuf_reserve), + __imm(bpf_ringbuf_submit), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c deleted file mode 100644 index 92e3f6a61a79..000000000000 --- a/tools/testing/selftests/bpf/verifier/ringbuf.c +++ /dev/null @@ -1,95 +0,0 @@ -{ - "ringbuf: invalid reservation offset 1", - .insns = { - /* reserve 8 byte ringbuf memory */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - /* store a pointer to the reserved memory in R6 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* check whether the reservation was successful */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - /* spill R6(mem) into the stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - /* fill it back in R7 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), - /* should be able to access *(R7) = 0 */ - BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), - /* submit the reserved ringbuf memory */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - /* add invalid offset to reserved ringbuf memory */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 1 }, - .result = REJECT, - .errstr = "R1 must have zero offset when passed to release func", -}, -{ - "ringbuf: invalid reservation offset 2", - .insns = { - /* reserve 8 byte ringbuf memory */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - /* store a pointer to the reserved memory in R6 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* check whether the reservation was successful */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - /* spill R6(mem) into the stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - /* fill it back in R7 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), - /* add invalid offset to reserved ringbuf memory */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe), - /* should be able to access *(R7) = 0 */ - BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), - /* submit the reserved ringbuf memory */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 1 }, - .result = REJECT, - .errstr = "R7 min value is outside of the allowed memory range", -}, -{ - "ringbuf: check passing rb mem to helpers", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - /* reserve 8 byte ringbuf memory */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - /* check whether the reservation was successful */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - /* pass allocated ring buffer memory to fib lookup */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 8), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup), - /* submit the ringbuf memory */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 2 }, - .prog_type = BPF_PROG_TYPE_XDP, - .result = ACCEPT, -}, From f4fe3cfe6c3ac60a6ec086a291b1c0f59e7daaa2 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:15 +0200 Subject: [PATCH 139/260] selftests/bpf: verifier/spill_fill.c converted to inline assembly Test verifier/spill_fill.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-35-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_spill_fill.c | 374 ++++++++++++++++++ .../selftests/bpf/verifier/spill_fill.c | 345 ---------------- 3 files changed, 376 insertions(+), 345 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_spill_fill.c delete mode 100644 tools/testing/selftests/bpf/verifier/spill_fill.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index df5fc6fe1647..e2b131d2ba94 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -31,6 +31,7 @@ #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_ringbuf.skel.h" +#include "verifier_spill_fill.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -84,3 +85,4 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } +void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c new file mode 100644 index 000000000000..136e5530b72c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/spill_fill.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); +} map_ringbuf SEC(".maps"); + +SEC("socket") +__description("check valid spill/fill") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(POINTER_VALUE) +__naked void check_valid_spill_fill(void) +{ + asm volatile (" \ + /* spill R1(ctx) into stack */ \ + *(u64*)(r10 - 8) = r1; \ + /* fill it back into R2 */ \ + r2 = *(u64*)(r10 - 8); \ + /* should be able to access R0 = *(R2 + 8) */ \ + /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */\ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check valid spill/fill, skb mark") +__success __success_unpriv __retval(0) +__naked void valid_spill_fill_skb_mark(void) +{ + asm volatile (" \ + r6 = r1; \ + *(u64*)(r10 - 8) = r6; \ + r0 = *(u64*)(r10 - 8); \ + r0 = *(u32*)(r0 + %[__sk_buff_mark]); \ + exit; \ +" : + : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)) + : __clobber_all); +} + +SEC("socket") +__description("check valid spill/fill, ptr to mem") +__success __success_unpriv __retval(0) +__naked void spill_fill_ptr_to_mem(void) +{ + asm volatile (" \ + /* reserve 8 byte ringbuf memory */ \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r1 = %[map_ringbuf] ll; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_ringbuf_reserve]; \ + /* store a pointer to the reserved memory in R6 */\ + r6 = r0; \ + /* check whether the reservation was successful */\ + if r0 == 0 goto l0_%=; \ + /* spill R6(mem) into the stack */ \ + *(u64*)(r10 - 8) = r6; \ + /* fill it back in R7 */ \ + r7 = *(u64*)(r10 - 8); \ + /* should be able to access *(R7) = 0 */ \ + r1 = 0; \ + *(u64*)(r7 + 0) = r1; \ + /* submit the reserved ringbuf memory */ \ + r1 = r7; \ + r2 = 0; \ + call %[bpf_ringbuf_submit]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ringbuf_reserve), + __imm(bpf_ringbuf_submit), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("socket") +__description("check with invalid reg offset 0") +__failure __msg("R0 pointer arithmetic on ringbuf_mem_or_null prohibited") +__failure_unpriv +__naked void with_invalid_reg_offset_0(void) +{ + asm volatile (" \ + /* reserve 8 byte ringbuf memory */ \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r1 = %[map_ringbuf] ll; \ + r2 = 8; \ + r3 = 0; \ + call %[bpf_ringbuf_reserve]; \ + /* store a pointer to the reserved memory in R6 */\ + r6 = r0; \ + /* add invalid offset to memory or NULL */ \ + r0 += 1; \ + /* check whether the reservation was successful */\ + if r0 == 0 goto l0_%=; \ + /* should not be able to access *(R7) = 0 */ \ + r1 = 0; \ + *(u32*)(r6 + 0) = r1; \ + /* submit the reserved ringbuf memory */ \ + r1 = r6; \ + r2 = 0; \ + call %[bpf_ringbuf_submit]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ringbuf_reserve), + __imm(bpf_ringbuf_submit), + __imm_addr(map_ringbuf) + : __clobber_all); +} + +SEC("socket") +__description("check corrupted spill/fill") +__failure __msg("R0 invalid mem access 'scalar'") +__msg_unpriv("attempt to corrupt spilled") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void check_corrupted_spill_fill(void) +{ + asm volatile (" \ + /* spill R1(ctx) into stack */ \ + *(u64*)(r10 - 8) = r1; \ + /* mess up with R1 pointer on stack */ \ + r0 = 0x23; \ + *(u8*)(r10 - 7) = r0; \ + /* fill back into R0 is fine for priv. \ + * R0 now becomes SCALAR_VALUE. \ + */ \ + r0 = *(u64*)(r10 - 8); \ + /* Load from R0 should fail. */ \ + r0 = *(u64*)(r0 + 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check corrupted spill/fill, LSB") +__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled") +__retval(POINTER_VALUE) +__naked void check_corrupted_spill_fill_lsb(void) +{ + asm volatile (" \ + *(u64*)(r10 - 8) = r1; \ + r0 = 0xcafe; \ + *(u16*)(r10 - 8) = r0; \ + r0 = *(u64*)(r10 - 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("check corrupted spill/fill, MSB") +__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled") +__retval(POINTER_VALUE) +__naked void check_corrupted_spill_fill_msb(void) +{ + asm volatile (" \ + *(u64*)(r10 - 8) = r1; \ + r0 = 0x12345678; \ + *(u32*)(r10 - 4) = r0; \ + r0 = *(u64*)(r10 - 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("Spill and refill a u32 const scalar. Offset to skb->data") +__success __retval(0) +__naked void scalar_offset_to_skb_data_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + w4 = 20; \ + *(u32*)(r10 - 8) = r4; \ + r4 = *(u32*)(r10 - 8); \ + r0 = r2; \ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */ \ + r0 += r4; \ + /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\ + if r0 > r3 goto l0_%=; \ + /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */\ + r0 = *(u32*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("socket") +__description("Spill a u32 const, refill from another half of the uninit u32 from the stack") +/* in privileged mode reads from uninitialized stack locations are permitted */ +__success __failure_unpriv +__msg_unpriv("invalid read from stack off -4+0 size 4") +__retval(0) +__naked void uninit_u32_from_the_stack(void) +{ + asm volatile (" \ + w4 = 20; \ + *(u32*)(r10 - 8) = r4; \ + /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ \ + r4 = *(u32*)(r10 - 4); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data") +__failure __msg("invalid access to packet") +__naked void u16_offset_to_skb_data(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + w4 = 20; \ + *(u32*)(r10 - 8) = r4; \ + r4 = *(u16*)(r10 - 8); \ + r0 = r2; \ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + r0 += r4; \ + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ + if r0 > r3 goto l0_%=; \ + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ + r0 = *(u32*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("Spill u32 const scalars. Refill as u64. Offset to skb->data") +__failure __msg("invalid access to packet") +__naked void u64_offset_to_skb_data(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + w6 = 0; \ + w7 = 20; \ + *(u32*)(r10 - 4) = r6; \ + *(u32*)(r10 - 8) = r7; \ + r4 = *(u16*)(r10 - 8); \ + r0 = r2; \ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + r0 += r4; \ + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ + if r0 > r3 goto l0_%=; \ + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ + r0 = *(u32*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data") +__failure __msg("invalid access to packet") +__naked void _6_offset_to_skb_data(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + w4 = 20; \ + *(u32*)(r10 - 8) = r4; \ + r4 = *(u16*)(r10 - 6); \ + r0 = r2; \ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + r0 += r4; \ + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ + if r0 > r3 goto l0_%=; \ + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ + r0 = *(u32*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data") +__failure __msg("invalid access to packet") +__naked void addr_offset_to_skb_data(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + w4 = 20; \ + *(u32*)(r10 - 8) = r4; \ + *(u32*)(r10 - 4) = r4; \ + r4 = *(u32*)(r10 - 4); \ + r0 = r2; \ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */\ + r0 += r4; \ + /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\ + if r0 > r3 goto l0_%=; \ + /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\ + r0 = *(u32*)(r2 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("Spill and refill a umax=40 bounded scalar. Offset to skb->data") +__success __retval(0) +__naked void scalar_offset_to_skb_data_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[__sk_buff_data]); \ + r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ + r4 = *(u64*)(r1 + %[__sk_buff_tstamp]); \ + if r4 <= 40 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: /* *(u32 *)(r10 -8) = r4 R4=umax=40 */ \ + *(u32*)(r10 - 8) = r4; \ + /* r4 = (*u32 *)(r10 - 8) */ \ + r4 = *(u32*)(r10 - 8); \ + /* r2 += r4 R2=pkt R4=umax=40 */ \ + r2 += r4; \ + /* r0 = r2 R2=pkt,umax=40 R4=umax=40 */ \ + r0 = r2; \ + /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ \ + r2 += 20; \ + /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */\ + if r2 > r3 goto l1_%=; \ + /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */\ + r0 = *(u32*)(r0 + 0); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)), + __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp)) + : __clobber_all); +} + +SEC("tc") +__description("Spill a u32 scalar at fp-4 and then at fp-8") +__success __retval(0) +__naked void and_then_at_fp_8(void) +{ + asm volatile (" \ + w4 = 4321; \ + *(u32*)(r10 - 4) = r4; \ + *(u32*)(r10 - 8) = r4; \ + r4 = *(u64*)(r10 - 8); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c deleted file mode 100644 index d1463bf4949a..000000000000 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ /dev/null @@ -1,345 +0,0 @@ -{ - "check valid spill/fill", - .insns = { - /* spill R1(ctx) into stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - /* fill it back into R2 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), - /* should be able to access R0 = *(R2 + 8) */ - /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .retval = POINTER_VALUE, -}, -{ - "check valid spill/fill, skb mark", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = ACCEPT, -}, -{ - "check valid spill/fill, ptr to mem", - .insns = { - /* reserve 8 byte ringbuf memory */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - /* store a pointer to the reserved memory in R6 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* check whether the reservation was successful */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - /* spill R6(mem) into the stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - /* fill it back in R7 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), - /* should be able to access *(R7) = 0 */ - BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), - /* submit the reserved ringbuf memory */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 1 }, - .result = ACCEPT, - .result_unpriv = ACCEPT, -}, -{ - "check with invalid reg offset 0", - .insns = { - /* reserve 8 byte ringbuf memory */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - /* store a pointer to the reserved memory in R6 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* add invalid offset to memory or NULL */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - /* check whether the reservation was successful */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - /* should not be able to access *(R7) = 0 */ - BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0), - /* submit the reserved ringbuf memory */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 1 }, - .result = REJECT, - .errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited", -}, -{ - "check corrupted spill/fill", - .insns = { - /* spill R1(ctx) into stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - /* mess up with R1 pointer on stack */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), - /* fill back into R0 is fine for priv. - * R0 now becomes SCALAR_VALUE. - */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - /* Load from R0 should fail. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .errstr = "R0 invalid mem access 'scalar'", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "check corrupted spill/fill, LSB", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = POINTER_VALUE, -}, -{ - "check corrupted spill/fill, MSB", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = POINTER_VALUE, -}, -{ - "Spill and refill a u32 const scalar. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u32 *)(r10 -8) */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 const, refill from another half of the uninit u32 from the stack", - .insns = { - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid read from stack off -4+0 size 4", - /* in privileged mode reads from uninitialized stack locations are permitted */ - .result = ACCEPT, -}, -{ - "Spill a u32 const scalar. Refill as u16. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u16 *)(r10 -8) */ - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill u32 const scalars. Refill as u64. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r6 = 0 */ - BPF_MOV32_IMM(BPF_REG_6, 0), - /* r7 = 20 */ - BPF_MOV32_IMM(BPF_REG_7, 20), - /* *(u32 *)(r10 -4) = r6 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), - /* *(u32 *)(r10 -8) = r7 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), - /* r4 = *(u64 *)(r10 -8) */ - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u16 *)(r10 -6) */ - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -6), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - /* r4 = 20 */ - BPF_MOV32_IMM(BPF_REG_4, 20), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* *(u32 *)(r10 -4) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4), - /* r4 = *(u32 *)(r10 -4), */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4), - /* r0 = r2 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */ - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill and refill a umax=40 bounded scalar. Offset to skb->data", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, tstamp)), - BPF_JMP_IMM(BPF_JLE, BPF_REG_4, 40, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* *(u32 *)(r10 -8) = r4 R4=umax=40 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = (*u32 *)(r10 - 8) */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8), - /* r2 += r4 R2=pkt R4=umax=40 */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_4), - /* r0 = r2 R2=pkt,umax=40 R4=umax=40 */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 20), - /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */ - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 1), - /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "Spill a u32 scalar at fp-4 and then at fp-8", - .insns = { - /* r4 = 4321 */ - BPF_MOV32_IMM(BPF_REG_4, 4321), - /* *(u32 *)(r10 -4) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4), - /* *(u32 *)(r10 -8) = r4 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), - /* r4 = *(u64 *)(r10 -8) */ - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, From edff37b2f28f5c24f628981d0f26ca2fcd7e2ad5 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:16 +0200 Subject: [PATCH 140/260] selftests/bpf: verifier/stack_ptr.c converted to inline assembly Test verifier/stack_ptr.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-36-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_stack_ptr.c | 484 ++++++++++++++++++ .../selftests/bpf/verifier/stack_ptr.c | 359 ------------- 3 files changed, 486 insertions(+), 359 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_stack_ptr.c delete mode 100644 tools/testing/selftests/bpf/verifier/stack_ptr.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e2b131d2ba94..ce1ca8c0c02e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -32,6 +32,7 @@ #include "verifier_raw_tp_writable.skel.h" #include "verifier_ringbuf.skel.h" #include "verifier_spill_fill.skel.h" +#include "verifier_stack_ptr.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -86,3 +87,4 @@ void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } +void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c new file mode 100644 index 000000000000..e0f77e3e7869 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/stack_ptr.c */ + +#include +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct test_val); +} map_array_48b SEC(".maps"); + +SEC("socket") +__description("PTR_TO_STACK store/load") +__success __success_unpriv __retval(0xfaceb00c) +__naked void ptr_to_stack_store_load(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -10; \ + r0 = 0xfaceb00c; \ + *(u64*)(r1 + 2) = r0; \ + r0 = *(u64*)(r1 + 2); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK store/load - bad alignment on off") +__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8") +__failure_unpriv +__naked void load_bad_alignment_on_off(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -8; \ + r0 = 0xfaceb00c; \ + *(u64*)(r1 + 2) = r0; \ + r0 = *(u64*)(r1 + 2); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK store/load - bad alignment on reg") +__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8") +__failure_unpriv +__naked void load_bad_alignment_on_reg(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -10; \ + r0 = 0xfaceb00c; \ + *(u64*)(r1 + 8) = r0; \ + r0 = *(u64*)(r1 + 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK store/load - out of bounds low") +__failure __msg("invalid write to stack R1 off=-79992 size=8") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void load_out_of_bounds_low(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -80000; \ + r0 = 0xfaceb00c; \ + *(u64*)(r1 + 8) = r0; \ + r0 = *(u64*)(r1 + 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK store/load - out of bounds high") +__failure __msg("invalid write to stack R1 off=0 size=8") +__failure_unpriv +__naked void load_out_of_bounds_high(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -8; \ + r0 = 0xfaceb00c; \ + *(u64*)(r1 + 8) = r0; \ + r0 = *(u64*)(r1 + 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 1") +__success __success_unpriv __retval(42) +__naked void to_stack_check_high_1(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -1; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 2") +__success __success_unpriv __retval(42) +__naked void to_stack_check_high_2(void) +{ + asm volatile (" \ + r1 = r10; \ + r0 = 42; \ + *(u8*)(r1 - 1) = r0; \ + r0 = *(u8*)(r1 - 1); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 3") +__success __failure_unpriv +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__retval(42) +__naked void to_stack_check_high_3(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += 0; \ + r0 = 42; \ + *(u8*)(r1 - 1) = r0; \ + r0 = *(u8*)(r1 - 1); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 4") +__failure __msg("invalid write to stack R1 off=0 size=1") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_high_4(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += 0; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 5") +__failure __msg("invalid write to stack R1") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_high_5(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[__imm_0]; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" : + : __imm_const(__imm_0, (1 << 29) - 1) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 6") +__failure __msg("invalid write to stack") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_high_6(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[__imm_0]; \ + r0 = 42; \ + *(u8*)(r1 + %[shrt_max]) = r0; \ + r0 = *(u8*)(r1 + %[shrt_max]); \ + exit; \ +" : + : __imm_const(__imm_0, (1 << 29) - 1), + __imm_const(shrt_max, SHRT_MAX) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check high 7") +__failure __msg("fp pointer offset") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_high_7(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[__imm_0]; \ + r1 += %[__imm_0]; \ + r0 = 42; \ + *(u8*)(r1 + %[shrt_max]) = r0; \ + r0 = *(u8*)(r1 + %[shrt_max]); \ + exit; \ +" : + : __imm_const(__imm_0, (1 << 29) - 1), + __imm_const(shrt_max, SHRT_MAX) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 1") +__success __success_unpriv __retval(42) +__naked void to_stack_check_low_1(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -512; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 2") +__success __failure_unpriv +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__retval(42) +__naked void to_stack_check_low_2(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -513; \ + r0 = 42; \ + *(u8*)(r1 + 1) = r0; \ + r0 = *(u8*)(r1 + 1); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 3") +__failure __msg("invalid write to stack R1 off=-513 size=1") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_low_3(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -513; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 4") +__failure __msg("math between fp pointer") +__failure_unpriv +__naked void to_stack_check_low_4(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[int_min]; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 5") +__failure __msg("invalid write to stack") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_low_5(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[__imm_0]; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" : + : __imm_const(__imm_0, -((1 << 29) - 1)) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 6") +__failure __msg("invalid write to stack") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_low_6(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[__imm_0]; \ + r0 = 42; \ + *(u8*)(r1 %[shrt_min]) = r0; \ + r0 = *(u8*)(r1 %[shrt_min]); \ + exit; \ +" : + : __imm_const(__imm_0, -((1 << 29) - 1)), + __imm_const(shrt_min, SHRT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK check low 7") +__failure __msg("fp pointer offset") +__msg_unpriv("R1 stack pointer arithmetic goes out of range") +__naked void to_stack_check_low_7(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += %[__imm_0]; \ + r1 += %[__imm_0]; \ + r0 = 42; \ + *(u8*)(r1 %[shrt_min]) = r0; \ + r0 = *(u8*)(r1 %[shrt_min]); \ + exit; \ +" : + : __imm_const(__imm_0, -((1 << 29) - 1)), + __imm_const(shrt_min, SHRT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK mixed reg/k, 1") +__success __success_unpriv __retval(42) +__naked void stack_mixed_reg_k_1(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -3; \ + r2 = -3; \ + r1 += r2; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK mixed reg/k, 2") +__success __success_unpriv __retval(42) +__naked void stack_mixed_reg_k_2(void) +{ + asm volatile (" \ + r0 = 0; \ + *(u64*)(r10 - 8) = r0; \ + r0 = 0; \ + *(u64*)(r10 - 16) = r0; \ + r1 = r10; \ + r1 += -3; \ + r2 = -3; \ + r1 += r2; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r5 = r10; \ + r0 = *(u8*)(r5 - 6); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK mixed reg/k, 3") +__success __success_unpriv __retval(-3) +__naked void stack_mixed_reg_k_3(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -3; \ + r2 = -3; \ + r1 += r2; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK reg") +__success __success_unpriv __retval(42) +__naked void ptr_to_stack_reg(void) +{ + asm volatile (" \ + r1 = r10; \ + r2 = -3; \ + r1 += r2; \ + r0 = 42; \ + *(u8*)(r1 + 0) = r0; \ + r0 = *(u8*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("stack pointer arithmetic") +__success __success_unpriv __retval(0) +__naked void stack_pointer_arithmetic(void) +{ + asm volatile (" \ + r1 = 4; \ + goto l0_%=; \ +l0_%=: r7 = r10; \ + r7 += -10; \ + r7 += -10; \ + r2 = r7; \ + r2 += r1; \ + r0 = 0; \ + *(u32*)(r2 + 4) = r0; \ + r2 = r7; \ + r2 += 8; \ + r0 = 0; \ + *(u32*)(r2 + 4) = r0; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("store PTR_TO_STACK in R10 to array map using BPF_B") +__success __retval(42) +__naked void array_map_using_bpf_b(void) +{ + asm volatile (" \ + /* Load pointer to map. */ \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_array_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto l0_%=; \ + r0 = 2; \ + exit; \ +l0_%=: r1 = r0; \ + /* Copy R10 to R9. */ \ + r9 = r10; \ + /* Pollute other registers with unaligned values. */\ + r2 = -1; \ + r3 = -1; \ + r4 = -1; \ + r5 = -1; \ + r6 = -1; \ + r7 = -1; \ + r8 = -1; \ + /* Store both R9 and R10 with BPF_B and read back. */\ + *(u8*)(r1 + 0) = r10; \ + r2 = *(u8*)(r1 + 0); \ + *(u8*)(r1 + 0) = r9; \ + r3 = *(u8*)(r1 + 0); \ + /* Should read back as same value. */ \ + if r2 == r3 goto l1_%=; \ + r0 = 1; \ + exit; \ +l1_%=: r0 = 42; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_array_48b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c deleted file mode 100644 index 8ab94d65f3d5..000000000000 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ /dev/null @@ -1,359 +0,0 @@ -{ - "PTR_TO_STACK store/load", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0xfaceb00c, -}, -{ - "PTR_TO_STACK store/load - bad alignment on off", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", -}, -{ - "PTR_TO_STACK store/load - bad alignment on reg", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", -}, -{ - "PTR_TO_STACK store/load - out of bounds low", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid write to stack R1 off=-79992 size=8", - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", -}, -{ - "PTR_TO_STACK store/load - out of bounds high", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid write to stack R1 off=0 size=8", -}, -{ - "PTR_TO_STACK check high 1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK check high 2", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK check high 3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), - BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK check high 4", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid write to stack R1 off=0 size=1", - .result = REJECT, -}, -{ - "PTR_TO_STACK check high 5", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid write to stack R1", -}, -{ - "PTR_TO_STACK check high 6", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid write to stack", -}, -{ - "PTR_TO_STACK check high 7", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "fp pointer offset", -}, -{ - "PTR_TO_STACK check low 1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK check low 2", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), - BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1), - BPF_EXIT_INSN(), - }, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK check low 3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid write to stack R1 off=-513 size=1", - .result = REJECT, -}, -{ - "PTR_TO_STACK check low 4", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "math between fp pointer", -}, -{ - "PTR_TO_STACK check low 5", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid write to stack", -}, -{ - "PTR_TO_STACK check low 6", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid write to stack", - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", -}, -{ - "PTR_TO_STACK check low 7", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "fp pointer offset", -}, -{ - "PTR_TO_STACK mixed reg/k, 1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK mixed reg/k, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "PTR_TO_STACK mixed reg/k, 3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = -3, -}, -{ - "PTR_TO_STACK reg", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, -}, -{ - "stack pointer arithmetic", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), - BPF_ST_MEM(0, BPF_REG_2, 4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), - BPF_ST_MEM(0, BPF_REG_2, 4, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, -}, -{ - "store PTR_TO_STACK in R10 to array map using BPF_B", - .insns = { - /* Load pointer to map. */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - /* Copy R10 to R9. */ - BPF_MOV64_REG(BPF_REG_9, BPF_REG_10), - /* Pollute other registers with unaligned values. */ - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_3, -1), - BPF_MOV64_IMM(BPF_REG_4, -1), - BPF_MOV64_IMM(BPF_REG_5, -1), - BPF_MOV64_IMM(BPF_REG_6, -1), - BPF_MOV64_IMM(BPF_REG_7, -1), - BPF_MOV64_IMM(BPF_REG_8, -1), - /* Store both R9 and R10 with BPF_B and read back. */ - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0), - /* Should read back as same value. */ - BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 42, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, From ab839a58194633bd1c5ff42e6a9de87a746a67aa Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:17 +0200 Subject: [PATCH 141/260] selftests/bpf: verifier/uninit.c converted to inline assembly Test verifier/uninit.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-37-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_uninit.c | 61 +++++++++++++++++++ tools/testing/selftests/bpf/verifier/uninit.c | 39 ------------ 3 files changed, 63 insertions(+), 39 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_uninit.c delete mode 100644 tools/testing/selftests/bpf/verifier/uninit.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index ce1ca8c0c02e..c6e69b3827dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -33,6 +33,7 @@ #include "verifier_ringbuf.skel.h" #include "verifier_spill_fill.skel.h" #include "verifier_stack_ptr.skel.h" +#include "verifier_uninit.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -88,3 +89,4 @@ void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } +void test_verifier_uninit(void) { RUN(verifier_uninit); } diff --git a/tools/testing/selftests/bpf/progs/verifier_uninit.c b/tools/testing/selftests/bpf/progs/verifier_uninit.c new file mode 100644 index 000000000000..7718cd7d19ce --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_uninit.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/uninit.c */ + +#include +#include +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +SEC("socket") +__description("read uninitialized register") +__failure __msg("R2 !read_ok") +__failure_unpriv +__naked void read_uninitialized_register(void) +{ + asm volatile (" \ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("read invalid register") +__failure __msg("R15 is invalid") +__failure_unpriv +__naked void read_invalid_register(void) +{ + asm volatile (" \ + .8byte %[mov64_reg]; \ + exit; \ +" : + : __imm_insn(mov64_reg, BPF_MOV64_REG(BPF_REG_0, -1)) + : __clobber_all); +} + +SEC("socket") +__description("program doesn't init R0 before exit") +__failure __msg("R0 !read_ok") +__failure_unpriv +__naked void t_init_r0_before_exit(void) +{ + asm volatile (" \ + r2 = r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("program doesn't init R0 before exit in all branches") +__failure __msg("R0 !read_ok") +__msg_unpriv("R1 pointer comparison") +__naked void before_exit_in_all_branches(void) +{ + asm volatile (" \ + if r1 >= 0 goto l0_%=; \ + r0 = 1; \ + r0 += 2; \ +l0_%=: exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/uninit.c b/tools/testing/selftests/bpf/verifier/uninit.c deleted file mode 100644 index 987a5871ff1d..000000000000 --- a/tools/testing/selftests/bpf/verifier/uninit.c +++ /dev/null @@ -1,39 +0,0 @@ -{ - "read uninitialized register", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R2 !read_ok", - .result = REJECT, -}, -{ - "read invalid register", - .insns = { - BPF_MOV64_REG(BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .errstr = "R15 is invalid", - .result = REJECT, -}, -{ - "program doesn't init R0 before exit", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R0 !read_ok", - .result = REJECT, -}, -{ - "program doesn't init R0 before exit in all branches", - .insns = { - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 !read_ok", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, -}, From 033914942da4696dcd9009ba88bc1bba06706549 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:18 +0200 Subject: [PATCH 142/260] selftests/bpf: verifier/value_adj_spill.c converted to inline assembly Test verifier/value_adj_spill.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-38-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_value_adj_spill.c | 78 +++++++++++++++++++ .../selftests/bpf/verifier/value_adj_spill.c | 43 ---------- 3 files changed, 80 insertions(+), 43 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c delete mode 100644 tools/testing/selftests/bpf/verifier/value_adj_spill.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c6e69b3827dc..825c8583fecf 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -34,6 +34,7 @@ #include "verifier_spill_fill.skel.h" #include "verifier_stack_ptr.skel.h" #include "verifier_uninit.skel.h" +#include "verifier_value_adj_spill.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -90,3 +91,4 @@ void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_uninit(void) { RUN(verifier_uninit); } +void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); } diff --git a/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c new file mode 100644 index 000000000000..d7a5ba9bbe6a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/value_adj_spill.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +SEC("socket") +__description("map element value is preserved across register spilling") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) +__naked void is_preserved_across_register_spilling(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 42; \ + *(u64*)(r0 + 0) = r1; \ + r1 = r10; \ + r1 += -184; \ + *(u64*)(r1 + 0) = r0; \ + r3 = *(u64*)(r1 + 0); \ + r1 = 42; \ + *(u64*)(r3 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("socket") +__description("map element value or null is marked on register spilling") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) +__naked void is_marked_on_register_spilling(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + r1 = r10; \ + r1 += -152; \ + *(u64*)(r1 + 0) = r0; \ + if r0 == 0 goto l0_%=; \ + r3 = *(u64*)(r1 + 0); \ + r1 = 42; \ + *(u64*)(r3 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/value_adj_spill.c b/tools/testing/selftests/bpf/verifier/value_adj_spill.c deleted file mode 100644 index 7135e8021b81..000000000000 --- a/tools/testing/selftests/bpf/verifier/value_adj_spill.c +++ /dev/null @@ -1,43 +0,0 @@ -{ - "map element value is preserved across register spilling", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, -}, -{ - "map element value or null is marked on register spilling", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, -}, From 8f59e87a3bc6a5618f0ed459f1d36838c16bcad7 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:19 +0200 Subject: [PATCH 143/260] selftests/bpf: verifier/value.c converted to inline assembly Test verifier/value.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-39-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_value.c | 158 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/value.c | 104 ------------ 3 files changed, 160 insertions(+), 104 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_value.c delete mode 100644 tools/testing/selftests/bpf/verifier/value.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 825c8583fecf..c77df746d650 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -35,6 +35,7 @@ #include "verifier_stack_ptr.skel.h" #include "verifier_uninit.skel.h" #include "verifier_value_adj_spill.skel.h" +#include "verifier_value.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -92,3 +93,4 @@ void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_uninit(void) { RUN(verifier_uninit); } void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); } +void test_verifier_value(void) { RUN(verifier_value); } diff --git a/tools/testing/selftests/bpf/progs/verifier_value.c b/tools/testing/selftests/bpf/progs/verifier_value.c new file mode 100644 index 000000000000..b5af6b6f5acd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_value.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/value.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +SEC("socket") +__description("map element value store of cleared call register") +__failure __msg("R1 !read_ok") +__failure_unpriv __msg_unpriv("R1 !read_ok") +__naked void store_of_cleared_call_register(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("socket") +__description("map element value with unaligned store") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void element_value_with_unaligned_store(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r0 += 3; \ + r1 = 42; \ + *(u64*)(r0 + 0) = r1; \ + r1 = 43; \ + *(u64*)(r0 + 2) = r1; \ + r1 = 44; \ + *(u64*)(r0 - 2) = r1; \ + r8 = r0; \ + r1 = 32; \ + *(u64*)(r8 + 0) = r1; \ + r1 = 33; \ + *(u64*)(r8 + 2) = r1; \ + r1 = 34; \ + *(u64*)(r8 - 2) = r1; \ + r8 += 5; \ + r1 = 22; \ + *(u64*)(r8 + 0) = r1; \ + r1 = 23; \ + *(u64*)(r8 + 4) = r1; \ + r1 = 24; \ + *(u64*)(r8 - 7) = r1; \ + r7 = r8; \ + r7 += 3; \ + r1 = 22; \ + *(u64*)(r7 + 0) = r1; \ + r1 = 23; \ + *(u64*)(r7 + 4) = r1; \ + r1 = 24; \ + *(u64*)(r7 - 4) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b) + : __clobber_all); +} + +SEC("socket") +__description("map element value with unaligned load") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void element_value_with_unaligned_load(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + if r1 >= %[max_entries] goto l0_%=; \ + r0 += 3; \ + r7 = *(u64*)(r0 + 0); \ + r7 = *(u64*)(r0 + 2); \ + r8 = r0; \ + r7 = *(u64*)(r8 + 0); \ + r7 = *(u64*)(r8 + 2); \ + r0 += 5; \ + r7 = *(u64*)(r0 + 0); \ + r7 = *(u64*)(r0 + 4); \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(max_entries, MAX_ENTRIES) + : __clobber_all); +} + +SEC("socket") +__description("map element value is preserved across register spilling") +__success __failure_unpriv __msg_unpriv("R0 leaks addr") +__retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void is_preserved_across_register_spilling(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r0 += %[test_val_foo]; \ + r1 = 42; \ + *(u64*)(r0 + 0) = r1; \ + r1 = r10; \ + r1 += -184; \ + *(u64*)(r1 + 0) = r0; \ + r3 = *(u64*)(r1 + 0); \ + r1 = 42; \ + *(u64*)(r3 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/value.c b/tools/testing/selftests/bpf/verifier/value.c deleted file mode 100644 index 0e42592b1218..000000000000 --- a/tools/testing/selftests/bpf/verifier/value.c +++ /dev/null @@ -1,104 +0,0 @@ -{ - "map element value store of cleared call register", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R1 !read_ok", - .errstr = "R1 !read_ok", - .result = REJECT, - .result_unpriv = REJECT, -}, -{ - "map element value with unaligned store", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43), - BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33), - BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23), - BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22), - BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23), - BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "map element value with unaligned load", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "map element value is preserved across register spilling", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, offsetof(struct test_val, foo)), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, From d330528617b78ae893c405bf06c6c604bd2e6357 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:20 +0200 Subject: [PATCH 144/260] selftests/bpf: verifier/value_or_null.c converted to inline assembly Test verifier/value_or_null.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-40-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_value_or_null.c | 288 ++++++++++++++++++ .../selftests/bpf/verifier/value_or_null.c | 220 ------------- 3 files changed, 290 insertions(+), 220 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_value_or_null.c delete mode 100644 tools/testing/selftests/bpf/verifier/value_or_null.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c77df746d650..54eb21ef9fad 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -36,6 +36,7 @@ #include "verifier_uninit.skel.h" #include "verifier_value_adj_spill.skel.h" #include "verifier_value.skel.h" +#include "verifier_value_or_null.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -94,3 +95,4 @@ void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_uninit(void) { RUN(verifier_uninit); } void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); } void test_verifier_value(void) { RUN(verifier_value); } +void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); } diff --git a/tools/testing/selftests/bpf/progs/verifier_value_or_null.c b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c new file mode 100644 index 000000000000..8ff668a242eb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/value_or_null.c */ + +#include +#include +#include "bpf_misc.h" + +#define MAX_ENTRIES 11 + +struct test_val { + unsigned int index; + int foo[MAX_ENTRIES]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, struct test_val); +} map_hash_48b SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("tc") +__description("multiple registers share map_lookup_elem result") +__success __retval(0) +__naked void share_map_lookup_elem_result(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r4 = r0; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r4 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("alu ops on ptr_to_map_value_or_null, 1") +__failure __msg("R4 pointer arithmetic on map_value_or_null") +__naked void map_value_or_null_1(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r4 = r0; \ + r4 += -2; \ + r4 += 2; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r4 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("alu ops on ptr_to_map_value_or_null, 2") +__failure __msg("R4 pointer arithmetic on map_value_or_null") +__naked void map_value_or_null_2(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r4 = r0; \ + r4 &= -1; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r4 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("alu ops on ptr_to_map_value_or_null, 3") +__failure __msg("R4 pointer arithmetic on map_value_or_null") +__naked void map_value_or_null_3(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r4 = r0; \ + r4 <<= 1; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r4 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("invalid memory access with multiple map_lookup_elem calls") +__failure __msg("R4 !read_ok") +__naked void multiple_map_lookup_elem_calls(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + r8 = r1; \ + r7 = r2; \ + call %[bpf_map_lookup_elem]; \ + r4 = r0; \ + r1 = r8; \ + r2 = r7; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = 0; \ + *(u64*)(r4 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("tc") +__description("valid indirect map_lookup_elem access with 2nd lookup in branch") +__success __retval(0) +__naked void with_2nd_lookup_in_branch(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + r8 = r1; \ + r7 = r2; \ + call %[bpf_map_lookup_elem]; \ + r2 = 10; \ + if r2 != 0 goto l0_%=; \ + r1 = r8; \ + r2 = r7; \ + call %[bpf_map_lookup_elem]; \ +l0_%=: r4 = r0; \ + if r0 == 0 goto l1_%=; \ + r1 = 0; \ + *(u64*)(r4 + 0) = r1; \ +l1_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("invalid map access from else condition") +__failure __msg("R0 unbounded memory access") +__failure_unpriv __msg_unpriv("R0 leaks addr") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void map_access_from_else_condition(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = *(u32*)(r0 + 0); \ + if r1 >= %[__imm_0] goto l1_%=; \ + r1 += 1; \ +l1_%=: r1 <<= 2; \ + r0 += r1; \ + r1 = %[test_val_foo]; \ + *(u64*)(r0 + 0) = r1; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_48b), + __imm_const(__imm_0, MAX_ENTRIES-1), + __imm_const(test_val_foo, offsetof(struct test_val, foo)) + : __clobber_all); +} + +SEC("tc") +__description("map lookup and null branch prediction") +__success __retval(0) +__naked void lookup_and_null_branch_prediction(void) +{ + asm volatile (" \ + r1 = 10; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r6 = r0; \ + if r6 == 0 goto l0_%=; \ + if r6 != 0 goto l0_%=; \ + r10 += 10; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("MAP_VALUE_OR_NULL check_ids() in regsafe()") +__failure __msg("R8 invalid mem access 'map_value_or_null'") +__failure_unpriv __msg_unpriv("") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void null_check_ids_in_regsafe(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + /* r9 = map_lookup_elem(...) */ \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r9 = r0; \ + /* r8 = map_lookup_elem(...) */ \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r8 = r0; \ + /* r7 = ktime_get_ns() */ \ + call %[bpf_ktime_get_ns]; \ + r7 = r0; \ + /* r6 = ktime_get_ns() */ \ + call %[bpf_ktime_get_ns]; \ + r6 = r0; \ + /* if r6 > r7 goto +1 ; no new information about the state is derived from\ + * ; this check, thus produced verifier states differ\ + * ; only in 'insn_idx' \ + * r9 = r8 ; optionally share ID between r9 and r8\ + */ \ + if r6 > r7 goto l0_%=; \ + r9 = r8; \ +l0_%=: /* if r9 == 0 goto */ \ + if r9 == 0 goto l1_%=; \ + /* read map value via r8, this is not always \ + * safe because r8 might be not equal to r9. \ + */ \ + r0 = *(u64*)(r8 + 0); \ +l1_%=: /* exit 0 */ \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c deleted file mode 100644 index 52a8bca14f03..000000000000 --- a/tools/testing/selftests/bpf/verifier/value_or_null.c +++ /dev/null @@ -1,220 +0,0 @@ -{ - "multiple registers share map_lookup_elem result", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS -}, -{ - "alu ops on ptr_to_map_value_or_null, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "R4 pointer arithmetic on map_value_or_null", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS -}, -{ - "alu ops on ptr_to_map_value_or_null, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "R4 pointer arithmetic on map_value_or_null", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS -}, -{ - "alu ops on ptr_to_map_value_or_null, 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "R4 pointer arithmetic on map_value_or_null", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS -}, -{ - "invalid memory access with multiple map_lookup_elem calls", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .result = REJECT, - .errstr = "R4 !read_ok", - .prog_type = BPF_PROG_TYPE_SCHED_CLS -}, -{ - "valid indirect map_lookup_elem access with 2nd lookup in branch", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_2, 10), - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS -}, -{ - "invalid map access from else condition", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 unbounded memory access", - .result = REJECT, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "map lookup and null branch prediction", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2), - BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, -}, -{ - "MAP_VALUE_OR_NULL check_ids() in regsafe()", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* r9 = map_lookup_elem(...) */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, - 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), - /* r8 = map_lookup_elem(...) */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, - 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - /* r7 = ktime_get_ns() */ - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - /* r6 = ktime_get_ns() */ - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* if r6 > r7 goto +1 ; no new information about the state is derived from - * ; this check, thus produced verifier states differ - * ; only in 'insn_idx' - * r9 = r8 ; optionally share ID between r9 and r8 - */ - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), - BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), - /* if r9 == 0 goto */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1), - /* read map value via r8, this is not always - * safe because r8 might be not equal to r9. - */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), - /* exit 0 */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .flags = BPF_F_TEST_STATE_FREQ, - .fixup_map_hash_8b = { 3, 9 }, - .result = REJECT, - .errstr = "R8 invalid mem access 'map_value_or_null'", - .result_unpriv = REJECT, - .errstr_unpriv = "", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, From d15f5b68b63ad7f47e05aba33c4794fb3bfaf1af Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:21 +0200 Subject: [PATCH 145/260] selftests/bpf: verifier/var_off.c converted to inline assembly Test verifier/var_off.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-41-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_var_off.c | 349 ++++++++++++++++++ .../testing/selftests/bpf/verifier/var_off.c | 291 --------------- 3 files changed, 351 insertions(+), 291 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_var_off.c delete mode 100644 tools/testing/selftests/bpf/verifier/var_off.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 54eb21ef9fad..44350e328da2 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -37,6 +37,7 @@ #include "verifier_value_adj_spill.skel.h" #include "verifier_value.skel.h" #include "verifier_value_or_null.skel.h" +#include "verifier_var_off.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -96,3 +97,4 @@ void test_verifier_uninit(void) { RUN(verifier_uninit); } void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); } void test_verifier_value(void) { RUN(verifier_value); } void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); } +void test_verifier_var_off(void) { RUN(verifier_var_off); } diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c new file mode 100644 index 000000000000..83a90afba785 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/var_off.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("lwt_in") +__description("variable-offset ctx access") +__failure __msg("variable ctx access var_off=(0x0; 0x4)") +__naked void variable_offset_ctx_access(void) +{ + asm volatile (" \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned */ \ + r2 &= 4; \ + /* add it to skb. We now have either &skb->len or\ + * &skb->pkt_type, but we don't know which \ + */ \ + r1 += r2; \ + /* dereference it */ \ + r0 = *(u32*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("cgroup/skb") +__description("variable-offset stack read, priv vs unpriv") +__success __failure_unpriv +__msg_unpriv("R2 variable stack access prohibited for !root") +__retval(0) +__naked void stack_read_priv_vs_unpriv(void) +{ + asm volatile (" \ + /* Fill the top 8 bytes of the stack */ \ + r0 = 0; \ + *(u64*)(r10 - 8) = r0; \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned */ \ + r2 &= 4; \ + r2 -= 8; \ + /* add it to fp. We now have either fp-4 or fp-8, but\ + * we don't know which \ + */ \ + r2 += r10; \ + /* dereference it for a stack read */ \ + r0 = *(u32*)(r2 + 0); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("lwt_in") +__description("variable-offset stack read, uninitialized") +__failure __msg("invalid variable-offset read from stack R2") +__naked void variable_offset_stack_read_uninitialized(void) +{ + asm volatile (" \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned */ \ + r2 &= 4; \ + r2 -= 8; \ + /* add it to fp. We now have either fp-4 or fp-8, but\ + * we don't know which \ + */ \ + r2 += r10; \ + /* dereference it for a stack read */ \ + r0 = *(u32*)(r2 + 0); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("variable-offset stack write, priv vs unpriv") +__success __failure_unpriv +/* Variable stack access is rejected for unprivileged. + */ +__msg_unpriv("R2 variable stack access prohibited for !root") +__retval(0) +__naked void stack_write_priv_vs_unpriv(void) +{ + asm volatile (" \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 8-byte aligned */ \ + r2 &= 8; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-8 or fp-16, but\ + * we don't know which \ + */ \ + r2 += r10; \ + /* Dereference it for a stack write */ \ + r0 = 0; \ + *(u64*)(r2 + 0) = r0; \ + /* Now read from the address we just wrote. This shows\ + * that, after a variable-offset write, a priviledged\ + * program can read the slots that were in the range of\ + * that write (even if the verifier doesn't actually know\ + * if the slot being read was really written to or not.\ + */ \ + r3 = *(u64*)(r2 + 0); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("variable-offset stack write clobbers spilled regs") +__failure +/* In the priviledged case, dereferencing a spilled-and-then-filled + * register is rejected because the previous variable offset stack + * write might have overwritten the spilled pointer (i.e. we lose track + * of the spilled register when we analyze the write). + */ +__msg("R2 invalid mem access 'scalar'") +__failure_unpriv +/* The unprivileged case is not too interesting; variable + * stack access is rejected. + */ +__msg_unpriv("R2 variable stack access prohibited for !root") +__naked void stack_write_clobbers_spilled_regs(void) +{ + asm volatile (" \ + /* Dummy instruction; needed because we need to patch the next one\ + * and we can't patch the first instruction. \ + */ \ + r6 = 0; \ + /* Make R0 a map ptr */ \ + r0 = %[map_hash_8b] ll; \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 8-byte aligned */ \ + r2 &= 8; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-8 or fp-16, but\ + * we don't know which. \ + */ \ + r2 += r10; \ + /* Spill R0(map ptr) into stack */ \ + *(u64*)(r10 - 8) = r0; \ + /* Dereference the unknown value for a stack write */\ + r0 = 0; \ + *(u64*)(r2 + 0) = r0; \ + /* Fill the register back into R2 */ \ + r2 = *(u64*)(r10 - 8); \ + /* Try to dereference R2 for a memory load */ \ + r0 = *(u64*)(r2 + 8); \ + exit; \ +" : + : __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("sockops") +__description("indirect variable-offset stack access, unbounded") +__failure __msg("invalid unbounded variable-offset indirect access to stack R4") +__naked void variable_offset_stack_access_unbounded(void) +{ + asm volatile (" \ + r2 = 6; \ + r3 = 28; \ + /* Fill the top 16 bytes of the stack. */ \ + r4 = 0; \ + *(u64*)(r10 - 16) = r4; \ + r4 = 0; \ + *(u64*)(r10 - 8) = r4; \ + /* Get an unknown value. */ \ + r4 = *(u64*)(r1 + %[bpf_sock_ops_bytes_received]);\ + /* Check the lower bound but don't check the upper one. */\ + if r4 s< 0 goto l0_%=; \ + /* Point the lower bound to initialized stack. Offset is now in range\ + * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.\ + */ \ + r4 -= 16; \ + r4 += r10; \ + r5 = 8; \ + /* Dereference it indirectly. */ \ + call %[bpf_getsockopt]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_getsockopt), + __imm_const(bpf_sock_ops_bytes_received, offsetof(struct bpf_sock_ops, bytes_received)) + : __clobber_all); +} + +SEC("lwt_in") +__description("indirect variable-offset stack access, max out of bound") +__failure __msg("invalid variable-offset indirect access to stack R2") +__naked void access_max_out_of_bound(void) +{ + asm volatile (" \ + /* Fill the top 8 bytes of the stack */ \ + r2 = 0; \ + *(u64*)(r10 - 8) = r2; \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned */ \ + r2 &= 4; \ + r2 -= 8; \ + /* add it to fp. We now have either fp-4 or fp-8, but\ + * we don't know which \ + */ \ + r2 += r10; \ + /* dereference it indirectly */ \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("lwt_in") +__description("indirect variable-offset stack access, min out of bound") +__failure __msg("invalid variable-offset indirect access to stack R2") +__naked void access_min_out_of_bound(void) +{ + asm volatile (" \ + /* Fill the top 8 bytes of the stack */ \ + r2 = 0; \ + *(u64*)(r10 - 8) = r2; \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned */ \ + r2 &= 4; \ + r2 -= 516; \ + /* add it to fp. We now have either fp-516 or fp-512, but\ + * we don't know which \ + */ \ + r2 += r10; \ + /* dereference it indirectly */ \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("lwt_in") +__description("indirect variable-offset stack access, min_off < min_initialized") +__failure __msg("invalid indirect read from stack R2 var_off") +__naked void access_min_off_min_initialized(void) +{ + asm volatile (" \ + /* Fill only the top 8 bytes of the stack. */ \ + r2 = 0; \ + *(u64*)(r10 - 8) = r2; \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned. */ \ + r2 &= 4; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-12 or fp-16, but we don't know\ + * which. fp-16 size 8 is partially uninitialized stack.\ + */ \ + r2 += r10; \ + /* Dereference it indirectly. */ \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("cgroup/skb") +__description("indirect variable-offset stack access, priv vs unpriv") +__success __failure_unpriv +__msg_unpriv("R2 variable stack access prohibited for !root") +__retval(0) +__naked void stack_access_priv_vs_unpriv(void) +{ + asm volatile (" \ + /* Fill the top 16 bytes of the stack. */ \ + r2 = 0; \ + *(u64*)(r10 - 16) = r2; \ + r2 = 0; \ + *(u64*)(r10 - 8) = r2; \ + /* Get an unknown value. */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned. */ \ + r2 &= 4; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-12 or fp-16, we don't know\ + * which, but either way it points to initialized stack.\ + */ \ + r2 += r10; \ + /* Dereference it indirectly. */ \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("lwt_in") +__description("indirect variable-offset stack access, ok") +__success __retval(0) +__naked void variable_offset_stack_access_ok(void) +{ + asm volatile (" \ + /* Fill the top 16 bytes of the stack. */ \ + r2 = 0; \ + *(u64*)(r10 - 16) = r2; \ + r2 = 0; \ + *(u64*)(r10 - 8) = r2; \ + /* Get an unknown value. */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 4-byte aligned. */ \ + r2 &= 4; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-12 or fp-16, we don't know\ + * which, but either way it points to initialized stack.\ + */ \ + r2 += r10; \ + /* Dereference it indirectly. */ \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c deleted file mode 100644 index b183e26c03f1..000000000000 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ /dev/null @@ -1,291 +0,0 @@ -{ - "variable-offset ctx access", - .insns = { - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - /* add it to skb. We now have either &skb->len or - * &skb->pkt_type, but we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - /* dereference it */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "variable ctx access var_off=(0x0; 0x4)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, -{ - "variable-offset stack read, priv vs unpriv", - .insns = { - /* Fill the top 8 bytes of the stack */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), - /* add it to fp. We now have either fp-4 or fp-8, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it for a stack read */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R2 variable stack access prohibited for !root", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "variable-offset stack read, uninitialized", - .insns = { - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), - /* add it to fp. We now have either fp-4 or fp-8, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it for a stack read */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid variable-offset read from stack R2", - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, -{ - "variable-offset stack write, priv vs unpriv", - .insns = { - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 8-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-8 or fp-16, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it for a stack write */ - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - /* Now read from the address we just wrote. This shows - * that, after a variable-offset write, a priviledged - * program can read the slots that were in the range of - * that write (even if the verifier doesn't actually know - * if the slot being read was really written to or not. - */ - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - /* Variable stack access is rejected for unprivileged. - */ - .errstr_unpriv = "R2 variable stack access prohibited for !root", - .result_unpriv = REJECT, - .result = ACCEPT, -}, -{ - "variable-offset stack write clobbers spilled regs", - .insns = { - /* Dummy instruction; needed because we need to patch the next one - * and we can't patch the first instruction. - */ - BPF_MOV64_IMM(BPF_REG_6, 0), - /* Make R0 a map ptr */ - BPF_LD_MAP_FD(BPF_REG_0, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 8-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-8 or fp-16, but - * we don't know which. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Spill R0(map ptr) into stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - /* Dereference the unknown value for a stack write */ - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - /* Fill the register back into R2 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), - /* Try to dereference R2 for a memory load */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - /* The unprivileged case is not too interesting; variable - * stack access is rejected. - */ - .errstr_unpriv = "R2 variable stack access prohibited for !root", - .result_unpriv = REJECT, - /* In the priviledged case, dereferencing a spilled-and-then-filled - * register is rejected because the previous variable offset stack - * write might have overwritten the spilled pointer (i.e. we lose track - * of the spilled register when we analyze the write). - */ - .errstr = "R2 invalid mem access 'scalar'", - .result = REJECT, -}, -{ - "indirect variable-offset stack access, unbounded", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_3, 28), - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, - bytes_received)), - /* Check the lower bound but don't check the upper one. */ - BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4), - /* Point the lower bound to initialized stack. Offset is now in range - * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded. - */ - BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_5, 8), - /* Dereference it indirectly. */ - BPF_EMIT_CALL(BPF_FUNC_getsockopt), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid unbounded variable-offset indirect access to stack R4", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, -{ - "indirect variable-offset stack access, max out of bound", - .insns = { - /* Fill the top 8 bytes of the stack */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), - /* add it to fp. We now have either fp-4 or fp-8, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it indirectly */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid variable-offset indirect access to stack R2", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, -{ - "indirect variable-offset stack access, min out of bound", - .insns = { - /* Fill the top 8 bytes of the stack */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516), - /* add it to fp. We now have either fp-516 or fp-512, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it indirectly */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid variable-offset indirect access to stack R2", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, -{ - "indirect variable-offset stack access, min_off < min_initialized", - .insns = { - /* Fill only the top 8 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, but we don't know - * which. fp-16 size 8 is partially uninitialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack R2 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, -{ - "indirect variable-offset stack access, priv vs unpriv", - .insns = { - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, we don't know - * which, but either way it points to initialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 6 }, - .errstr_unpriv = "R2 variable stack access prohibited for !root", - .result_unpriv = REJECT, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, -}, -{ - "indirect variable-offset stack access, ok", - .insns = { - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, we don't know - * which, but either way it points to initialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 6 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, From a8036aea2d4f412c98a5fdbc0c987fa8a3c023ed Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:22 +0200 Subject: [PATCH 146/260] selftests/bpf: verifier/xadd.c converted to inline assembly Test verifier/xadd.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-42-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_xadd.c | 124 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/xadd.c | 97 -------------- 3 files changed, 126 insertions(+), 97 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_xadd.c delete mode 100644 tools/testing/selftests/bpf/verifier/xadd.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 44350e328da2..cd56fe520145 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -38,6 +38,7 @@ #include "verifier_value.skel.h" #include "verifier_value_or_null.skel.h" #include "verifier_var_off.skel.h" +#include "verifier_xadd.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -98,3 +99,4 @@ void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); } void test_verifier_value(void) { RUN(verifier_value); } void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); } void test_verifier_var_off(void) { RUN(verifier_var_off); } +void test_verifier_xadd(void) { RUN(verifier_xadd); } diff --git a/tools/testing/selftests/bpf/progs/verifier_xadd.c b/tools/testing/selftests/bpf/progs/verifier_xadd.c new file mode 100644 index 000000000000..05a0a55adb45 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_xadd.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/xadd.c */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("tc") +__description("xadd/w check unaligned stack") +__failure __msg("misaligned stack access off") +__naked void xadd_w_check_unaligned_stack(void) +{ + asm volatile (" \ + r0 = 1; \ + *(u64*)(r10 - 8) = r0; \ + lock *(u32 *)(r10 - 7) += w0; \ + r0 = *(u64*)(r10 - 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("xadd/w check unaligned map") +__failure __msg("misaligned value access off") +__naked void xadd_w_check_unaligned_map(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: r1 = 1; \ + lock *(u32 *)(r0 + 3) += w1; \ + r0 = *(u32*)(r0 + 3); \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("xdp") +__description("xadd/w check unaligned pkt") +__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void xadd_w_check_unaligned_pkt(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + r0 = 99; \ + goto l1_%=; \ +l0_%=: r0 = 1; \ + r1 = 0; \ + *(u32*)(r2 + 0) = r1; \ + r1 = 0; \ + *(u32*)(r2 + 3) = r1; \ + lock *(u32 *)(r2 + 1) += w0; \ + lock *(u32 *)(r2 + 2) += w0; \ + r0 = *(u32*)(r2 + 1); \ +l1_%=: exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("tc") +__description("xadd/w check whether src/dst got mangled, 1") +__success __retval(3) +__naked void src_dst_got_mangled_1(void) +{ + asm volatile (" \ + r0 = 1; \ + r6 = r0; \ + r7 = r10; \ + *(u64*)(r10 - 8) = r0; \ + lock *(u64 *)(r10 - 8) += r0; \ + lock *(u64 *)(r10 - 8) += r0; \ + if r6 != r0 goto l0_%=; \ + if r7 != r10 goto l0_%=; \ + r0 = *(u64*)(r10 - 8); \ + exit; \ +l0_%=: r0 = 42; \ + exit; \ +" ::: __clobber_all); +} + +SEC("tc") +__description("xadd/w check whether src/dst got mangled, 2") +__success __retval(3) +__naked void src_dst_got_mangled_2(void) +{ + asm volatile (" \ + r0 = 1; \ + r6 = r0; \ + r7 = r10; \ + *(u32*)(r10 - 8) = r0; \ + lock *(u32 *)(r10 - 8) += w0; \ + lock *(u32 *)(r10 - 8) += w0; \ + if r6 != r0 goto l0_%=; \ + if r7 != r10 goto l0_%=; \ + r0 = *(u32*)(r10 - 8); \ + exit; \ +l0_%=: r0 = 42; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c deleted file mode 100644 index b96ef3526815..000000000000 --- a/tools/testing/selftests/bpf/verifier/xadd.c +++ /dev/null @@ -1,97 +0,0 @@ -{ - "xadd/w check unaligned stack", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -7), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "misaligned stack access off", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "xadd/w check unaligned map", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 3), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = REJECT, - .errstr = "misaligned value access off", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ - "xadd/w check unaligned pkt", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 99), - BPF_JMP_IMM(BPF_JA, 0, 0, 6), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 1), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "BPF_ATOMIC stores into R2 pkt is not allowed", - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "xadd/w check whether src/dst got mangled, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), - BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 3, -}, -{ - "xadd/w check whether src/dst got mangled, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), - BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 3, -}, From ffb515c933a9e8000e50b03f76569ffb6ef4d39d Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Mar 2023 04:55:23 +0200 Subject: [PATCH 147/260] selftests/bpf: verifier/xdp.c converted to inline assembly Test verifier/xdp.c automatically converted to use inline assembly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230325025524.144043-43-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 ++ .../selftests/bpf/progs/verifier_xdp.c | 24 +++++++++++++++++++ tools/testing/selftests/bpf/verifier/xdp.c | 14 ----------- 3 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/verifier_xdp.c delete mode 100644 tools/testing/selftests/bpf/verifier/xdp.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index cd56fe520145..a774d5b193f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -39,6 +39,7 @@ #include "verifier_value_or_null.skel.h" #include "verifier_var_off.skel.h" #include "verifier_xadd.skel.h" +#include "verifier_xdp.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -100,3 +101,4 @@ void test_verifier_value(void) { RUN(verifier_value); } void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); } void test_verifier_var_off(void) { RUN(verifier_var_off); } void test_verifier_xadd(void) { RUN(verifier_xadd); } +void test_verifier_xdp(void) { RUN(verifier_xdp); } diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp.c b/tools/testing/selftests/bpf/progs/verifier_xdp.c new file mode 100644 index 000000000000..50768ed179b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_xdp.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/xdp.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("xdp") +__description("XDP, using ifindex from netdev") +__success __retval(1) +__naked void xdp_using_ifindex_from_netdev(void) +{ + asm volatile (" \ + r0 = 0; \ + r2 = *(u32*)(r1 + %[xdp_md_ingress_ifindex]); \ + if r2 < 1 goto l0_%=; \ + r0 = 1; \ +l0_%=: exit; \ +" : + : __imm_const(xdp_md_ingress_ifindex, offsetof(struct xdp_md, ingress_ifindex)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/xdp.c b/tools/testing/selftests/bpf/verifier/xdp.c deleted file mode 100644 index 5ac390508139..000000000000 --- a/tools/testing/selftests/bpf/verifier/xdp.c +++ /dev/null @@ -1,14 +0,0 @@ -{ - "XDP, using ifindex from netdev", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, ingress_ifindex)), - BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .retval = 1, -}, From e65a5c6edbc6ca4853e6076bd81db1a410592a09 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2023 14:52:42 -0700 Subject: [PATCH 148/260] bpf: Add a few bpf mem allocator functions This patch adds a few bpf mem allocator functions which will be used in the bpf_local_storage in a later patch. bpf_mem_cache_alloc_flags(..., gfp_t flags) is added. When the flags == GFP_KERNEL, it will fallback to __alloc(..., GFP_KERNEL). bpf_local_storage knows its running context is sleepable (GFP_KERNEL) and provides a better guarantee on memory allocation. bpf_local_storage has some uncommon cases that its selem cannot be reused immediately. It handles its own rcu_head and goes through a rcu_trace gp and then free it. bpf_mem_cache_raw_free() is added for direct free purpose without leaking the LLIST_NODE_SZ internal knowledge. During free time, the 'struct bpf_mem_alloc *ma' is no longer available. However, the caller should know if it is percpu memory or not and it can call different raw_free functions. bpf_local_storage does not support percpu value, so only the non-percpu 'bpf_mem_cache_raw_free()' is added in this patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230322215246.1675516-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_mem_alloc.h | 2 ++ kernel/bpf/memalloc.c | 59 +++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index a7104af61ab4..3929be5743f4 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -31,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr); /* kmem_cache_alloc/free equivalent: */ void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma); void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr); +void bpf_mem_cache_raw_free(void *ptr); +void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags); #endif /* _BPF_MEM_ALLOC_H */ diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 5fcdacbb8439..410637c225fb 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -121,15 +121,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head) return entry; } -static void *__alloc(struct bpf_mem_cache *c, int node) +static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags) { - /* Allocate, but don't deplete atomic reserves that typical - * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc - * will allocate from the current numa node which is what we - * want here. - */ - gfp_t flags = GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT; - if (c->percpu_size) { void **obj = kmalloc_node(c->percpu_size, flags, node); void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags); @@ -185,7 +178,12 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node) */ obj = __llist_del_first(&c->free_by_rcu); if (!obj) { - obj = __alloc(c, node); + /* Allocate, but don't deplete atomic reserves that typical + * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc + * will allocate from the current numa node which is what we + * want here. + */ + obj = __alloc(c, node, GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT); if (!obj) break; } @@ -676,3 +674,46 @@ void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr) unit_free(this_cpu_ptr(ma->cache), ptr); } + +/* Directly does a kfree() without putting 'ptr' back to the free_llist + * for reuse and without waiting for a rcu_tasks_trace gp. + * The caller must first go through the rcu_tasks_trace gp for 'ptr' + * before calling bpf_mem_cache_raw_free(). + * It could be used when the rcu_tasks_trace callback does not have + * a hold on the original bpf_mem_alloc object that allocated the + * 'ptr'. This should only be used in the uncommon code path. + * Otherwise, the bpf_mem_alloc's free_llist cannot be refilled + * and may affect performance. + */ +void bpf_mem_cache_raw_free(void *ptr) +{ + if (!ptr) + return; + + kfree(ptr - LLIST_NODE_SZ); +} + +/* When flags == GFP_KERNEL, it signals that the caller will not cause + * deadlock when using kmalloc. bpf_mem_cache_alloc_flags() will use + * kmalloc if the free_llist is empty. + */ +void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) +{ + struct bpf_mem_cache *c; + void *ret; + + c = this_cpu_ptr(ma->cache); + + ret = unit_alloc(c); + if (!ret && flags == GFP_KERNEL) { + struct mem_cgroup *memcg, *old_memcg; + + memcg = get_memcg(c); + old_memcg = set_active_memcg(memcg); + ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT); + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); + } + + return !ret ? NULL : ret + LLIST_NODE_SZ; +} From 08a7ce384e33e53e0732c500a8af67a73f8fceca Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2023 14:52:43 -0700 Subject: [PATCH 149/260] bpf: Use bpf_mem_cache_alloc/free in bpf_local_storage_elem This patch uses bpf_mem_alloc for the task and cgroup local storage that the bpf prog can easily get a hold of the storage owner's PTR_TO_BTF_ID. eg. bpf_get_current_task_btf() can be used in some of the kmalloc code path which will cause deadlock/recursion. bpf_mem_cache_alloc is deadlock free and will solve a legit use case in [1]. For sk storage, its batch creation benchmark shows a few percent regression when the sk create/destroy batch size is larger than 32. The sk creation/destruction happens much more often and depends on external traffic. Considering it is hypothetical to be able to cause deadlock with sk storage, it can cross the bridge to use bpf_mem_alloc till a legit (ie. useful) use case comes up. For inode storage, bpf_local_storage_destroy() is called before waiting for a rcu gp and its memory cannot be reused immediately. inode stays with kmalloc/kfree after the rcu [or tasks_trace] gp. A 'bool bpf_ma' argument is added to bpf_local_storage_map_alloc(). Only task and cgroup storage have 'bpf_ma == true' which means to use bpf_mem_cache_alloc/free(). This patch only changes selem to use bpf_mem_alloc for task and cgroup. The next patch will change the local_storage to use bpf_mem_alloc also for task and cgroup. Here is some more details on the changes: * memory allocation: After bpf_mem_cache_alloc(), the SDATA(selem)->data is zero-ed because bpf_mem_cache_alloc() could return a reused selem. It is to keep the existing bpf_map_kzalloc() behavior. Only SDATA(selem)->data is zero-ed. SDATA(selem)->data is the visible part to the bpf prog. No need to use zero_map_value() to do the zeroing because bpf_selem_free(..., reuse_now = true) ensures no bpf prog is using the selem before returning the selem through bpf_mem_cache_free(). For the internal fields of selem, they will be initialized when linking to the new smap and the new local_storage. When 'bpf_ma == false', nothing changes in this patch. It will stay with the bpf_map_kzalloc(). * memory free: The bpf_selem_free() and bpf_selem_free_rcu() are modified to handle the bpf_ma == true case. For the common selem free path where its owner is also being destroyed, the mem is freed in bpf_local_storage_destroy(), the owner (task and cgroup) has gone through a rcu gp. The memory can be reused immediately, so bpf_local_storage_destroy() will call bpf_selem_free(..., reuse_now = true) which will do bpf_mem_cache_free() for immediate reuse consideration. An exception is the delete elem code path. The delete elem code path is called from the helper bpf_*_storage_delete() and the syscall bpf_map_delete_elem(). This path is an unusual case for local storage because the common use case is to have the local storage staying with its owner life time so that the bpf prog and the user space does not have to monitor the owner's destruction. For the delete elem path, the selem cannot be reused immediately because there could be bpf prog using it. It will call bpf_selem_free(..., reuse_now = false) and it will wait for a rcu tasks trace gp before freeing the elem. The rcu callback is changed to do bpf_mem_cache_raw_free() instead of kfree(). When 'bpf_ma == false', it should be the same as before. __bpf_selem_free() is added to do the kfree_rcu and call_tasks_trace_rcu(). A few words on the 'reuse_now == true'. When 'reuse_now == true', it is still racing with bpf_local_storage_map_free which is under rcu protection, so it still needs to wait for a rcu gp instead of kfree(). Otherwise, the selem may be reused by slab for a totally different struct while the bpf_local_storage_map_free() is still using it (as a rcu reader). For the inode case, there may be other rcu readers also. In short, when bpf_ma == false and reuse_now == true => vanilla rcu. [1]: https://lore.kernel.org/bpf/20221118190109.1512674-1-namhyung@kernel.org/ Cc: Namhyung Kim Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230322215246.1675516-3-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 6 +- kernel/bpf/bpf_cgrp_storage.c | 2 +- kernel/bpf/bpf_inode_storage.c | 2 +- kernel/bpf/bpf_local_storage.c | 95 ++++++++++++++++++++++++++++--- kernel/bpf/bpf_task_storage.c | 2 +- net/core/bpf_sk_storage.c | 2 +- 6 files changed, 95 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index a34f61467a2f..30efbcab2798 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 @@ -55,6 +56,8 @@ struct bpf_local_storage_map { u32 bucket_log; u16 elem_size; u16 cache_idx; + struct bpf_mem_alloc selem_ma; + bool bpf_ma; }; struct bpf_local_storage_data { @@ -122,7 +125,8 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, - struct bpf_local_storage_cache *cache); + struct bpf_local_storage_cache *cache, + bool bpf_ma); struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index f5b016a5484d..d17d5b694668 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -149,7 +149,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &cgroup_cache); + return bpf_local_storage_map_alloc(attr, &cgroup_cache, true); } static void cgroup_storage_map_free(struct bpf_map *map) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 9a5f05151898..e17ad581b9be 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -199,7 +199,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &inode_cache); + return bpf_local_storage_map_alloc(attr, &inode_cache, false); } static void inode_storage_map_free(struct bpf_map *map) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 351d991694cb..309ea727a5cb 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -80,8 +80,24 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL; - selem = bpf_map_kzalloc(&smap->map, smap->elem_size, - gfp_flags | __GFP_NOWARN); + if (smap->bpf_ma) { + migrate_disable(); + selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); + migrate_enable(); + if (selem) + /* Keep the original bpf_map_kzalloc behavior + * before started using the bpf_mem_cache_alloc. + * + * No need to use zero_map_value. The bpf_selem_free() + * only does bpf_mem_cache_free when there is + * no other bpf prog is using the selem. + */ + memset(SDATA(selem)->data, 0, smap->map.value_size); + } else { + selem = bpf_map_kzalloc(&smap->map, smap->elem_size, + gfp_flags | __GFP_NOWARN); + } + if (selem) { if (value) copy_map_value(&smap->map, SDATA(selem)->data, value); @@ -124,12 +140,34 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); } +/* rcu tasks trace callback for bpf_ma == false */ +static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(selem); + else + kfree_rcu(selem, rcu); +} + +/* Handle bpf_ma == false */ +static void __bpf_selem_free(struct bpf_local_storage_elem *selem, + bool vanilla_rcu) +{ + if (vanilla_rcu) + kfree_rcu(selem, rcu); + else + call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); +} + static void bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - kfree(selem); + bpf_mem_cache_raw_free(selem); } static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) @@ -145,10 +183,23 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, bool reuse_now) { bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - if (!reuse_now) + + if (!smap->bpf_ma) { + __bpf_selem_free(selem, reuse_now); + return; + } + + if (!reuse_now) { call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); - else - call_rcu(&selem->rcu, bpf_selem_free_rcu); + } else { + /* Instead of using the vanilla call_rcu(), + * bpf_mem_cache_free will be able to reuse selem + * immediately. + */ + migrate_disable(); + bpf_mem_cache_free(&smap->selem_ma, selem); + migrate_enable(); + } } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -654,13 +705,25 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) return usage; } +/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory. + * A deadlock free allocator is useful for storage that the bpf prog can easily + * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf. + * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses + * memory immediately. To be reuse-immediate safe, the owner destruction + * code path needs to go through a rcu grace period before calling + * bpf_local_storage_destroy(). + * + * When bpf_ma == false, the kmalloc and kfree are used. + */ struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, - struct bpf_local_storage_cache *cache) + struct bpf_local_storage_cache *cache, + bool bpf_ma) { struct bpf_local_storage_map *smap; unsigned int i; u32 nbuckets; + int err; smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); if (!smap) @@ -675,8 +738,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), nbuckets, GFP_USER | __GFP_NOWARN); if (!smap->buckets) { - bpf_map_area_free(smap); - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto free_smap; } for (i = 0; i < nbuckets; i++) { @@ -687,8 +750,20 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, smap->elem_size = offsetof(struct bpf_local_storage_elem, sdata.data[attr->value_size]); + smap->bpf_ma = bpf_ma; + if (bpf_ma) { + err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); + if (err) + goto free_smap; + } + smap->cache_idx = bpf_local_storage_cache_idx_get(cache); return &smap->map; + +free_smap: + kvfree(smap->buckets); + bpf_map_area_free(smap); + return ERR_PTR(err); } void bpf_local_storage_map_free(struct bpf_map *map, @@ -754,6 +829,8 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); + if (smap->bpf_ma) + bpf_mem_alloc_destroy(&smap->selem_ma); kvfree(smap->buckets); bpf_map_area_free(smap); } diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index ab5bd1ef58c4..d1af0c8f9ce4 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -309,7 +309,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &task_cache); + return bpf_local_storage_map_alloc(attr, &task_cache, true); } static void task_storage_map_free(struct bpf_map *map) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index cb0f5a105b89..085025c7130a 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -68,7 +68,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &sk_cache); + return bpf_local_storage_map_alloc(attr, &sk_cache, false); } static int notsupp_get_next_key(struct bpf_map *map, void *key, From 6ae9d5e99e1dd26babdd9502759fa25a3fd348ad Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2023 14:52:44 -0700 Subject: [PATCH 150/260] bpf: Use bpf_mem_cache_alloc/free for bpf_local_storage This patch uses bpf_mem_cache_alloc/free for allocating and freeing bpf_local_storage for task and cgroup storage. The changes are similar to the previous patch. A few things that worth to mention for bpf_local_storage: The local_storage is freed when the last selem is deleted. Before deleting a selem from local_storage, it needs to retrieve the local_storage->smap because the bpf_selem_unlink_storage_nolock() may have set it to NULL. Note that local_storage->smap may have already been NULL when the selem created this local_storage has been removed. In this case, call_rcu will be used to free the local_storage. Also, the bpf_ma (true or false) value is needed before calling bpf_local_storage_free(). The bpf_ma can either be obtained from the local_storage->smap (if available) or any of its selem's smap. A new helper check_storage_bpf_ma() is added to obtain bpf_ma for a deleting bpf_local_storage. When bpf_local_storage_alloc getting a reused memory, all fields are either in the correct values or will be initialized. 'cache[]' must already be all NULLs. 'list' must be empty. Others will be initialized. Cc: Namhyung Kim Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230322215246.1675516-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 1 + kernel/bpf/bpf_local_storage.c | 132 ++++++++++++++++++++++++++---- 2 files changed, 117 insertions(+), 16 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 30efbcab2798..173ec7f43ed1 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -57,6 +57,7 @@ struct bpf_local_storage_map { u16 elem_size; u16 cache_idx; struct bpf_mem_alloc selem_ma; + struct bpf_mem_alloc storage_ma; bool bpf_ma; }; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 309ea727a5cb..dab2ff4c99d9 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -111,33 +111,74 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } +/* rcu tasks trace callback for bpf_ma == false */ +static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage *local_storage; + + /* If RCU Tasks Trace grace period implies RCU grace period, do + * kfree(), else do kfree_rcu(). + */ + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(local_storage); + else + kfree_rcu(local_storage, rcu); +} + static void bpf_local_storage_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; local_storage = container_of(rcu, struct bpf_local_storage, rcu); - kfree(local_storage); + bpf_mem_cache_raw_free(local_storage); } static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { - /* If RCU Tasks Trace grace period implies RCU grace period, do - * kfree(), else do kfree_rcu(). - */ if (rcu_trace_implies_rcu_gp()) bpf_local_storage_free_rcu(rcu); else call_rcu(rcu, bpf_local_storage_free_rcu); } -static void bpf_local_storage_free(struct bpf_local_storage *local_storage, - bool reuse_now) +/* Handle bpf_ma == false */ +static void __bpf_local_storage_free(struct bpf_local_storage *local_storage, + bool vanilla_rcu) { - if (!reuse_now) + if (vanilla_rcu) + kfree_rcu(local_storage, rcu); + else + call_rcu_tasks_trace(&local_storage->rcu, + __bpf_local_storage_free_trace_rcu); +} + +static void bpf_local_storage_free(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + bool bpf_ma, bool reuse_now) +{ + if (!bpf_ma) { + __bpf_local_storage_free(local_storage, reuse_now); + return; + } + + if (!reuse_now) { call_rcu_tasks_trace(&local_storage->rcu, bpf_local_storage_free_trace_rcu); - else + return; + } + + if (smap) { + migrate_disable(); + bpf_mem_cache_free(&smap->storage_ma, local_storage); + migrate_enable(); + } else { + /* smap could be NULL if the selem that triggered + * this 'local_storage' creation had been long gone. + * In this case, directly do call_rcu(). + */ call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); + } } /* rcu tasks trace callback for bpf_ma == false */ @@ -260,11 +301,47 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor return free_local_storage; } +static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *storage_smap, + struct bpf_local_storage_elem *selem) +{ + + struct bpf_local_storage_map *selem_smap; + + /* local_storage->smap may be NULL. If it is, get the bpf_ma + * from any selem in the local_storage->list. The bpf_ma of all + * local_storage and selem should have the same value + * for the same map type. + * + * If the local_storage->list is already empty, the caller will not + * care about the bpf_ma value also because the caller is not + * responsibile to free the local_storage. + */ + + if (storage_smap) + return storage_smap->bpf_ma; + + if (!selem) { + struct hlist_node *n; + + n = rcu_dereference_check(hlist_first_rcu(&local_storage->list), + bpf_rcu_lock_held()); + if (!n) + return false; + + selem = hlist_entry(n, struct bpf_local_storage_elem, snode); + } + selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + + return selem_smap->bpf_ma; +} + static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, bool reuse_now) { + struct bpf_local_storage_map *storage_smap; struct bpf_local_storage *local_storage; - bool free_local_storage = false; + bool bpf_ma, free_local_storage = false; unsigned long flags; if (unlikely(!selem_linked_to_storage_lockless(selem))) @@ -273,6 +350,10 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held()); + storage_smap = rcu_dereference_check(local_storage->smap, + bpf_rcu_lock_held()); + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); + raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( @@ -280,7 +361,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_local_storage) - bpf_local_storage_free(local_storage, reuse_now); + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -400,8 +481,15 @@ int bpf_local_storage_alloc(void *owner, if (err) return err; - storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), - gfp_flags | __GFP_NOWARN); + if (smap->bpf_ma) { + migrate_disable(); + storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags); + migrate_enable(); + } else { + storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), + gfp_flags | __GFP_NOWARN); + } + if (!storage) { err = -ENOMEM; goto uncharge; @@ -447,7 +535,7 @@ int bpf_local_storage_alloc(void *owner, return 0; uncharge: - bpf_local_storage_free(storage, true); + bpf_local_storage_free(storage, smap, smap->bpf_ma, true); mem_uncharge(smap, owner, sizeof(*storage)); return err; } @@ -660,11 +748,15 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) { + struct bpf_local_storage_map *storage_smap; struct bpf_local_storage_elem *selem; - bool free_storage = false; + bool bpf_ma, free_storage = false; struct hlist_node *n; unsigned long flags; + storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held()); + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL); + /* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. * Thus, no elem can be added to or deleted from the @@ -692,7 +784,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_storage) - bpf_local_storage_free(local_storage, true); + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) @@ -755,6 +847,12 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); if (err) goto free_smap; + + err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false); + if (err) { + bpf_mem_alloc_destroy(&smap->selem_ma); + goto free_smap; + } } smap->cache_idx = bpf_local_storage_cache_idx_get(cache); @@ -829,8 +927,10 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); - if (smap->bpf_ma) + if (smap->bpf_ma) { bpf_mem_alloc_destroy(&smap->selem_ma); + bpf_mem_alloc_destroy(&smap->storage_ma); + } kvfree(smap->buckets); bpf_map_area_free(smap); } From d8db84d71c0e539f7ce902e2fe297e535ba4d46c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2023 14:52:45 -0700 Subject: [PATCH 151/260] selftests/bpf: Test task storage when local_storage->smap is NULL The current sk storage test ensures the memory free works when the local_storage->smap is NULL. This patch adds a task storage test to ensure the memory free code path works when local_storage->smap is NULL. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230322215246.1675516-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/test_local_storage.c | 7 ++- .../selftests/bpf/progs/local_storage.c | 56 ++++++++++++++----- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 563a9c746b7b..bcf2e1905ed7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -23,7 +23,7 @@ struct storage { /* Fork and exec the provided rm binary and return the exit code of the * forked process and its pid. */ -static int run_self_unlink(int *monitored_pid, const char *rm_path) +static int run_self_unlink(struct local_storage *skel, const char *rm_path) { int child_pid, child_status, ret; int null_fd; @@ -35,7 +35,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path) dup2(null_fd, STDERR_FILENO); close(null_fd); - *monitored_pid = getpid(); + skel->bss->monitored_pid = getpid(); /* Use the copied /usr/bin/rm to delete itself * /tmp/copy_of_rm /tmp/copy_of_rm. */ @@ -44,6 +44,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path) exit(errno); } else if (child_pid > 0) { waitpid(child_pid, &child_status, 0); + ASSERT_EQ(skel->data->task_storage_result, 0, "task_storage_result"); return WEXITSTATUS(child_status); } @@ -133,7 +134,7 @@ void test_test_local_storage(void) * unlink its executable. This operation should be denied by the loaded * LSM program. */ - err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); + err = run_self_unlink(skel, tmp_exec_path); if (!ASSERT_EQ(err, EPERM, "run_self_unlink")) goto close_prog_rmdir; diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index c8ba7207f5a5..bc8ea56671a1 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -16,6 +16,7 @@ char _license[] SEC("license") = "GPL"; int monitored_pid = 0; int inode_storage_result = -1; int sk_storage_result = -1; +int task_storage_result = -1; struct local_storage { struct inode *exec_inode; @@ -50,26 +51,57 @@ struct { __type(value, struct local_storage); } task_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct local_storage); +} task_storage_map2 SEC(".maps"); + SEC("lsm/inode_unlink") int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) { __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct bpf_local_storage *local_storage; struct local_storage *storage; + struct task_struct *task; bool is_self_unlink; if (pid != monitored_pid) return 0; - storage = bpf_task_storage_get(&task_storage_map, - bpf_get_current_task_btf(), 0, 0); - if (storage) { - /* Don't let an executable delete itself */ - is_self_unlink = storage->exec_inode == victim->d_inode; - if (is_self_unlink) - return -EPERM; - } + task = bpf_get_current_task_btf(); + if (!task) + return 0; - return 0; + task_storage_result = -1; + + storage = bpf_task_storage_get(&task_storage_map, task, 0, 0); + if (!storage) + return 0; + + /* Don't let an executable delete itself */ + is_self_unlink = storage->exec_inode == victim->d_inode; + + storage = bpf_task_storage_get(&task_storage_map2, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage || storage->value) + return 0; + + if (bpf_task_storage_delete(&task_storage_map, task)) + return 0; + + /* Ensure that the task_storage_map is disconnected from the storage. + * The storage memory should not be freed back to the + * bpf_mem_alloc. + */ + local_storage = task->bpf_storage; + if (!local_storage || local_storage->smap) + return 0; + + task_storage_result = 0; + + return is_self_unlink ? -EPERM : 0; } SEC("lsm.s/inode_rename") @@ -139,11 +171,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (bpf_sk_storage_delete(&sk_storage_map, sock->sk)) return 0; - /* Ensure that the sk_storage_map is disconnected from the storage. - * The storage memory should not be freed back to the - * bpf_mem_alloc of the sk_bpf_storage_map because - * sk_bpf_storage_map may have been gone. - */ + /* Ensure that the sk_storage_map is disconnected from the storage. */ if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap) return 0; From cbe9d93d58b16b5912498ea42b5173022fff7c04 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2023 14:52:46 -0700 Subject: [PATCH 152/260] selftests/bpf: Add bench for task storage creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a task storage benchmark to the existing local-storage-create benchmark. For task storage, ./bench --storage-type task --batch-size 32: bpf_ma: Summary: creates 30.456 ± 0.507k/s ( 30.456k/prod), 6.08 kmallocs/create no bpf_ma: Summary: creates 31.962 ± 0.486k/s ( 31.962k/prod), 6.13 kmallocs/create ./bench --storage-type task --batch-size 64: bpf_ma: Summary: creates 30.197 ± 1.476k/s ( 30.197k/prod), 6.08 kmallocs/create no bpf_ma: Summary: creates 31.103 ± 0.297k/s ( 31.103k/prod), 6.13 kmallocs/create Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230322215246.1675516-6-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bench.c | 2 + .../bpf/benchs/bench_local_storage_create.c | 151 ++++++++++++++++-- .../bpf/progs/bench_local_storage_create.c | 25 +++ 3 files changed, 164 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index dc3827c1f139..d9c080ac1796 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -278,6 +278,7 @@ extern struct argp bench_local_storage_argp; extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; extern struct argp bench_hashmap_lookup_argp; +extern struct argp bench_local_storage_create_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -288,6 +289,7 @@ static const struct argp_child bench_parsers[] = { { &bench_local_storage_rcu_tasks_trace_argp, 0, "local_storage RCU Tasks Trace slowdown benchmark", 0 }, { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 }, + { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 }, {}, }; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c index f8b2a640ccbe..abb0321d4f34 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -3,19 +3,71 @@ #include #include +#include +#include #include "bench.h" #include "bench_local_storage_create.skel.h" -#define BATCH_SZ 32 - struct thread { - int fds[BATCH_SZ]; + int *fds; + pthread_t *pthds; + int *pthd_results; }; static struct bench_local_storage_create *skel; static struct thread *threads; -static long socket_errs; +static long create_owner_errs; +static int storage_type = BPF_MAP_TYPE_SK_STORAGE; +static int batch_sz = 32; + +enum { + ARG_BATCH_SZ = 9000, + ARG_STORAGE_TYPE = 9001, +}; + +static const struct argp_option opts[] = { + { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0, + "The number of storage creations in each batch" }, + { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0, + "The type of local storage to test (socket or task)" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + int ret; + + switch (key) { + case ARG_BATCH_SZ: + ret = atoi(arg); + if (ret < 1) { + fprintf(stderr, "invalid batch-size\n"); + argp_usage(state); + } + batch_sz = ret; + break; + case ARG_STORAGE_TYPE: + if (!strcmp(arg, "task")) { + storage_type = BPF_MAP_TYPE_TASK_STORAGE; + } else if (!strcmp(arg, "socket")) { + storage_type = BPF_MAP_TYPE_SK_STORAGE; + } else { + fprintf(stderr, "invalid storage-type (socket or task)\n"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_create_argp = { + .options = opts, + .parser = parse_arg, +}; static void validate(void) { @@ -28,6 +80,8 @@ static void validate(void) static void setup(void) { + int i; + skel = bench_local_storage_create__open_and_load(); if (!skel) { fprintf(stderr, "error loading skel\n"); @@ -35,10 +89,16 @@ static void setup(void) } skel->bss->bench_pid = getpid(); - - if (!bpf_program__attach(skel->progs.socket_post_create)) { - fprintf(stderr, "Error attaching bpf program\n"); - exit(1); + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { + if (!bpf_program__attach(skel->progs.socket_post_create)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + } else { + if (!bpf_program__attach(skel->progs.fork)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } } if (!bpf_program__attach(skel->progs.kmalloc)) { @@ -52,6 +112,29 @@ static void setup(void) fprintf(stderr, "cannot alloc thread_res\n"); exit(1); } + + for (i = 0; i < env.producer_cnt; i++) { + struct thread *t = &threads[i]; + + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { + t->fds = malloc(batch_sz * sizeof(*t->fds)); + if (!t->fds) { + fprintf(stderr, "cannot alloc t->fds\n"); + exit(1); + } + } else { + t->pthds = malloc(batch_sz * sizeof(*t->pthds)); + if (!t->pthds) { + fprintf(stderr, "cannot alloc t->pthds\n"); + exit(1); + } + t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results)); + if (!t->pthd_results) { + fprintf(stderr, "cannot alloc t->pthd_results\n"); + exit(1); + } + } + } } static void measure(struct bench_res *res) @@ -65,20 +148,20 @@ static void *consumer(void *input) return NULL; } -static void *producer(void *input) +static void *sk_producer(void *input) { struct thread *t = &threads[(long)(input)]; int *fds = t->fds; int i; while (true) { - for (i = 0; i < BATCH_SZ; i++) { + for (i = 0; i < batch_sz; i++) { fds[i] = socket(AF_INET6, SOCK_DGRAM, 0); if (fds[i] == -1) - atomic_inc(&socket_errs); + atomic_inc(&create_owner_errs); } - for (i = 0; i < BATCH_SZ; i++) { + for (i = 0; i < batch_sz; i++) { if (fds[i] != -1) close(fds[i]); } @@ -87,6 +170,42 @@ static void *producer(void *input) return NULL; } +static void *thread_func(void *arg) +{ + return NULL; +} + +static void *task_producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + pthread_t *pthds = t->pthds; + int *pthd_results = t->pthd_results; + int i; + + while (true) { + for (i = 0; i < batch_sz; i++) { + pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL); + if (pthd_results[i]) + atomic_inc(&create_owner_errs); + } + + for (i = 0; i < batch_sz; i++) { + if (!pthd_results[i]) + pthread_join(pthds[i], NULL);; + } + } + + return NULL; +} + +static void *producer(void *input) +{ + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) + return sk_producer(input); + else + return task_producer(input); +} + static void report_progress(int iter, struct bench_res *res, long delta_ns) { double creates_per_sec, kmallocs_per_create; @@ -123,14 +242,18 @@ static void report_final(struct bench_res res[], int res_cnt) printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ", creates_mean, creates_stddev, creates_mean / env.producer_cnt); printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates); - if (socket_errs || skel->bss->create_errs) - printf("socket() errors %ld create_errs %ld\n", socket_errs, + if (create_owner_errs || skel->bss->create_errs) + printf("%s() errors %ld create_errs %ld\n", + storage_type == BPF_MAP_TYPE_SK_STORAGE ? + "socket" : "pthread_create", + create_owner_errs, skel->bss->create_errs); } /* Benchmark performance of creating bpf local storage */ const struct bench bench_local_storage_create = { .name = "local-storage-create", + .argp = &bench_local_storage_create_argp, .validate = validate, .setup = setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c index 2814bab54d28..7c851c9d5e47 100644 --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c @@ -22,6 +22,13 @@ struct { __type(value, struct storage); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct storage); +} task_storage_map SEC(".maps"); + SEC("raw_tp/kmalloc") int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, @@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr, return 0; } +SEC("tp_btf/sched_process_fork") +int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child) +{ + struct storage *stg; + + if (parent->tgid != bench_pid) + return 0; + + stg = bpf_task_storage_get(&task_storage_map, child, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (stg) + __sync_fetch_and_add(&create_cnts, 1); + else + __sync_fetch_and_add(&create_errs, 1); + + return 0; +} + SEC("lsm.s/socket_post_create") int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) From 8cfee110711ed60bfdd39af0107ddef01d6b72c3 Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Sun, 26 Mar 2023 03:31:17 +0000 Subject: [PATCH 153/260] bpf, docs: Add extended call instructions Add extended call instructions. Uses the term "program-local" for call by offset. And there are instructions for calling helper functions by "address" (the old way of using integer values), and for calling helper functions by BTF ID (for kfuncs). V1 -> V2: addressed comments from David Vernet V2 -> V3: make descriptions in table consistent with updated names V3 -> V4: addressed comments from Alexei V4 -> V5: fixed alignment Signed-off-by: Dave Thaler Acked-by: David Vernet Link: https://lore.kernel.org/r/20230326033117.1075-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/instruction-set.rst | 59 +++++++++++++++++---------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index b44640589055..b77280eb926f 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -243,27 +243,29 @@ Jump instructions otherwise identical operations. The 'code' field encodes the operation as below: -======== ===== ========================= ============ -code value description notes -======== ===== ========================= ============ -BPF_JA 0x00 PC += off BPF_JMP only -BPF_JEQ 0x10 PC += off if dst == src -BPF_JGT 0x20 PC += off if dst > src unsigned -BPF_JGE 0x30 PC += off if dst >= src unsigned -BPF_JSET 0x40 PC += off if dst & src -BPF_JNE 0x50 PC += off if dst != src -BPF_JSGT 0x60 PC += off if dst > src signed -BPF_JSGE 0x70 PC += off if dst >= src signed -BPF_CALL 0x80 function call see `Helper functions`_ -BPF_EXIT 0x90 function / program return BPF_JMP only -BPF_JLT 0xa0 PC += off if dst < src unsigned -BPF_JLE 0xb0 PC += off if dst <= src unsigned -BPF_JSLT 0xc0 PC += off if dst < src signed -BPF_JSLE 0xd0 PC += off if dst <= src signed -======== ===== ========================= ============ +======== ===== === =========================================== ========================================= +code value src description notes +======== ===== === =========================================== ========================================= +BPF_JA 0x0 0x0 PC += offset BPF_JMP only +BPF_JEQ 0x1 any PC += offset if dst == src +BPF_JGT 0x2 any PC += offset if dst > src unsigned +BPF_JGE 0x3 any PC += offset if dst >= src unsigned +BPF_JSET 0x4 any PC += offset if dst & src +BPF_JNE 0x5 any PC += offset if dst != src +BPF_JSGT 0x6 any PC += offset if dst > src signed +BPF_JSGE 0x7 any PC += offset if dst >= src signed +BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_ +BPF_CALL 0x8 0x1 call PC += offset see `Program-local functions`_ +BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_ +BPF_EXIT 0x9 0x0 return BPF_JMP only +BPF_JLT 0xa any PC += offset if dst < src unsigned +BPF_JLE 0xb any PC += offset if dst <= src unsigned +BPF_JSLT 0xc any PC += offset if dst < src signed +BPF_JSLE 0xd any PC += offset if dst <= src signed +======== ===== === =========================================== ========================================= The eBPF program needs to store the return value into register R0 before doing a -BPF_EXIT. +``BPF_EXIT``. Example: @@ -277,9 +279,22 @@ Helper functions ~~~~~~~~~~~~~~~~ Helper functions are a concept whereby BPF programs can call into a -set of function calls exposed by the runtime. Each helper -function is identified by an integer used in a ``BPF_CALL`` instruction. -The available helper functions may differ for each program type. +set of function calls exposed by the underlying platform. + +Historically, each helper function was identified by an address +encoded in the imm field. The available helper functions may differ +for each program type, but address values are unique across all program types. + +Platforms that support the BPF Type Format (BTF) support identifying +a helper function by a BTF ID encoded in the imm field, where the BTF ID +identifies the helper name and type. + +Program-local functions +~~~~~~~~~~~~~~~~~~~~~~~ +Program-local functions are functions exposed by the same BPF program as the +caller, and are referenced by offset from the call instruction, similar to +``BPF_JA``. A ``BPF_EXIT`` within the program-local function will return to +the caller. Load and store instructions =========================== From 5f5a7d8d8bd461f515543040ad7d107cc405d30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20Gon=C3=A7alves?= Date: Fri, 24 Mar 2023 10:02:22 +0000 Subject: [PATCH 154/260] xsk: allow remap of fill and/or completion rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remap of fill and completion rings was frowned upon as they control the usage of UMEM which does not support concurrent use. At the same time this would disallow the remap of these rings into another process. A possible use case is that the user wants to transfer the socket/ UMEM ownership to another process (via SYS_pidfd_getfd) and so would need to also remap these rings. This will have no impact on current usages and just relaxes the remap limitation. Signed-off-by: Nuno Gonçalves Reviewed-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20230324100222.13434-1-nunog@fr24.com Signed-off-by: Alexei Starovoitov --- net/xdp/xsk.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2ac58b282b5e..cc1e7f15fa73 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1301,9 +1301,10 @@ static int xsk_mmap(struct file *file, struct socket *sock, loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; struct xdp_sock *xs = xdp_sk(sock->sk); + int state = READ_ONCE(xs->state); struct xsk_queue *q = NULL; - if (READ_ONCE(xs->state) != XSK_READY) + if (state != XSK_READY && state != XSK_BOUND) return -EBUSY; if (offset == XDP_PGOFF_RX_RING) { @@ -1314,9 +1315,11 @@ static int xsk_mmap(struct file *file, struct socket *sock, /* Matches the smp_wmb() in XDP_UMEM_REG */ smp_rmb(); if (offset == XDP_UMEM_PGOFF_FILL_RING) - q = READ_ONCE(xs->fq_tmp); + q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) : + READ_ONCE(xs->pool->fq); else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) - q = READ_ONCE(xs->cq_tmp); + q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) : + READ_ONCE(xs->pool->cq); } if (!q) From f1cb927cdb6280e0ce283174cc1bf395dffc43d4 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Fri, 24 Mar 2023 18:08:45 -0700 Subject: [PATCH 155/260] libbpf: Ensure print callback usage is thread-safe This patch prevents races on the print function pointer, allowing the libbpf_set_print() function to become thread-safe. Signed-off-by: JP Kobryn Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230325010845.46000-1-inwardvessel@gmail.com --- tools/lib/bpf/libbpf.c | 9 ++++++--- tools/lib/bpf/libbpf.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f6a071db5c6e..15737d7b5a28 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -216,9 +216,10 @@ static libbpf_print_fn_t __libbpf_pr = __base_pr; libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) { - libbpf_print_fn_t old_print_fn = __libbpf_pr; + libbpf_print_fn_t old_print_fn; + + old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); - __libbpf_pr = fn; return old_print_fn; } @@ -227,8 +228,10 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) { va_list args; int old_errno; + libbpf_print_fn_t print_fn; - if (!__libbpf_pr) + print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); + if (!print_fn) return; old_errno = errno; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1615e55e2e79..0b7362397ea3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -101,6 +101,8 @@ typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, * be used for libbpf warnings and informational messages. * @param fn The log print function. If NULL, libbpf won't print anything. * @return Pointer to old print function. + * + * This function is thread-safe. */ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); From 7283137a7622292076dd8b7f3b8b2bb203ce5a14 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sun, 26 Mar 2023 09:53:41 +0000 Subject: [PATCH 156/260] selftests/bpf: Don't assume page size is 4096 The verifier test creates BPF ringbuf maps using hard-coded 4096 as max_entries. Some tests will fail if the page size of the running kernel is not 4096. Use getpagesize() instead. Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230326095341.816023-1-hengqi.chen@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5b90eef09ade..e4657c5bc3f1 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1079,7 +1079,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } if (*fixup_map_ringbuf) { map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0, - 0, 4096); + 0, getpagesize()); do { prog[*fixup_map_ringbuf].imm = map_fds[20]; fixup_map_ringbuf++; From d08ab82f59d55b0e5acfeb453081278dfc33f232 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 28 Mar 2023 03:47:38 +0300 Subject: [PATCH 157/260] libbpf: Fix double-free when linker processes empty sections Double-free error in bpf_linker__free() was reported by James Hilliard. The error is caused by miss-use of realloc() in extend_sec(). The error occurs when two files with empty sections of the same name are linked: - when first file is processed: - extend_sec() calls realloc(dst->raw_data, dst_align_sz) with dst->raw_data == NULL and dst_align_sz == 0; - dst->raw_data is set to a special pointer to a memory block of size zero; - when second file is processed: - extend_sec() calls realloc(dst->raw_data, dst_align_sz) with dst->raw_data == and dst_align_sz == 0; - realloc() "frees" dst->raw_data special pointer and returns NULL; - extend_sec() exits with -ENOMEM, and the old dst->raw_data value is preserved (it is now invalid); - eventually, bpf_linker__free() attempts to free dst->raw_data again. This patch fixes the bug by avoiding -ENOMEM exit for dst_align_sz == 0. The fix was suggested by Andrii Nakryiko . Reported-by: James Hilliard Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Tested-by: James Hilliard Link: https://lore.kernel.org/bpf/CADvTj4o7ZWUikKwNTwFq0O_AaX+46t_+Ca9gvWMYdWdRtTGeHQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20230328004738.381898-3-eddyz87@gmail.com --- tools/lib/bpf/linker.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index d7069780984a..5ced96d99f8c 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -1115,7 +1115,19 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src if (src->shdr->sh_type != SHT_NOBITS) { tmp = realloc(dst->raw_data, dst_final_sz); - if (!tmp) + /* If dst_align_sz == 0, realloc() behaves in a special way: + * 1. When dst->raw_data is NULL it returns: + * "either NULL or a pointer suitable to be passed to free()" [1]. + * 2. When dst->raw_data is not-NULL it frees dst->raw_data and returns NULL, + * thus invalidating any "pointer suitable to be passed to free()" obtained + * at step (1). + * + * The dst_align_sz > 0 check avoids error exit after (2), otherwise + * dst->raw_data would be freed again in bpf_linker__free(). + * + * [1] man 3 realloc + */ + if (!tmp && dst_align_sz > 0) return -ENOMEM; dst->raw_data = tmp; From 6e9e141a7a28520a1cd13c96ad9127860e32ffbb Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 28 Mar 2023 05:08:12 +0300 Subject: [PATCH 158/260] selftests/bpf: Verifier/xdp_direct_packet_access.c converted to inline assembly Test verifier/xdp_direct_packet_access.c automatically converted to use inline assembly. Original test would be removed in the next patch. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230328020813.392560-2-eddyz87@gmail.com --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../progs/verifier_xdp_direct_packet_access.c | 1722 +++++++++++++++++ 2 files changed, 1724 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index a774d5b193f1..efc8cf2e18d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -40,6 +40,7 @@ #include "verifier_var_off.skel.h" #include "verifier_xadd.skel.h" #include "verifier_xdp.skel.h" +#include "verifier_xdp_direct_packet_access.skel.h" __maybe_unused static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory) @@ -102,3 +103,4 @@ void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); } void test_verifier_var_off(void) { RUN(verifier_var_off); } void test_verifier_xadd(void) { RUN(verifier_xadd); } void test_verifier_xdp(void) { RUN(verifier_xdp); } +void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c new file mode 100644 index 000000000000..df2dfd1b15d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c @@ -0,0 +1,1722 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Converted from tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c */ + +#include +#include +#include "bpf_misc.h" + +SEC("xdp") +__description("XDP pkt read, pkt_end mangling, bad access 1") +__failure __msg("R3 pointer arithmetic on pkt_end") +__naked void end_mangling_bad_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + r3 += 8; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end mangling, bad access 2") +__failure __msg("R3 pointer arithmetic on pkt_end") +__naked void end_mangling_bad_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + r3 -= 8; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' > pkt_end, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void end_corner_case_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' > pkt_end, bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_end_bad_access_1_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 4); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' > pkt_end, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_end_bad_access_2_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' > pkt_end, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 9; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 9); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end > pkt_data', good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void end_pkt_data_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end > pkt_data', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 6; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 6); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end > pkt_data', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 > r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end > pkt_data', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end > pkt_data', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' < pkt_end, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_pkt_end_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 6; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 6); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' < pkt_end, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_end_bad_access_2_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' < pkt_end, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void end_corner_case_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' < pkt_end, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end < pkt_data', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end < pkt_data', bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_1_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 4); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end < pkt_data', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 < r1 goto l0_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end < pkt_data', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 9; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 9); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end < pkt_data', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' >= pkt_end, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_pkt_end_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u32*)(r1 - 5); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_5(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 6; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 6); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' >= pkt_end, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_end_bad_access_2_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 >= r3 goto l0_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' >= pkt_end, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void end_corner_case_good_access_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_5(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end >= pkt_data', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end >= pkt_data', bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_1_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 4); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end >= pkt_data', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 >= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end >= pkt_data', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_6(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 9; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 9); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_6(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' <= pkt_end, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void end_corner_case_good_access_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' <= pkt_end, bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_end_bad_access_1_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 4); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' <= pkt_end, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_end_bad_access_2_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r1 <= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_7(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 9; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 9); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_7(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end <= pkt_data', good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void end_pkt_data_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u32*)(r1 - 5); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_8(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 6; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 6); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end <= pkt_data', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 <= r1 goto l0_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end <= pkt_data', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 7; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_end <= pkt_data', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_8(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data]); \ + r3 = *(u32*)(r1 + %[xdp_md_data_end]); \ + r1 = r2; \ + r1 += 8; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' > pkt_data, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_5(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' > pkt_data, bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_1_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 4); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' > pkt_data, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_5(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 > r3 goto l0_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_9(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 9; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 9); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_9(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r1 > r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data > pkt_meta', good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_pkt_meta_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_10(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 6; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 6); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data > pkt_meta', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_meta_bad_access_2_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 > r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data > pkt_meta', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void meta_corner_case_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data > pkt_meta', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_10(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 > r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' < pkt_data, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void meta_pkt_data_good_access_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_11(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 6; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 6); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' < pkt_data, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_6(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' < pkt_data, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_6(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_11(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 < r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data < pkt_meta', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void meta_corner_case_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data < pkt_meta', bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_meta_bad_access_1_1(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 4); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data < pkt_meta', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_meta_bad_access_2_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 < r1 goto l0_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data < pkt_meta', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_12(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 9; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 9); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_12(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r3 < r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' >= pkt_data, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void meta_pkt_data_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u32*)(r1 - 5); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_13(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 6; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 6); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' >= pkt_data, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_7(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 >= r3 goto l0_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' >= pkt_data, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_7(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_13(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 >= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data >= pkt_meta', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void meta_corner_case_good_access_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data >= pkt_meta', bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_meta_bad_access_1_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 4); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data >= pkt_meta', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_meta_bad_access_2_3(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 >= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_14(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 9; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 9); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_14(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r3 >= r1 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' <= pkt_data, corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_corner_case_good_access_8(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 8); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 1") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_1_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 4); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_data_bad_access_2_8(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r1 <= r3 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_15(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 9; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 9); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_15(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r1 <= r3 goto l0_%=; \ + goto l1_%=; \ +l0_%=: r0 = *(u64*)(r1 - 7); \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data <= pkt_meta', good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void data_pkt_meta_good_access_2(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u32*)(r1 - 5); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_bad_access_16(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 6; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 6); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data <= pkt_meta', bad access 2") +__failure __msg("R1 offset is outside of the packet") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void pkt_meta_bad_access_2_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 <= r1 goto l0_%=; \ +l0_%=: r0 = *(u32*)(r1 - 5); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data <= pkt_meta', corner case, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void meta_corner_case_good_access_4(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 7; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 7); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +SEC("xdp") +__description("XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access") +__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT) +__naked void corner_case_1_good_access_16(void) +{ + asm volatile (" \ + r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \ + r3 = *(u32*)(r1 + %[xdp_md_data]); \ + r1 = r2; \ + r1 += 8; \ + if r3 <= r1 goto l0_%=; \ + r0 = *(u64*)(r1 - 8); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; From c63a7d8bbb54a904f3ab8ff0aae39cd571b2c39c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 28 Mar 2023 05:08:13 +0300 Subject: [PATCH 159/260] selftests/bpf: Remove verifier/xdp_direct_packet_access.c, converted to progs/verifier_xdp_direct_packet_access.c Removing verifier/xdp_direct_packet_access.c.c as it was automatically converted to use inline assembly in the previous commit. It is available in progs/verifier_xdp_direct_packet_access.c.c. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230328020813.392560-3-eddyz87@gmail.com --- .../bpf/verifier/xdp_direct_packet_access.c | 1468 ----------------- 1 file changed, 1468 deletions(-) delete mode 100644 tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c deleted file mode 100644 index b4ec228eb95d..000000000000 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ /dev/null @@ -1,1468 +0,0 @@ -{ - "XDP pkt read, pkt_end mangling, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R3 pointer arithmetic on pkt_end", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "XDP pkt read, pkt_end mangling, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R3 pointer arithmetic on pkt_end", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, -}, -{ - "XDP pkt read, pkt_data' > pkt_end, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' > pkt_end, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' > pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end > pkt_data', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end > pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end > pkt_data', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end > pkt_data', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' < pkt_end, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' < pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' < pkt_end, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end < pkt_data', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end < pkt_data', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end < pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end < pkt_data', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' >= pkt_end, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' >= pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' >= pkt_end, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end >= pkt_data', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end >= pkt_data', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end >= pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' <= pkt_end, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' <= pkt_end, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' <= pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end <= pkt_data', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end <= pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end <= pkt_data', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' > pkt_data, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' > pkt_data, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' > pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data > pkt_meta', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data > pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data > pkt_meta', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' < pkt_data, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' < pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' < pkt_data, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data < pkt_meta', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data < pkt_meta', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data < pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' >= pkt_data, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data >= pkt_meta', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data >= pkt_meta', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data >= pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' <= pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data <= pkt_meta', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data <= pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data <= pkt_meta', corner case, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, -{ - "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, -}, From d8d8b008629ffd69f1e204010cb3299bb633877e Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Wed, 29 Mar 2023 00:30:02 -0700 Subject: [PATCH 160/260] tools: bpftool: json: Fix backslash escape typo in jsonw_puts This is essentially a backport of iproute2's commit ed54f76484b5 ("json: fix backslash escape typo in jsonw_puts") Also added the stdio.h include in json_writer.h to be able to compile and run the json_writer test as used below). Before this fix: $ gcc -D notused -D TEST -I../../include -o json_writer json_writer.c json_writer.h $ ./json_writer { "Vyatta": { "url": "http://vyatta.com", "downloads": 2000000, "stock": 8.16, "ARGV": [], "empty": [], "NIL": {}, "my_null": null, "special chars": [ "slash": "/", "newline": "\n", "tab": "\t", "ff": "\f", "quote": "\"", "tick": "'", "backslash": "\n" ] } } After: $ gcc -D notused -D TEST -I../../include -o json_writer json_writer.c json_writer.h $ ./json_writer { "Vyatta": { "url": "http://vyatta.com", "downloads": 2000000, "stock": 8.16, "ARGV": [], "empty": [], "NIL": {}, "my_null": null, "special chars": [ "slash": "/", "newline": "\n", "tab": "\t", "ff": "\f", "quote": "\"", "tick": "'", "backslash": "\\" ] } } Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20230329073002.2026563-1-chantr4@gmail.com --- tools/bpf/bpftool/json_writer.c | 2 +- tools/bpf/bpftool/json_writer.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index bca5dd0a59e3..be379613d118 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -75,7 +75,7 @@ static void jsonw_puts(json_writer_t *self, const char *str) fputs("\\b", self->out); break; case '\\': - fputs("\\n", self->out); + fputs("\\\\", self->out); break; case '"': fputs("\\\"", self->out); diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index 8ace65cdb92f..5aaffd3b837b 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* Opaque class structure */ From 562dc56a88983421a6c5a46e0feb891873d118a1 Mon Sep 17 00:00:00 2001 From: Yixin Shen Date: Wed, 29 Mar 2023 07:35:57 +0000 Subject: [PATCH 161/260] bpf: allow a TCP CC to write app_limited A CC that implements tcp_congestion_ops.cong_control() should be able to write app_limited. A built-in CC or one from a kernel module is already able to write to this member of struct tcp_sock. For a BPF program, write access has not been allowed, yet. Signed-off-by: Yixin Shen Link: https://lore.kernel.org/r/20230329073558.8136-2-bobankhshen@gmail.com Signed-off-by: Martin KaFai Lau --- net/ipv4/bpf_tcp_ca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index e8b27826283e..ea21c96c03aa 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -113,6 +113,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, case offsetof(struct tcp_sock, ecn_flags): end = offsetofend(struct tcp_sock, ecn_flags); break; + case offsetof(struct tcp_sock, app_limited): + end = offsetofend(struct tcp_sock, app_limited); + break; default: bpf_log(log, "no write support to tcp_sock at off %d\n", off); return -EACCES; From 4239561b69feb94e52e43d93685cc46fb9dbcae5 Mon Sep 17 00:00:00 2001 From: Yixin Shen Date: Wed, 29 Mar 2023 07:35:58 +0000 Subject: [PATCH 162/260] selftests/bpf: test a BPF CC writing app_limited Test whether a TCP CC implemented in BPF is allowed to write app_limited in struct tcp_sock. This is already allowed for the built-in TCP CC. Signed-off-by: Yixin Shen Link: https://lore.kernel.org/r/20230329073558.8136-3-bobankhshen@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/progs/tcp_ca_write_sk_pacing.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c index 43447704cf0e..0724a79cec78 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -16,6 +16,16 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) return (struct tcp_sock *)sk; } +static inline unsigned int tcp_left_out(const struct tcp_sock *tp) +{ + return tp->sacked_out + tp->lost_out; +} + +static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) +{ + return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; +} + SEC("struct_ops/write_sk_pacing_init") void BPF_PROG(write_sk_pacing_init, struct sock *sk) { @@ -31,11 +41,12 @@ SEC("struct_ops/write_sk_pacing_cong_control") void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, const struct rate_sample *rs) { - const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); unsigned long rate = ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) / (tp->srtt_us ?: 1U << 3); sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate); + tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1; } SEC("struct_ops/write_sk_pacing_ssthresh") From d6e6286a12e7b8a4ddc66237c4ccf6f531ef1c82 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Mar 2023 11:52:00 -0700 Subject: [PATCH 163/260] libbpf: disassociate section handler on explicit bpf_program__set_type() call If user explicitly overrides programs's type with bpf_program__set_type() API call, we need to disassociate whatever SEC_DEF handler libbpf determined initially based on program's SEC() definition, as it's not goind to be valid anymore and could lead to crashes and/or confusing failures. Also, fix up bpf_prog_test_load() helper in selftests/bpf, which is force-setting program type (even if that's completely unnecessary; this is quite a legacy piece of code), and thus should expect auto-attach to not work, yet one of the tests explicitly relies on auto-attach for testing. Instead, force-set program type only if it differs from the desired one. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230327185202.1929145-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 1 + tools/testing/selftests/bpf/testing_helpers.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 15737d7b5a28..49cd304ae3bc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8468,6 +8468,7 @@ int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) return libbpf_err(-EBUSY); prog->type = type; + prog->sec_def = NULL; return 0; } diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 6c44153755e6..ecfea13f938b 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -195,7 +195,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, goto err_out; } - if (type != BPF_PROG_TYPE_UNSPEC) + if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; From b3c63d7ad81ad6f43921d59af18fc25c64327a74 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Mar 2023 11:52:01 -0700 Subject: [PATCH 164/260] veristat: add -d debug mode option to see debug libbpf log Add -d option to allow requesting libbpf debug logs from veristat. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230327185202.1929145-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 83231456d3c5..263df32fbda8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -135,6 +135,7 @@ static struct env { char **filenames; int filename_cnt; bool verbose; + bool debug; bool quiet; int log_level; enum resfmt out_fmt; @@ -169,7 +170,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va { if (!env.verbose) return 0; - if (level == LIBBPF_DEBUG /* && !env.verbose */) + if (level == LIBBPF_DEBUG && !env.debug) return 0; return vfprintf(stderr, format, args); } @@ -186,6 +187,7 @@ static const struct argp_option opts[] = { { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, { "verbose", 'v', NULL, 0, "Verbose mode" }, { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, + { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -212,6 +214,10 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'v': env.verbose = true; break; + case 'd': + env.debug = true; + env.verbose = true; + break; case 'q': env.quiet = true; break; From fa7cc90620870e4444bb5184c08148495b1627c6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Mar 2023 11:52:02 -0700 Subject: [PATCH 165/260] veristat: guess and substitue underlying program type for freplace (EXT) progs SEC("freplace") (i.e., BPF_PROG_TYPE_EXT) programs are not loadable as is through veristat, as kernel expects actual program's FD during BPF_PROG_LOAD time, which veristat has no way of knowing. Unfortunately, freplace programs are a pretty important class of programs, especially when dealing with XDP chaining solutions, which rely on EXT programs. So let's do our best and teach veristat to try to guess the original program type, based on program's context argument type. And if guessing process succeeds, we manually override freplace/EXT with guessed program type using bpf_program__set_type() setter to increase chances of proper BPF verification. We rely on BTF and maintain a simple lookup table. This process is obviously not 100% bulletproof, as valid program might not use context and thus wouldn't have to specify correct type. Also, __sk_buff is very ambiguous and is the context type across many different program types. We pick BPF_PROG_TYPE_CGROUP_SKB for now, which seems to work fine in practice so far. Similarly, some program types require specifying attach type, and so we pick one out of possible few variants. Best effort at its best. But this makes veristat even more widely applicable. Signed-off-by: Andrii Nakryiko Tested-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230327185202.1929145-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 121 ++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 263df32fbda8..055df1abd7ca 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -778,7 +779,62 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } -static void fixup_obj(struct bpf_object *obj) +static int guess_prog_type_by_ctx_name(const char *ctx_name, + enum bpf_prog_type *prog_type, + enum bpf_attach_type *attach_type) +{ + /* We need to guess program type based on its declared context type. + * This guess can't be perfect as many different program types might + * share the same context type. So we can only hope to reasonably + * well guess this and get lucky. + * + * Just in case, we support both UAPI-side type names and + * kernel-internal names. + */ + static struct { + const char *uapi_name; + const char *kern_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + } ctx_map[] = { + /* __sk_buff is most ambiguous, for now we assume cgroup_skb */ + { "__sk_buff", "sk_buff", BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS }, + { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND }, + { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND }, + { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS }, + { "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT }, + { "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE }, + { "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL }, + { "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT }, + { "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE }, + { "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP }, + { "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP }, + /* tracing types with no expected attach type */ + { "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE }, + { "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT }, + /* raw_tp programs use u64[] from kernel side, we don't want + * to match on that, probably; so NULL for kern-side type + */ + { "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT }, + }; + int i; + + if (!ctx_name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ctx_map); i++) { + if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 || + (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) { + *prog_type = ctx_map[i].prog_type; + *attach_type = ctx_map[i].attach_type; + return 0; + } + } + + return -ESRCH; +} + +static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename) { struct bpf_map *map; @@ -798,18 +854,75 @@ static void fixup_obj(struct bpf_object *obj) bpf_map__set_max_entries(map, 1); } } + + /* SEC(freplace) programs can't be loaded with veristat as is, + * but we can try guessing their target program's expected type by + * looking at the type of program's first argument and substituting + * corresponding program type + */ + if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) { + const struct btf *btf = bpf_object__btf(obj); + const char *prog_name = bpf_program__name(prog); + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + const struct btf_type *t; + const char *ctx_name; + int id; + + if (!btf) + goto skip_freplace_fixup; + + id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC); + t = btf__type_by_id(btf, id); + t = btf__type_by_id(btf, t->type); + if (!btf_is_func_proto(t) || btf_vlen(t) != 1) + goto skip_freplace_fixup; + + /* context argument is a pointer to a struct/typedef */ + t = btf__type_by_id(btf, btf_params(t)[0].type); + while (t && btf_is_mod(t)) + t = btf__type_by_id(btf, t->type); + if (!t || !btf_is_ptr(t)) + goto skip_freplace_fixup; + t = btf__type_by_id(btf, t->type); + while (t && btf_is_mod(t)) + t = btf__type_by_id(btf, t->type); + if (!t) + goto skip_freplace_fixup; + + ctx_name = btf__name_by_offset(btf, t->name_off); + + if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) { + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + + if (!env.quiet) { + printf("Using guessed program type '%s' for %s/%s...\n", + libbpf_bpf_prog_type_str(prog_type), + filename, prog_name); + } + } else { + if (!env.quiet) { + printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", + ctx_name, filename, prog_name); + } + } + } +skip_freplace_fixup: + return; } static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *prog_name = bpf_program__name(prog); + const char *base_filename = basename(filename); size_t buf_sz = sizeof(verif_log_buf); char *buf = verif_log_buf; struct verif_stats *stats; int err = 0; void *tmp; - if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) { + if (!should_process_file_prog(base_filename, bpf_program__name(prog))) { env.progs_skipped++; return 0; } @@ -835,12 +948,12 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf verif_log_buf[0] = '\0'; /* increase chances of successful BPF object loading */ - fixup_obj(obj); + fixup_obj(obj, prog, base_filename); err = bpf_object__load(obj); env.progs_processed++; - stats->file_name = strdup(basename(filename)); + stats->file_name = strdup(base_filename); stats->prog_name = strdup(bpf_program__name(prog)); stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */ parse_verif_log(buf, buf_sz, stats); From 4ca13d1002f37c10038ff4ed3cfdc70dbe049d60 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 28 Mar 2023 21:10:48 -0400 Subject: [PATCH 166/260] selftests/bpf: Rewrite two infinite loops in bound check cases The two infinite loops in bound check cases added by commit 1a3148fc171f ("selftests/bpf: Check when bounds are not in the 32-bit range") increased the execution time of test_verifier from about 6 seconds to about 9 seconds. Rewrite these two infinite loops to finite loops to get rid of this extra time cost. Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20230329011048.1721937-1-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/bounds.c | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 74b1917d4208..43942ce8cf15 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -784,22 +784,26 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 13), BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), BPF_LD_IMM64(BPF_REG_0, 0x7fffffffffffff10), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LD_IMM64(BPF_REG_2, 0x8000000000000fff), BPF_LD_IMM64(BPF_REG_0, 0x8000000000000000), BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_2, 3), /* r1 signed range is [S64_MIN, S64_MAX] */ - BPF_JMP_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -2), + BPF_JMP_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -3), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF program is too large", - .result = REJECT, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, { @@ -856,21 +860,25 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 10), BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), BPF_MOV32_IMM(BPF_REG_0, 0x7fffff10), BPF_ALU32_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_2, 0x80000fff), BPF_MOV32_IMM(BPF_REG_0, 0x80000000), BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP32_REG(BPF_JSGT, BPF_REG_0, BPF_REG_2, 3), /* r1 signed range is [S32_MIN, S32_MAX] */ - BPF_JMP32_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -2), + BPF_JMP32_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -3), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF program is too large", - .result = REJECT, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, From d816129530e77b905b492631651eb09a18789692 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Mar 2023 12:01:15 -0700 Subject: [PATCH 167/260] veristat: change guess for __sk_buff from CGROUP_SKB to SCHED_CLS SCHED_CLS seems to be a better option as a default guess for freplace programs that have __sk_buff as a context type. Reported-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230330190115.3942962-1-andrii@kernel.org Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/veristat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 055df1abd7ca..7888c03ba631 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -798,7 +798,7 @@ static int guess_prog_type_by_ctx_name(const char *ctx_name, enum bpf_attach_type attach_type; } ctx_map[] = { /* __sk_buff is most ambiguous, for now we assume cgroup_skb */ - { "__sk_buff", "sk_buff", BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS }, + { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS }, { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND }, { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND }, { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS }, From e4c2acab95a5947fe7948140a83e4f4918c6c048 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 30 Mar 2023 09:52:02 -0500 Subject: [PATCH 168/260] bpf: Handle PTR_MAYBE_NULL case in PTR_TO_BTF_ID helper call arg When validating a helper function argument, we use check_reg_type() to ensure that the register containing the argument is of the correct type. When the register's base type is PTR_TO_BTF_ID, there is some supplemental logic where we do extra checks for various combinations of PTR_TO_BTF_ID type modifiers. For example, for PTR_TO_BTF_ID, PTR_TO_BTF_ID | PTR_TRUSTED, and PTR_TO_BTF_ID | MEM_RCU, we call map_kptr_match_type() for bpf_kptr_xchg() calls, and btf_struct_ids_match() for other helper calls. When an unhandled PTR_TO_BTF_ID type modifier combination is passed to check_reg_type(), the verifier fails with an internal verifier error message. This can currently be triggered by passing a PTR_MAYBE_NULL pointer to helper functions (currently just bpf_kptr_xchg()) with an ARG_PTR_TO_BTF_ID_OR_NULL arg type. For example, by callin bpf_kptr_xchg(&v->kptr, bpf_cpumask_create()). Whether or not passing a PTR_MAYBE_NULL arg to an ARG_PTR_TO_BTF_ID_OR_NULL argument is valid is an interesting question. In a vacuum, it seems fine. A helper function with an ARG_PTR_TO_BTF_ID_OR_NULL arg would seem to be implying that it can handle either a NULL or non-NULL arg, and has logic in place to detect and gracefully handle each. This is the case for bpf_kptr_xchg(), which of course simply does an xchg(). On the other hand, bpf_kptr_xchg() also specifies OBJ_RELEASE, and refcounting semantics for a PTR_MAYBE_NULL pointer is different than handling it for a NULL _OR_ non-NULL pointer. For example, with a non-NULL arg, we should always fail if there was not a nonzero refcount for the value in the register being passed to the helper. For PTR_MAYBE_NULL on the other hand, it's unclear. If the pointer is NULL it would be fine, but if it's not NULL, it would be incorrect to load the program. The current solution to this is to just fail if PTR_MAYBE_NULL is passed, and to instead require programs to have a NULL check to explicitly handle the NULL and non-NULL cases. This seems reasonable. Not only would it possibly be quite complicated to correctly handle PTR_MAYBE_NULL refcounting in the verifier, but it's also an arguably odd programming pattern in general to not explicitly handle the NULL case anyways. For example, it seems odd to not care about whether a pointer you're passing to bpf_kptr_xchg() was successfully allocated in a program such as the following: private(MASK) static struct bpf_cpumask __kptr * global_mask; SEC("tp_btf/task_newtask") int BPF_PROG(example, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *prev; /* bpf_cpumask_create() returns PTR_MAYBE_NULL */ prev = bpf_kptr_xchg(&global_mask, bpf_cpumask_create()); if (prev) bpf_cpumask_release(prev); return 0; } This patch therefore updates the verifier to explicitly check for PTR_MAYBE_NULL in check_reg_type(), and fail gracefully if it's observed. This isn't really "fixing" anything unsafe or incorrect. We're just updating the verifier to fail gracefully, and explicitly handle this pattern rather than unintentionally falling back to an internal verifier error path. A subsequent patch will update selftests. Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230330145203.80506-1-void@manifault.com --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20eb2015842f..52738f9dcb15 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7204,6 +7204,10 @@ found: } break; } + case PTR_TO_BTF_ID | PTR_MAYBE_NULL: + case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: + verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno); + return -EACCES; case PTR_TO_BTF_ID | MEM_ALLOC: if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && meta->func_id != BPF_FUNC_kptr_xchg) { From 67efbd57bc6e57de276b964f023f8f947bc52460 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 30 Mar 2023 09:52:03 -0500 Subject: [PATCH 169/260] selftests/bpf: Add testcases for ptr_*_or_null_ in bpf_kptr_xchg The second argument of the bpf_kptr_xchg() helper function is ARG_PTR_TO_BTF_ID_OR_NULL. A recent patch fixed a bug whereby the verifier would fail with an internal error message if a program invoked the helper with a PTR_TO_BTF_ID | PTR_MAYBE_NULL register. This testcase adds some testcases to ensure that it fails gracefully moving forward. Before the fix, these testcases would have failed an error resembling the following: ; p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); 99: (7b) *(u64 *)(r10 -16) = r7 ; frame1: ... 100: (bf) r1 = r10 ; frame1: ... 101: (07) r1 += -16 ; frame1: ... ; p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); 102: (85) call bpf_kfunc_call_test_acquire#13908 ; frame1: R0_w=ptr_or_null_prog_test_ref_kfunc... ; p = bpf_kptr_xchg(&v->ref_ptr, p); 103: (bf) r1 = r6 ; frame1: ... 104: (bf) r2 = r0 ; frame1: R0_w=ptr_or_null_prog_test_ref_kfunc... 105: (85) call bpf_kptr_xchg#194 verifier internal error: invalid PTR_TO_BTF_ID register for type match Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230330145203.80506-2-void@manifault.com --- .../selftests/bpf/progs/cpumask_failure.c | 25 +++++++++++++++++++ .../selftests/bpf/progs/map_kptr_fail.c | 23 +++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index db4f94e72b61..a9bf6ea336cf 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -165,3 +165,28 @@ int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("Possibly NULL pointer passed to helper arg2") +int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *prev, *curr; + + curr = bpf_cpumask_create(); + if (!curr) + return 0; + + prev = bpf_kptr_xchg(&global_mask, curr); + if (prev) + bpf_cpumask_release(prev); + + bpf_rcu_read_lock(); + curr = global_mask; + /* PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU passed to bpf_kptr_xchg() */ + prev = bpf_kptr_xchg(&global_mask, curr); + bpf_rcu_read_unlock(); + if (prev) + bpf_cpumask_release(prev); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 08f9ec18c345..15bf3127dba3 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -20,6 +20,7 @@ struct array_map { } array_map SEC(".maps"); extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; @@ -442,4 +443,26 @@ int kptr_get_ref_state(struct __sk_buff *ctx) return 0; } +SEC("?tc") +__failure __msg("Possibly NULL pointer passed to helper arg2") +int kptr_xchg_possibly_null(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + + /* PTR_TO_BTF_ID | PTR_MAYBE_NULL passed to bpf_kptr_xchg() */ + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) + bpf_kfunc_call_test_release(p); + + return 0; +} + char _license[] SEC("license") = "GPL"; From 328bafc9a373da5f268d82533dd8f2e66526d168 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 31 Mar 2023 11:31:55 +0200 Subject: [PATCH 170/260] selftests/bpf: Add err.h header Moving error macros from profiler.inc.h to new err.h header. It will be used in following changes. Also adding PTR_ERR macro that will be used in following changes. Acked-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20230331093157.1749137-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/err.h | 18 ++++++++++++++++++ .../testing/selftests/bpf/progs/profiler.inc.h | 3 +-- 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/err.h diff --git a/tools/testing/selftests/bpf/progs/err.h b/tools/testing/selftests/bpf/progs/err.h new file mode 100644 index 000000000000..d66d283d9e59 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/err.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ERR_H__ +#define __ERR_H__ + +#define MAX_ERRNO 4095 +#define IS_ERR_VALUE(x) (unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO + +static inline int IS_ERR_OR_NULL(const void *ptr) +{ + return !ptr || IS_ERR_VALUE((unsigned long)ptr); +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +#endif /* __ERR_H__ */ diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 875513866032..f799d87e8700 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -6,6 +6,7 @@ #include #include "profiler.h" +#include "err.h" #ifndef NULL #define NULL 0 @@ -16,7 +17,6 @@ #define O_DIRECTORY 00200000 #define __O_TMPFILE 020000000 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) -#define MAX_ERRNO 4095 #define S_IFMT 00170000 #define S_IFSOCK 0140000 #define S_IFLNK 0120000 @@ -34,7 +34,6 @@ #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) -#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO #define KILL_DATA_ARRAY_SIZE 8 From 88dc8b3605b38a440fba45edcc53a6c7a98eee3b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 31 Mar 2023 11:31:56 +0200 Subject: [PATCH 171/260] selftests/bpf: Add read_build_id function Adding read_build_id function that parses out build id from specified binary. It will replace extract_build_id and also be used in following changes. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20230331093157.1749137-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/trace_helpers.c | 82 +++++++++++++++++++++ tools/testing/selftests/bpf/trace_helpers.h | 5 ++ 2 files changed, 87 insertions(+) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 934bf28fc888..9b070cdf44ac 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -11,6 +11,9 @@ #include #include #include "trace_helpers.h" +#include +#include +#include #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" @@ -234,3 +237,82 @@ ssize_t get_rel_offset(uintptr_t addr) fclose(f); return -EINVAL; } + +static int +parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id) +{ + Elf32_Word note_offs = 0; + + while (note_offs + sizeof(Elf32_Nhdr) < note_size) { + Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); + + if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") && + !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 && + nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { + memcpy(build_id, note_start + note_offs + + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); + memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz); + return (int) nhdr->n_descsz; + } + + note_offs = note_offs + sizeof(Elf32_Nhdr) + + ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); + } + + return -ENOENT; +} + +/* Reads binary from *path* file and returns it in the *build_id* buffer + * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes. + * Returns size of build id on success. On error the error value is + * returned. + */ +int read_build_id(const char *path, char *build_id, size_t size) +{ + int fd, err = -EINVAL; + Elf *elf = NULL; + GElf_Ehdr ehdr; + size_t max, i; + + if (size < BPF_BUILD_ID_SIZE) + return -EINVAL; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return -errno; + + (void)elf_version(EV_CURRENT); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + goto out; + if (elf_kind(elf) != ELF_K_ELF) + goto out; + if (!gelf_getehdr(elf, &ehdr)) + goto out; + + for (i = 0; i < ehdr.e_phnum; i++) { + GElf_Phdr mem, *phdr; + char *data; + + phdr = gelf_getphdr(elf, i, &mem); + if (!phdr) + goto out; + if (phdr->p_type != PT_NOTE) + continue; + data = elf_rawfile(elf, &max); + if (!data) + goto out; + if (phdr->p_offset + phdr->p_memsz > max) + goto out; + err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id); + if (err > 0) + break; + } + +out: + if (elf) + elf_end(elf); + close(fd); + return err; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 53efde0e2998..876f3e711df6 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -4,6 +4,9 @@ #include +#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) +#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) + struct ksym { long addr; char *name; @@ -23,4 +26,6 @@ void read_trace_pipe(void); ssize_t get_uprobe_offset(const void *addr); ssize_t get_rel_offset(uintptr_t addr); +int read_build_id(const char *path, char *build_id, size_t size); + #endif From dcc46f51d770bde625e4845cac42e808b3302b62 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 31 Mar 2023 11:31:57 +0200 Subject: [PATCH 172/260] selftests/bpf: Replace extract_build_id with read_build_id Replacing extract_build_id with read_build_id that parses out build id directly from elf without using readelf tool. Acked-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20230331093157.1749137-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/stacktrace_build_id.c | 19 ++++++-------- .../bpf/prog_tests/stacktrace_build_id_nmi.c | 17 +++++-------- tools/testing/selftests/bpf/test_progs.c | 25 ------------------- tools/testing/selftests/bpf/test_progs.h | 1 - 4 files changed, 13 insertions(+), 49 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index 9ad09a6c538a..b7ba5cd47d96 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -7,13 +7,12 @@ void test_stacktrace_build_id(void) int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; struct test_stacktrace_build_id *skel; - int err, stack_trace_len; + int err, stack_trace_len, build_id_size; __u32 key, prev_key, val, duration = 0; - char buf[256]; - int i, j; + char buf[BPF_BUILD_ID_SIZE]; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; - int retry = 1; + int i, retry = 1; retry: skel = test_stacktrace_build_id__open_and_load(); @@ -52,9 +51,10 @@ retry: "err %d errno %d\n", err, errno)) goto cleanup; - err = extract_build_id(buf, 256); + build_id_size = read_build_id("urandom_read", buf, sizeof(buf)); + err = build_id_size < 0 ? build_id_size : 0; - if (CHECK(err, "get build_id with readelf", + if (CHECK(err, "read_build_id", "err %d errno %d\n", err, errno)) goto cleanup; @@ -64,8 +64,6 @@ retry: goto cleanup; do { - char build_id[64]; - err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) @@ -73,10 +71,7 @@ retry: for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && id_offs[i].offset != 0) { - for (j = 0; j < 20; ++j) - sprintf(build_id + 2 * j, "%02x", - id_offs[i].build_id[j] & 0xff); - if (strstr(buf, build_id) != NULL) + if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0) build_id_matches = 1; } prev_key = key; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f4ea1a215ce4..47558b0d7f66 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -28,11 +28,10 @@ void test_stacktrace_build_id_nmi(void) .config = PERF_COUNT_HW_CPU_CYCLES, }; __u32 key, prev_key, val, duration = 0; - char buf[256]; - int i, j; + char buf[BPF_BUILD_ID_SIZE]; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; - int build_id_matches = 0; - int retry = 1; + int build_id_matches = 0, build_id_size; + int i, retry = 1; attr.sample_freq = read_perf_max_sample_freq(); @@ -94,7 +93,8 @@ retry: "err %d errno %d\n", err, errno)) goto cleanup; - err = extract_build_id(buf, 256); + build_id_size = read_build_id("urandom_read", buf, sizeof(buf)); + err = build_id_size < 0 ? build_id_size : 0; if (CHECK(err, "get build_id with readelf", "err %d errno %d\n", err, errno)) @@ -106,8 +106,6 @@ retry: goto cleanup; do { - char build_id[64]; - err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key), id_offs, sizeof(id_offs), 0); if (CHECK(err, "lookup_elem from stackmap", @@ -116,10 +114,7 @@ retry: for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && id_offs[i].offset != 0) { - for (j = 0; j < 20; ++j) - sprintf(build_id + 2 * j, "%02x", - id_offs[i].build_id[j] & 0xff); - if (strstr(buf, build_id) != NULL) + if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0) build_id_matches = 1; } prev_key = key; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index d903e6a72a96..ea82921110da 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -629,31 +629,6 @@ out: return err; } -int extract_build_id(char *build_id, size_t size) -{ - FILE *fp; - char *line = NULL; - size_t len = 0; - - fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r"); - if (fp == NULL) - return -1; - - if (getline(&line, &len, fp) == -1) - goto err; - pclose(fp); - - if (len > size) - len = size; - memcpy(build_id, line, len); - build_id[len] = '\0'; - free(line); - return 0; -err: - pclose(fp); - return -1; -} - static int finit_module(int fd, const char *param_values, int flags) { return syscall(__NR_finit_module, fd, param_values, flags); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 4b06b8347cd4..10ba43250668 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -405,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr) int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); -int extract_build_id(char *build_id, size_t size); int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); From 9af0f555ae4add25f0950753fb410c509aa71f50 Mon Sep 17 00:00:00 2001 From: James Hilliard Date: Fri, 31 Mar 2023 01:58:42 -0600 Subject: [PATCH 173/260] selftests/bpf: Fix conflicts with built-in functions in bench_local_storage_create The fork function in gcc is considered a built in function due to being used by libgcov when building with gnu extensions. Rename fork to sched_process_fork to prevent this conflict. See details: https://github.com/gcc-mirror/gcc/commit/d1c38823924506d389ca58d02926ace21bdf82fa https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82457 Fixes the following error: In file included from progs/bench_local_storage_create.c:6: progs/bench_local_storage_create.c:43:14: error: conflicting types for built-in function 'fork'; expected 'int(void)' [-Werror=builtin-declaration-mismatch] 43 | int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child) | ^~~~ Fixes: cbe9d93d58b1 ("selftests/bpf: Add bench for task storage creation") Signed-off-by: James Hilliard Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230331075848.1642814-1-james.hilliard1@gmail.com --- tools/testing/selftests/bpf/benchs/bench_local_storage_create.c | 2 +- tools/testing/selftests/bpf/progs/bench_local_storage_create.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c index abb0321d4f34..cff703f90e95 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -95,7 +95,7 @@ static void setup(void) exit(1); } } else { - if (!bpf_program__attach(skel->progs.fork)) { + if (!bpf_program__attach(skel->progs.sched_process_fork)) { fprintf(stderr, "Error attaching bpf program\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c index 7c851c9d5e47..e4bfbba6c193 100644 --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c @@ -40,7 +40,7 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr, } SEC("tp_btf/sched_process_fork") -int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child) +int BPF_PROG(sched_process_fork, struct task_struct *parent, struct task_struct *child) { struct storage *stg; From 3ed85ae80283885ef8491d07cdcd7124328bed35 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Mar 2023 15:24:02 -0700 Subject: [PATCH 174/260] veristat: relicense veristat.c as dual GPL-2.0-only or BSD-2-Clause licensed Dual-license veristat.c to dual GPL-2.0-only or BSD-2-Clause license. This is needed to mirror it to Github to make it convenient for distro packagers to package veristat as a separate package. Veristat grew into a useful tool by itself, and there are already a bunch of users relying on veristat as generic BPF loading and verification helper tool. So making it easy to packagers by providing Github mirror just like we do for bpftool and libbpf is the next step to get veristat into the hands of users. Apart from few typo fixes, I'm the sole contributor to veristat.c so far, so no extra Acks should be needed for relicensing. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230331222405.3468634-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 7888c03ba631..612ca52c6fba 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #define _GNU_SOURCE #include From 71c8c39f517787af19d9f35fe60463f7eec914e1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Mar 2023 15:24:03 -0700 Subject: [PATCH 175/260] veristat: improve version reporting For packaging version of the tool is important, so add a simple way to specify veristat version for upstream mirror at Github. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230331222405.3468634-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 612ca52c6fba..daac72b76508 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -140,6 +140,7 @@ static struct env { bool quiet; int log_level; enum resfmt out_fmt; + bool show_version; bool comparison_mode; bool replay_mode; @@ -176,16 +177,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } -const char *argp_program_version = "veristat"; +#ifndef VERISTAT_VERSION +#define VERISTAT_VERSION "" +#endif + +const char *argp_program_version = "veristat v" VERISTAT_VERSION; const char *argp_program_bug_address = ""; const char argp_program_doc[] = "veristat BPF verifier stats collection and comparison tool.\n" "\n" "USAGE: veristat [...]\n" -" OR: veristat -C \n"; +" OR: veristat -C \n" +" OR: veristat -R \n"; static const struct argp_option opts[] = { { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + { "version", 'V', NULL, 0, "Print version" }, { "verbose", 'v', NULL, 0, "Verbose mode" }, { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" }, @@ -212,6 +219,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'h': argp_state_help(state, stderr, ARGP_HELP_STD_HELP); break; + case 'V': + env.show_version = true; + break; case 'v': env.verbose = true; break; @@ -1991,6 +2001,11 @@ int main(int argc, char **argv) if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) return 1; + if (env.show_version) { + printf("%s\n", argp_program_version); + return 0; + } + if (env.verbose && env.quiet) { fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n"); argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); From e3b65c0c1a5b8ed06818b7eeb0c44165ea817d52 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Mar 2023 15:24:04 -0700 Subject: [PATCH 176/260] veristat: avoid using kernel-internal headers Drop linux/compiler.h include, which seems to be needed for ARRAY_SIZE macro only. Redefine own version of ARRAY_SIZE instead. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230331222405.3468634-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index daac72b76508..e592d05bccb2 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -20,6 +19,10 @@ #include #include +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + enum stat_id { VERDICT, DURATION, From ebf390c9d0136e01f327439c012ab5741971e72d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Mar 2023 15:24:05 -0700 Subject: [PATCH 177/260] veristat: small fixed found in -O2 mode Fix few potentially unitialized variables uses, found while building veristat.c in release (-O2) mode. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230331222405.3468634-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index e592d05bccb2..53d7ec168268 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -810,7 +810,7 @@ static int guess_prog_type_by_ctx_name(const char *ctx_name, enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; } ctx_map[] = { - /* __sk_buff is most ambiguous, for now we assume cgroup_skb */ + /* __sk_buff is most ambiguous, we assume TC program */ { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS }, { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND }, { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND }, @@ -1045,6 +1045,7 @@ static int process_obj(const char *filename) goto cleanup; } + lprog = NULL; bpf_object__for_each_program(tprog, tobj) { const char *tprog_name = bpf_program__name(tprog); @@ -1855,6 +1856,7 @@ static int handle_comparison_mode(void) one_more_time: output_comp_headers(cur_fmt); + last_idx = -1; for (i = 0; i < env.join_stat_cnt; i++) { const struct verif_stats_join *join = &env.join_stats[i]; From d02c48fa113953aba0b330ec6c35f50c7d1d7986 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 31 Mar 2023 14:57:31 -0500 Subject: [PATCH 178/260] bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 11 ++- kernel/bpf/verifier.c | 1 + .../selftests/bpf/prog_tests/task_kfunc.c | 1 + .../selftests/bpf/progs/task_kfunc_common.h | 5 ++ .../selftests/bpf/progs/task_kfunc_failure.c | 80 ++++++++++++++++--- .../selftests/bpf/progs/task_kfunc_success.c | 26 +++++- 6 files changed, 108 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 8980f6859443..e71a4a54ce99 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2013,7 +2014,9 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) */ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) { - return get_task_struct(p); + if (refcount_inc_not_zero(&p->rcu_users)) + return p; + return NULL; } /** @@ -2089,7 +2092,7 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { - put_task_struct(p); + put_task_struct_rcu_user(p); } #ifdef CONFIG_CGROUPS @@ -2199,7 +2202,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) rcu_read_lock(); p = find_task_by_pid_ns(pid, &init_pid_ns); if (p) - bpf_task_acquire(p); + p = bpf_task_acquire(p); rcu_read_unlock(); return p; @@ -2371,7 +2374,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front) BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52738f9dcb15..92ae4e8ab87b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4600,6 +4600,7 @@ BTF_SET_START(rcu_protected_types) BTF_ID(struct, prog_test_ref_kfunc) BTF_ID(struct, cgroup) BTF_ID(struct, bpf_cpumask) +BTF_ID(struct, task_struct) BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index f79fa5bc9a8d..330133ece3f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -78,6 +78,7 @@ static const char * const success_tests[] = { "test_task_from_pid_arg", "test_task_from_pid_current", "test_task_from_pid_invalid", + "task_kfunc_acquire_trusted_walked", }; void test_task_kfunc(void) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index 4c2a4b0e3a25..bf0d1da9aff8 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -24,6 +24,8 @@ struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym; void bpf_task_release(struct task_struct *p) __ksym; struct task_struct *bpf_task_from_pid(s32 pid) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p) { @@ -60,6 +62,9 @@ static inline int tasks_kfunc_map_insert(struct task_struct *p) } acquired = bpf_task_acquire(p); + if (!acquired) + return -ENOENT; + old = bpf_kptr_xchg(&v->task, acquired); if (old) { bpf_task_release(old); diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 2c374a7ffece..63aef547da87 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -40,6 +40,9 @@ int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_f /* Can't invoke bpf_task_acquire() on an untrusted pointer. */ acquired = bpf_task_acquire(v->task); + if (!acquired) + return 0; + bpf_task_release(acquired); return 0; @@ -53,38 +56,49 @@ int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags) /* Can't invoke bpf_task_acquire() on a random frame pointer. */ acquired = bpf_task_acquire((struct task_struct *)&stack_task); + if (!acquired) + return 0; + bpf_task_release(acquired); return 0; } SEC("kretprobe/free_task") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("calling kernel function bpf_task_acquire is not allowed") int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; + /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */ acquired = bpf_task_acquire(task); - /* Can't release a bpf_task_acquire()'d task without a NULL check. */ + if (!acquired) + return 0; bpf_task_release(acquired); return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("R1 must be referenced or trusted") -int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +SEC("kretprobe/free_task") +__failure __msg("calling kernel function bpf_task_acquire is not allowed") +int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; - /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */ - acquired = bpf_task_acquire(task->group_leader); - bpf_task_release(acquired); + bpf_rcu_read_lock(); + if (!task) { + bpf_rcu_read_unlock(); + return 0; + } + /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */ + acquired = bpf_task_acquire(task); + if (acquired) + bpf_task_release(acquired); + bpf_rcu_read_unlock(); return 0; } - SEC("tp_btf/task_newtask") __failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags) @@ -137,6 +151,8 @@ int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clo struct task_struct *kptr, *acquired; acquired = bpf_task_acquire(task); + if (!acquired) + return 0; /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */ kptr = bpf_task_kptr_get(&acquired); @@ -185,6 +201,19 @@ int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_fla return 0; } +SEC("tp_btf/task_newtask") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + /* Can't invoke bpf_task_release() on an acquired task without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + SEC("tp_btf/task_newtask") __failure __msg("Unreleased reference") int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags) @@ -256,12 +285,13 @@ int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) return -ENOENT; acquired = bpf_task_acquire(task); + if (!acquired) + return -EEXIST; old = bpf_kptr_xchg(&v->task, acquired); /* old cannot be passed to bpf_task_release() without a NULL check. */ bpf_task_release(old); - bpf_task_release(old); return 0; } @@ -298,6 +328,9 @@ int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) /* the argument of lsm task_free hook is untrusted. */ acquired = bpf_task_acquire(task); + if (!acquired) + return 0; + bpf_task_release(acquired); return 0; } @@ -337,3 +370,30 @@ int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool bpf_strncmp(task->comm, 16, "foo"); return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("R1 must be referenced or trusted") +int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *local; + struct __tasks_kfunc_map_value *v; + + if (tasks_kfunc_map_insert(task)) + return 0; + + v = tasks_kfunc_map_value_lookup(task); + if (!v) + return 0; + + bpf_rcu_read_lock(); + local = v->task; + if (!local) { + bpf_rcu_read_unlock(); + return 0; + } + /* Can't release a kptr that's still stored in a map. */ + bpf_task_release(local); + bpf_rcu_read_unlock(); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index cfa7f12b84e8..a75304a5e860 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -47,7 +47,10 @@ static int test_acquire_release(struct task_struct *task) } acquired = bpf_task_acquire(task); - bpf_task_release(acquired); + if (acquired) + bpf_task_release(acquired); + else + err = 6; return 0; } @@ -166,7 +169,10 @@ int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 cl current = bpf_get_current_task_btf(); acquired = bpf_task_acquire(current); - bpf_task_release(acquired); + if (acquired) + bpf_task_release(acquired); + else + err = 1; return 0; } @@ -241,3 +247,19 @@ int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_fla return 0; } + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* task->group_leader is listed as a trusted, non-NULL field of task struct. */ + acquired = bpf_task_acquire(task->group_leader); + if (acquired) + bpf_task_release(acquired); + else + err = 1; + + + return 0; +} From f85671c6ef46d490a90dac719e0c0e0adbacfd9b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 31 Mar 2023 14:57:32 -0500 Subject: [PATCH 179/260] bpf: Remove now-defunct task kfuncs In commit 22df776a9a86 ("tasks: Extract rcu_users out of union"), the 'refcount_t rcu_users' field was extracted out of a union with the 'struct rcu_head rcu' field. This allows us to safely perform a refcount_inc_not_zero() on task->rcu_users when acquiring a reference on a task struct. A prior patch leveraged this by making struct task_struct an RCU-protected object in the verifier, and by bpf_task_acquire() to use the task->rcu_users field for synchronization. Now that we can use RCU to protect tasks, we no longer need bpf_task_kptr_get(), or bpf_task_acquire_not_zero(). bpf_task_kptr_get() is truly completely unnecessary, as we can just use RCU to get the object. bpf_task_acquire_not_zero() is now equivalent to bpf_task_acquire(). In addition to these changes, this patch also updates the associated selftests to no longer use these kfuncs. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230331195733.699708-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 69 ------------------ .../selftests/bpf/prog_tests/task_kfunc.c | 2 +- .../selftests/bpf/progs/rcu_read_lock.c | 9 +-- .../selftests/bpf/progs/task_kfunc_common.h | 1 - .../selftests/bpf/progs/task_kfunc_failure.c | 73 ------------------- .../selftests/bpf/progs/task_kfunc_success.c | 22 +++--- 6 files changed, 14 insertions(+), 162 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e71a4a54ce99..6be16db9f188 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2019,73 +2019,6 @@ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) return NULL; } -/** - * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task - * acquired by this kfunc which is not stored in a map as a kptr, must be - * released by calling bpf_task_release(). - * @p: The task on which a reference is being acquired. - */ -__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) -{ - /* For the time being this function returns NULL, as it's not currently - * possible to safely acquire a reference to a task with RCU protection - * using get_task_struct() and put_task_struct(). This is due to the - * slightly odd mechanics of p->rcu_users, and how task RCU protection - * works. - * - * A struct task_struct is refcounted by two different refcount_t - * fields: - * - * 1. p->usage: The "true" refcount field which tracks a task's - * lifetime. The task is freed as soon as this - * refcount drops to 0. - * - * 2. p->rcu_users: An "RCU users" refcount field which is statically - * initialized to 2, and is co-located in a union with - * a struct rcu_head field (p->rcu). p->rcu_users - * essentially encapsulates a single p->usage - * refcount, and when p->rcu_users goes to 0, an RCU - * callback is scheduled on the struct rcu_head which - * decrements the p->usage refcount. - * - * There are two important implications to this task refcounting logic - * described above. The first is that - * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as - * after the refcount goes to 0, the RCU callback being scheduled will - * cause the memory backing the refcount to again be nonzero due to the - * fields sharing a union. The other is that we can't rely on RCU to - * guarantee that a task is valid in a BPF program. This is because a - * task could have already transitioned to being in the TASK_DEAD - * state, had its rcu_users refcount go to 0, and its rcu callback - * invoked in which it drops its single p->usage reference. At this - * point the task will be freed as soon as the last p->usage reference - * goes to 0, without waiting for another RCU gp to elapse. The only - * way that a BPF program can guarantee that a task is valid is in this - * scenario is to hold a p->usage refcount itself. - * - * Until we're able to resolve this issue, either by pulling - * p->rcu_users and p->rcu out of the union, or by getting rid of - * p->usage and just using p->rcu_users for refcounting, we'll just - * return NULL here. - */ - return NULL; -} - -/** - * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task - * kptr acquired by this kfunc which is not subsequently stored in a map, must - * be released by calling bpf_task_release(). - * @pp: A pointer to a task kptr on which a reference is being acquired. - */ -__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) -{ - /* We must return NULL here until we have clarity on how to properly - * leverage RCU for ensuring a task's lifetime. See the comment above - * in bpf_task_acquire_not_zero() for more details. - */ - return NULL; -} - /** * bpf_task_release - Release the reference acquired on a task. * @p: The task on which a reference is being released. @@ -2375,8 +2308,6 @@ BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_rbtree_add) diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index 330133ece3f6..740d5f644b40 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -73,7 +73,7 @@ static const char * const success_tests[] = { "test_task_acquire_release_current", "test_task_acquire_leave_in_map", "test_task_xchg_release", - "test_task_get_release", + "test_task_map_acquire_release", "test_task_current_acquire_release", "test_task_from_pid_arg", "test_task_from_pid_current", diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 6a8c88e58df2..14fb01437fb8 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -23,7 +23,7 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; -struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym; +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; void bpf_task_release(struct task_struct *p) __ksym; SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") @@ -159,13 +159,8 @@ int task_acquire(void *ctx) goto out; /* acquire a reference which can be used outside rcu read lock region */ - gparent = bpf_task_acquire_not_zero(gparent); + gparent = bpf_task_acquire(gparent); if (!gparent) - /* Until we resolve the issues with using task->rcu_users, we - * expect bpf_task_acquire_not_zero() to return a NULL task. - * See the comment at the definition of - * bpf_task_acquire_not_zero() for more details. - */ goto out; (void)bpf_task_storage_get(&map_a, gparent, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index bf0d1da9aff8..41f2d44f49cb 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -21,7 +21,6 @@ struct hash_map { } __tasks_kfunc_map SEC(".maps"); struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; -struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym; void bpf_task_release(struct task_struct *p) __ksym; struct task_struct *bpf_task_from_pid(s32 pid) __ksym; void bpf_rcu_read_lock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 63aef547da87..dcdea3127086 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -128,59 +128,6 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr; - - /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */ - kptr = bpf_task_kptr_get(&task); - if (!kptr) - return 0; - - bpf_task_release(kptr); - - return 0; -} - -SEC("tp_btf/task_newtask") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr, *acquired; - - acquired = bpf_task_acquire(task); - if (!acquired) - return 0; - - /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */ - kptr = bpf_task_kptr_get(&acquired); - bpf_task_release(acquired); - if (!kptr) - return 0; - - bpf_task_release(kptr); - - return 0; -} - -SEC("tp_btf/task_newtask") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr; - - /* Cannot use bpf_task_kptr_get() on a NULL pointer. */ - kptr = bpf_task_kptr_get(NULL); - if (!kptr) - return 0; - - bpf_task_release(kptr); - - return 0; -} - SEC("tp_btf/task_newtask") __failure __msg("Unreleased reference") int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags) @@ -214,26 +161,6 @@ int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("Unreleased reference") -int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr; - struct __tasks_kfunc_map_value *v; - - v = insert_lookup_task(task); - if (!v) - return 0; - - kptr = bpf_task_kptr_get(&v->task); - if (!kptr) - return 0; - - /* Kptr acquired above is never released. */ - - return 0; -} - SEC("tp_btf/task_newtask") __failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index a75304a5e860..b09371bba204 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -122,7 +122,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") -int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) +int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags) { struct task_struct *kptr; struct __tasks_kfunc_map_value *v; @@ -143,18 +143,18 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) return 0; } - kptr = bpf_task_kptr_get(&v->task); - if (kptr) { - /* Until we resolve the issues with using task->rcu_users, we - * expect bpf_task_kptr_get() to return a NULL task. See the - * comment at the definition of bpf_task_acquire_not_zero() for - * more details. - */ - bpf_task_release(kptr); + bpf_rcu_read_lock(); + kptr = v->task; + if (!kptr) { err = 3; - return 0; + } else { + kptr = bpf_task_acquire(kptr); + if (!kptr) + err = 4; + else + bpf_task_release(kptr); } - + bpf_rcu_read_unlock(); return 0; } From db9d479ab59b21d719486e6bf673f83f129dae32 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 31 Mar 2023 14:57:33 -0500 Subject: [PATCH 180/260] bpf,docs: Update documentation to reflect new task kfuncs Now that struct task_struct objects are RCU safe, and bpf_task_acquire() can return NULL, we should update the BPF task kfunc documentation to reflect the current state of the API. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230331195733.699708-4-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 49 +++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index bf1b85941452..d8a16c4bef7f 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -471,13 +471,50 @@ struct_ops callback arg. For example: struct task_struct *acquired; acquired = bpf_task_acquire(task); + if (acquired) + /* + * In a typical program you'd do something like store + * the task in a map, and the map will automatically + * release it later. Here, we release it manually. + */ + bpf_task_release(acquired); + return 0; + } + + +References acquired on ``struct task_struct *`` objects are RCU protected. +Therefore, when in an RCU read region, you can obtain a pointer to a task +embedded in a map value without having to acquire a reference: + +.. code-block:: c + + #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) + private(TASK) static struct task_struct *global; + + /** + * A trivial example showing how to access a task stored + * in a map using RCU. + */ + SEC("tp_btf/task_newtask") + int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags) + { + struct task_struct *local_copy; + + bpf_rcu_read_lock(); + local_copy = global; + if (local_copy) + /* + * We could also pass local_copy to kfuncs or helper functions here, + * as we're guaranteed that local_copy will be valid until we exit + * the RCU read region below. + */ + bpf_printk("Global task %s is valid", local_copy->comm); + else + bpf_printk("No global task found"); + bpf_rcu_read_unlock(); + + /* At this point we can no longer reference local_copy. */ - /* - * In a typical program you'd do something like store - * the task in a map, and the map will automatically - * release it later. Here, we release it manually. - */ - bpf_task_release(acquired); return 0; } From 5b85575ad4280171c382e888b006cb8d12c35bde Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Sat, 1 Apr 2023 20:06:02 +0000 Subject: [PATCH 181/260] bpf: optimize hashmap lookups when key_size is divisible by 4 The BPF hashmap uses the jhash() hash function. There is an optimized version of this hash function which may be used if hash size is a multiple of 4. Apply this optimization to the hashmap in a similar way as it is done in the bloom filter map. On practice the optimization is only noticeable for smaller key sizes, which, however, is sufficient for many applications. An example is listed in the following table of measurements (a hashmap of 65536 elements was used): -------------------------------------------------------------------- | key_size | fullness | lookups /sec | lookups (opt) /sec | gain | -------------------------------------------------------------------- | 4 | 25% | 42.990M | 46.000M | 7.0% | | 4 | 50% | 37.910M | 39.094M | 3.1% | | 4 | 75% | 34.486M | 36.124M | 4.7% | | 4 | 100% | 31.760M | 32.719M | 3.0% | -------------------------------------------------------------------- | 8 | 25% | 43.855M | 49.626M | 13.2% | | 8 | 50% | 38.328M | 42.152M | 10.0% | | 8 | 75% | 34.483M | 38.088M | 10.5% | | 8 | 100% | 31.306M | 34.686M | 10.8% | -------------------------------------------------------------------- | 12 | 25% | 38.398M | 43.770M | 14.0% | | 12 | 50% | 33.336M | 37.712M | 13.1% | | 12 | 75% | 29.917M | 34.440M | 15.1% | | 12 | 100% | 27.322M | 30.480M | 11.6% | -------------------------------------------------------------------- | 16 | 25% | 41.491M | 41.921M | 1.0% | | 16 | 50% | 36.206M | 36.474M | 0.7% | | 16 | 75% | 32.529M | 33.027M | 1.5% | | 16 | 100% | 29.581M | 30.325M | 2.5% | -------------------------------------------------------------------- | 20 | 25% | 34.240M | 36.787M | 7.4% | | 20 | 50% | 30.328M | 32.663M | 7.7% | | 20 | 75% | 27.536M | 29.354M | 6.6% | | 20 | 100% | 24.847M | 26.505M | 6.7% | -------------------------------------------------------------------- | 24 | 25% | 36.329M | 40.608M | 11.8% | | 24 | 50% | 31.444M | 35.059M | 11.5% | | 24 | 75% | 28.426M | 31.452M | 10.6% | | 24 | 100% | 26.278M | 28.741M | 9.4% | -------------------------------------------------------------------- | 28 | 25% | 31.540M | 31.944M | 1.3% | | 28 | 50% | 27.739M | 28.063M | 1.2% | | 28 | 75% | 24.993M | 25.814M | 3.3% | | 28 | 100% | 23.513M | 23.500M | -0.1% | -------------------------------------------------------------------- | 32 | 25% | 32.116M | 33.953M | 5.7% | | 32 | 50% | 28.879M | 29.859M | 3.4% | | 32 | 75% | 26.227M | 26.948M | 2.7% | | 32 | 100% | 23.829M | 24.613M | 3.3% | -------------------------------------------------------------------- | 64 | 25% | 22.535M | 22.554M | 0.1% | | 64 | 50% | 20.471M | 20.675M | 1.0% | | 64 | 75% | 19.077M | 19.146M | 0.4% | | 64 | 100% | 17.710M | 18.131M | 2.4% | -------------------------------------------------------------------- The following script was used to gather the results (SMT & frequency off): cd tools/testing/selftests/bpf for key_size in 4 8 12 16 20 24 28 32 64; do for nr_entries in `seq 16384 16384 65536`; do fullness=$(printf '%3s' $((nr_entries*100/65536))) echo -n "key_size=$key_size: $fullness% full: " sudo ./bench -d2 -a bpf-hashmap-lookup --key_size=$key_size --nr_entries=$nr_entries --max_entries=65536 --nr_loops=2000000 --map_flags=0x40 | grep cpu done echo done Signed-off-by: Anton Protopopov Link: https://lore.kernel.org/r/20230401200602.3275-1-aspsk@isovalent.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 96b645bba3a4..00c253b84bf5 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -607,6 +607,8 @@ free_htab: static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) { + if (likely(key_len % 4 == 0)) + return jhash2(key, key_len / 4, hashrnd); return jhash(key, key_len, hashrnd); } From 92b2e810f0d3a2c05d8cf12a800592b238d458df Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Sun, 2 Apr 2023 11:43:40 +0000 Subject: [PATCH 182/260] bpf: compute hashes in bloom filter similar to hashmap If the value size in a bloom filter is a multiple of 4, then the jhash2() function is used to compute hashes. The length parameter of this function equals to the number of 32-bit words in input. Compute it in the hot path instead of pre-computing it, as this is translated to one extra shift to divide the length by four vs. one extra memory load of a pre-computed length. Signed-off-by: Anton Protopopov Link: https://lore.kernel.org/r/20230402114340.3441-1-aspsk@isovalent.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bloom_filter.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index db19784601a7..540331b610a9 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -16,13 +16,6 @@ struct bpf_bloom_filter { struct bpf_map map; u32 bitset_mask; u32 hash_seed; - /* If the size of the values in the bloom filter is u32 aligned, - * then it is more performant to use jhash2 as the underlying hash - * function, else we use jhash. This tracks the number of u32s - * in an u32-aligned value size. If the value size is not u32 aligned, - * this will be 0. - */ - u32 aligned_u32_count; u32 nr_hash_funcs; unsigned long bitset[]; }; @@ -32,9 +25,8 @@ static u32 hash(struct bpf_bloom_filter *bloom, void *value, { u32 h; - if (bloom->aligned_u32_count) - h = jhash2(value, bloom->aligned_u32_count, - bloom->hash_seed + index); + if (likely(value_size % 4 == 0)) + h = jhash2(value, value_size / 4, bloom->hash_seed + index); else h = jhash(value, value_size, bloom->hash_seed + index); @@ -152,11 +144,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) bloom->nr_hash_funcs = nr_hash_funcs; bloom->bitset_mask = bitset_mask; - /* Check whether the value size is u32-aligned */ - if ((attr->value_size & (sizeof(u32) - 1)) == 0) - bloom->aligned_u32_count = - attr->value_size / sizeof(u32); - if (!(attr->map_flags & BPF_F_ZERO_SEED)) bloom->hash_seed = get_random_u32(); From 16b7c970cc8192e929dbd5192ccc1867e19d7bda Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Sun, 26 Mar 2023 05:49:46 +0000 Subject: [PATCH 183/260] bpf, docs: Add docs on extended 64-bit immediate instructions Add docs on extended 64-bit immediate instructions, including six instructions previously undocumented. Include a brief description of maps and variables, as used by those instructions. V1 -> V2: rebased on top of latest master V2 -> V3: addressed comments from Alexei V3 -> V4: addressed comments from David Vernet Signed-off-by: Dave Thaler Link: https://lore.kernel.org/r/20230326054946.2331-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/instruction-set.rst | 50 ++++++++++++++++++++++++--- Documentation/bpf/linux-notes.rst | 22 ++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index b77280eb926f..492980ece1ab 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -416,14 +416,56 @@ and loaded back to ``R0``. ----------------------------- Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction -encoding for an extra imm64 value. +encoding defined in `Instruction encoding`_, and use the 'src' field of the +basic instruction to hold an opcode subtype. -There is currently only one such instruction. +The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions +with opcode subtypes in the 'src' field, using new terms such as "map" +defined further below: -``BPF_LD | BPF_DW | BPF_IMM`` means:: +========================= ====== === ========================================= =========== ============== +opcode construction opcode src pseudocode imm type dst type +========================= ====== === ========================================= =========== ============== +BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map +BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map +BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer +========================= ====== === ========================================= =========== ============== - dst = imm64 +where +* map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map (see `Maps`_) +* map_by_idx(imm) means to convert a 32-bit index into an address of a map +* map_val(map) gets the address of the first value in a given map +* var_addr(imm) gets the address of a platform variable (see `Platform Variables`_) with a given id +* code_addr(imm) gets the address of the instruction at a specified relative offset in number of (64-bit) instructions +* the 'imm type' can be used by disassemblers for display +* the 'dst type' can be used for verification and JIT compilation purposes + +Maps +~~~~ + +Maps are shared memory regions accessible by eBPF programs on some platforms. +A map can have various semantics as defined in a separate document, and may or +may not have a single contiguous memory region, but the 'map_val(map)' is +currently only defined for maps that do have a single contiguous memory region. + +Each map can have a file descriptor (fd) if supported by the platform, where +'map_by_fd(imm)' means to get the map with the specified file descriptor. Each +BPF program can also be defined to use a set of maps associated with the +program at load time, and 'map_by_idx(imm)' means to get the map with the given +index in the set associated with the BPF program containing the instruction. + +Platform Variables +~~~~~~~~~~~~~~~~~~ + +Platform variables are memory regions, identified by integer ids, exposed by +the runtime and accessible by BPF programs on some platforms. The +'var_addr(imm)' operation means to get the address of the memory region +identified by the given id. Legacy BPF Packet access instructions ------------------------------------- diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst index f43b9c797bcb..508d009d3bed 100644 --- a/Documentation/bpf/linux-notes.rst +++ b/Documentation/bpf/linux-notes.rst @@ -20,6 +20,28 @@ integer would be read from a specified register, is not currently supported by the verifier. Any programs with this instruction will fail to load until such support is added. +Maps +==== + +Linux only supports the 'map_val(map)' operation on array maps with a single element. + +Linux uses an fd_array to store maps associated with a BPF program. Thus, +map_by_idx(imm) uses the fd at that index in the array. + +Variables +========= + +The following 64-bit immediate instruction specifies that a variable address, +which corresponds to some integer stored in the 'imm' field, should be loaded: + +========================= ====== === ========================================= =========== ============== +opcode construction opcode src pseudocode imm type dst type +========================= ====== === ========================================= =========== ============== +BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer +========================= ====== === ========================================= =========== ============== + +On Linux, this integer is a BTF ID. + Legacy BPF Packet access instructions ===================================== From f6a6a5a976288e4d0d94eb1c6c9e983e8e5cdb31 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 3 Apr 2023 13:00:27 -0700 Subject: [PATCH 184/260] bpf: Fix struct_meta lookup for bpf_obj_free_fields kfunc call bpf_obj_drop_impl has a void return type. In check_kfunc_call, the "else if" which sets insn_aux->kptr_struct_meta for bpf_obj_drop_impl is surrounded by a larger if statement which checks btf_type_is_ptr. As a result: * The bpf_obj_drop_impl-specific code will never execute * The btf_struct_meta input to bpf_obj_drop is always NULL * __bpf_obj_drop_impl will always see a NULL btf_record when called from BPF program, and won't call bpf_obj_free_fields * program-allocated kptrs which have fields that should be cleaned up by bpf_obj_free_fields may instead leak resources This patch adds a btf_type_is_void branch to the larger if and moves special handling for bpf_obj_drop_impl there, fixing the issue. Fixes: ac9f06050a35 ("bpf: Introduce bpf_obj_drop") Cc: Kumar Kartikeya Dwivedi Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230403200027.2271029-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 92ae4e8ab87b..eaf9c5291cf0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10743,10 +10743,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_aux->obj_new_size = ret_t->size; insn_aux->kptr_struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); - } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { - insn_aux->kptr_struct_meta = - btf_find_struct_meta(meta.arg_obj_drop.btf, - meta.arg_obj_drop.btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { struct btf_field *field = meta.arg_list_head.field; @@ -10875,7 +10871,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) regs[BPF_REG_0].id = ++env->id_gen; - } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ + } else if (btf_type_is_void(t)) { + if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) { + if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + insn_aux->kptr_struct_meta = + btf_find_struct_meta(meta.arg_obj_drop.btf, + meta.arg_obj_drop.btf_id); + } + } + } nargs = btf_type_vlen(meta.func_proto); args = (const struct btf_param *)(meta.func_proto + 1); From 8fc59c26d212c23d6fd5ad47a10651cf72d83b4a Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 3 Apr 2023 19:29:35 +0200 Subject: [PATCH 185/260] selftests/bpf: Add RESOLVE_BTFIDS dependency to bpf_testmod.ko bpf_testmod.ko sometimes fails to build from a clean checkout: BTF [M] linux/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko /bin/sh: 1: linux-build//tools/build/resolve_btfids/resolve_btfids: not found The reason is that RESOLVE_BTFIDS may not yet be built. Fix by adding a dependency. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20230403172935.1553022-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4a8ef118fd9d..febd1dae6c88 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -201,7 +201,7 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $< -o $@ \ $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) -$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) +$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod From 7d64c513284408fee5178a0953a686e9410f2399 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:22 -0700 Subject: [PATCH 186/260] bpf: Invoke btf_struct_access() callback only for writes. Remove duplicated if (atype == BPF_READ) btf_struct_access() from btf_struct_access() callback and invoke it only for writes. This is possible to do because currently btf_struct_access() custom callback always delegates to generic btf_struct_access() helper for BPF_READ accesses. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-2-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 2 +- net/bpf/bpf_dummy_struct_ops.c | 2 +- net/core/filter.c | 6 ------ net/ipv4/bpf_tcp_ca.c | 3 --- 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eaf9c5291cf0..83984568ccb4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5504,7 +5504,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } - if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) { + if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) { if (!btf_is_kernel(reg->btf)) { verbose(env, "verifier internal error: reg->btf must be kernel btf\n"); return -EFAULT; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index ff4f89a2b02a..9535c8506cda 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -198,7 +198,7 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, if (err < 0) return err; - return atype == BPF_READ ? err : NOT_INIT; + return NOT_INIT; } static const struct bpf_verifier_ops bpf_dummy_verifier_ops = { diff --git a/net/core/filter.c b/net/core/filter.c index 3370efad1dda..8b9f409a2ec3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8753,9 +8753,6 @@ static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, { int ret = -EACCES; - if (atype == BPF_READ) - return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); @@ -8830,9 +8827,6 @@ static int xdp_btf_struct_access(struct bpf_verifier_log *log, { int ret = -EACCES; - if (atype == BPF_READ) - return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index ea21c96c03aa..d6465876bbf6 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -78,9 +78,6 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t; size_t end; - if (atype == BPF_READ) - return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - t = btf_type_by_id(reg->btf, reg->btf_id); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); From b7e852a9ec96635168c04204fb7cf1f7390b9a8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:23 -0700 Subject: [PATCH 187/260] bpf: Remove unused arguments from btf_struct_access(). Remove unused arguments from btf_struct_access() callback. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-3-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 3 +-- include/linux/filter.h | 3 +-- kernel/bpf/verifier.c | 4 ++-- net/bpf/bpf_dummy_struct_ops.c | 12 +++++------- net/core/filter.c | 13 +++++-------- net/ipv4/bpf_tcp_ca.c | 3 +-- net/netfilter/nf_conntrack_bpf.c | 3 +-- 7 files changed, 16 insertions(+), 25 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2d8f3f639e68..4f689dda748f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -893,8 +893,7 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); }; struct bpf_prog_offload_ops { diff --git a/include/linux/filter.h b/include/linux/filter.h index 23c08c31bea9..5364b0c52c1d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -571,8 +571,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); extern struct mutex nf_conn_btf_access_lock; extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 83984568ccb4..5ca520e5eddf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5459,7 +5459,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); const char *tname = btf_name_by_offset(reg->btf, t->name_off); enum bpf_type_flag flag = 0; - u32 btf_id; + u32 btf_id = 0; int ret; if (!env->allow_ptr_leaks) { @@ -5509,7 +5509,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, verbose(env, "verifier internal error: reg->btf must be kernel btf\n"); return -EFAULT; } - ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag); + ret = env->ops->btf_struct_access(&env->log, reg, off, size); } else { /* Writes are permitted with default btf_struct_access for * program allocated objects (which always have ref_obj_id > 0), diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 9535c8506cda..5918d1b32e19 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -173,14 +173,11 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t, static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *state; const struct btf_type *t; s32 type_id; - int err; type_id = btf_find_by_name_kind(reg->btf, "bpf_dummy_ops_state", BTF_KIND_STRUCT); @@ -194,9 +191,10 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, return -EACCES; } - err = btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - if (err < 0) - return err; + if (off + size > sizeof(struct bpf_dummy_ops_state)) { + bpf_log(log, "write access at off %d with size %d\n", off, size); + return -EACCES; + } return NOT_INIT; } diff --git a/net/core/filter.c b/net/core/filter.c index 8b9f409a2ec3..1f2abf0f60e6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8742,20 +8742,18 @@ EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); EXPORT_SYMBOL_GPL(nfct_btf_struct_access); static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { int ret = -EACCES; mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) - ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); + ret = nfct_btf_struct_access(log, reg, off, size); mutex_unlock(&nf_conn_btf_access_lock); return ret; @@ -8822,14 +8820,13 @@ EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); static int xdp_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { int ret = -EACCES; mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) - ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); + ret = nfct_btf_struct_access(log, reg, off, size); mutex_unlock(&nf_conn_btf_access_lock); return ret; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index d6465876bbf6..4406d796cc2f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -72,8 +72,7 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *t; size_t end; diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 002e9d24a1e9..3f821b7ba646 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -192,8 +192,7 @@ BTF_ID(struct, nf_conn___init) /* Check writes into `struct nf_conn` */ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *ncit, *nct, *t; size_t end; From 63260df1396578226ac3134cf7f764690002e70e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:24 -0700 Subject: [PATCH 188/260] bpf: Refactor btf_nested_type_is_trusted(). btf_nested_type_is_trusted() tries to find a struct member at corresponding offset. It works for flat structures and falls apart in more complex structs with nested structs. The offset->member search is already performed by btf_struct_walk() including nested structs. Reuse this work and pass {field name, field btf id} into btf_nested_type_is_trusted() instead of offset to make BTF_TYPE_SAFE*() logic more robust. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-4-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 7 ++++--- kernel/bpf/btf.c | 44 +++++++++++++++++-------------------------- kernel/bpf/verifier.c | 23 +++++++++++----------- 3 files changed, 33 insertions(+), 41 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4f689dda748f..002a811b6b90 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2263,7 +2263,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id, @@ -2302,7 +2302,7 @@ struct bpf_core_ctx { bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, const char *suffix); + const char *field_name, u32 btf_id, const char *suffix); bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, const struct btf *reg_btf, u32 reg_id, @@ -2517,7 +2517,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) static inline int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { return -EACCES; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index b7e5a5510b91..593c45a294d0 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6166,7 +6166,8 @@ enum bpf_struct_walk_result { static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, - u32 *next_btf_id, enum bpf_type_flag *flag) + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; const struct btf_type *mtype, *elem_type = NULL; @@ -6395,6 +6396,8 @@ error: if (btf_type_is_struct(stype)) { *next_btf_id = id; *flag |= tmp_flag; + if (field_name) + *field_name = mname; return WALK_PTR; } } @@ -6421,7 +6424,8 @@ error: int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype __maybe_unused, - u32 *next_btf_id, enum bpf_type_flag *flag) + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { const struct btf *btf = reg->btf; enum bpf_type_flag tmp_flag = 0; @@ -6453,7 +6457,7 @@ int btf_struct_access(struct bpf_verifier_log *log, t = btf_type_by_id(btf, id); do { - err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag); + err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag, field_name); switch (err) { case WALK_PTR: @@ -6528,7 +6532,7 @@ again: type = btf_type_by_id(btf, id); if (!type) return false; - err = btf_struct_walk(log, btf, type, off, 1, &id, &flag); + err = btf_struct_walk(log, btf, type, off, 1, &id, &flag, NULL); if (err != WALK_STRUCT) return false; @@ -8488,16 +8492,15 @@ out: bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, const char *suffix) + const char *field_name, u32 btf_id, const char *suffix) { struct btf *btf = reg->btf; const struct btf_type *walk_type, *safe_type; const char *tname; char safe_tname[64]; long ret, safe_id; - const struct btf_member *member, *m_walk = NULL; + const struct btf_member *member; u32 i; - const char *walk_name; walk_type = btf_type_by_id(btf, reg->btf_id); if (!walk_type) @@ -8517,30 +8520,17 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, if (!safe_type) return false; - for_each_member(i, walk_type, member) { - u32 moff; - - /* We're looking for the PTR_TO_BTF_ID member in the struct - * type we're walking which matches the specified offset. - * Below, we'll iterate over the fields in the safe variant of - * the struct and see if any of them has a matching type / - * name. - */ - moff = __btf_member_bit_offset(walk_type, member) / 8; - if (off == moff) { - m_walk = member; - break; - } - } - if (m_walk == NULL) - return false; - - walk_name = __btf_name_by_offset(btf, m_walk->name_off); for_each_member(i, safe_type, member) { const char *m_name = __btf_name_by_offset(btf, member->name_off); + const struct btf_type *mtype = btf_type_by_id(btf, member->type); + u32 id; + if (!btf_type_is_ptr(mtype)) + continue; + + btf_type_skip_modifiers(btf, mtype->type, &id); /* If we match on both type and name, the field is considered trusted. */ - if (m_walk->type == member->type && !strcmp(walk_name, m_name)) + if (btf_id == id && !strcmp(field_name, m_name)) return true; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5ca520e5eddf..2cd2e0b725cd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5400,12 +5400,12 @@ BTF_TYPE_SAFE_RCU(struct css_set) { /* full trusted: these fields are trusted even outside of RCU CS and never NULL */ BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) { - __bpf_md_ptr(struct seq_file *, seq); + struct seq_file *seq; }; BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) { - __bpf_md_ptr(struct bpf_iter_meta *, meta); - __bpf_md_ptr(struct task_struct *, task); + struct bpf_iter_meta *meta; + struct task_struct *task; }; BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) { @@ -5427,17 +5427,17 @@ BTF_TYPE_SAFE_TRUSTED(struct socket) { static bool type_is_rcu(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int off) + const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set)); - return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu"); + return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu"); } static bool type_is_trusted(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int off) + const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); @@ -5446,7 +5446,7 @@ static bool type_is_trusted(struct bpf_verifier_env *env, BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); - return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted"); + return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); } static int check_ptr_to_btf_access(struct bpf_verifier_env *env, @@ -5458,6 +5458,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg = regs + regno; const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); const char *tname = btf_name_by_offset(reg->btf, t->name_off); + const char *field_name = NULL; enum bpf_type_flag flag = 0; u32 btf_id = 0; int ret; @@ -5526,7 +5527,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EFAULT; } - ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag); + ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name); } if (ret < 0) @@ -5554,10 +5555,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, * A regular RCU-protected pointer with __rcu tag can also be deemed * trusted if we are in an RCU CS. Such pointer can be NULL. */ - if (type_is_trusted(env, reg, off)) { + if (type_is_trusted(env, reg, field_name, btf_id)) { flag |= PTR_TRUSTED; } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) { - if (type_is_rcu(env, reg, off)) { + if (type_is_rcu(env, reg, field_name, btf_id)) { /* ignore __rcu tag and mark it MEM_RCU */ flag |= MEM_RCU; } else if (flag & MEM_RCU) { @@ -5640,7 +5641,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, /* Simulate access to a PTR_TO_BTF_ID */ memset(&map_reg, 0, sizeof(map_reg)); mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0); - ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag); + ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL); if (ret < 0) return ret; From 91571a515d1bcdc280bb46423bb697ea7eb42ff3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:25 -0700 Subject: [PATCH 189/260] bpf: Teach verifier that certain helpers accept NULL pointer. bpf_[sk|inode|task|cgrp]_storage_[get|delete]() and bpf_get_socket_cookie() helpers perform run-time check that sk|inode|task|cgrp pointer != NULL. Teach verifier about this fact and allow bpf programs to pass PTR_TO_BTF_ID | PTR_MAYBE_NULL into such helpers. It will be used in the subsequent patch that will do bpf_sk_storage_get(.., skb->sk, ...); Even when 'skb' pointer is trusted the 'sk' pointer may be NULL. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-5-alexei.starovoitov@gmail.com --- kernel/bpf/bpf_cgrp_storage.c | 4 ++-- kernel/bpf/bpf_inode_storage.c | 4 ++-- kernel/bpf/bpf_task_storage.c | 8 ++++---- net/core/bpf_sk_storage.c | 4 ++-- net/core/filter.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index d17d5b694668..d44fe8dd9732 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -224,7 +224,7 @@ const struct bpf_func_proto bpf_cgrp_storage_get_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_cgroup_btf_id[0], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -235,6 +235,6 @@ const struct bpf_func_proto bpf_cgrp_storage_delete_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_cgroup_btf_id[0], }; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index e17ad581b9be..a4d93df78c75 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -229,7 +229,7 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_inode_storage_btf_ids[0], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -240,6 +240,6 @@ const struct bpf_func_proto bpf_inode_storage_delete_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_inode_storage_btf_ids[0], }; diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index d1af0c8f9ce4..adf6dfe0ba68 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -338,7 +338,7 @@ const struct bpf_func_proto bpf_task_storage_get_recur_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -349,7 +349,7 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -360,7 +360,7 @@ const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; @@ -369,6 +369,6 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 085025c7130a..d4172534dfa8 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -412,7 +412,7 @@ const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -424,7 +424,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], .allowed = bpf_sk_storage_tracing_allowed, }; diff --git a/net/core/filter.c b/net/core/filter.c index 1f2abf0f60e6..727c5269867d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4998,7 +4998,7 @@ const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = { .func = bpf_get_socket_ptr_cookie, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | PTR_MAYBE_NULL, }; BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) From add68b843f33d4e5dcbdc7ba6dffe7750a964159 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:26 -0700 Subject: [PATCH 190/260] bpf: Refactor NULL-ness check in check_reg_type(). check_reg_type() unconditionally disallows PTR_TO_BTF_ID | PTR_MAYBE_NULL. It's problematic for helpers that allow ARG_PTR_TO_BTF_ID_OR_NULL like bpf_sk_storage_get(). Allow passing PTR_TO_BTF_ID | PTR_MAYBE_NULL into such helpers. That technically includes bpf_kptr_xchg() helper, but in practice: bpf_kptr_xchg(..., bpf_cpumask_create()); is still disallowed because bpf_cpumask_create() returns ref counted pointer with ref_obj_id > 0. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-6-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2cd2e0b725cd..4e7d671497f4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7168,6 +7168,8 @@ found: case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_TRUSTED: case PTR_TO_BTF_ID | MEM_RCU: + case PTR_TO_BTF_ID | PTR_MAYBE_NULL: + case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: { /* For bpf_sk_release, it needs to match against first member * 'struct sock_common', hence make an exception for it. This @@ -7176,6 +7178,12 @@ found: bool strict_type_match = arg_type_is_release(arg_type) && meta->func_id != BPF_FUNC_sk_release; + if (type_may_be_null(reg->type) && + (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) { + verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno); + return -EACCES; + } + if (!arg_btf_id) { if (!compatible->btf_id) { verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); @@ -7206,10 +7214,6 @@ found: } break; } - case PTR_TO_BTF_ID | PTR_MAYBE_NULL: - case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: - verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno); - return -EACCES; case PTR_TO_BTF_ID | MEM_ALLOC: if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && meta->func_id != BPF_FUNC_kptr_xchg) { From 30ee9821f9430c34a6254134a5f0e8db227510be Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:27 -0700 Subject: [PATCH 191/260] bpf: Allowlist few fields similar to __rcu tag. Allow bpf program access cgrp->kn, mm->exe_file, skb->sk, req->sk. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-7-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4e7d671497f4..fd90ba498ccc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5378,6 +5378,7 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) } #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu) +#define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null) #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted) /* @@ -5394,10 +5395,31 @@ BTF_TYPE_SAFE_RCU(struct task_struct) { struct task_struct *group_leader; }; +BTF_TYPE_SAFE_RCU(struct cgroup) { + /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */ + struct kernfs_node *kn; +}; + BTF_TYPE_SAFE_RCU(struct css_set) { struct cgroup *dfl_cgrp; }; +/* RCU trusted: these fields are trusted in RCU CS and can be NULL */ +BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) { + struct file __rcu *exe_file; +}; + +/* skb->sk, req->sk are not RCU protected, but we mark them as such + * because bpf prog accessible sockets are SOCK_RCU_FREE. + */ +BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) { + struct sock *sk; +}; + +BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) { + struct sock *sk; +}; + /* full trusted: these fields are trusted even outside of RCU CS and never NULL */ BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) { struct seq_file *seq; @@ -5430,11 +5452,23 @@ static bool type_is_rcu(struct bpf_verifier_env *env, const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu"); } +static bool type_is_rcu_or_null(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + const char *field_name, u32 btf_id) +{ + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)); + + return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null"); +} + static bool type_is_trusted(struct bpf_verifier_env *env, struct bpf_reg_state *reg, const char *field_name, u32 btf_id) @@ -5561,9 +5595,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (type_is_rcu(env, reg, field_name, btf_id)) { /* ignore __rcu tag and mark it MEM_RCU */ flag |= MEM_RCU; - } else if (flag & MEM_RCU) { + } else if (flag & MEM_RCU || + type_is_rcu_or_null(env, reg, field_name, btf_id)) { /* __rcu tagged pointers can be NULL */ - flag |= PTR_MAYBE_NULL; + flag |= MEM_RCU | PTR_MAYBE_NULL; } else if (flag & (MEM_PERCPU | MEM_USER)) { /* keep as-is */ } else { From afeebf9f57a4965e0be90e4dc87f8f3a1417376d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:28 -0700 Subject: [PATCH 192/260] bpf: Undo strict enforcement for walking untagged fields. The commit 6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.") broke several tracing bpf programs. Even in clang compiled kernels there are many fields that are not marked with __rcu that are safe to read and pass into helpers, but the verifier doesn't know that they're safe. Aggressively marking them as PTR_UNTRUSTED was premature. Fixes: 6fcd486b3a0a ("bpf: Refactor RCU enforcement in the verifier.") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-8-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fd90ba498ccc..56f569811f70 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4974,6 +4974,11 @@ static bool is_rcu_reg(const struct bpf_reg_state *reg) return reg->type & MEM_RCU; } +static void clear_trusted_flags(enum bpf_type_flag *flag) +{ + *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU); +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -5602,8 +5607,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } else if (flag & (MEM_PERCPU | MEM_USER)) { /* keep as-is */ } else { - /* walking unknown pointers yields untrusted pointer */ - flag = PTR_UNTRUSTED; + /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */ + clear_trusted_flags(&flag); } } else { /* @@ -5617,7 +5622,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } } else { /* Old compat. Deprecated */ - flag &= ~PTR_TRUSTED; + clear_trusted_flags(&flag); } if (atype == BPF_READ && value_regno >= 0) From 69f41a787761633b752d71166786eb642bad4913 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 3 Apr 2023 21:50:29 -0700 Subject: [PATCH 193/260] selftests/bpf: Add tracing tests for walking skb and req. Add tracing tests for walking skb->sk and req->sk. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230404045029.82870-9-alexei.starovoitov@gmail.com --- .../bpf/progs/test_sk_storage_tracing.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c index 6dc1f28fc4b6..02e718f06e0f 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c @@ -92,4 +92,20 @@ int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern, return 0; } +SEC("tp_btf/tcp_retransmit_synack") +int BPF_PROG(tcp_retransmit_synack, struct sock* sk, struct request_sock* req) +{ + /* load only test */ + bpf_sk_storage_get(&sk_stg_map, sk, 0, 0); + bpf_sk_storage_get(&sk_stg_map, req->sk, 0, 0); + return 0; +} + +SEC("tp_btf/tcp_bad_csum") +int BPF_PROG(tcp_bad_csum, struct sk_buff* skb) +{ + bpf_sk_storage_get(&sk_stg_map, skb->sk, 0, 0); + return 0; +} + char _license[] SEC("license") = "GPL"; From d099f594ad5650e8d8232b5f31f5f90104e65def Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Apr 2023 00:02:54 +0200 Subject: [PATCH 194/260] kallsyms: Disable preemption for find_kallsyms_symbol_value Artem reported suspicious RCU usage [1]. The reason is that verifier calls find_kallsyms_symbol_value with preemption enabled which will trigger suspicious RCU usage warning in rcu_dereference_sched call. Disabling preemption in find_kallsyms_symbol_value and adding __find_kallsyms_symbol_value function. Fixes: 31bf1dbccfb0 ("bpf: Fix attaching fentry/fexit/fmod_ret/lsm to modules") Reported-by: Artem Savkov Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Artem Savkov Reviewed-by: Zhen Lei Link: https://lore.kernel.org/bpf/20230403220254.2191240-1-jolsa@kernel.org [1] https://lore.kernel.org/bpf/ZBrPMkv8YVRiWwCR@samus.usersys.redhat.com/ --- kernel/module/kallsyms.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index ab2376a1be88..bdc911dbcde5 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -442,7 +442,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, } /* Given a module and name of symbol, find and return the symbol's value */ -unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) +static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned int i; struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); @@ -466,7 +466,7 @@ static unsigned long __module_kallsyms_lookup_name(const char *name) if (colon) { mod = find_module_all(name, colon - name, false); if (mod) - return find_kallsyms_symbol_value(mod, colon + 1); + return __find_kallsyms_symbol_value(mod, colon + 1); return 0; } @@ -475,7 +475,7 @@ static unsigned long __module_kallsyms_lookup_name(const char *name) if (mod->state == MODULE_STATE_UNFORMED) continue; - ret = find_kallsyms_symbol_value(mod, name); + ret = __find_kallsyms_symbol_value(mod, name); if (ret) return ret; } @@ -494,6 +494,16 @@ unsigned long module_kallsyms_lookup_name(const char *name) return ret; } +unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) +{ + unsigned long ret; + + preempt_disable(); + ret = __find_kallsyms_symbol_value(mod, name); + preempt_enable(); + return ret; +} + int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, struct module *, unsigned long), From 9af87166944b3ff33d1399f7a1924ef0175e96b2 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Mon, 3 Apr 2023 15:01:51 +0200 Subject: [PATCH 195/260] selftests: xsk: Add xskxceiver.h dependency to Makefile xskxceiver depends on xskxceiver.h so tell make about it. Signed-off-by: Kal Conley Link: https://lore.kernel.org/r/20230403130151.31195-1-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index febd1dae6c88..b5ffdd89b86f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -612,7 +612,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ -$(OUTPUT)/xskxceiver: xskxceiver.c $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) +$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ From 7a2050df244e2c9a4e90882052b7907450ad10ed Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Mon, 3 Apr 2023 16:50:46 +0200 Subject: [PATCH 196/260] selftests: xsk: Use correct UMEM size in testapp_invalid_desc Avoid UMEM_SIZE macro in testapp_invalid_desc which is incorrect when the frame size is not XSK_UMEM__DEFAULT_FRAME_SIZE. Also remove the macro since it's no longer being used. Fixes: 909f0e28207c ("selftests: xsk: Add tests for 2K frame size") Signed-off-by: Kal Conley Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20230403145047.33065-2-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/xskxceiver.c | 9 +++++---- tools/testing/selftests/bpf/xskxceiver.h | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index b65e0645b0cd..3956f5db84f3 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -1652,6 +1652,7 @@ static void testapp_single_pkt(struct test_spec *test) static void testapp_invalid_desc(struct test_spec *test) { + u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; struct pkt pkts[] = { /* Zero packet address allowed */ {0, PKT_SIZE, 0, true}, @@ -1662,9 +1663,9 @@ static void testapp_invalid_desc(struct test_spec *test) /* Packet too large */ {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, /* After umem ends */ - {UMEM_SIZE, PKT_SIZE, 0, false}, + {umem_size, PKT_SIZE, 0, false}, /* Straddle the end of umem */ - {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false}, + {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false}, /* Straddle a page boundrary */ {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, /* Straddle a 2K boundrary */ @@ -1682,8 +1683,8 @@ static void testapp_invalid_desc(struct test_spec *test) } if (test->ifobj_tx->shared_umem) { - pkts[4].addr += UMEM_SIZE; - pkts[5].addr += UMEM_SIZE; + pkts[4].addr += umem_size; + pkts[5].addr += umem_size; } pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index bdb4efedf3a9..cc24ab72f3ff 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -53,7 +53,6 @@ #define THREAD_TMOUT 3 #define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) -#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) #define RX_FULL_RXQSIZE 32 #define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) From ccd1b2933f8cbc09a8667992425996f19bf62c15 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Mon, 3 Apr 2023 16:50:47 +0200 Subject: [PATCH 197/260] selftests: xsk: Add test case for packets at end of UMEM Add test case to testapp_invalid_desc for valid packets at the end of the UMEM. Signed-off-by: Kal Conley Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20230403145047.33065-3-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/xskxceiver.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 3956f5db84f3..34a1f32fe752 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -1662,6 +1662,8 @@ static void testapp_invalid_desc(struct test_spec *test) {-2, PKT_SIZE, 0, false}, /* Packet too large */ {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + /* Up to end of umem allowed */ + {umem_size - PKT_SIZE, PKT_SIZE, 0, true}, /* After umem ends */ {umem_size, PKT_SIZE, 0, false}, /* Straddle the end of umem */ @@ -1675,16 +1677,17 @@ static void testapp_invalid_desc(struct test_spec *test) if (test->ifobj_tx->umem->unaligned_mode) { /* Crossing a page boundrary allowed */ - pkts[6].valid = true; + pkts[7].valid = true; } if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { /* Crossing a 2K frame size boundrary not allowed */ - pkts[7].valid = false; + pkts[8].valid = false; } if (test->ifobj_tx->shared_umem) { pkts[4].addr += umem_size; pkts[5].addr += umem_size; + pkts[6].addr += umem_size; } pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); From f2b50f17268390567bc0e95642170d88f336c8f4 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Wed, 5 Apr 2023 10:29:04 +0200 Subject: [PATCH 198/260] selftests: xsk: Disable IPv6 on VETH1 This change fixes flakiness in the BIDIRECTIONAL test: # [is_pkt_valid] expected length [60], got length [90] not ok 1 FAIL: SKB BUSY-POLL BIDIRECTIONAL When IPv6 is enabled, the interface will periodically send MLDv1 and MLDv2 packets. These packets can cause the BIDIRECTIONAL test to fail since it uses VETH0 for RX. For other tests, this was not a problem since they only receive on VETH1 and IPv6 was already disabled on VETH0. Fixes: a89052572ebb ("selftests/bpf: Xsk selftests framework") Signed-off-by: Kal Conley Link: https://lore.kernel.org/r/20230405082905.6303-1-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_xsk.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index b077cf58f825..377fb157a57c 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -116,6 +116,7 @@ setup_vethPairs() { ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4 if [ -f /proc/net/if_inet6 ]; then echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6 + echo 1 > /proc/sys/net/ipv6/conf/${VETH1}/disable_ipv6 fi if [[ $verbose -eq 1 ]]; then echo "setting up ${VETH1}" From 68e7322142f5e731af222892d384d311835db0f1 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Mon, 3 Apr 2023 14:03:59 +0200 Subject: [PATCH 199/260] selftests: xsk: Deflakify STATS_RX_DROPPED test Fix flaky STATS_RX_DROPPED test. The receiver calls getsockopt after receiving the last (valid) packet which is not the final packet sent in the test (valid and invalid packets are sent in alternating fashion with the final packet being invalid). Since the last packet may or may not have been dropped already, both outcomes must be allowed. This issue could also be fixed by making sure the last packet sent is valid. This alternative is left as an exercise to the reader (or the benevolent maintainers of this file). This problem was quite visible on certain setups. On one machine this failure was observed 50% of the time. Also, remove a redundant assignment of pkt_stream->nb_pkts. This field is already initialized by __pkt_stream_alloc. Fixes: 27e934bec35b ("selftests: xsk: make stat tests not spin on getsockopt") Signed-off-by: Kal Conley Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20230403120400.31018-1-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/xskxceiver.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 34a1f32fe752..1a4bdd5aa78c 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -633,7 +633,6 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb if (!pkt_stream) exit_with_error(ENOMEM); - pkt_stream->nb_pkts = nb_pkts; for (i = 0; i < nb_pkts; i++) { pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, pkt_len); @@ -1141,7 +1140,14 @@ static int validate_rx_dropped(struct ifobject *ifobject) if (err) return TEST_FAILURE; - if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2) + /* The receiver calls getsockopt after receiving the last (valid) + * packet which is not the final packet sent in this test (valid and + * invalid packets are sent in alternating fashion with the final + * packet being invalid). Since the last packet may or may not have + * been dropped already, both outcomes must be allowed. + */ + if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 || + stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1) return TEST_PASS; return TEST_FAILURE; From 5af607a861d43ffff830fc1890033e579ec44799 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Wed, 5 Apr 2023 19:33:54 +0000 Subject: [PATCH 200/260] selftests/bpf: Wait for receive in cg_storage_multi test In some cases the loopback latency might be large enough, causing the assertion on invocations to be run before ingress prog getting executed. The assertion would fail and the test would flake. This can be reliably reproduced by arbitrarily increasing the loopback latency (thanks to [1]): tc qdisc add dev lo root handle 1: htb default 12 tc class add dev lo parent 1:1 classid 1:12 htb rate 20kbps ceil 20kbps tc qdisc add dev lo parent 1:12 netem delay 100ms Fix this by waiting on the receive end, instead of instantly returning to the assert. The call to read() will wait for the default SO_RCVTIMEO timeout of 3 seconds provided by start_server(). [1] https://gist.github.com/kstevens715/4598301 Reported-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/9c5c8b7e-1d89-a3af-5400-14fde81f4429@linux.dev/ Fixes: 3573f384014f ("selftests/bpf: Test CGROUP_STORAGE behavior on shared egress + ingress") Acked-by: Stanislav Fomichev Signed-off-by: YiFei Zhu Link: https://lore.kernel.org/r/20230405193354.1956209-1-zhuyifei@google.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 621c57222191..63ee892bc757 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -56,8 +56,9 @@ static bool assert_storage_noexist(struct bpf_map *map, const void *key) static bool connect_send(const char *cgroup_path) { - bool res = true; int server_fd = -1, client_fd = -1; + char message[] = "message"; + bool res = true; if (join_cgroup(cgroup_path)) goto out_clean; @@ -70,7 +71,10 @@ static bool connect_send(const char *cgroup_path) if (client_fd < 0) goto out_clean; - if (send(client_fd, "message", strlen("message"), 0) < 0) + if (send(client_fd, &message, sizeof(message), 0) < 0) + goto out_clean; + + if (read(server_fd, &message, sizeof(message)) < 0) goto out_clean; res = false; From e27f0f1620b693ecf312c21ff8a52affb8a5be08 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:14 +0100 Subject: [PATCH 201/260] bpftool: Fix documentation about line info display for prog dumps The documentation states that when line_info is available when dumping a program, the source line will be displayed "by default". There is no notion of "default" here: the line is always displayed if available, there is no way currently to turn it off. In the next sentence, the documentation states that if "linum" is used on the command line, the relevant filename, line, and column will be displayed "on top of the source line". This is incorrect, as they are currently displayed on the right side of the source line (or on top of the eBPF instruction, not the source). This commit fixes the documentation to address these points. Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-2-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 14de72544995..06d1e4314406 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -106,9 +106,8 @@ DESCRIPTION CFG in DOT format, on standard output. If the programs have line_info available, the source line will - be displayed by default. If **linum** is specified, - the filename, line number and line column will also be - displayed on top of the source line. + be displayed. If **linum** is specified, the filename, line + number and line column will also be displayed. **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. @@ -120,9 +119,8 @@ DESCRIPTION **opcodes** controls if raw opcodes will be printed. If the prog has line_info available, the source line will - be displayed by default. If **linum** is specified, - the filename, line number and line column will also be - displayed on top of the source line. + be displayed. If **linum** is specified, the filename, line + number and line column will also be displayed. **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. From 67cf52cdb6c8fa6365d29106555dacf95c9fd374 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:15 +0100 Subject: [PATCH 202/260] bpftool: Fix bug for long instructions in program CFG dumps When dumping the control flow graphs for programs using the 16-byte long load instruction, we need to skip the second part of this instruction when looking for the next instruction to process. Otherwise, we end up printing "BUG_ld_00" from the kernel disassembler in the CFG. Fixes: efcef17a6d65 ("tools: bpftool: generate .dot graph from CFG information") Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-3-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/xlated_dumper.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 6fe3134ae45d..3daa05d9bbb7 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -372,8 +372,15 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, struct bpf_insn *insn_start = buf_start; struct bpf_insn *insn_end = buf_end; struct bpf_insn *cur = insn_start; + bool double_insn = false; for (; cur <= insn_end; cur++) { + if (double_insn) { + double_insn = false; + continue; + } + double_insn = cur->code == (BPF_LD | BPF_IMM | BPF_DW); + printf("% 4d: ", (int)(cur - insn_start + start_idx)); print_bpf_insn(&cbs, cur, true); if (cur != insn_end) From 9fd496848b1c4cd04fee5f152bb7bcec11e1b901 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:16 +0100 Subject: [PATCH 203/260] bpftool: Support inline annotations when dumping the CFG of a program We support dumping the control flow graph of loaded programs to the DOT format with bpftool, but so far this feature wouldn't display the source code lines available through BTF along with the eBPF bytecode. Let's add support for these annotations, to make it easier to read the graph. In prog.c, we move the call to dump_xlated_cfg() in order to pass and use the full struct dump_data, instead of creating a minimal one in draw_bb_node(). We pass the pointer to this struct down to dump_xlated_for_graph() in xlated_dumper.c, where most of the logics is added. We deal with BTF mostly like we do for plain or JSON output, except that we cannot use a "nr_skip" value to skip a given number of linfo records (we don't process the BPF instructions linearly, and apart from the root of the graph we don't know how many records we should skip, so we just store the last linfo and make sure the new one we find is different before printing it). When printing the source instructions to the label of a DOT graph node, there are a few subtleties to address. We want some special newline markers, and there are some characters that we must escape. To deal with them, we introduce a new dedicated function btf_dump_linfo_dotlabel() in btf_dumper.c. We'll reuse this function in a later commit to format the filepath, line, and column references as well. Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-4-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/btf_dumper.c | 34 +++++++++++++++++++++++++++++++ tools/bpf/bpftool/cfg.c | 23 +++++++++------------ tools/bpf/bpftool/cfg.h | 4 +++- tools/bpf/bpftool/main.h | 2 ++ tools/bpf/bpftool/prog.c | 17 +++++++--------- tools/bpf/bpftool/xlated_dumper.c | 34 +++++++++++++++++++++++++++++-- 6 files changed, 88 insertions(+), 26 deletions(-) diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index e7f6ec3a8f35..583aa843df92 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -821,3 +821,37 @@ void btf_dump_linfo_json(const struct btf *btf, BPF_LINE_INFO_LINE_COL(linfo->line_col)); } } + +static void dotlabel_puts(const char *s) +{ + for (; *s; ++s) { + switch (*s) { + case '\\': + case '"': + case '{': + case '}': + case '<': + case '>': + case '|': + case ' ': + putchar('\\'); + __fallthrough; + default: + putchar(*s); + } + } +} + +void btf_dump_linfo_dotlabel(const struct btf *btf, + const struct bpf_line_info *linfo) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (!line || !strlen(line)) + return; + line = ltrim(line); + + printf("; "); + dotlabel_puts(line); + printf("\\l\\\n"); +} diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index 1951219a9af7..9fdc1f0cdd6e 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -380,7 +380,8 @@ static void cfg_destroy(struct cfg *cfg) } } -static void draw_bb_node(struct func_node *func, struct bb_node *bb) +static void +draw_bb_node(struct func_node *func, struct bb_node *bb, struct dump_data *dd) { const char *shape; @@ -398,13 +399,9 @@ static void draw_bb_node(struct func_node *func, struct bb_node *bb) printf("EXIT"); } else { unsigned int start_idx; - struct dump_data dd = {}; - - printf("{"); - kernel_syms_load(&dd); + printf("{\\\n"); start_idx = bb->head - func->start; - dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx); - kernel_syms_destroy(&dd); + dump_xlated_for_graph(dd, bb->head, bb->tail, start_idx); printf("}"); } @@ -430,12 +427,12 @@ static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb) } } -static void func_output_bb_def(struct func_node *func) +static void func_output_bb_def(struct func_node *func, struct dump_data *dd) { struct bb_node *bb; list_for_each_entry(bb, &func->bbs, l) { - draw_bb_node(func, bb); + draw_bb_node(func, bb, dd); } } @@ -455,7 +452,7 @@ static void func_output_edges(struct func_node *func) func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX); } -static void cfg_dump(struct cfg *cfg) +static void cfg_dump(struct cfg *cfg, struct dump_data *dd) { struct func_node *func; @@ -463,14 +460,14 @@ static void cfg_dump(struct cfg *cfg) list_for_each_entry(func, &cfg->funcs, l) { printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n", func->idx, func->idx); - func_output_bb_def(func); + func_output_bb_def(func, dd); func_output_edges(func); printf("}\n"); } printf("}\n"); } -void dump_xlated_cfg(void *buf, unsigned int len) +void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len) { struct bpf_insn *insn = buf; struct cfg cfg; @@ -479,7 +476,7 @@ void dump_xlated_cfg(void *buf, unsigned int len) if (cfg_build(&cfg, insn, len)) return; - cfg_dump(&cfg); + cfg_dump(&cfg, dd); cfg_destroy(&cfg); } diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h index e144257ea6d2..909d17e6d4c2 100644 --- a/tools/bpf/bpftool/cfg.h +++ b/tools/bpf/bpftool/cfg.h @@ -4,6 +4,8 @@ #ifndef __BPF_TOOL_CFG_H #define __BPF_TOOL_CFG_H -void dump_xlated_cfg(void *buf, unsigned int len); +#include "xlated_dumper.h" + +void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len); #endif /* __BPF_TOOL_CFG_H */ diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0ef373cef4c7..e9ee514b22d4 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -229,6 +229,8 @@ void btf_dump_linfo_plain(const struct btf *btf, const char *prefix, bool linum); void btf_dump_linfo_json(const struct btf *btf, const struct bpf_line_info *linfo, bool linum); +void btf_dump_linfo_dotlabel(const struct btf *btf, + const struct bpf_line_info *linfo); struct nlattr; struct ifinfomsg; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index afbe3ec342c8..d855118f0d96 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -840,11 +840,6 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, false)) goto exit_free; } - } else if (visual) { - if (json_output) - jsonw_null(json_wtr); - else - dump_xlated_cfg(buf, member_len); } else { kernel_syms_load(&dd); dd.nr_jited_ksyms = info->nr_jited_ksyms; @@ -854,12 +849,14 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, dd.finfo_rec_size = info->func_info_rec_size; dd.prog_linfo = prog_linfo; - if (json_output) - dump_xlated_json(&dd, buf, member_len, opcodes, - linum); + if (json_output && visual) + jsonw_null(json_wtr); + else if (json_output) + dump_xlated_json(&dd, buf, member_len, opcodes, linum); + else if (visual) + dump_xlated_cfg(&dd, buf, member_len); else - dump_xlated_plain(&dd, buf, member_len, opcodes, - linum); + dump_xlated_plain(&dd, buf, member_len, opcodes, linum); kernel_syms_destroy(&dd); } diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 3daa05d9bbb7..2ee83561b945 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -369,21 +369,51 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, .cb_imm = print_imm, .private_data = dd, }; + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; + const struct bpf_line_info *last_linfo = NULL; + struct bpf_func_info *record = dd->func_info; struct bpf_insn *insn_start = buf_start; struct bpf_insn *insn_end = buf_end; struct bpf_insn *cur = insn_start; + struct btf *btf = dd->btf; bool double_insn = false; + char func_sig[1024]; for (; cur <= insn_end; cur++) { + unsigned int insn_off; + if (double_insn) { double_insn = false; continue; } double_insn = cur->code == (BPF_LD | BPF_IMM | BPF_DW); - printf("% 4d: ", (int)(cur - insn_start + start_idx)); + insn_off = (unsigned int)(cur - insn_start + start_idx); + if (btf && record) { + if (record->insn_off == insn_off) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') + printf("; %s:\\l\\\n", func_sig); + record = (void *)record + dd->finfo_rec_size; + } + } + + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, insn_off, 0); + if (linfo && linfo != last_linfo) { + btf_dump_linfo_dotlabel(btf, linfo); + last_linfo = linfo; + } + } + + printf("%d: ", insn_off); print_bpf_insn(&cbs, cur, true); + if (cur != insn_end) - printf(" | "); + printf("| "); } } From 05a06be722896e51f65dbbb6a3610f85a8353d6b Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:17 +0100 Subject: [PATCH 204/260] bpftool: Return an error on prog dumps if both CFG and JSON are required We do not support JSON output for control flow graphs of programs with bpftool. So far, requiring both the CFG and JSON output would result in producing a null JSON object. It makes more sense to raise an error directly when parsing command line arguments and options, so that users know they won't get any output they might expect. If JSON is required for the graph, we leave it to Graphviz instead: # bpftool prog dump xlated visual | dot -Tjson Suggested-by: Eduard Zingerman Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-5-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/bash-completion/bpftool | 9 ++++++--- tools/bpf/bpftool/prog.c | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 35f26f7c1124..a3cb07172789 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -255,16 +255,19 @@ _bpftool_map_update_get_name() _bpftool() { - local cur prev words objword + local cur prev words objword json=0 _init_completion || return # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf' + --use-loader --base-btf' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi + if _bpftool_search_list -j --json -p --pretty; then + json=1 + fi # Deal with simplest keywords case $prev in @@ -367,7 +370,7 @@ _bpftool() ;; *) _bpftool_once_attr 'file' - if _bpftool_search_list 'xlated'; then + if _bpftool_search_list 'xlated' && [[ "$json" == 0 ]]; then COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ "$cur" ) ) else diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d855118f0d96..736defc6e5d0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -849,9 +849,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, dd.finfo_rec_size = info->func_info_rec_size; dd.prog_linfo = prog_linfo; - if (json_output && visual) - jsonw_null(json_wtr); - else if (json_output) + if (json_output) dump_xlated_json(&dd, buf, member_len, opcodes, linum); else if (visual) dump_xlated_cfg(&dd, buf, member_len); @@ -940,6 +938,10 @@ static int do_dump(int argc, char **argv) usage(); goto exit_close; } + if (json_output && visual) { + p_err("'visual' is not compatible with JSON output"); + goto exit_close; + } if (json_output && nb_fds > 1) jsonw_start_array(json_wtr); /* root array */ From 9b79f02722bbf24f060b2ab79513ad6e22c8e2f0 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:18 +0100 Subject: [PATCH 205/260] bpftool: Support "opcodes", "linum", "visual" simultaneously When dumping a program, the keywords "opcodes" (for printing the raw opcodes), "linum" (for displaying the filename, line number, column number along with the source code), and "visual" (for generating the control flow graph for translated programs) are mutually exclusive. But there's no reason why they should be. Let's make it possible to pass several of them at once. The "file FILE" option, which makes bpftool output a binary image to a file, remains incompatible with the others. Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-6-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- .../bpftool/Documentation/bpftool-prog.rst | 8 +-- tools/bpf/bpftool/bash-completion/bpftool | 17 +++--- tools/bpf/bpftool/prog.c | 61 ++++++++++--------- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 06d1e4314406..9443c524bb76 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -28,8 +28,8 @@ PROG COMMANDS ============= | **bpftool** **prog** { **show** | **list** } [*PROG*] -| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] -| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] +| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] +| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] | **bpftool** **prog pin** *PROG* *FILE* | **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] @@ -88,7 +88,7 @@ DESCRIPTION programs. On such kernels bpftool will automatically emit this information as well. - **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] Dump eBPF instructions of the programs from the kernel. By default, eBPF will be disassembled and printed to standard output in human-readable format. In this case, **opcodes** @@ -109,7 +109,7 @@ DESCRIPTION be displayed. If **linum** is specified, the filename, line number and line column will also be displayed. - **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] + **bpftool prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] Dump jited image (host machine code) of the program. If *FILE* is specified image will be written to a file, diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index a3cb07172789..69c64dc18b1d 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -271,7 +271,7 @@ _bpftool() # Deal with simplest keywords case $prev in - help|hex|opcodes|visual|linum) + help|hex) return 0 ;; tag) @@ -369,13 +369,16 @@ _bpftool() return 0 ;; *) - _bpftool_once_attr 'file' + # "file" is not compatible with other keywords here + if _bpftool_search_list 'file'; then + return 0 + fi + if ! _bpftool_search_list 'linum opcodes visual'; then + _bpftool_once_attr 'file' + fi + _bpftool_once_attr 'linum opcodes' if _bpftool_search_list 'xlated' && [[ "$json" == 0 ]]; then - COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ - "$cur" ) ) - else - COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ - "$cur" ) ) + _bpftool_once_attr 'visual' fi return 0 ;; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 736defc6e5d0..092525a6933a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -905,37 +905,42 @@ static int do_dump(int argc, char **argv) if (nb_fds < 1) goto exit_free; - if (is_prefix(*argv, "file")) { - NEXT_ARG(); - if (!argc) { - p_err("expected file path"); - goto exit_close; - } - if (nb_fds > 1) { - p_err("several programs matched"); - goto exit_close; - } + while (argc) { + if (is_prefix(*argv, "file")) { + NEXT_ARG(); + if (!argc) { + p_err("expected file path"); + goto exit_close; + } + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } - filepath = *argv; - NEXT_ARG(); - } else if (is_prefix(*argv, "opcodes")) { - opcodes = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "visual")) { - if (nb_fds > 1) { - p_err("several programs matched"); + filepath = *argv; + NEXT_ARG(); + } else if (is_prefix(*argv, "opcodes")) { + opcodes = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "visual")) { + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } + + visual = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); + } else { + usage(); goto exit_close; } - - visual = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "linum")) { - linum = true; - NEXT_ARG(); } - if (argc) { - usage(); + if (filepath && (opcodes || visual || linum)) { + p_err("'file' is not compatible with 'opcodes', 'visual', or 'linum'"); goto exit_close; } if (json_output && visual) { @@ -2419,8 +2424,8 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [PROG]\n" - " %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n" - " %1$s %2$s dump jited PROG [{ file FILE | opcodes | linum }]\n" + " %1$s %2$s dump xlated PROG [{ file FILE | [opcodes] [linum] [visual] }]\n" + " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n" " %1$s %2$s pin PROG FILE\n" " %1$s %2$s { load | loadall } OBJ PATH \\\n" " [type TYPE] [dev NAME] \\\n" From 7483a7a70a12d7c00c9f80574d533b01689d39a7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:19 +0100 Subject: [PATCH 206/260] bpftool: Support printing opcodes and source file references in CFG Add support for displaying opcodes or/and file references (filepath, line and column numbers) when dumping the control flow graphs of loaded BPF programs with bpftool. The filepaths in the records are absolute. To avoid blocks on the graph to get too wide, we truncate them when they get too long (but we always keep the entire file name). In the unlikely case where the resulting file name is ambiguous, it remains possible to get the full path with a regular dump (no CFG). Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-7-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/btf_dumper.c | 51 ++++++++++++++++++++++++++++++- tools/bpf/bpftool/cfg.c | 22 ++++++++----- tools/bpf/bpftool/cfg.h | 3 +- tools/bpf/bpftool/main.h | 2 +- tools/bpf/bpftool/prog.c | 2 +- tools/bpf/bpftool/xlated_dumper.c | 15 +++++++-- tools/bpf/bpftool/xlated_dumper.h | 3 +- 7 files changed, 83 insertions(+), 15 deletions(-) diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 583aa843df92..6c5e0e82da22 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -842,8 +842,37 @@ static void dotlabel_puts(const char *s) } } +static const char *shorten_path(const char *path) +{ + const unsigned int MAX_PATH_LEN = 32; + size_t len = strlen(path); + const char *shortpath; + + if (len <= MAX_PATH_LEN) + return path; + + /* Search for last '/' under the MAX_PATH_LEN limit */ + shortpath = strchr(path + len - MAX_PATH_LEN, '/'); + if (shortpath) { + if (shortpath < path + strlen("...")) + /* We removed a very short prefix, e.g. "/w", and we'll + * make the path longer by prefixing with the ellipsis. + * Not worth it, keep initial path. + */ + return path; + return shortpath; + } + + /* File base name length is > MAX_PATH_LEN, search for last '/' */ + shortpath = strrchr(path, '/'); + if (shortpath) + return shortpath; + + return path; +} + void btf_dump_linfo_dotlabel(const struct btf *btf, - const struct bpf_line_info *linfo) + const struct bpf_line_info *linfo, bool linum) { const char *line = btf__name_by_offset(btf, linfo->line_off); @@ -851,6 +880,26 @@ void btf_dump_linfo_dotlabel(const struct btf *btf, return; line = ltrim(line); + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + const char *shortfile; + + /* More forgiving on file because linum option is + * expected to provide more info than the already + * available src line. + */ + if (!file) + shortfile = ""; + else + shortfile = shorten_path(file); + + printf("; [%s", shortfile > file ? "..." : ""); + dotlabel_puts(shortfile); + printf(" line:%u col:%u]\\l\\\n", + BPF_LINE_INFO_LINE_NUM(linfo->line_col), + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } + printf("; "); dotlabel_puts(line); printf("\\l\\\n"); diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index 9fdc1f0cdd6e..eec437cca2ea 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -381,7 +381,8 @@ static void cfg_destroy(struct cfg *cfg) } static void -draw_bb_node(struct func_node *func, struct bb_node *bb, struct dump_data *dd) +draw_bb_node(struct func_node *func, struct bb_node *bb, struct dump_data *dd, + bool opcodes, bool linum) { const char *shape; @@ -401,7 +402,8 @@ draw_bb_node(struct func_node *func, struct bb_node *bb, struct dump_data *dd) unsigned int start_idx; printf("{\\\n"); start_idx = bb->head - func->start; - dump_xlated_for_graph(dd, bb->head, bb->tail, start_idx); + dump_xlated_for_graph(dd, bb->head, bb->tail, start_idx, + opcodes, linum); printf("}"); } @@ -427,12 +429,14 @@ static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb) } } -static void func_output_bb_def(struct func_node *func, struct dump_data *dd) +static void +func_output_bb_def(struct func_node *func, struct dump_data *dd, + bool opcodes, bool linum) { struct bb_node *bb; list_for_each_entry(bb, &func->bbs, l) { - draw_bb_node(func, bb, dd); + draw_bb_node(func, bb, dd, opcodes, linum); } } @@ -452,7 +456,8 @@ static void func_output_edges(struct func_node *func) func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX); } -static void cfg_dump(struct cfg *cfg, struct dump_data *dd) +static void +cfg_dump(struct cfg *cfg, struct dump_data *dd, bool opcodes, bool linum) { struct func_node *func; @@ -460,14 +465,15 @@ static void cfg_dump(struct cfg *cfg, struct dump_data *dd) list_for_each_entry(func, &cfg->funcs, l) { printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n", func->idx, func->idx); - func_output_bb_def(func, dd); + func_output_bb_def(func, dd, opcodes, linum); func_output_edges(func); printf("}\n"); } printf("}\n"); } -void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len) +void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len, + bool opcodes, bool linum) { struct bpf_insn *insn = buf; struct cfg cfg; @@ -476,7 +482,7 @@ void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len) if (cfg_build(&cfg, insn, len)) return; - cfg_dump(&cfg, dd); + cfg_dump(&cfg, dd, opcodes, linum); cfg_destroy(&cfg); } diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h index 909d17e6d4c2..b3793f4e1783 100644 --- a/tools/bpf/bpftool/cfg.h +++ b/tools/bpf/bpftool/cfg.h @@ -6,6 +6,7 @@ #include "xlated_dumper.h" -void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len); +void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len, + bool opcodes, bool linum); #endif /* __BPF_TOOL_CFG_H */ diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index e9ee514b22d4..00d11ca6d3f2 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -230,7 +230,7 @@ void btf_dump_linfo_plain(const struct btf *btf, void btf_dump_linfo_json(const struct btf *btf, const struct bpf_line_info *linfo, bool linum); void btf_dump_linfo_dotlabel(const struct btf *btf, - const struct bpf_line_info *linfo); + const struct bpf_line_info *linfo, bool linum); struct nlattr; struct ifinfomsg; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 092525a6933a..430f72306409 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -852,7 +852,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (json_output) dump_xlated_json(&dd, buf, member_len, opcodes, linum); else if (visual) - dump_xlated_cfg(&dd, buf, member_len); + dump_xlated_cfg(&dd, buf, member_len, opcodes, linum); else dump_xlated_plain(&dd, buf, member_len, opcodes, linum); kernel_syms_destroy(&dd); diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 2ee83561b945..da608e10c843 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -361,7 +361,8 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, } void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, - unsigned int start_idx) + unsigned int start_idx, + bool opcodes, bool linum) { const struct bpf_insn_cbs cbs = { .cb_print = print_insn_for_graph, @@ -405,7 +406,7 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, linfo = bpf_prog_linfo__lfind(prog_linfo, insn_off, 0); if (linfo && linfo != last_linfo) { - btf_dump_linfo_dotlabel(btf, linfo); + btf_dump_linfo_dotlabel(btf, linfo, linum); last_linfo = linfo; } } @@ -413,6 +414,16 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, printf("%d: ", insn_off); print_bpf_insn(&cbs, cur, true); + if (opcodes) { + printf("\\ \\ \\ \\ "); + fprint_hex(stdout, cur, 8, " "); + if (double_insn && cur <= insn_end - 1) { + printf(" "); + fprint_hex(stdout, cur + 1, 8, " "); + } + printf("\\l\\\n"); + } + if (cur != insn_end) printf("| "); } diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index 54847e174273..9a946377b0e6 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -34,6 +34,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, bool opcodes, bool linum); void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end, - unsigned int start_index); + unsigned int start_index, + bool opcodes, bool linum); #endif From 7319296855f1bac0e7fb003388f44ecbd4515102 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Apr 2023 14:21:20 +0100 Subject: [PATCH 207/260] bpftool: Clean up _bpftool_once_attr() calls in bash completion In bpftool's bash completion file, function _bpftool_once_attr() is able to process multiple arguments. There are a few locations where this function is called multiple times in a row, each time for a single argument; let's pass all arguments instead to minimize the number of function calls required for the completion. Signed-off-by: Quentin Monnet Link: https://lore.kernel.org/r/20230405132120.59886-8-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/bash-completion/bpftool | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 69c64dc18b1d..e7234d1a5306 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -508,10 +508,7 @@ _bpftool() ;; *) COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) - _bpftool_once_attr 'type' - _bpftool_once_attr 'dev' - _bpftool_once_attr 'pinmaps' - _bpftool_once_attr 'autoattach' + _bpftool_once_attr 'type dev pinmaps autoattach' return 0 ;; esac @@ -736,16 +733,10 @@ _bpftool() esac ;; *) - _bpftool_once_attr 'type' - _bpftool_once_attr 'key' - _bpftool_once_attr 'value' - _bpftool_once_attr 'entries' - _bpftool_once_attr 'name' - _bpftool_once_attr 'flags' + _bpftool_once_attr 'type key value entries name flags dev' if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then _bpftool_once_attr 'inner_map' fi - _bpftool_once_attr 'dev' return 0 ;; esac @@ -886,8 +877,7 @@ _bpftool() return 0 ;; *) - _bpftool_once_attr 'cpu' - _bpftool_once_attr 'index' + _bpftool_once_attr 'cpu index' return 0 ;; esac From 34bf93472f8fb60b4189aa2872471017e739cf0a Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Thu, 30 Mar 2023 12:20:01 +0200 Subject: [PATCH 208/260] kallsyms: move module-related functions under correct configs Functions for searching module kallsyms should have non-empty definitions only if CONFIG_MODULES=y and CONFIG_KALLSYMS=y. Until now, only CONFIG_MODULES check was used for many of these, which may have caused complilation errors on some configs. This patch moves all relevant functions under the correct configs. Fixes: bd5314f8dd2d ("kallsyms, bpf: Move find_kallsyms_symbol_value out of internal header") Signed-off-by: Viktor Malik Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303181535.RFDCnz3E-lkp@intel.com/ Link: https://lore.kernel.org/r/20230330102001.2183693-1-vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- include/linux/module.h | 135 ++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 41cfd3be57e5..886d24877c7c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -608,16 +608,6 @@ static inline bool within_module(unsigned long addr, const struct module *mod) /* Search for module by name: must be in a RCU-sched critical section. */ struct module *find_module(const char *name); -/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if - symnum out of range. */ -int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, - char *name, char *module_name, int *exported); - -/* Look for this name: can be of form module:name. */ -unsigned long module_kallsyms_lookup_name(const char *name); - -unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); - extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); #define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) @@ -664,17 +654,6 @@ static inline void __module_get(struct module *module) /* Dereference module function descriptor */ void *dereference_module_function_descriptor(struct module *mod, void *ptr); -/* For kallsyms to ask for address resolution. namebuf should be at - * least KSYM_NAME_LEN long: a pointer to namebuf is returned if - * found, otherwise NULL. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, const unsigned char **modbuildid, - char *namebuf); -int lookup_module_symbol_name(unsigned long addr, char *symname); -int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); - int register_module_notifier(struct notifier_block *nb); int unregister_module_notifier(struct notifier_block *nb); @@ -765,45 +744,6 @@ static inline void module_put(struct module *module) #define module_name(mod) "kernel" -/* For kallsyms to ask for address resolution. NULL means not found. */ -static inline const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, - const unsigned char **modbuildid, - char *namebuf) -{ - return NULL; -} - -static inline int lookup_module_symbol_name(unsigned long addr, char *symname) -{ - return -ERANGE; -} - -static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) -{ - return -ERANGE; -} - -static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, - char *type, char *name, - char *module_name, int *exported) -{ - return -ERANGE; -} - -static inline unsigned long module_kallsyms_lookup_name(const char *name) -{ - return 0; -} - -static inline unsigned long find_kallsyms_symbol_value(struct module *mod, - const char *name) -{ - return 0; -} - static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ @@ -899,7 +839,36 @@ int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, struct module *, unsigned long), void *data); -#else + +/* For kallsyms to ask for address resolution. namebuf should be at + * least KSYM_NAME_LEN long: a pointer to namebuf is returned if + * found, otherwise NULL. + */ +const char *module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, const unsigned char **modbuildid, + char *namebuf); +int lookup_module_symbol_name(unsigned long addr, char *symname); +int lookup_module_symbol_attrs(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char *modname, + char *name); + +/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if + * symnum out of range. + */ +int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *name, char *module_name, int *exported); + +/* Look for this name: can be of form module:name. */ +unsigned long module_kallsyms_lookup_name(const char *name); + +unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); + +#else /* CONFIG_MODULES && CONFIG_KALLSYMS */ + static inline int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, struct module *, unsigned long), @@ -907,6 +876,50 @@ static inline int module_kallsyms_on_each_symbol(const char *modname, { return -EOPNOTSUPP; } + +/* For kallsyms to ask for address resolution. NULL means not found. */ +static inline const char *module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, + const unsigned char **modbuildid, + char *namebuf) +{ + return NULL; +} + +static inline int lookup_module_symbol_name(unsigned long addr, char *symname) +{ + return -ERANGE; +} + +static inline int lookup_module_symbol_attrs(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char *modname, + char *name) +{ + return -ERANGE; +} + +static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + return -ERANGE; +} + +static inline unsigned long module_kallsyms_lookup_name(const char *name) +{ + return 0; +} + +static inline unsigned long find_kallsyms_symbol_value(struct module *mod, + const char *name) +{ + return 0; +} + #endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ #endif /* _LINUX_MODULE_H */ From d769ccaf957fe7391f357c0a923de71f594b8a2b Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Thu, 6 Apr 2023 01:59:18 +0200 Subject: [PATCH 209/260] xsk: Fix unaligned descriptor validation Make sure unaligned descriptors that straddle the end of the UMEM are considered invalid. Currently, descriptor validation is broken for zero-copy mode which only checks descriptors at page granularity. For example, descriptors in zero-copy mode that overrun the end of the UMEM but not a page boundary are (incorrectly) considered valid. The UMEM boundary check needs to happen before the page boundary and contiguity checks in xp_desc_crosses_non_contig_pg(). Do this check in xp_unaligned_validate_desc() instead like xp_check_unaligned() already does. Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") Signed-off-by: Kal Conley Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20230405235920.7305-2-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- include/net/xsk_buff_pool.h | 9 ++------- net/xdp/xsk_queue.h | 1 + 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 3e952e569418..d318c769b445 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -180,13 +180,8 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, if (likely(!cross_pg)) return false; - if (pool->dma_pages_cnt) { - return !(pool->dma_pages[addr >> PAGE_SHIFT] & - XSK_NEXT_PG_CONTIG_MASK); - } - - /* skb path */ - return addr + len > pool->addrs_cnt; + return pool->dma_pages_cnt && + !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK); } static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index bfb2a7e50c26..66c6f57c9c44 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -162,6 +162,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, return false; if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt || + addr + desc->len > pool->addrs_cnt || xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) return false; From c0801598e5430d9da2d406ed32fcedbef23977fc Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Thu, 6 Apr 2023 01:59:19 +0200 Subject: [PATCH 210/260] selftests: xsk: Add test UNALIGNED_INV_DESC_4K1_FRAME_SIZE Add unaligned descriptor test for frame size of 4001. Using an odd frame size ensures that the end of the UMEM is not near a page boundary. This allows testing descriptors that staddle the end of the UMEM but not a page. This test used to fail without the previous commit ("xsk: Fix unaligned descriptor validation"). Signed-off-by: Kal Conley Link: https://lore.kernel.org/r/20230405235920.7305-3-kal.conley@dectris.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/xskxceiver.c | 24 ++++++++++++++++++++++++ tools/testing/selftests/bpf/xskxceiver.h | 1 + 2 files changed, 25 insertions(+) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 1a4bdd5aa78c..5a9691e942de 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -69,6 +69,7 @@ */ #define _GNU_SOURCE +#include #include #include #include @@ -1876,6 +1877,29 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ test->ifobj_rx->umem->unaligned_mode = true; testapp_invalid_desc(test); break; + case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { + u64 page_size, umem_size; + + if (!hugepages_present(test->ifobj_tx)) { + ksft_test_result_skip("No 2M huge pages present.\n"); + return; + } + test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); + /* Odd frame size so the UMEM doesn't end near a page boundary. */ + test->ifobj_tx->umem->frame_size = 4001; + test->ifobj_rx->umem->frame_size = 4001; + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* This test exists to test descriptors that staddle the end of + * the UMEM but not a page. + */ + page_size = sysconf(_SC_PAGESIZE); + umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + assert(umem_size % page_size > PKT_SIZE); + assert(umem_size % page_size < page_size - PKT_SIZE); + testapp_invalid_desc(test); + break; + } case TEST_TYPE_UNALIGNED: if (!testapp_unaligned(test)) return; diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index cc24ab72f3ff..919327807a4e 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -78,6 +78,7 @@ enum test_type { TEST_TYPE_ALIGNED_INV_DESC, TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, TEST_TYPE_UNALIGNED_INV_DESC, + TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME, TEST_TYPE_HEADROOM, TEST_TYPE_TEARDOWN, TEST_TYPE_BIDI, From 13fbcee55706db45ce047a7cea14811d68f94ee3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 6 Apr 2023 09:44:55 -0700 Subject: [PATCH 211/260] bpf: Improve verifier JEQ/JNE insn branch taken checking Currently, for BPF_JEQ/BPF_JNE insn, verifier determines whether the branch is taken or not only if both operands are constants. Therefore, for the following code snippet, 0: (85) call bpf_ktime_get_ns#5 ; R0_w=scalar() 1: (a5) if r0 < 0x3 goto pc+2 ; R0_w=scalar(umin=3) 2: (b7) r2 = 2 ; R2_w=2 3: (1d) if r0 == r2 goto pc+2 6 At insn 3, since r0 is not a constant, verifier assumes both branch can be taken which may lead inproper verification failure. Add comparing umin/umax value and the constant. If the umin value is greater than the constant, or umax value is smaller than the constant, for JEQ the branch must be not-taken, and for JNE the branch must be taken. The jmp32 mode JEQ/JNE branch taken checking is also handled similarly. The following lists the veristat result w.r.t. changed number of processes insns during verification: File Program Insns (A) Insns (B) Insns (DIFF) ----------------------------------------------------- ---------------------------------------------------- --------- --------- --------------- test_cls_redirect.bpf.linked3.o cls_redirect 64980 73472 +8492 (+13.07%) test_seg6_loop.bpf.linked3.o __add_egr_x 12425 12423 -2 (-0.02%) test_tcp_hdr_options.bpf.linked3.o estab 2634 2558 -76 (-2.89%) test_parse_tcp_hdr_opt.bpf.linked3.o xdp_ingress_v6 1421 1420 -1 (-0.07%) test_parse_tcp_hdr_opt_dynptr.bpf.linked3.o xdp_ingress_v6 1238 1237 -1 (-0.08%) test_tc_dtime.bpf.linked3.o egress_fwdns_prio100 414 411 -3 (-0.72%) Mostly a small improvement but test_cls_redirect.bpf.linked3.o has a 13% regression. I checked with verifier log and found it this is due to pruning. For some JEQ/JNE branches impacted by this patch, one branch is explored and the other has state equivalence and pruned. Signed-off-by: Yonghong Song Acked-by: Dave Marchevsky Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230406164455.1045294-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 56f569811f70..5c6b90e384a5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12651,10 +12651,14 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) case BPF_JEQ: if (tnum_is_const(subreg)) return !!tnum_equals_const(subreg, val); + else if (val < reg->u32_min_value || val > reg->u32_max_value) + return 0; break; case BPF_JNE: if (tnum_is_const(subreg)) return !tnum_equals_const(subreg, val); + else if (val < reg->u32_min_value || val > reg->u32_max_value) + return 1; break; case BPF_JSET: if ((~subreg.mask & subreg.value) & val) @@ -12724,10 +12728,14 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) case BPF_JEQ: if (tnum_is_const(reg->var_off)) return !!tnum_equals_const(reg->var_off, val); + else if (val < reg->umin_value || val > reg->umax_value) + return 0; break; case BPF_JNE: if (tnum_is_const(reg->var_off)) return !tnum_equals_const(reg->var_off, val); + else if (val < reg->umin_value || val > reg->umax_value) + return 1; break; case BPF_JSET: if ((~reg->var_off.mask & reg->var_off.value) & val) From aec08d677b4d0adeb7412fa98547cf07bfce6fea Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 6 Apr 2023 09:45:00 -0700 Subject: [PATCH 212/260] selftests/bpf: Add tests for non-constant cond_op NE/EQ bound deduction Add various tests for code pattern ' NE/EQ ' implemented in the previous verifier patch. Without the verifier patch, these new tests will fail. Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230406164500.1045715-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../verifier_bounds_deduction_non_const.c | 179 ++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index efc8cf2e18d0..73dff693d411 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -7,6 +7,7 @@ #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bounds_deduction.skel.h" +#include "verifier_bounds_deduction_non_const.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" @@ -70,6 +71,7 @@ void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_array_access(void) { RUN(verifier_array_access); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } +void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c new file mode 100644 index 000000000000..fe570d866139 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, == , 1") +__success __retval(0) +__naked void deducing_bounds_from_non_const_1(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 < 3 goto l0_%=; \ + r2 = 2; \ + if r0 == r2 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, == , 2") +__success __retval(0) +__naked void deducing_bounds_from_non_const_2(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 > 3 goto l0_%=; \ + r2 = 4; \ + if r0 == r2 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, != , 1") +__success __retval(0) +__naked void deducing_bounds_from_non_const_3(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 < 3 goto l0_%=; \ + r2 = 2; \ + if r0 != r2 goto l0_%=; \ + goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, != , 2") +__success __retval(0) +__naked void deducing_bounds_from_non_const_4(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 > 3 goto l0_%=; \ + r2 = 4; \ + if r0 != r2 goto l0_%=; \ + goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, == , 1") +__success __retval(0) +__naked void deducing_bounds_from_non_const_5(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 < 4 goto l0_%=; \ + w2 = 3; \ + if w0 == w2 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, == , 2") +__success __retval(0) +__naked void deducing_bounds_from_non_const_6(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 > 4 goto l0_%=; \ + w2 = 5; \ + if w0 == w2 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, != , 1") +__success __retval(0) +__naked void deducing_bounds_from_non_const_7(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 < 3 goto l0_%=; \ + w2 = 2; \ + if w0 != w2 goto l0_%=; \ + goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, != , 2") +__success __retval(0) +__naked void deducing_bounds_from_non_const_8(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 > 3 goto l0_%=; \ + w2 = 4; \ + if w0 != w2 goto l0_%=; \ + goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; From 953d9f5beaf75e88c69a13d70ce424cd606a29f5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 6 Apr 2023 09:45:05 -0700 Subject: [PATCH 213/260] bpf: Improve handling of pattern ' ' in verifier Currently, the verifier does not handle ' ' well. For example, ... 10: (79) r1 = *(u64 *)(r10 -16) ; R1_w=scalar() R10=fp0 11: (b7) r2 = 0 ; R2_w=0 12: (2d) if r2 > r1 goto pc+2 13: (b7) r0 = 0 14: (95) exit 15: (65) if r1 s> 0x1 goto pc+3 16: (0f) r0 += r1 ... At insn 12, verifier decides both true and false branch are possible, but actually only false branch is possible. Currently, the verifier already supports patterns ' . Add support for patterns ' ' in a similar way. Also fix selftest 'verifier_bounds_mix_sign_unsign/bounds checks mixing signed and unsigned, variant 10' due to this change. Signed-off-by: Yonghong Song Acked-by: Dave Marchevsky Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230406164505.1046801-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 ++++++++++++ .../bpf/progs/verifier_bounds_mix_sign_unsign.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5c6b90e384a5..3660b573048a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13356,6 +13356,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, src_reg->var_off.value, opcode, is_jmp32); + } else if (dst_reg->type == SCALAR_VALUE && + is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) { + pred = is_branch_taken(src_reg, + tnum_subreg(dst_reg->var_off).value, + flip_opcode(opcode), + is_jmp32); + } else if (dst_reg->type == SCALAR_VALUE && + !is_jmp32 && tnum_is_const(dst_reg->var_off)) { + pred = is_branch_taken(src_reg, + dst_reg->var_off.value, + flip_opcode(opcode), + is_jmp32); } else if (reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg) && !is_jmp32) { diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c index 91a66357896a..4f40144748a5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c @@ -354,7 +354,7 @@ __naked void signed_and_unsigned_variant_10(void) call %[bpf_map_lookup_elem]; \ if r0 == 0 goto l0_%=; \ r1 = *(u64*)(r10 - 16); \ - r2 = 0; \ + r2 = -1; \ if r2 > r1 goto l1_%=; \ r0 = 0; \ exit; \ From 23a88fae9f20d47bb3aed99b1e08d0d6cf65cf0c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 6 Apr 2023 09:45:10 -0700 Subject: [PATCH 214/260] selftests/bpf: Add verifier tests for code pattern ' ' Add various tests for code pattern ' ' to exercise the previous verifier patch. The following are veristat changed number of processed insns stat comparing the previous patch vs. this patch: File Program Insns (A) Insns (B) Insns (DIFF) ----------------------------------------------------- ---------------------------------------------------- --------- --------- ------------- test_seg6_loop.bpf.linked3.o __add_egr_x 12423 12314 -109 (-0.88%) Only one program is affected with minor change. Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230406164510.1047757-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- .../verifier_bounds_deduction_non_const.c | 460 ++++++++++++++++++ 1 file changed, 460 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c index fe570d866139..823f727cf210 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c @@ -176,4 +176,464 @@ l1_%=: \ : __clobber_all); } +SEC("socket") +__description("check deducing bounds from non-const, jmp64, > , 1") +__success __retval(0) +__naked void deducing_bounds_from_non_const_9(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + r2 = 0; \ + if r2 > r0 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, > , 2") +__success __retval(0) +__naked void deducing_bounds_from_non_const_10(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 < 4 goto l0_%=; \ + r2 = 4; \ + if r2 > r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, >= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_11(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 < 4 goto l0_%=; \ + r2 = 3; \ + if r2 >= r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, < ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_12(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 > 4 goto l0_%=; \ + r2 = 4; \ + if r2 < r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, <= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_13(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 >= 4 goto l0_%=; \ + r2 = 4; \ + if r2 <= r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, == ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_14(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 < 3 goto l0_%=; \ + r2 = 2; \ + if r2 == r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, s> ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_15(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 s< 4 goto l0_%=; \ + r2 = 4; \ + if r2 s> r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, s>= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_16(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 s< 4 goto l0_%=; \ + r2 = 3; \ + if r2 s>= r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, s< ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_17(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 s> 4 goto l0_%=; \ + r2 = 4; \ + if r2 s< r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, s<= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_18(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 s> 4 goto l0_%=; \ + r2 = 5; \ + if r2 s<= r0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp64, != ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_19(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 < 3 goto l0_%=; \ + r2 = 2; \ + if r2 != r0 goto l0_%=; \ + goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, > , 1") +__success __retval(0) +__naked void deducing_bounds_from_non_const_20(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + w2 = 0; \ + if w2 > w0 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, > , 2") +__success __retval(0) +__naked void deducing_bounds_from_non_const_21(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 < 4 goto l0_%=; \ + w2 = 4; \ + if w2 > w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, >= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_22(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 < 4 goto l0_%=; \ + w2 = 3; \ + if w2 >= w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, < ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_23(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 > 4 goto l0_%=; \ + w2 = 4; \ + if w2 < w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, <= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_24(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 >= 4 goto l0_%=; \ + w2 = 4; \ + if w2 <= w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, == ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_25(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 < 4 goto l0_%=; \ + w2 = 3; \ + if w2 == w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, s> ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_26(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 s< 4 goto l0_%=; \ + w2 = 4; \ + if w2 s> w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, s>= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_27(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 s< 4 goto l0_%=; \ + w2 = 3; \ + if w2 s>= w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, s< ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_28(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 s> 4 goto l0_%=; \ + w2 = 5; \ + if w2 s< w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, s<= ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_29(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 s>= 4 goto l0_%=; \ + w2 = 4; \ + if w2 s<= w0 goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("socket") +__description("check deducing bounds from non-const, jmp32, != ") +__success __retval(0) +__naked void deducing_bounds_from_non_const_30(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if w0 < 3 goto l0_%=; \ + w2 = 2; \ + if w2 != w0 goto l0_%=; \ + goto l1_%=; \ +l0_%=: \ + r0 = 0; \ + exit; \ +l1_%=: \ + r0 -= r1; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; From f3f21349779776135349a8e6f114a1485b2476b7 Mon Sep 17 00:00:00 2001 From: Barret Rhoden Date: Thu, 6 Apr 2023 20:18:08 -0400 Subject: [PATCH 215/260] bpf: ensure all memory is initialized in bpf_get_current_comm BPF helpers that take an ARG_PTR_TO_UNINIT_MEM must ensure that all of the memory is set, including beyond the end of the string. Signed-off-by: Barret Rhoden Link: https://lore.kernel.org/r/20230407001808.1622968-1-brho@google.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6be16db9f188..b6a5cda5bb59 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -258,7 +258,7 @@ BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) goto err_clear; /* Verifier guarantees that size > 0 */ - strscpy(buf, task->comm, size); + strscpy_pad(buf, task->comm, size); return 0; err_clear: memset(buf, 0, size); From 3ebf5212bf042954666b19fe4ff5a98911b08128 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 7 Apr 2023 12:01:30 -0700 Subject: [PATCH 216/260] selftests/bpf: Use PERF_COUNT_HW_CPU_CYCLES event for get_branch_snapshot perf_event with type=PERF_TYPE_RAW and config=0x1b00 turned out to be not reliable in ensuring LBR is active. Thus, test_progs:get_branch_snapshot is not reliable in some systems. Replace it with PERF_COUNT_HW_CPU_CYCLES event, which gives more consistent results. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20230407190130.2093736-1-song@kernel.org --- tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c index 3948da12a528..0394a1156d99 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -37,8 +37,8 @@ static int create_perf_events(void) /* create perf event */ attr.size = sizeof(attr); - attr.type = PERF_TYPE_RAW; - attr.config = 0x1b00; + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; attr.sample_type = PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY; From b24f0b049e706c6fc6e822dfc519ee33c3865092 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 7 Apr 2023 08:14:26 +0000 Subject: [PATCH 217/260] bpftool: Set program type only if it differs from the desired one After commit d6e6286a12e7 ("libbpf: disassociate section handler on explicit bpf_program__set_type() call"), bpf_program__set_type() will force cleanup the program's SEC() definition, this commit fixed the test helper but missed the bpftool, which leads to bpftool prog autoattach broken as follows: $ bpftool prog load spi-xfer-r1v1.o /sys/fs/bpf/test autoattach Program spi_xfer_r1v1 does not support autoattach, falling back to pinning This patch fix bpftool to set program type only if it differs. Fixes: d6e6286a12e7 ("libbpf: disassociate section handler on explicit bpf_program__set_type() call") Signed-off-by: Wei Yongjun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230407081427.2621590-1-weiyongjun@huaweicloud.com --- tools/bpf/bpftool/prog.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 430f72306409..e5b613a7974c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1685,7 +1685,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } bpf_program__set_ifindex(pos, ifindex); - bpf_program__set_type(pos, prog_type); + if (bpf_program__type(pos) != prog_type) + bpf_program__set_type(pos, prog_type); bpf_program__set_expected_attach_type(pos, expected_attach_type); } From 5855b0999de4213bf51d856a345c4b53f2304e33 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 7 Apr 2023 18:41:25 +0300 Subject: [PATCH 218/260] selftests/bpf: Prevent infinite loop in veristat when base file is too short The following example forces veristat to loop indefinitely: $ cat two-ok file_name,prog_name,verdict,total_states file-a,a,success,12 file-b,b,success,67 $ cat add-failure file_name,prog_name,verdict,total_states file-a,a,success,12 file-b,b,success,67 file-b,c,failure,32 $ veristat -C two-ok add-failure The loop is caused by handle_comparison_mode() not checking if `base` variable points to `fallback_stats` prior advancing joined results using `base`. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230407154125.896927-1-eddyz87@gmail.com --- tools/testing/selftests/bpf/veristat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 53d7ec168268..e05954e20bba 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1824,18 +1824,22 @@ static int handle_comparison_mode(void) join->stats_b = comp; i++; j++; - } else if (comp == &fallback_stats || r < 0) { + } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) { join->file_name = base->file_name; join->prog_name = base->prog_name; join->stats_a = base; join->stats_b = NULL; i++; - } else { + } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) { join->file_name = comp->file_name; join->prog_name = comp->prog_name; join->stats_a = NULL; join->stats_b = comp; j++; + } else { + fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i", + __FILE__, __LINE__, i, j); + return -EINVAL; } env.join_stat_cnt += 1; } From c4d3b488a90be95f4f9413dc7eae5fc113d15fe9 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Fri, 7 Apr 2023 19:29:19 -0700 Subject: [PATCH 219/260] selftests/bpf: Reset err when symbol name already exist in kprobe_multi_test When trying to add a name to the hashmap, an error code of EEXIST is returned and we continue as names are possibly duplicated in the sys file. If the last name in the file is a duplicate, we will continue to the next iteration of the while loop, and exit the loop with a value of err set to EEXIST and enter the error label with err set, which causes the test to fail when it should not. This change reset err to 0 before continue-ing into the next iteration, this way, if there is no more data to read from the file we iterate through, err will be set to 0. Behaviour prior to this change: ``` test_kprobe_multi_bench_attach:FAIL:get_syms unexpected error: -17 (errno 2) All error logs: test_kprobe_multi_bench_attach:FAIL:get_syms unexpected error: -17 (errno 2) Summary: 0/1 PASSED, 0 SKIPPED, 1 FAILED ``` After this change: ``` Summary: 1/2 PASSED, 0 SKIPPED, 0 FAILED ``` Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230408022919.54601-1-chantr4@gmail.com --- tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 22be0a9a5a0a..2173c4bb555e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -381,8 +381,10 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel) continue; err = hashmap__add(map, name, 0); - if (err == -EEXIST) + if (err == -EEXIST) { + err = 0; continue; + } if (err) goto error; From eafa92152e2ec6318e32b6ddda9c1d95d161000a Mon Sep 17 00:00:00 2001 From: Alejandro Colomar Date: Tue, 11 Apr 2023 15:47:47 +0100 Subject: [PATCH 220/260] bpf: Remove extra whitespace in SPDX tag for syscall/helpers man pages There is an extra whitespace in the SPDX tag, before the license name, in the script for generating man pages for the bpf() syscall and the helpers. It has caused problems in Debian packaging, in the tool that autodetects licenses. Let's clean it up. Fixes: 5cb62b7598f2 ("bpf, docs: Use SPDX license identifier in bpf_doc.py") Signed-off-by: Alejandro Colomar Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230411144747.66734-1-quentin@isovalent.com --- scripts/bpf_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 38d51e05c7a2..eaae2ce78381 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -383,7 +383,7 @@ class PrinterRST(Printer): .. Copyright (C) All BPF authors and contributors from 2014 to present. .. See git log include/uapi/linux/bpf.h in kernel tree for details. .. -.. SPDX-License-Identifier: Linux-man-pages-copyleft +.. SPDX-License-Identifier: Linux-man-pages-copyleft .. .. Please do not edit this file. It was generated from the documentation .. located in file include/uapi/linux/bpf.h of the Linux kernel sources From 4294a0a7ab6282c3d92f03de84e762dda993c93d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:47 -0700 Subject: [PATCH 221/260] bpf: Split off basic BPF verifier log into separate file kernel/bpf/verifier.c file is large and growing larger all the time. So it's good to start splitting off more or less self-contained parts into separate files to keep source code size (somewhat) somewhat under control. This patch is a one step in this direction, moving some of BPF verifier log routines into a separate kernel/bpf/log.c. Right now it's most low-level and isolated routines to append data to log, reset log to previous position, etc. Eventually we could probably move verifier state printing logic here as well, but this patch doesn't attempt to do that yet. Subsequent patches will add more logic to verifier log management, so having basics in a separate file will make sure verifier.c doesn't grow more with new changes. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-2-andrii@kernel.org --- include/linux/bpf_verifier.h | 19 +++----- kernel/bpf/Makefile | 3 +- kernel/bpf/log.c | 85 ++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 69 ----------------------------- 4 files changed, 94 insertions(+), 82 deletions(-) create mode 100644 kernel/bpf/log.c diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 81d525d057c7..83dff25545ee 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -498,11 +498,6 @@ struct bpf_verifier_log { u32 len_total; }; -static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) -{ - return log->len_used >= log->len_total - 1; -} - #define BPF_LOG_LEVEL1 1 #define BPF_LOG_LEVEL2 2 #define BPF_LOG_STATS 4 @@ -512,6 +507,11 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) #define BPF_LOG_MIN_ALIGNMENT 8U #define BPF_LOG_ALIGNMENT 40U +static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) +{ + return log->len_used >= log->len_total - 1; +} + static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { return log && @@ -519,13 +519,6 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) log->level == BPF_LOG_KERNEL); } -static inline bool -bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) -{ - return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && - log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); -} - #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_info { @@ -608,12 +601,14 @@ struct bpf_verifier_env { char type_str_buf[TYPE_STR_BUF_LEN]; }; +bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log); __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...); +void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos); static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 02242614dcc7..1d3892168d32 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,8 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o +obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c new file mode 100644 index 000000000000..920061e38d2e --- /dev/null +++ b/kernel/bpf/log.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + */ +#include +#include +#include +#include +#include + +bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +{ + return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && + log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); +} + +void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, + va_list args) +{ + unsigned int n; + + n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); + + WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, + "verifier log line truncated - local buffer too short\n"); + + if (log->level == BPF_LOG_KERNEL) { + bool newline = n > 0 && log->kbuf[n - 1] == '\n'; + + pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n"); + return; + } + + n = min(log->len_total - log->len_used - 1, n); + log->kbuf[n] = '\0'; + if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) + log->len_used += n; + else + log->ubuf = NULL; +} + +void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos) +{ + char zero = 0; + + if (!bpf_verifier_log_needed(log)) + return; + + log->len_used = new_pos; + if (put_user(zero, log->ubuf + new_pos)) + log->ubuf = NULL; +} + +/* log_level controls verbosity level of eBPF verifier. + * bpf_verifier_log_write() is used to dump the verification trace to the log, + * so the user can figure out what's wrong with the program + */ +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...) +{ + va_list args; + + if (!bpf_verifier_log_needed(&env->log)) + return; + + va_start(args, fmt); + bpf_verifier_vlog(&env->log, fmt, args); + va_end(args); +} +EXPORT_SYMBOL_GPL(bpf_verifier_log_write); + +__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, + const char *fmt, ...) +{ + va_list args; + + if (!bpf_verifier_log_needed(log)) + return; + + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); +} +EXPORT_SYMBOL_GPL(bpf_log); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3660b573048a..745ae0cd01d4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -335,61 +335,6 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off) return &linfo[i - 1]; } -void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, - va_list args) -{ - unsigned int n; - - n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); - - WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, - "verifier log line truncated - local buffer too short\n"); - - if (log->level == BPF_LOG_KERNEL) { - bool newline = n > 0 && log->kbuf[n - 1] == '\n'; - - pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n"); - return; - } - - n = min(log->len_total - log->len_used - 1, n); - log->kbuf[n] = '\0'; - if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) - log->len_used += n; - else - log->ubuf = NULL; -} - -static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos) -{ - char zero = 0; - - if (!bpf_verifier_log_needed(log)) - return; - - log->len_used = new_pos; - if (put_user(zero, log->ubuf + new_pos)) - log->ubuf = NULL; -} - -/* log_level controls verbosity level of eBPF verifier. - * bpf_verifier_log_write() is used to dump the verification trace to the log, - * so the user can figure out what's wrong with the program - */ -__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, - const char *fmt, ...) -{ - va_list args; - - if (!bpf_verifier_log_needed(&env->log)) - return; - - va_start(args, fmt); - bpf_verifier_vlog(&env->log, fmt, args); - va_end(args); -} -EXPORT_SYMBOL_GPL(bpf_verifier_log_write); - __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) { struct bpf_verifier_env *env = private_data; @@ -403,20 +348,6 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) va_end(args); } -__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, - const char *fmt, ...) -{ - va_list args; - - if (!bpf_verifier_log_needed(log)) - return; - - va_start(args, fmt); - bpf_verifier_vlog(log, fmt, args); - va_end(args); -} -EXPORT_SYMBOL_GPL(bpf_log); - static const char *ltrim(const char *s) { while (isspace(*s)) From 03cc3aa6a53394481f01c16231f99298332066f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:48 -0700 Subject: [PATCH 222/260] bpf: Remove minimum size restrictions on verifier log buffer It's not clear why we have 128 as minimum size, but it makes testing harder and seems unnecessary, as we carefully handle truncation scenarios and use proper snprintf variants. So remove this limitation and just enforce positive length for log buffer. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-3-andrii@kernel.org --- kernel/bpf/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 920061e38d2e..1974891fc324 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -11,7 +11,7 @@ bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) { - return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && + return log->len_total > 0 && log->len_total <= UINT_MAX >> 2 && log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); } From 1216640938035e63bdbd32438e91c9bcc1fd8ee1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:49 -0700 Subject: [PATCH 223/260] bpf: Switch BPF verifier log to be a rotating log by default Currently, if user-supplied log buffer to collect BPF verifier log turns out to be too small to contain full log, bpf() syscall returns -ENOSPC, fails BPF program verification/load, and preserves first N-1 bytes of the verifier log (where N is the size of user-supplied buffer). This is problematic in a bunch of common scenarios, especially when working with real-world BPF programs that tend to be pretty complex as far as verification goes and require big log buffers. Typically, it's when debugging tricky cases at log level 2 (verbose). Also, when BPF program is successfully validated, log level 2 is the only way to actually see verifier state progression and all the important details. Even with log level 1, it's possible to get -ENOSPC even if the final verifier log fits in log buffer, if there is a code path that's deep enough to fill up entire log, even if normally it would be reset later on (there is a logic to chop off successfully validated portions of BPF verifier log). In short, it's not always possible to pre-size log buffer. Also, what's worse, in practice, the end of the log most often is way more important than the beginning, but verifier stops emitting log as soon as initial log buffer is filled up. This patch switches BPF verifier log behavior to effectively behave as rotating log. That is, if user-supplied log buffer turns out to be too short, verifier will keep overwriting previously written log, effectively treating user's log buffer as a ring buffer. -ENOSPC is still going to be returned at the end, to notify user that log contents was truncated, but the important last N bytes of the log would be returned, which might be all that user really needs. This consistent -ENOSPC behavior, regardless of rotating or fixed log behavior, allows to prevent backwards compatibility breakage. The only user-visible change is which portion of verifier log user ends up seeing *if buffer is too small*. Given contents of verifier log itself is not an ABI, there is no breakage due to this behavior change. Specialized tools that rely on specific contents of verifier log in -ENOSPC scenario are expected to be easily adapted to accommodate old and new behaviors. Importantly, though, to preserve good user experience and not require every user-space application to adopt to this new behavior, before exiting to user-space verifier will rotate log (in place) to make it start at the very beginning of user buffer as a continuous zero-terminated string. The contents will be a chopped off N-1 last bytes of full verifier log, of course. Given beginning of log is sometimes important as well, we add BPF_LOG_FIXED (which equals 8) flag to force old behavior, which allows tools like veristat to request first part of verifier log, if necessary. BPF_LOG_FIXED flag is also a simple and straightforward way to check if BPF verifier supports rotating behavior. On the implementation side, conceptually, it's all simple. We maintain 64-bit logical start and end positions. If we need to truncate the log, start position will be adjusted accordingly to lag end position by N bytes. We then use those logical positions to calculate their matching actual positions in user buffer and handle wrap around the end of the buffer properly. Finally, right before returning from bpf_check(), we rotate user log buffer contents in-place as necessary, to make log contents contiguous. See comments in relevant functions for details. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-4-andrii@kernel.org --- include/linux/bpf_verifier.h | 33 ++- kernel/bpf/btf.c | 3 +- kernel/bpf/log.c | 198 +++++++++++++++++- kernel/bpf/verifier.c | 19 +- .../selftests/bpf/prog_tests/log_fixup.c | 1 + 5 files changed, 228 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 83dff25545ee..4c926227f612 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -491,25 +491,42 @@ struct bpf_insn_aux_data { #define BPF_VERIFIER_TMP_LOG_SIZE 1024 struct bpf_verifier_log { - u32 level; - char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; + /* Logical start and end positions of a "log window" of the verifier log. + * start_pos == 0 means we haven't truncated anything. + * Once truncation starts to happen, start_pos + len_total == end_pos, + * except during log reset situations, in which (end_pos - start_pos) + * might get smaller than len_total (see bpf_vlog_reset()). + * Generally, (end_pos - start_pos) gives number of useful data in + * user log buffer. + */ + u64 start_pos; + u64 end_pos; char __user *ubuf; - u32 len_used; + u32 level; u32 len_total; + char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; }; #define BPF_LOG_LEVEL1 1 #define BPF_LOG_LEVEL2 2 #define BPF_LOG_STATS 4 +#define BPF_LOG_FIXED 8 #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) -#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) +#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS | BPF_LOG_FIXED) #define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ #define BPF_LOG_MIN_ALIGNMENT 8U #define BPF_LOG_ALIGNMENT 40U +static inline u32 bpf_log_used(const struct bpf_verifier_log *log) +{ + return log->end_pos - log->start_pos; +} + static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) { - return log->len_used >= log->len_total - 1; + if (log->level & BPF_LOG_FIXED) + return bpf_log_used(log) >= log->len_total - 1; + return false; } static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) @@ -596,7 +613,7 @@ struct bpf_verifier_env { u32 scratched_regs; /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; - u32 prev_log_len, prev_insn_print_len; + u64 prev_log_pos, prev_insn_print_pos; /* buffer used in reg_type_str() to generate reg_type string */ char type_str_buf[TYPE_STR_BUF_LEN]; }; @@ -608,7 +625,9 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...); -void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos); +void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos); +void bpf_vlog_finalize(struct bpf_verifier_log *log); +bool bpf_vlog_truncated(const struct bpf_verifier_log *log); static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 593c45a294d0..20a05b8932db 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5593,7 +5593,8 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, } } - if (log->level && bpf_verifier_log_full(log)) { + bpf_vlog_finalize(log); + if (log->level && bpf_vlog_truncated(log)) { err = -ENOSPC; goto errout_meta; } diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 1974891fc324..92b1c8ad6601 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -8,6 +8,7 @@ #include #include #include +#include bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) { @@ -32,23 +33,202 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, return; } - n = min(log->len_total - log->len_used - 1, n); - log->kbuf[n] = '\0'; - if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) - log->len_used += n; - else - log->ubuf = NULL; + if (log->level & BPF_LOG_FIXED) { + n = min(log->len_total - bpf_log_used(log) - 1, n); + log->kbuf[n] = '\0'; + n += 1; + + if (copy_to_user(log->ubuf + log->end_pos, log->kbuf, n)) + goto fail; + + log->end_pos += n - 1; /* don't count terminating '\0' */ + } else { + u64 new_end, new_start, cur_pos; + u32 buf_start, buf_end, new_n; + + n += 1; + + new_end = log->end_pos + n; + if (new_end - log->start_pos >= log->len_total) + new_start = new_end - log->len_total; + else + new_start = log->start_pos; + new_n = min(n, log->len_total); + cur_pos = new_end - new_n; + + div_u64_rem(cur_pos, log->len_total, &buf_start); + div_u64_rem(new_end, log->len_total, &buf_end); + /* new_end and buf_end are exclusive indices, so if buf_end is + * exactly zero, then it actually points right to the end of + * ubuf and there is no wrap around + */ + if (buf_end == 0) + buf_end = log->len_total; + + /* if buf_start > buf_end, we wrapped around; + * if buf_start == buf_end, then we fill ubuf completely; we + * can't have buf_start == buf_end to mean that there is + * nothing to write, because we always write at least + * something, even if terminal '\0' + */ + if (buf_start < buf_end) { + /* message fits within contiguous chunk of ubuf */ + if (copy_to_user(log->ubuf + buf_start, + log->kbuf + n - new_n, + buf_end - buf_start)) + goto fail; + } else { + /* message wraps around the end of ubuf, copy in two chunks */ + if (copy_to_user(log->ubuf + buf_start, + log->kbuf + n - new_n, + log->len_total - buf_start)) + goto fail; + if (copy_to_user(log->ubuf, + log->kbuf + n - buf_end, + buf_end)) + goto fail; + } + + log->start_pos = new_start; + log->end_pos = new_end - 1; /* don't count terminating '\0' */ + } + + return; +fail: + log->ubuf = NULL; } -void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos) +void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos) { char zero = 0; + u32 pos; + + if (WARN_ON_ONCE(new_pos > log->end_pos)) + return; if (!bpf_verifier_log_needed(log)) return; - log->len_used = new_pos; - if (put_user(zero, log->ubuf + new_pos)) + /* if position to which we reset is beyond current log window, + * then we didn't preserve any useful content and should adjust + * start_pos to end up with an empty log (start_pos == end_pos) + */ + log->end_pos = new_pos; + if (log->end_pos < log->start_pos) + log->start_pos = log->end_pos; + div_u64_rem(new_pos, log->len_total, &pos); + if (put_user(zero, log->ubuf + pos)) + log->ubuf = NULL; +} + +static void bpf_vlog_reverse_kbuf(char *buf, int len) +{ + int i, j; + + for (i = 0, j = len - 1; i < j; i++, j--) + swap(buf[i], buf[j]); +} + +static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int end) +{ + /* we split log->kbuf into two equal parts for both ends of array */ + int n = sizeof(log->kbuf) / 2, nn; + char *lbuf = log->kbuf, *rbuf = log->kbuf + n; + + /* Read ubuf's section [start, end) two chunks at a time, from left + * and right side; within each chunk, swap all the bytes; after that + * reverse the order of lbuf and rbuf and write result back to ubuf. + * This way we'll end up with swapped contents of specified + * [start, end) ubuf segment. + */ + while (end - start > 1) { + nn = min(n, (end - start ) / 2); + + if (copy_from_user(lbuf, log->ubuf + start, nn)) + return -EFAULT; + if (copy_from_user(rbuf, log->ubuf + end - nn, nn)) + return -EFAULT; + + bpf_vlog_reverse_kbuf(lbuf, nn); + bpf_vlog_reverse_kbuf(rbuf, nn); + + /* we write lbuf to the right end of ubuf, while rbuf to the + * left one to end up with properly reversed overall ubuf + */ + if (copy_to_user(log->ubuf + start, rbuf, nn)) + return -EFAULT; + if (copy_to_user(log->ubuf + end - nn, lbuf, nn)) + return -EFAULT; + + start += nn; + end -= nn; + } + + return 0; +} + +bool bpf_vlog_truncated(const struct bpf_verifier_log *log) +{ + if (log->level & BPF_LOG_FIXED) + return bpf_log_used(log) >= log->len_total - 1; + else + return log->start_pos > 0; +} + +void bpf_vlog_finalize(struct bpf_verifier_log *log) +{ + u32 sublen; + int err; + + if (!log || !log->level || !log->ubuf) + return; + if ((log->level & BPF_LOG_FIXED) || log->level == BPF_LOG_KERNEL) + return; + + /* If we never truncated log, there is nothing to move around. */ + if (log->start_pos == 0) + return; + + /* Otherwise we need to rotate log contents to make it start from the + * buffer beginning and be a continuous zero-terminated string. Note + * that if log->start_pos != 0 then we definitely filled up entire log + * buffer with no gaps, and we just need to shift buffer contents to + * the left by (log->start_pos % log->len_total) bytes. + * + * Unfortunately, user buffer could be huge and we don't want to + * allocate temporary kernel memory of the same size just to shift + * contents in a straightforward fashion. Instead, we'll be clever and + * do in-place array rotation. This is a leetcode-style problem, which + * could be solved by three rotations. + * + * Let's say we have log buffer that has to be shifted left by 7 bytes + * (spaces and vertical bar is just for demonstrative purposes): + * E F G H I J K | A B C D + * + * First, we reverse entire array: + * D C B A | K J I H G F E + * + * Then we rotate first 4 bytes (DCBA) and separately last 7 bytes + * (KJIHGFE), resulting in a properly rotated array: + * A B C D | E F G H I J K + * + * We'll utilize log->kbuf to read user memory chunk by chunk, swap + * bytes, and write them back. Doing it byte-by-byte would be + * unnecessarily inefficient. Altogether we are going to read and + * write each byte twice, for total 4 memory copies between kernel and + * user space. + */ + + /* length of the chopped off part that will be the beginning; + * len(ABCD) in the example above + */ + div_u64_rem(log->start_pos, log->len_total, &sublen); + sublen = log->len_total - sublen; + + err = bpf_vlog_reverse_ubuf(log, 0, log->len_total); + err = err ?: bpf_vlog_reverse_ubuf(log, 0, sublen); + err = err ?: bpf_vlog_reverse_ubuf(log, sublen, log->len_total); + if (err) log->ubuf = NULL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 745ae0cd01d4..a476bb319685 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1439,10 +1439,10 @@ static inline u32 vlog_alignment(u32 pos) static void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state) { - if (env->prev_log_len && env->prev_log_len == env->log.len_used) { + if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) { /* remove new line character */ - bpf_vlog_reset(&env->log, env->prev_log_len - 1); - verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' '); + bpf_vlog_reset(&env->log, env->prev_log_pos - 1); + verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' '); } else { verbose(env, "%d:", env->insn_idx); } @@ -1750,7 +1750,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; - elem->log_pos = env->log.len_used; + elem->log_pos = env->log.end_pos; env->head = elem; env->stack_size++; err = copy_verifier_state(&elem->st, cur); @@ -2286,7 +2286,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; - elem->log_pos = env->log.len_used; + elem->log_pos = env->log.end_pos; env->head = elem; env->stack_size++; if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { @@ -15638,11 +15638,11 @@ static int do_check(struct bpf_verifier_env *env) print_insn_state(env, state->frame[state->curframe]); verbose_linfo(env, env->insn_idx, "; "); - env->prev_log_len = env->log.len_used; + env->prev_log_pos = env->log.end_pos; verbose(env, "%d: ", env->insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); - env->prev_insn_print_len = env->log.len_used - env->prev_log_len; - env->prev_log_len = env->log.len_used; + env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos; + env->prev_log_pos = env->log.end_pos; } if (bpf_prog_is_offloaded(env->prog->aux)) { @@ -18860,7 +18860,8 @@ skip_full_check: print_verification_stats(env); env->prog->aux->verified_insns = env->insn_processed; - if (log->level && bpf_verifier_log_full(log)) + bpf_vlog_finalize(log); + if (log->level && bpf_vlog_truncated(log)) ret = -ENOSPC; if (log->level && !log->ubuf) { ret = -EFAULT; diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index 239e1c5753b0..bc27170bdeb0 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -24,6 +24,7 @@ static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type) bpf_program__set_autoload(skel->progs.bad_relo, true); memset(log_buf, 0, sizeof(log_buf)); bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf)); + bpf_program__set_log_level(skel->progs.bad_relo, 1 | 8); /* BPF_LOG_FIXED to force truncation */ err = test_log_fixup__load(skel); if (!ASSERT_ERR(err, "load_fail")) From e0aee1facccf9f12136600031be4ce21eb810a78 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:50 -0700 Subject: [PATCH 224/260] libbpf: Don't enforce unnecessary verifier log restrictions on libbpf side This basically prevents any forward compatibility. And we either way just return -EINVAL, which would otherwise be returned from bpf() syscall anyways. Similarly, drop enforcement of non-NULL log_buf when log_level > 0. This won't be true anymore soon. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-5-andrii@kernel.org --- tools/lib/bpf/bpf.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 767035900354..f1d04ee14d83 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -290,10 +290,6 @@ int bpf_prog_load(enum bpf_prog_type prog_type, if (!!log_buf != !!log_size) return libbpf_err(-EINVAL); - if (log_level > (4 | 2 | 1)) - return libbpf_err(-EINVAL); - if (log_level && !log_buf) - return libbpf_err(-EINVAL); func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); func_info = OPTS_GET(opts, func_info, NULL); From d0d75c67c45abd3930967dcafc82fd4505400665 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:51 -0700 Subject: [PATCH 225/260] veristat: Add more veristat control over verifier log options Add --log-size to be able to customize log buffer sent to bpf() syscall for BPF program verification logging. Add --log-fixed to enforce BPF_LOG_FIXED behavior for BPF verifier log. This is useful in unlikely event that beginning of truncated verifier log is more important than the end of it (which with rotating verifier log behavior is the default now). Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230406234205.323208-6-andrii@kernel.org --- tools/testing/selftests/bpf/veristat.c | 44 ++++++++++++++++++++------ 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index e05954e20bba..1db7185181da 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -141,12 +141,15 @@ static struct env { bool verbose; bool debug; bool quiet; - int log_level; enum resfmt out_fmt; bool show_version; bool comparison_mode; bool replay_mode; + int log_level; + int log_size; + bool log_fixed; + struct verif_stats *prog_stats; int prog_stat_cnt; @@ -193,12 +196,19 @@ const char argp_program_doc[] = " OR: veristat -C \n" " OR: veristat -R \n"; +enum { + OPT_LOG_FIXED = 1000, + OPT_LOG_SIZE = 1001, +}; + static const struct argp_option opts[] = { { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, { "version", 'V', NULL, 0, "Print version" }, { "verbose", 'v', NULL, 0, "Verbose mode" }, - { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" }, + { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, + { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, + { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -263,6 +273,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) argp_usage(state); } break; + case OPT_LOG_FIXED: + env.log_fixed = true; + break; + case OPT_LOG_SIZE: + errno = 0; + env.log_size = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid log size: %s\n", arg); + argp_usage(state); + } + break; case 'C': env.comparison_mode = true; break; @@ -929,8 +950,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf { const char *prog_name = bpf_program__name(prog); const char *base_filename = basename(filename); - size_t buf_sz = sizeof(verif_log_buf); - char *buf = verif_log_buf; + char *buf; + int buf_sz, log_level; struct verif_stats *stats; int err = 0; void *tmp; @@ -948,18 +969,23 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf memset(stats, 0, sizeof(*stats)); if (env.verbose) { - buf_sz = 16 * 1024 * 1024; + buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024; buf = malloc(buf_sz); if (!buf) return -ENOMEM; - bpf_program__set_log_buf(prog, buf, buf_sz); - bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */ + /* ensure we always request stats */ + log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0); } else { - bpf_program__set_log_buf(prog, buf, buf_sz); - bpf_program__set_log_level(prog, 4); /* only verifier stats */ + buf = verif_log_buf; + buf_sz = sizeof(verif_log_buf); + /* request only verifier stats */ + log_level = 4 | (env.log_fixed ? 8 : 0); } verif_log_buf[0] = '\0'; + bpf_program__set_log_buf(prog, buf, buf_sz); + bpf_program__set_log_level(prog, log_level); + /* increase chances of successful BPF object loading */ fixup_obj(obj, prog, base_filename); From b1a7a480a1120d4f70305f5e8859f527e0efe4a5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:52 -0700 Subject: [PATCH 226/260] selftests/bpf: Add fixed vs rotating verifier log tests Add selftests validating BPF_LOG_FIXED behavior, which used to be the only behavior, and now default rotating BPF verifier log, which returns just up to last N bytes of full verifier log, instead of returning -ENOSPC. To stress test correctness of in-kernel verifier log logic, we force it to truncate program's verifier log to all lengths from 1 all the way to its full size (about 450 bytes today). This was a useful stress test while developing the feature. For both fixed and rotating log modes we expect -ENOSPC if log contents doesn't fit in user-supplied log buffer. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-7-andrii@kernel.org --- .../selftests/bpf/prog_tests/verifier_log.c | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/verifier_log.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c new file mode 100644 index 000000000000..3284108a6ce8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "test_log_buf.skel.h" + + +static bool check_prog_load(int prog_fd, bool expect_err, const char *tag) +{ + if (expect_err) { + if (!ASSERT_LT(prog_fd, 0, tag)) { + close(prog_fd); + return false; + } + } else /* !expect_err */ { + if (!ASSERT_GT(prog_fd, 0, tag)) + return false; + } + return true; +} + +static void verif_log_subtest(const char *name, bool expect_load_error, int log_level) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts); + struct { + /* strategically placed before others to avoid accidental modification by kernel */ + char filler[1024]; + char buf[1024]; + /* strategically placed after buf[] to catch more accidental corruptions */ + char reference[1024]; + } logs; + char *exp_log, prog_name[16], op_name[32]; + struct test_log_buf *skel; + struct bpf_program *prog; + const struct bpf_insn *insns; + size_t insn_cnt, fixed_log_sz; + int i, mode, err, prog_fd; + + skel = test_log_buf__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + if (strcmp(bpf_program__name(prog), name) == 0) + bpf_program__set_autoload(prog, true); + else + bpf_program__set_autoload(prog, false); + } + + err = test_log_buf__load(skel); + if (!expect_load_error && !ASSERT_OK(err, "unexpected_load_failure")) + goto cleanup; + if (expect_load_error && !ASSERT_ERR(err, "unexpected_load_success")) + goto cleanup; + + insns = bpf_program__insns(skel->progs.good_prog); + insn_cnt = bpf_program__insn_cnt(skel->progs.good_prog); + + opts.log_buf = logs.reference; + opts.log_size = sizeof(logs.reference); + opts.log_level = log_level | 8 /* BPF_LOG_FIXED */; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed", + "GPL", insns, insn_cnt, &opts); + if (!check_prog_load(prog_fd, expect_load_error, "fixed_buf_prog_load")) + goto cleanup; + close(prog_fd); + + fixed_log_sz = strlen(logs.reference) + 1; + if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz")) + goto cleanup; + memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz); + + /* validate BPF_LOG_FIXED works as verifier log used to work, that is: + * we get -ENOSPC and beginning of the full verifier log. This only + * works for log_level 2 and log_level 1 + failed program. For log + * level 2 we don't reset log at all. For log_level 1 + failed program + * we don't get to verification stats output. With log level 1 + * for successful program final result will be just verifier stats. + * But if provided too short log buf, kernel will NULL-out log->ubuf + * and will stop emitting further log. This means we'll never see + * predictable verifier stats. + * Long story short, we do the following -ENOSPC test only for + * predictable combinations. + */ + if (log_level >= 2 || expect_load_error) { + opts.log_buf = logs.buf; + opts.log_level = log_level | 8; /* fixed-length log */ + opts.log_size = 25; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed50", + "GPL", insns, insn_cnt, &opts); + if (!ASSERT_EQ(prog_fd, -ENOSPC, "unexpected_log_fixed_prog_load_result")) { + if (prog_fd >= 0) + close(prog_fd); + goto cleanup; + } + if (!ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25")) + goto cleanup; + if (!ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name)) + goto cleanup; + } + + /* validate rolling verifier log logic: try all variations of log buf + * length to force various truncation scenarios + */ + opts.log_buf = logs.buf; + + /* rotating mode, then fixed mode */ + for (mode = 1; mode >= 0; mode--) { + /* prefill logs.buf with 'A's to detect any write beyond allowed length */ + memset(logs.filler, 'A', sizeof(logs.filler)); + logs.filler[sizeof(logs.filler) - 1] = '\0'; + memset(logs.buf, 'A', sizeof(logs.buf)); + logs.buf[sizeof(logs.buf) - 1] = '\0'; + + for (i = 1; i < fixed_log_sz; i++) { + opts.log_size = i; + opts.log_level = log_level | (mode ? 0 : 8 /* BPF_LOG_FIXED */); + + snprintf(prog_name, sizeof(prog_name), + "log_%s_%d", mode ? "roll" : "fixed", i); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, prog_name, + "GPL", insns, insn_cnt, &opts); + + snprintf(op_name, sizeof(op_name), + "log_%s_prog_load_%d", mode ? "roll" : "fixed", i); + if (!ASSERT_EQ(prog_fd, -ENOSPC, op_name)) { + if (prog_fd >= 0) + close(prog_fd); + goto cleanup; + } + + snprintf(op_name, sizeof(op_name), + "log_%s_strlen_%d", mode ? "roll" : "fixed", i); + ASSERT_EQ(strlen(logs.buf), i - 1, op_name); + + if (mode) + exp_log = logs.reference + fixed_log_sz - i; + else + exp_log = logs.reference; + + snprintf(op_name, sizeof(op_name), + "log_%s_contents_%d", mode ? "roll" : "fixed", i); + if (!ASSERT_STRNEQ(logs.buf, exp_log, i - 1, op_name)) { + printf("CMP:%d\nS1:'%s'\nS2:'%s'\n", + strncmp(logs.buf, exp_log, i - 1), + logs.buf, exp_log); + goto cleanup; + } + + /* check that unused portions of logs.buf is not overwritten */ + snprintf(op_name, sizeof(op_name), + "log_%s_unused_%d", mode ? "roll" : "fixed", i); + if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) { + printf("CMP:%d\nS1:'%s'\nS2:'%s'\n", + strcmp(logs.buf + i, logs.filler + i), + logs.buf + i, logs.filler + i); + goto cleanup; + } + } + } + +cleanup: + test_log_buf__destroy(skel); +} + +void test_verifier_log(void) +{ + if (test__start_subtest("good_prog-level1")) + verif_log_subtest("good_prog", false, 1); + if (test__start_subtest("good_prog-level2")) + verif_log_subtest("good_prog", false, 2); + if (test__start_subtest("bad_prog-level1")) + verif_log_subtest("bad_prog", true, 1); + if (test__start_subtest("bad_prog-level2")) + verif_log_subtest("bad_prog", true, 2); +} From 24bc80887adb4d6fc0057d4f14fabeaa4502b2a0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:53 -0700 Subject: [PATCH 227/260] bpf: Ignore verifier log reset in BPF_LOG_KERNEL mode Verifier log position reset is meaningless in BPF_LOG_KERNEL mode, so just exit early in bpf_vlog_reset() if log->level is BPF_LOG_KERNEL. This avoid meaningless put_user() into NULL log->ubuf. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-8-andrii@kernel.org --- kernel/bpf/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 92b1c8ad6601..d99a50f07187 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -106,7 +106,7 @@ void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos) if (WARN_ON_ONCE(new_pos > log->end_pos)) return; - if (!bpf_verifier_log_needed(log)) + if (!bpf_verifier_log_needed(log) || log->level == BPF_LOG_KERNEL) return; /* if position to which we reset is beyond current log window, From 971fb5057d787d0a7e7c8cb910207c82e2db920e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:54 -0700 Subject: [PATCH 228/260] bpf: Fix missing -EFAULT return on user log buf error in btf_parse() btf_parse() is missing -EFAULT error return if log->ubuf was NULL-ed out due to error while copying data into user-provided buffer. Add it, but handle a special case of BPF_LOG_KERNEL in which log->ubuf is always NULL. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-9-andrii@kernel.org --- kernel/bpf/btf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 20a05b8932db..6372c144a294 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5598,6 +5598,10 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, err = -ENOSPC; goto errout_meta; } + if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) { + err = -EFAULT; + goto errout_meta; + } btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); From cbedb42a0da3bb48819b2200af4b4cb5d922c518 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:55 -0700 Subject: [PATCH 229/260] bpf: Avoid incorrect -EFAULT error in BPF_LOG_KERNEL mode If verifier log is in BPF_LOG_KERNEL mode, no log->ubuf is expected and it stays NULL throughout entire verification process. Don't erroneously return -EFAULT in such case. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-10-andrii@kernel.org --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a476bb319685..0323149803f5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18863,7 +18863,7 @@ skip_full_check: bpf_vlog_finalize(log); if (log->level && bpf_vlog_truncated(log)) ret = -ENOSPC; - if (log->level && !log->ubuf) { + if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) { ret = -EFAULT; goto err_release_maps; } From 8a6ca6bc553e3c878fa53c506bc6ec281efdc039 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:56 -0700 Subject: [PATCH 230/260] bpf: Simplify logging-related error conditions handling Move log->level == 0 check into bpf_vlog_truncated() instead of doing it explicitly. Also remove unnecessary goto in kernel/bpf/verifier.c. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-11-andrii@kernel.org --- kernel/bpf/btf.c | 2 +- kernel/bpf/log.c | 4 +++- kernel/bpf/verifier.c | 6 ++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6372c144a294..5aa540ee611f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5594,7 +5594,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, } bpf_vlog_finalize(log); - if (log->level && bpf_vlog_truncated(log)) { + if (bpf_vlog_truncated(log)) { err = -ENOSPC; goto errout_meta; } diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index d99a50f07187..c778f3b290cb 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -169,7 +169,9 @@ static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int en bool bpf_vlog_truncated(const struct bpf_verifier_log *log) { - if (log->level & BPF_LOG_FIXED) + if (!log->level) + return false; + else if (log->level & BPF_LOG_FIXED) return bpf_log_used(log) >= log->len_total - 1; else return log->start_pos > 0; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0323149803f5..a98cbc046d1e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18861,12 +18861,10 @@ skip_full_check: env->prog->aux->verified_insns = env->insn_processed; bpf_vlog_finalize(log); - if (log->level && bpf_vlog_truncated(log)) + if (bpf_vlog_truncated(log)) ret = -ENOSPC; - if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) { + if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) ret = -EFAULT; - goto err_release_maps; - } if (ret) goto err_release_maps; From fa1c7d5cc404ac3b6e6b4ab6d00b07c76bd819be Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:57 -0700 Subject: [PATCH 231/260] bpf: Keep track of total log content size in both fixed and rolling modes Change how we do accounting in BPF_LOG_FIXED mode and adopt log->end_pos as *logical* log position. This means that we can go beyond physical log buffer size now and be able to tell what log buffer size should be to fit entire log contents without -ENOSPC. To do this for BPF_LOG_FIXED mode, we need to remove a short-circuiting logic of not vsnprintf()'ing further log content once we filled up user-provided buffer, which is done by bpf_verifier_log_needed() checks. We modify these checks to always keep going if log->level is non-zero (i.e., log is requested), even if log->ubuf was NULL'ed out due to copying data to user-space, or if entire log buffer is physically full. We adopt bpf_verifier_vlog() routine to work correctly with log->ubuf == NULL condition, performing log formatting into temporary kernel buffer, doing all the necessary accounting, but just avoiding copying data out if buffer is full or NULL'ed out. With these changes, it's now possible to do this sort of determination of log contents size in both BPF_LOG_FIXED and default rolling log mode. We need to keep in mind bpf_vlog_reset(), though, which shrinks log contents after successful verification of a particular code path. This log reset means that log->end_pos isn't always increasing, so to return back to users what should be the log buffer size to fit all log content without causing -ENOSPC even in the presence of log resetting, we need to keep maximum over "lifetime" of logging. We do this accounting in bpf_vlog_update_len_max() helper. A related and subtle aspect is that with this logical log->end_pos even in BPF_LOG_FIXED mode we could temporary "overflow" buffer, but then reset it back with bpf_vlog_reset() to a position inside user-supplied log_buf. In such situation we still want to properly maintain terminating zero. We will eventually return -ENOSPC even if final log buffer is small (we detect this through log->len_max check). This behavior is simpler to reason about and is consistent with current behavior of verifier log. Handling of this required a small addition to bpf_vlog_reset() logic to avoid doing put_user() beyond physical log buffer dimensions. Another issue to keep in mind is that we limit log buffer size to 32-bit value and keep such log length as u32, but theoretically verifier could produce huge log stretching beyond 4GB. Instead of keeping (and later returning) 64-bit log length, we cap it at UINT_MAX. Current UAPI makes it impossible to specify log buffer size bigger than 4GB anyways, so we don't really loose anything here and keep everything consistently 32-bit in UAPI. This property will be utilized in next patch. Doing the same determination of maximum log buffer for rolling mode is trivial, as log->end_pos and log->start_pos are already logical positions, so there is nothing new there. These changes do incidentally fix one small issue with previous logging logic. Previously, if use provided log buffer of size N, and actual log output was exactly N-1 bytes + terminating \0, kernel logic coun't distinguish this condition from log truncation scenario which would end up with truncated log contents of N-1 bytes + terminating \0 as well. But now with log->end_pos being logical position that could go beyond actual log buffer size, we can distinguish these two conditions, which we do in this patch. This plays nicely with returning log_size_actual (implemented in UAPI in the next patch), as we can now guarantee that if user takes such log_size_actual and provides log buffer of that exact size, they will not get -ENOSPC in return. All in all, all these changes do conceptually unify fixed and rolling log modes much better, and allow a nice feature requested by users: knowing what should be the size of the buffer to avoid -ENOSPC. We'll plumb this through the UAPI and the code in the next patch. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-12-andrii@kernel.org --- include/linux/bpf_verifier.h | 12 ++----- kernel/bpf/log.c | 69 ++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4c926227f612..98d2eb382dbb 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -504,6 +504,7 @@ struct bpf_verifier_log { char __user *ubuf; u32 level; u32 len_total; + u32 len_max; char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; }; @@ -517,23 +518,16 @@ struct bpf_verifier_log { #define BPF_LOG_MIN_ALIGNMENT 8U #define BPF_LOG_ALIGNMENT 40U -static inline u32 bpf_log_used(const struct bpf_verifier_log *log) -{ - return log->end_pos - log->start_pos; -} - static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) { if (log->level & BPF_LOG_FIXED) - return bpf_log_used(log) >= log->len_total - 1; + return log->end_pos >= log->len_total; return false; } static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { - return log && - ((log->level && log->ubuf && !bpf_verifier_log_full(log)) || - log->level == BPF_LOG_KERNEL); + return log && log->level; } #define BPF_MAX_SUBPROGS 256 diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index c778f3b290cb..47bea2fad6fe 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -16,10 +16,26 @@ bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); } +static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len) +{ + /* add_len includes terminal \0, so no need for +1. */ + u64 len = log->end_pos + add_len; + + /* log->len_max could be larger than our current len due to + * bpf_vlog_reset() calls, so we maintain the max of any length at any + * previous point + */ + if (len > UINT_MAX) + log->len_max = UINT_MAX; + else if (len > log->len_max) + log->len_max = len; +} + void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args) { - unsigned int n; + u64 cur_pos; + u32 new_n, n; n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); @@ -33,20 +49,26 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, return; } + n += 1; /* include terminating zero */ + bpf_vlog_update_len_max(log, n); + if (log->level & BPF_LOG_FIXED) { - n = min(log->len_total - bpf_log_used(log) - 1, n); - log->kbuf[n] = '\0'; - n += 1; - - if (copy_to_user(log->ubuf + log->end_pos, log->kbuf, n)) - goto fail; + /* check if we have at least something to put into user buf */ + new_n = 0; + if (log->end_pos < log->len_total) { + new_n = min_t(u32, log->len_total - log->end_pos, n); + log->kbuf[new_n - 1] = '\0'; + } + cur_pos = log->end_pos; log->end_pos += n - 1; /* don't count terminating '\0' */ - } else { - u64 new_end, new_start, cur_pos; - u32 buf_start, buf_end, new_n; - n += 1; + if (log->ubuf && new_n && + copy_to_user(log->ubuf + cur_pos, log->kbuf, new_n)) + goto fail; + } else { + u64 new_end, new_start; + u32 buf_start, buf_end, new_n; new_end = log->end_pos + n; if (new_end - log->start_pos >= log->len_total) @@ -65,6 +87,12 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, if (buf_end == 0) buf_end = log->len_total; + log->start_pos = new_start; + log->end_pos = new_end - 1; /* don't count terminating '\0' */ + + if (!log->ubuf) + return; + /* if buf_start > buf_end, we wrapped around; * if buf_start == buf_end, then we fill ubuf completely; we * can't have buf_start == buf_end to mean that there is @@ -88,9 +116,6 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, buf_end)) goto fail; } - - log->start_pos = new_start; - log->end_pos = new_end - 1; /* don't count terminating '\0' */ } return; @@ -116,8 +141,13 @@ void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos) log->end_pos = new_pos; if (log->end_pos < log->start_pos) log->start_pos = log->end_pos; - div_u64_rem(new_pos, log->len_total, &pos); - if (put_user(zero, log->ubuf + pos)) + + if (log->level & BPF_LOG_FIXED) + pos = log->end_pos + 1; + else + div_u64_rem(new_pos, log->len_total, &pos); + + if (log->ubuf && pos < log->len_total && put_user(zero, log->ubuf + pos)) log->ubuf = NULL; } @@ -169,12 +199,7 @@ static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int en bool bpf_vlog_truncated(const struct bpf_verifier_log *log) { - if (!log->level) - return false; - else if (log->level & BPF_LOG_FIXED) - return bpf_log_used(log) >= log->len_total - 1; - else - return log->start_pos > 0; + return log->len_max > log->len_total; } void bpf_vlog_finalize(struct bpf_verifier_log *log) From 47a71c1f9af0a334c9dfa97633c41de4feda4287 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:58 -0700 Subject: [PATCH 232/260] bpf: Add log_true_size output field to return necessary log buffer size Add output-only log_true_size and btf_log_true_size field to BPF_PROG_LOAD and BPF_BTF_LOAD commands, respectively. It will return the size of log buffer necessary to fit in all the log contents at specified log_level. This is very useful for BPF loader libraries like libbpf to be able to size log buffer correctly, but could be used by users directly, if necessary, as well. This patch plumbs all this through the code, taking into account actual bpf_attr size provided by user to determine if these new fields are expected by users. And if they are, set them from kernel on return. We refactory btf_parse() function to accommodate this, moving attr and uattr handling inside it. The rest is very straightforward code, which is split from the logging accounting changes in the previous patch to make it simpler to review logic vs UAPI changes. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-13-andrii@kernel.org --- include/linux/bpf.h | 2 +- include/linux/btf.h | 2 +- include/uapi/linux/bpf.h | 10 ++++++++++ kernel/bpf/btf.c | 32 ++++++++++++++++++-------------- kernel/bpf/syscall.c | 16 ++++++++-------- kernel/bpf/verifier.c | 8 +++++++- tools/include/uapi/linux/bpf.h | 12 +++++++++++- 7 files changed, 56 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 002a811b6b90..2c6095bd7d69 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2175,7 +2175,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); diff --git a/include/linux/btf.h b/include/linux/btf.h index d53b10cc55f2..495250162422 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -125,7 +125,7 @@ extern const struct file_operations btf_fops; void btf_get(struct btf *btf); void btf_put(struct btf *btf); -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr); +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e3d3b5160d26..3823100b7934 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1407,6 +1407,11 @@ union bpf_attr { __aligned_u64 fd_array; /* array of FDs */ __aligned_u64 core_relos; __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1492,6 +1497,11 @@ union bpf_attr { __u32 btf_size; __u32 btf_log_size; __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; }; struct { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5aa540ee611f..0748cf4b8ab6 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5504,9 +5504,10 @@ static int btf_check_type_tags(struct btf_verifier_env *env, return 0; } -static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, - u32 log_level, char __user *log_ubuf, u32 log_size) +static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { + bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); + char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; @@ -5514,7 +5515,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, u8 *data; int err; - if (btf_data_size > BTF_MAX_SIZE) + if (attr->btf_size > BTF_MAX_SIZE) return ERR_PTR(-E2BIG); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); @@ -5522,13 +5523,13 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, return ERR_PTR(-ENOMEM); log = &env->log; - if (log_level || log_ubuf || log_size) { + if (attr->btf_log_level || log_ubuf || attr->btf_log_size) { /* user requested verbose verifier output * and supplied buffer to store the verification trace */ - log->level = log_level; + log->level = attr->btf_log_level; log->ubuf = log_ubuf; - log->len_total = log_size; + log->len_total = attr->btf_log_size; /* log attributes have to be sane */ if (!bpf_verifier_log_attr_valid(log)) { @@ -5544,16 +5545,16 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, } env->btf = btf; - data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); + data = kvmalloc(attr->btf_size, GFP_KERNEL | __GFP_NOWARN); if (!data) { err = -ENOMEM; goto errout; } btf->data = data; - btf->data_size = btf_data_size; + btf->data_size = attr->btf_size; - if (copy_from_bpfptr(data, btf_data, btf_data_size)) { + if (copy_from_bpfptr(data, btf_data, attr->btf_size)) { err = -EFAULT; goto errout; } @@ -5594,6 +5595,12 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, } bpf_vlog_finalize(log); + if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), + &log->len_max, sizeof(log->len_max))) { + err = -EFAULT; + goto errout_meta; + } if (bpf_vlog_truncated(log)) { err = -ENOSPC; goto errout_meta; @@ -7218,15 +7225,12 @@ static int __btf_new_fd(struct btf *btf) return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); } -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr) +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { struct btf *btf; int ret; - btf = btf_parse(make_bpfptr(attr->btf, uattr.is_kernel), - attr->btf_size, attr->btf_log_level, - u64_to_user_ptr(attr->btf_log_buf), - attr->btf_log_size); + btf = btf_parse(attr, uattr, uattr_size); if (IS_ERR(btf)) return PTR_ERR(btf); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e18ac7fdc210..6d575505f89c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2501,9 +2501,9 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size +#define BPF_PROG_LOAD_LAST_FIELD log_true_size -static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) +static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; @@ -2653,7 +2653,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) goto free_prog_sec; /* run eBPF verifier */ - err = bpf_check(&prog, attr, uattr); + err = bpf_check(&prog, attr, uattr, uattr_size); if (err < 0) goto free_used_maps; @@ -4371,9 +4371,9 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } -#define BPF_BTF_LOAD_LAST_FIELD btf_log_level +#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size -static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr) +static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { if (CHECK_ATTR(BPF_BTF_LOAD)) return -EINVAL; @@ -4381,7 +4381,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr) if (!bpf_capable()) return -EPERM; - return btf_new_fd(attr, uattr); + return btf_new_fd(attr, uattr, uattr_size); } #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id @@ -5059,7 +5059,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) err = map_freeze(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr); + err = bpf_prog_load(&attr, uattr, size); break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); @@ -5104,7 +5104,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) err = bpf_raw_tracepoint_open(&attr); break; case BPF_BTF_LOAD: - err = bpf_btf_load(&attr, uattr); + err = bpf_btf_load(&attr, uattr, size); break; case BPF_BTF_GET_FD_BY_ID: err = bpf_btf_get_fd_by_id(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a98cbc046d1e..308e7abeb979 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18694,7 +18694,7 @@ struct btf *bpf_get_btf_vmlinux(void) return btf_vmlinux; } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; @@ -18861,6 +18861,12 @@ skip_full_check: env->prog->aux->verified_insns = env->insn_processed; bpf_vlog_finalize(log); + if (uattr_size >= offsetofend(union bpf_attr, log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), + &log->len_max, sizeof(log->len_max))) { + ret = -EFAULT; + goto err_release_maps; + } if (bpf_vlog_truncated(log)) ret = -ENOSPC; if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d6c5a022ae28..3823100b7934 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1407,6 +1407,11 @@ union bpf_attr { __aligned_u64 fd_array; /* array of FDs */ __aligned_u64 core_relos; __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1492,6 +1497,11 @@ union bpf_attr { __u32 btf_size; __u32 btf_log_size; __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; }; struct { @@ -1513,7 +1523,7 @@ union bpf_attr { struct { /* struct used by BPF_LINK_CREATE command */ union { __u32 prog_fd; /* eBPF program to attach */ - __u32 map_fd; /* eBPF struct_ops to attach */ + __u32 map_fd; /* struct_ops to attach */ }; union { __u32 target_fd; /* object to attach to */ From bdcab4144f5da97cc0fa7e1dd63b8475e10c8f0a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:59 -0700 Subject: [PATCH 233/260] bpf: Simplify internal verifier log interface Simplify internal verifier log API down to bpf_vlog_init() and bpf_vlog_finalize(). The former handles input arguments validation in one place and makes it easier to change it. The latter subsumes -ENOSPC (truncation) and -EFAULT handling and simplifies both caller's code (bpf_check() and btf_parse()). For btf_parse(), this patch also makes sure that verifier log finalization happens even if there is some error condition during BTF verification process prior to normal finalization step. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-14-andrii@kernel.org --- include/linux/bpf_verifier.h | 13 ++------ kernel/bpf/btf.c | 65 ++++++++++++++++++------------------ kernel/bpf/log.c | 48 +++++++++++++++++++++----- kernel/bpf/verifier.c | 39 +++++++++------------- 4 files changed, 90 insertions(+), 75 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 98d2eb382dbb..f03852b89d28 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -518,13 +518,6 @@ struct bpf_verifier_log { #define BPF_LOG_MIN_ALIGNMENT 8U #define BPF_LOG_ALIGNMENT 40U -static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) -{ - if (log->level & BPF_LOG_FIXED) - return log->end_pos >= log->len_total; - return false; -} - static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { return log && log->level; @@ -612,16 +605,16 @@ struct bpf_verifier_env { char type_str_buf[TYPE_STR_BUF_LEN]; }; -bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log); __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...); +int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, + char __user *log_buf, u32 log_size); void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos); -void bpf_vlog_finalize(struct bpf_verifier_log *log); -bool bpf_vlog_truncated(const struct bpf_verifier_log *log); +int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual); static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0748cf4b8ab6..ffc31a1c84af 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5504,16 +5504,30 @@ static int btf_check_type_tags(struct btf_verifier_env *env, return 0; } +static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size) +{ + u32 log_true_size; + int err; + + err = bpf_vlog_finalize(log, &log_true_size); + + if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), + &log_true_size, sizeof(log_true_size))) + err = -EFAULT; + + return err; +} + static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; - struct bpf_verifier_log *log; struct btf *btf = NULL; u8 *data; - int err; + int err, ret; if (attr->btf_size > BTF_MAX_SIZE) return ERR_PTR(-E2BIG); @@ -5522,21 +5536,13 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat if (!env) return ERR_PTR(-ENOMEM); - log = &env->log; - if (attr->btf_log_level || log_ubuf || attr->btf_log_size) { - /* user requested verbose verifier output - * and supplied buffer to store the verification trace - */ - log->level = attr->btf_log_level; - log->ubuf = log_ubuf; - log->len_total = attr->btf_log_size; - - /* log attributes have to be sane */ - if (!bpf_verifier_log_attr_valid(log)) { - err = -EINVAL; - goto errout; - } - } + /* user could have requested verbose verifier output + * and supplied buffer to store the verification trace + */ + err = bpf_vlog_init(&env->log, attr->btf_log_level, + log_ubuf, attr->btf_log_size); + if (err) + goto errout_free; btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { @@ -5577,7 +5583,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat if (err) goto errout; - struct_meta_tab = btf_parse_struct_metas(log, btf); + struct_meta_tab = btf_parse_struct_metas(&env->log, btf); if (IS_ERR(struct_meta_tab)) { err = PTR_ERR(struct_meta_tab); goto errout; @@ -5594,21 +5600,9 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat } } - bpf_vlog_finalize(log); - if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), - &log->len_max, sizeof(log->len_max))) { - err = -EFAULT; - goto errout_meta; - } - if (bpf_vlog_truncated(log)) { - err = -ENOSPC; - goto errout_meta; - } - if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) { - err = -EFAULT; - goto errout_meta; - } + err = finalize_log(&env->log, uattr, uattr_size); + if (err) + goto errout_free; btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); @@ -5617,6 +5611,11 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat errout_meta: btf_free_struct_meta_tab(btf); errout: + /* overwrite err with -ENOSPC or -EFAULT */ + ret = finalize_log(&env->log, uattr, uattr_size); + if (ret) + err = ret; +errout_free: btf_verifier_env_free(env); if (btf) btf_free(btf); diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 47bea2fad6fe..1fae2c5d7ae4 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -10,12 +10,26 @@ #include #include -bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) { return log->len_total > 0 && log->len_total <= UINT_MAX >> 2 && log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); } +int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, + char __user *log_buf, u32 log_size) +{ + log->level = log_level; + log->ubuf = log_buf; + log->len_total = log_size; + + /* log attributes have to be sane */ + if (!bpf_verifier_log_attr_valid(log)) + return -EINVAL; + + return 0; +} + static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len) { /* add_len includes terminal \0, so no need for +1. */ @@ -197,24 +211,25 @@ static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int en return 0; } -bool bpf_vlog_truncated(const struct bpf_verifier_log *log) +static bool bpf_vlog_truncated(const struct bpf_verifier_log *log) { return log->len_max > log->len_total; } -void bpf_vlog_finalize(struct bpf_verifier_log *log) +int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual) { u32 sublen; int err; - if (!log || !log->level || !log->ubuf) - return; - if ((log->level & BPF_LOG_FIXED) || log->level == BPF_LOG_KERNEL) - return; + *log_size_actual = 0; + if (!log || log->level == 0 || log->level == BPF_LOG_KERNEL) + return 0; + if (!log->ubuf) + goto skip_log_rotate; /* If we never truncated log, there is nothing to move around. */ - if (log->start_pos == 0) - return; + if ((log->level & BPF_LOG_FIXED) || log->start_pos == 0) + goto skip_log_rotate; /* Otherwise we need to rotate log contents to make it start from the * buffer beginning and be a continuous zero-terminated string. Note @@ -257,6 +272,21 @@ void bpf_vlog_finalize(struct bpf_verifier_log *log) err = err ?: bpf_vlog_reverse_ubuf(log, sublen, log->len_total); if (err) log->ubuf = NULL; + +skip_log_rotate: + *log_size_actual = log->len_max; + + /* properly initialized log has either both ubuf!=NULL and len_total>0 + * or ubuf==NULL and len_total==0, so if this condition doesn't hold, + * we got a fault somewhere along the way, so report it back + */ + if (!!log->ubuf != !!log->len_total) + return -EFAULT; + + if (bpf_vlog_truncated(log)) + return -ENOSPC; + + return 0; } /* log_level controls verbosity level of eBPF verifier. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 308e7abeb979..d6db6de3e9ea 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18698,8 +18698,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; - struct bpf_verifier_log *log; - int i, len, ret = -EINVAL; + int i, len, ret = -EINVAL, err; + u32 log_true_size; bool is_priv; /* no program is valid */ @@ -18712,7 +18712,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; - log = &env->log; len = (*prog)->len; env->insn_aux_data = @@ -18733,20 +18732,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (!is_priv) mutex_lock(&bpf_verifier_lock); - if (attr->log_level || attr->log_buf || attr->log_size) { - /* user requested verbose verifier output - * and supplied buffer to store the verification trace - */ - log->level = attr->log_level; - log->ubuf = (char __user *) (unsigned long) attr->log_buf; - log->len_total = attr->log_size; - - /* log attributes have to be sane */ - if (!bpf_verifier_log_attr_valid(log)) { - ret = -EINVAL; - goto err_unlock; - } - } + /* user could have requested verbose verifier output + * and supplied buffer to store the verification trace + */ + ret = bpf_vlog_init(&env->log, attr->log_level, + (char __user *) (unsigned long) attr->log_buf, + attr->log_size); + if (ret) + goto err_unlock; mark_verifier_state_clean(env); @@ -18860,17 +18853,17 @@ skip_full_check: print_verification_stats(env); env->prog->aux->verified_insns = env->insn_processed; - bpf_vlog_finalize(log); + /* preserve original error even if log finalization is successful */ + err = bpf_vlog_finalize(&env->log, &log_true_size); + if (err) + ret = err; + if (uattr_size >= offsetofend(union bpf_attr, log_true_size) && copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), - &log->len_max, sizeof(log->len_max))) { + &log_true_size, sizeof(log_true_size))) { ret = -EFAULT; goto err_release_maps; } - if (bpf_vlog_truncated(log)) - ret = -ENOSPC; - if (log->level && log->level != BPF_LOG_KERNEL && !log->ubuf) - ret = -EFAULT; if (ret) goto err_release_maps; From fac08d45e2531f91d8fb3d11fc6576f588049476 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:42:00 -0700 Subject: [PATCH 234/260] bpf: Relax log_buf NULL conditions when log_level>0 is requested Drop the log_size>0 and log_buf!=NULL condition when log_level>0. This allows users to request log_true_size of a full log without providing actual (even if small) log buffer. Verifier log handling code was mostly ready to handle NULL log->ubuf, so only few small changes were necessary to prevent NULL log->ubuf from causing problems. Note, that if user provided NULL log_buf with log_level>0 we don't consider this a log truncation, and thus won't return -ENOSPC. We also enforce that either (log_buf==NULL && log_size==0) or (log_buf!=NULL && log_size>0). Suggested-by: Lorenz Bauer Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-15-andrii@kernel.org --- kernel/bpf/log.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 1fae2c5d7ae4..046ddff37a76 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -12,8 +12,17 @@ static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) { - return log->len_total > 0 && log->len_total <= UINT_MAX >> 2 && - log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); + /* ubuf and len_total should both be specified (or not) together */ + if (!!log->ubuf != !!log->len_total) + return false; + /* log buf without log_level is meaningless */ + if (log->ubuf && log->level == 0) + return false; + if (log->level & ~BPF_LOG_MASK) + return false; + if (log->len_total > UINT_MAX >> 2) + return false; + return true; } int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, @@ -89,9 +98,15 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, new_start = new_end - log->len_total; else new_start = log->start_pos; + + log->start_pos = new_start; + log->end_pos = new_end - 1; /* don't count terminating '\0' */ + + if (!log->ubuf) + return; + new_n = min(n, log->len_total); cur_pos = new_end - new_n; - div_u64_rem(cur_pos, log->len_total, &buf_start); div_u64_rem(new_end, log->len_total, &buf_end); /* new_end and buf_end are exclusive indices, so if buf_end is @@ -101,12 +116,6 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, if (buf_end == 0) buf_end = log->len_total; - log->start_pos = new_start; - log->end_pos = new_end - 1; /* don't count terminating '\0' */ - - if (!log->ubuf) - return; - /* if buf_start > buf_end, we wrapped around; * if buf_start == buf_end, then we fill ubuf completely; we * can't have buf_start == buf_end to mean that there is @@ -156,12 +165,15 @@ void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos) if (log->end_pos < log->start_pos) log->start_pos = log->end_pos; + if (!log->ubuf) + return; + if (log->level & BPF_LOG_FIXED) pos = log->end_pos + 1; else div_u64_rem(new_pos, log->len_total, &pos); - if (log->ubuf && pos < log->len_total && put_user(zero, log->ubuf + pos)) + if (pos < log->len_total && put_user(zero, log->ubuf + pos)) log->ubuf = NULL; } @@ -211,11 +223,6 @@ static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int en return 0; } -static bool bpf_vlog_truncated(const struct bpf_verifier_log *log) -{ - return log->len_max > log->len_total; -} - int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual) { u32 sublen; @@ -228,7 +235,7 @@ int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual) if (!log->ubuf) goto skip_log_rotate; /* If we never truncated log, there is nothing to move around. */ - if ((log->level & BPF_LOG_FIXED) || log->start_pos == 0) + if (log->start_pos == 0) goto skip_log_rotate; /* Otherwise we need to rotate log contents to make it start from the @@ -283,7 +290,8 @@ skip_log_rotate: if (!!log->ubuf != !!log->len_total) return -EFAULT; - if (bpf_vlog_truncated(log)) + /* did truncation actually happen? */ + if (log->ubuf && log->len_max > log->len_total) return -ENOSPC; return 0; From 94e55c0fdaf4268bdda2d3b375bc61daba056e85 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:42:01 -0700 Subject: [PATCH 235/260] libbpf: Wire through log_true_size returned from kernel for BPF_PROG_LOAD Add output-only log_true_size field to bpf_prog_load_opts to return bpf_attr->log_true_size value back from bpf() syscall. Note, that we have to drop const modifier from opts in bpf_prog_load(). This could potentially cause compilation error for some users. But the usual practice is to define bpf_prog_load_ops as a local variable next to bpf_prog_load() call and pass pointer to it, so const vs non-const makes no difference and won't even come up in most (if not all) cases. There are no runtime and ABI backwards/forward compatibility issues at all. If user provides old struct bpf_prog_load_opts, libbpf won't set new fields. If old libbpf is provided new bpf_prog_load_opts, nothing will happen either as old libbpf doesn't yet know about this new field. Adding a new variant of bpf_prog_load() just for this seems like a big and unnecessary overkill. As a corroborating evidence is the fact that entire selftests/bpf code base required not adjustment whatsoever. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230406234205.323208-16-andrii@kernel.org --- tools/lib/bpf/bpf.c | 7 +++++-- tools/lib/bpf/bpf.h | 11 +++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index f1d04ee14d83..a031de0a707a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -230,9 +230,9 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) + struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, fd_array); + const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -312,6 +312,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, } fd = sys_bpf_prog_load(&attr, attr_sz, attempts); + OPTS_SET(opts, log_true_size, attr.log_true_size); if (fd >= 0) return fd; @@ -352,6 +353,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, } fd = sys_bpf_prog_load(&attr, attr_sz, attempts); + OPTS_SET(opts, log_true_size, attr.log_true_size); if (fd >= 0) goto done; } @@ -366,6 +368,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.log_level = 1; fd = sys_bpf_prog_load(&attr, attr_sz, attempts); + OPTS_SET(opts, log_true_size, attr.log_true_size); } done: /* free() doesn't affect errno, so we don't need to restore it */ diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index b073e73439ef..cfa0cdfa50f8 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -96,13 +96,20 @@ struct bpf_prog_load_opts { __u32 log_level; __u32 log_size; char *log_buf; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + * If kernel doesn't support this feature, log_size is left unchanged. + */ + __u32 log_true_size; + size_t :0; }; -#define bpf_prog_load_opts__last_field log_buf +#define bpf_prog_load_opts__last_field log_true_size LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts); + struct bpf_prog_load_opts *opts); /* Flags to direct loading requirements */ #define MAPS_RELAX_COMPAT 0x01 From 097d8002b754a865beef880e5c1cdc3ef7c2163d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:42:02 -0700 Subject: [PATCH 236/260] libbpf: Wire through log_true_size for bpf_btf_load() API Similar to what we did for bpf_prog_load() in previous patch, wire returning of log_true_size value from kernel back to the user through OPTS out field. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230406234205.323208-17-andrii@kernel.org --- tools/lib/bpf/bpf.c | 6 ++++-- tools/lib/bpf/bpf.h | 11 +++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a031de0a707a..128ac723c4ea 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1083,9 +1083,9 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return libbpf_err_errno(fd); } -int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) +int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); + const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1128,6 +1128,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa attr.btf_log_level = 1; fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); } + + OPTS_SET(opts, log_true_size, attr.btf_log_true_size); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index cfa0cdfa50f8..a2c091389b18 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -124,11 +124,18 @@ struct bpf_btf_load_opts { char *log_buf; __u32 log_level; __u32 log_size; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + * If kernel doesn't support this feature, log_size is left unchanged. + */ + __u32 log_true_size; + size_t :0; }; -#define bpf_btf_load_opts__last_field log_size +#define bpf_btf_load_opts__last_field log_true_size LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, - const struct bpf_btf_load_opts *opts); + struct bpf_btf_load_opts *opts); LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); From 5787540827a9e2cdecf38166e648b2924a57443f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:42:03 -0700 Subject: [PATCH 237/260] selftests/bpf: Add tests to validate log_true_size feature Add additional test cases validating that log_true_size is consistent between fixed and rotating log modes, and that log_true_size can be used *exactly* without causing -ENOSPC, while using just 1 byte shorter log buffer would cause -ENOSPC. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-18-andrii@kernel.org --- .../selftests/bpf/prog_tests/verifier_log.c | 92 +++++++++++++++---- 1 file changed, 76 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 3284108a6ce8..2ec82fc60c03 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -18,25 +18,41 @@ static bool check_prog_load(int prog_fd, bool expect_err, const char *tag) if (!ASSERT_GT(prog_fd, 0, tag)) return false; } + if (prog_fd >= 0) + close(prog_fd); return true; } +static struct { + /* strategically placed before others to avoid accidental modification by kernel */ + char filler[1024]; + char buf[1024]; + /* strategically placed after buf[] to catch more accidental corruptions */ + char reference[1024]; +} logs; +static const struct bpf_insn *insns; +static size_t insn_cnt; + +static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error) +{ + int prog_fd; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_prog", + "GPL", insns, insn_cnt, opts); + check_prog_load(prog_fd, expect_load_error, "prog_load"); + + return prog_fd; +} + static void verif_log_subtest(const char *name, bool expect_load_error, int log_level) { LIBBPF_OPTS(bpf_prog_load_opts, opts); - struct { - /* strategically placed before others to avoid accidental modification by kernel */ - char filler[1024]; - char buf[1024]; - /* strategically placed after buf[] to catch more accidental corruptions */ - char reference[1024]; - } logs; char *exp_log, prog_name[16], op_name[32]; struct test_log_buf *skel; struct bpf_program *prog; - const struct bpf_insn *insns; - size_t insn_cnt, fixed_log_sz; - int i, mode, err, prog_fd; + size_t fixed_log_sz; + __u32 log_true_sz_fixed, log_true_sz_rolling; + int i, mode, err, prog_fd, res; skel = test_log_buf__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -61,11 +77,7 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ opts.log_buf = logs.reference; opts.log_size = sizeof(logs.reference); opts.log_level = log_level | 8 /* BPF_LOG_FIXED */; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed", - "GPL", insns, insn_cnt, &opts); - if (!check_prog_load(prog_fd, expect_load_error, "fixed_buf_prog_load")) - goto cleanup; - close(prog_fd); + load_prog(&opts, expect_load_error); fixed_log_sz = strlen(logs.reference) + 1; if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz")) @@ -89,7 +101,7 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ opts.log_level = log_level | 8; /* fixed-length log */ opts.log_size = 25; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed50", + prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed25", "GPL", insns, insn_cnt, &opts); if (!ASSERT_EQ(prog_fd, -ENOSPC, "unexpected_log_fixed_prog_load_result")) { if (prog_fd >= 0) @@ -162,6 +174,54 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ } } + /* (FIXED) get actual log size */ + opts.log_buf = logs.buf; + opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ + opts.log_size = sizeof(logs.buf); + res = load_prog(&opts, expect_load_error); + ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed"); + + log_true_sz_fixed = opts.log_true_size; + ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed"); + + /* (ROLLING) get actual log size */ + opts.log_buf = logs.buf; + opts.log_level = log_level; + opts.log_size = sizeof(logs.buf); + res = load_prog(&opts, expect_load_error); + ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling"); + + log_true_sz_rolling = opts.log_true_size; + ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq"); + + /* (FIXED) expect -ENOSPC for one byte short log */ + opts.log_buf = logs.buf; + opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ + opts.log_size = log_true_sz_fixed - 1; + res = load_prog(&opts, true /* should fail */); + ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_fixed"); + + /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */ + opts.log_buf = logs.buf; + opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ + opts.log_size = log_true_sz_fixed; + res = load_prog(&opts, expect_load_error); + ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_fixed"); + + /* (ROLLING) expect -ENOSPC for one byte short log */ + opts.log_buf = logs.buf; + opts.log_level = log_level; + opts.log_size = log_true_sz_rolling - 1; + res = load_prog(&opts, true /* should fail */); + ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_rolling"); + + /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */ + opts.log_buf = logs.buf; + opts.log_level = log_level; + opts.log_size = log_true_sz_rolling; + res = load_prog(&opts, expect_load_error); + ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_rolling"); + cleanup: test_log_buf__destroy(skel); } From be983f44274f575e42025130e3c62b8718b0a29a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:42:04 -0700 Subject: [PATCH 238/260] selftests/bpf: Add testing of log_buf==NULL condition for BPF_PROG_LOAD Add few extra test conditions to validate that it's ok to pass log_buf==NULL and log_size==0 to BPF_PROG_LOAD command with the intent to get log_true_size without providing a buffer. Test that log_buf==NULL condition *does not* return -ENOSPC. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-19-andrii@kernel.org --- .../selftests/bpf/prog_tests/verifier_log.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 2ec82fc60c03..9ae0ac6e3b25 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -178,26 +178,47 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ opts.log_buf = logs.buf; opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ opts.log_size = sizeof(logs.buf); + opts.log_true_size = 0; res = load_prog(&opts, expect_load_error); ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed"); log_true_sz_fixed = opts.log_true_size; ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed"); + /* (FIXED, NULL) get actual log size */ + opts.log_buf = NULL; + opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ + opts.log_size = 0; + opts.log_true_size = 0; + res = load_prog(&opts, expect_load_error); + ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed_null"); + ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq"); + /* (ROLLING) get actual log size */ opts.log_buf = logs.buf; opts.log_level = log_level; opts.log_size = sizeof(logs.buf); + opts.log_true_size = 0; res = load_prog(&opts, expect_load_error); ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling"); log_true_sz_rolling = opts.log_true_size; ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq"); + /* (ROLLING, NULL) get actual log size */ + opts.log_buf = NULL; + opts.log_level = log_level; + opts.log_size = 0; + opts.log_true_size = 0; + res = load_prog(&opts, expect_load_error); + ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling_null"); + ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq"); + /* (FIXED) expect -ENOSPC for one byte short log */ opts.log_buf = logs.buf; opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ opts.log_size = log_true_sz_fixed - 1; + opts.log_true_size = 0; res = load_prog(&opts, true /* should fail */); ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_fixed"); @@ -205,6 +226,7 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ opts.log_buf = logs.buf; opts.log_level = log_level | 8; /* BPF_LOG_FIXED */ opts.log_size = log_true_sz_fixed; + opts.log_true_size = 0; res = load_prog(&opts, expect_load_error); ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_fixed"); @@ -219,6 +241,7 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ opts.log_buf = logs.buf; opts.log_level = log_level; opts.log_size = log_true_sz_rolling; + opts.log_true_size = 0; res = load_prog(&opts, expect_load_error); ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_rolling"); From 054b6c7866c7a2537fffd4aa12d88aac47db60f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:42:05 -0700 Subject: [PATCH 239/260] selftests/bpf: Add verifier log tests for BPF_BTF_LOAD command Add verifier log tests for BPF_BTF_LOAD command, which are very similar, conceptually, to BPF_PROG_LOAD tests. These are two separate commands dealing with verbose verifier log, so should be both tested separately. Test that log_buf==NULL condition *does not* return -ENOSPC. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-20-andrii@kernel.org --- .../selftests/bpf/prog_tests/verifier_log.c | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 9ae0ac6e3b25..475092a78deb 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -249,6 +249,190 @@ cleanup: test_log_buf__destroy(skel); } +static const void *btf_data; +static u32 btf_data_sz; + +static int load_btf(struct bpf_btf_load_opts *opts, bool expect_err) +{ + int fd; + + fd = bpf_btf_load(btf_data, btf_data_sz, opts); + if (fd >= 0) + close(fd); + if (expect_err) + ASSERT_LT(fd, 0, "btf_load_failure"); + else /* !expect_err */ + ASSERT_GT(fd, 0, "btf_load_success"); + return fd; +} + +static void verif_btf_log_subtest(bool bad_btf) +{ + LIBBPF_OPTS(bpf_btf_load_opts, opts); + struct btf *btf; + struct btf_type *t; + char *exp_log, op_name[32]; + size_t fixed_log_sz; + __u32 log_true_sz_fixed, log_true_sz_rolling; + int i, res; + + /* prepare simple BTF contents */ + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "btf_new_empty")) + return; + res = btf__add_int(btf, "whatever", 4, 0); + if (!ASSERT_GT(res, 0, "btf_add_int_id")) + goto cleanup; + if (bad_btf) { + /* btf__add_int() doesn't allow bad value of size, so we'll just + * force-cast btf_type pointer and manually override size to invalid + * 3 if we need to simulate failure + */ + t = (void *)btf__type_by_id(btf, res); + if (!ASSERT_OK_PTR(t, "int_btf_type")) + goto cleanup; + t->size = 3; + } + + btf_data = btf__raw_data(btf, &btf_data_sz); + if (!ASSERT_OK_PTR(btf_data, "btf_data")) + goto cleanup; + + load_btf(&opts, bad_btf); + + opts.log_buf = logs.reference; + opts.log_size = sizeof(logs.reference); + opts.log_level = 1 | 8 /* BPF_LOG_FIXED */; + load_btf(&opts, bad_btf); + + fixed_log_sz = strlen(logs.reference) + 1; + if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz")) + goto cleanup; + memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz); + + /* validate BPF_LOG_FIXED truncation works as verifier log used to work */ + opts.log_buf = logs.buf; + opts.log_level = 1 | 8; /* fixed-length log */ + opts.log_size = 25; + res = load_btf(&opts, true); + ASSERT_EQ(res, -ENOSPC, "half_log_fd"); + ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25"); + ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name); + + /* validate rolling verifier log logic: try all variations of log buf + * length to force various truncation scenarios + */ + opts.log_buf = logs.buf; + opts.log_level = 1; /* rolling log */ + + /* prefill logs.buf with 'A's to detect any write beyond allowed length */ + memset(logs.filler, 'A', sizeof(logs.filler)); + logs.filler[sizeof(logs.filler) - 1] = '\0'; + memset(logs.buf, 'A', sizeof(logs.buf)); + logs.buf[sizeof(logs.buf) - 1] = '\0'; + + for (i = 1; i < fixed_log_sz; i++) { + opts.log_size = i; + + snprintf(op_name, sizeof(op_name), "log_roll_btf_load_%d", i); + res = load_btf(&opts, true); + if (!ASSERT_EQ(res, -ENOSPC, op_name)) + goto cleanup; + + exp_log = logs.reference + fixed_log_sz - i; + snprintf(op_name, sizeof(op_name), "log_roll_contents_%d", i); + if (!ASSERT_STREQ(logs.buf, exp_log, op_name)) { + printf("CMP:%d\nS1:'%s'\nS2:'%s'\n", + strcmp(logs.buf, exp_log), + logs.buf, exp_log); + goto cleanup; + } + + /* check that unused portions of logs.buf are not overwritten */ + snprintf(op_name, sizeof(op_name), "log_roll_unused_tail_%d", i); + if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) { + printf("CMP:%d\nS1:'%s'\nS2:'%s'\n", + strcmp(logs.buf + i, logs.filler + i), + logs.buf + i, logs.filler + i); + goto cleanup; + } + } + + /* (FIXED) get actual log size */ + opts.log_buf = logs.buf; + opts.log_level = 1 | 8; /* BPF_LOG_FIXED */ + opts.log_size = sizeof(logs.buf); + opts.log_true_size = 0; + res = load_btf(&opts, bad_btf); + ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed"); + + log_true_sz_fixed = opts.log_true_size; + ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed"); + + /* (FIXED, NULL) get actual log size */ + opts.log_buf = NULL; + opts.log_level = 1 | 8; /* BPF_LOG_FIXED */ + opts.log_size = 0; + opts.log_true_size = 0; + res = load_btf(&opts, bad_btf); + ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed_null"); + ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq"); + + /* (ROLLING) get actual log size */ + opts.log_buf = logs.buf; + opts.log_level = 1; + opts.log_size = sizeof(logs.buf); + opts.log_true_size = 0; + res = load_btf(&opts, bad_btf); + ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling"); + + log_true_sz_rolling = opts.log_true_size; + ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq"); + + /* (ROLLING, NULL) get actual log size */ + opts.log_buf = NULL; + opts.log_level = 1; + opts.log_size = 0; + opts.log_true_size = 0; + res = load_btf(&opts, bad_btf); + ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling_null"); + ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq"); + + /* (FIXED) expect -ENOSPC for one byte short log */ + opts.log_buf = logs.buf; + opts.log_level = 1 | 8; /* BPF_LOG_FIXED */ + opts.log_size = log_true_sz_fixed - 1; + opts.log_true_size = 0; + res = load_btf(&opts, true); + ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_fixed"); + + /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */ + opts.log_buf = logs.buf; + opts.log_level = 1 | 8; /* BPF_LOG_FIXED */ + opts.log_size = log_true_sz_fixed; + opts.log_true_size = 0; + res = load_btf(&opts, bad_btf); + ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_fixed"); + + /* (ROLLING) expect -ENOSPC for one byte short log */ + opts.log_buf = logs.buf; + opts.log_level = 1; + opts.log_size = log_true_sz_rolling - 1; + res = load_btf(&opts, true); + ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_rolling"); + + /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */ + opts.log_buf = logs.buf; + opts.log_level = 1; + opts.log_size = log_true_sz_rolling; + opts.log_true_size = 0; + res = load_btf(&opts, bad_btf); + ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_rolling"); + +cleanup: + btf__free(btf); +} + void test_verifier_log(void) { if (test__start_subtest("good_prog-level1")) @@ -259,4 +443,8 @@ void test_verifier_log(void) verif_log_subtest("bad_prog", true, 1); if (test__start_subtest("bad_prog-level2")) verif_log_subtest("bad_prog", true, 2); + if (test__start_subtest("bad_btf")) + verif_btf_log_subtest(true /* bad btf */); + if (test__start_subtest("good_btf")) + verif_btf_log_subtest(false /* !bad btf */); } From 91f2dc6838c19342f7f2993627c622835cc24890 Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Mon, 10 Apr 2023 16:59:07 +0800 Subject: [PATCH 240/260] bpf/btf: Fix is_int_ptr() When tracing a kernel function with arg type is u32*, btf_ctx_access() would report error: arg2 type INT is not a struct. The commit bb6728d75611 ("bpf: Allow access to int pointer arguments in tracing programs") added support for int pointer, but did not skip modifiers before checking it's type. This patch fixes it. Fixes: bb6728d75611 ("bpf: Allow access to int pointer arguments in tracing programs") Co-developed-by: Chengming Zhou Signed-off-by: Chengming Zhou Signed-off-by: Feng Zhou Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20230410085908.98493-2-zhoufeng.zf@bytedance.com --- kernel/bpf/btf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ffc31a1c84af..913b9d717a4a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5921,12 +5921,8 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) static bool is_int_ptr(struct btf *btf, const struct btf_type *t) { - /* t comes in already as a pointer */ - t = btf_type_by_id(btf, t->type); - - /* allow const */ - if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) - t = btf_type_by_id(btf, t->type); + /* skip modifiers */ + t = btf_type_skip_modifiers(btf, t->type, NULL); return btf_type_is_int(t); } From 75dcef8d3609d0b1d3497d6ed4809096513e0b83 Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Mon, 10 Apr 2023 16:59:08 +0800 Subject: [PATCH 241/260] selftests/bpf: Add test to access u32 ptr argument in tracing program Adding verifier test for accessing u32 pointer argument in tracing programs. The test program loads 1nd argument of bpf_fentry_test9 function which is u32 pointer and checks that verifier allows that. Co-developed-by: Chengming Zhou Signed-off-by: Chengming Zhou Signed-off-by: Feng Zhou Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20230410085908.98493-3-zhoufeng.zf@bytedance.com --- net/bpf/test_run.c | 8 +++++++- .../testing/selftests/bpf/verifier/btf_ctx_access.c | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index f1652f5fbd2e..68bdfc041a7b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -541,6 +541,11 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) return (long)arg->a; } +__bpf_kfunc u32 bpf_fentry_test9(u32 *a) +{ + return *a; +} + __bpf_kfunc int bpf_modify_return_test(int a, int *b) { *b += 1; @@ -855,7 +860,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || - bpf_fentry_test8(&arg) != 0) + bpf_fentry_test8(&arg) != 0 || + bpf_fentry_test9(&retval) != 0) goto out; break; case BPF_MODIFY_RETURN: diff --git a/tools/testing/selftests/bpf/verifier/btf_ctx_access.c b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c index 6340db6b46dc..0484d3de040d 100644 --- a/tools/testing/selftests/bpf/verifier/btf_ctx_access.c +++ b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c @@ -10,3 +10,16 @@ .expected_attach_type = BPF_TRACE_FENTRY, .kfunc = "bpf_modify_return_test", }, + +{ + "btf_ctx_access u32 pointer accept", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), /* load 1nd argument value (u32 pointer) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "bpf_fentry_test9", +}, From 10fd5f70c397782a97f411f25bfb312ea92b55bc Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 12 Apr 2023 10:12:52 -0700 Subject: [PATCH 242/260] bpf: Handle NULL in bpf_local_storage_free. During OOM bpf_local_storage_alloc() may fail to allocate 'storage' and call to bpf_local_storage_free() with NULL pointer will cause a crash like: [ 271718.917646] BUG: kernel NULL pointer dereference, address: 00000000000000a0 [ 271719.019620] RIP: 0010:call_rcu+0x2d/0x240 [ 271719.216274] bpf_local_storage_alloc+0x19e/0x1e0 [ 271719.250121] bpf_local_storage_update+0x33b/0x740 Fixes: 7e30a8477b0b ("bpf: Add bpf_local_storage_free()") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230412171252.15635-1-alexei.starovoitov@gmail.com --- kernel/bpf/bpf_local_storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index dab2ff4c99d9..47d9948d768f 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -157,6 +157,9 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool bpf_ma, bool reuse_now) { + if (!local_storage) + return; + if (!bpf_ma) { __bpf_local_storage_free(local_storage, reuse_now); return; From 1d71283987c729dceccce834a864c27301ba155e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 10 Apr 2023 23:16:31 -0500 Subject: [PATCH 243/260] bpf: Make bpf_cgroup_acquire() KF_RCU | KF_RET_NULL struct cgroup is already an RCU-safe type in the verifier. We can therefore update bpf_cgroup_acquire() to be KF_RCU | KF_RET_NULL, and subsequently remove bpf_cgroup_kptr_get(). This patch does the first of these by updating bpf_cgroup_acquire() to be KF_RCU | KF_RET_NULL, and also updates selftests accordingly. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230411041633.179404-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 5 ++- .../selftests/bpf/progs/cgrp_kfunc_common.h | 5 +++ .../selftests/bpf/progs/cgrp_kfunc_failure.c | 35 +++++++++++++++---- .../selftests/bpf/progs/cgrp_kfunc_success.c | 5 ++- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b6a5cda5bb59..71f0604bdc97 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2037,8 +2037,7 @@ __bpf_kfunc void bpf_task_release(struct task_struct *p) */ __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) { - cgroup_get(cgrp); - return cgrp; + return cgroup_tryget(cgrp) ? cgrp : NULL; } /** @@ -2314,7 +2313,7 @@ BTF_ID_FLAGS(func, bpf_rbtree_add) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS -BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index d0b7cd0d09d7..b0e279f4652b 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -61,6 +61,11 @@ static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp) } acquired = bpf_cgroup_acquire(cgrp); + if (!acquired) { + bpf_map_delete_elem(&__cgrps_kfunc_map, &id); + return -ENOENT; + } + old = bpf_kptr_xchg(&v->cgrp, acquired); if (old) { bpf_cgroup_release(old); diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 48b2034cadb3..49347f12de39 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -41,6 +41,23 @@ int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */ acquired = bpf_cgroup_acquire(v->cgrp); + if (acquired) + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(cgrp_kfunc_acquire_no_null_check, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + /* + * Can't invoke bpf_cgroup_release() without checking the return value + * of bpf_cgroup_acquire(). + */ bpf_cgroup_release(acquired); return 0; @@ -54,7 +71,8 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */ acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp); - bpf_cgroup_release(acquired); + if (acquired) + bpf_cgroup_release(acquired); return 0; } @@ -67,7 +85,8 @@ int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) /* Can't acquire an untrusted struct cgroup * pointer. */ acquired = bpf_cgroup_acquire(cgrp); - bpf_cgroup_release(acquired); + if (acquired) + bpf_cgroup_release(acquired); return 0; } @@ -80,7 +99,8 @@ int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */ acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp); - bpf_cgroup_release(acquired); + if (acquired) + bpf_cgroup_release(acquired); return 0; } @@ -93,9 +113,8 @@ int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path) /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */ acquired = bpf_cgroup_acquire(NULL); - if (!acquired) - return 0; - bpf_cgroup_release(acquired); + if (acquired) + bpf_cgroup_release(acquired); return 0; } @@ -137,6 +156,8 @@ int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char * struct cgroup *kptr, *acquired; acquired = bpf_cgroup_acquire(cgrp); + if (!acquired) + return 0; /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */ kptr = bpf_cgroup_kptr_get(&acquired); @@ -256,6 +277,8 @@ int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) return -ENOENT; acquired = bpf_cgroup_acquire(cgrp); + if (!acquired) + return -ENOENT; old = bpf_kptr_xchg(&v->cgrp, acquired); diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 030aff700084..e9dbd1af05a7 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -38,7 +38,10 @@ int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char return 0; acquired = bpf_cgroup_acquire(cgrp); - bpf_cgroup_release(acquired); + if (!acquired) + err = 1; + else + bpf_cgroup_release(acquired); return 0; } From 6499fe6edc4fd5b91aed4d5cd84bd113e1c58d5f Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 10 Apr 2023 23:16:32 -0500 Subject: [PATCH 244/260] bpf: Remove bpf_cgroup_kptr_get() kfunc Now that bpf_cgroup_acquire() is KF_RCU | KF_RET_NULL, bpf_cgroup_kptr_get() is redundant. Let's remove it, and update selftests to instead use bpf_cgroup_acquire() where appropriate. The next patch will update the BPF documentation to not mention bpf_cgroup_kptr_get(). Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230411041633.179404-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 32 --------- .../selftests/bpf/progs/cgrp_kfunc_common.h | 3 +- .../selftests/bpf/progs/cgrp_kfunc_failure.c | 68 +++---------------- .../selftests/bpf/progs/cgrp_kfunc_success.c | 10 ++- 4 files changed, 14 insertions(+), 99 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 71f0604bdc97..f04e60a4847f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2040,37 +2040,6 @@ __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) return cgroup_tryget(cgrp) ? cgrp : NULL; } -/** - * bpf_cgroup_kptr_get - Acquire a reference on a struct cgroup kptr. A cgroup - * kptr acquired by this kfunc which is not subsequently stored in a map, must - * be released by calling bpf_cgroup_release(). - * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired. - */ -__bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) -{ - struct cgroup *cgrp; - - rcu_read_lock(); - /* Another context could remove the cgroup from the map and release it - * at any time, including after we've done the lookup above. This is - * safe because we're in an RCU read region, so the cgroup is - * guaranteed to remain valid until at least the rcu_read_unlock() - * below. - */ - cgrp = READ_ONCE(*cgrpp); - - if (cgrp && !cgroup_tryget(cgrp)) - /* If the cgroup had been removed from the map and freed as - * described above, cgroup_tryget() will return false. The - * cgroup will be freed at some point after the current RCU gp - * has ended, so just return NULL to the user. - */ - cgrp = NULL; - rcu_read_unlock(); - - return cgrp; -} - /** * bpf_cgroup_release - Release the reference acquired on a cgroup. * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to @@ -2314,7 +2283,6 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index b0e279f4652b..22914a70db54 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -21,10 +21,11 @@ struct hash_map { } __cgrps_kfunc_map SEC(".maps"); struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; -struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; void bpf_cgroup_release(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 49347f12de39..0fa564a5cc5b 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -133,59 +133,6 @@ int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *pat return 0; } -SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path) -{ - struct cgroup *kptr; - - /* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */ - kptr = bpf_cgroup_kptr_get(&cgrp); - if (!kptr) - return 0; - - bpf_cgroup_release(kptr); - - return 0; -} - -SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path) -{ - struct cgroup *kptr, *acquired; - - acquired = bpf_cgroup_acquire(cgrp); - if (!acquired) - return 0; - - /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */ - kptr = bpf_cgroup_kptr_get(&acquired); - bpf_cgroup_release(acquired); - if (!kptr) - return 0; - - bpf_cgroup_release(kptr); - - return 0; -} - -SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path) -{ - struct cgroup *kptr; - - /* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */ - kptr = bpf_cgroup_kptr_get(NULL); - if (!kptr) - return 0; - - bpf_cgroup_release(kptr); - - return 0; -} - SEC("tp_btf/cgroup_mkdir") __failure __msg("Unreleased reference") int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path) @@ -207,8 +154,8 @@ int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("Unreleased reference") -int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) +__failure __msg("must be referenced or trusted") +int BPF_PROG(cgrp_kfunc_rcu_get_release, struct cgroup *cgrp, const char *path) { struct cgroup *kptr; struct __cgrps_kfunc_map_value *v; @@ -217,11 +164,12 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) if (!v) return 0; - kptr = bpf_cgroup_kptr_get(&v->cgrp); - if (!kptr) - return 0; - - /* Kptr acquired above is never released. */ + bpf_rcu_read_lock(); + kptr = v->cgrp; + if (kptr) + /* Can't release a cgroup kptr stored in a map. */ + bpf_cgroup_release(kptr); + bpf_rcu_read_unlock(); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index e9dbd1af05a7..5354455a01be 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -126,13 +126,11 @@ int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path) return 0; } - kptr = bpf_cgroup_kptr_get(&v->cgrp); - if (!kptr) { + bpf_rcu_read_lock(); + kptr = v->cgrp; + if (!kptr) err = 3; - return 0; - } - - bpf_cgroup_release(kptr); + bpf_rcu_read_unlock(); return 0; } From ec48599abee3a16fdc93c1c3c3e153a4f4d29420 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 10 Apr 2023 23:16:33 -0500 Subject: [PATCH 245/260] bpf,docs: Remove references to bpf_cgroup_kptr_get() The bpf_cgroup_kptr_get() kfunc has been removed, and bpf_cgroup_acquire() / bpf_cgroup_release() now have the same semantics as bpf_task_acquire() / bpf_task_release(). This patch updates the BPF documentation to reflect this. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230411041633.179404-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 68 ------------------------------------ 1 file changed, 68 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index d8a16c4bef7f..3b42cfe12437 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -572,74 +572,6 @@ bpf_task_release() respectively, so we won't provide examples for them. ---- -You may also acquire a reference to a ``struct cgroup`` kptr that's already -stored in a map using bpf_cgroup_kptr_get(): - -.. kernel-doc:: kernel/bpf/helpers.c - :identifiers: bpf_cgroup_kptr_get - -Here's an example of how it can be used: - -.. code-block:: c - - /* struct containing the struct task_struct kptr which is actually stored in the map. */ - struct __cgroups_kfunc_map_value { - struct cgroup __kptr * cgroup; - }; - - /* The map containing struct __cgroups_kfunc_map_value entries. */ - struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, int); - __type(value, struct __cgroups_kfunc_map_value); - __uint(max_entries, 1); - } __cgroups_kfunc_map SEC(".maps"); - - /* ... */ - - /** - * A simple example tracepoint program showing how a - * struct cgroup kptr that is stored in a map can - * be acquired using the bpf_cgroup_kptr_get() kfunc. - */ - SEC("tp_btf/cgroup_mkdir") - int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path) - { - struct cgroup *kptr; - struct __cgroups_kfunc_map_value *v; - s32 id = cgrp->self.id; - - /* Assume a cgroup kptr was previously stored in the map. */ - v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id); - if (!v) - return -ENOENT; - - /* Acquire a reference to the cgroup kptr that's already stored in the map. */ - kptr = bpf_cgroup_kptr_get(&v->cgroup); - if (!kptr) - /* If no cgroup was present in the map, it's because - * we're racing with another CPU that removed it with - * bpf_kptr_xchg() between the bpf_map_lookup_elem() - * above, and our call to bpf_cgroup_kptr_get(). - * bpf_cgroup_kptr_get() internally safely handles this - * race, and will return NULL if the task is no longer - * present in the map by the time we invoke the kfunc. - */ - return -EBUSY; - - /* Free the reference we just took above. Note that the - * original struct cgroup kptr is still in the map. It will - * be freed either at a later time if another context deletes - * it from the map, or automatically by the BPF subsystem if - * it's still present when the map is destroyed. - */ - bpf_cgroup_release(kptr); - - return 0; - } - ----- - Other kfuncs available for interacting with ``struct cgroup *`` objects are bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access the ancestor of a cgroup and find a cgroup by its ID, respectively. Both From ed17aa92dc56b6d8883e4b7a8f1c6fbf5ed6cd29 Mon Sep 17 00:00:00 2001 From: Xin Liu Date: Thu, 6 Apr 2023 20:26:22 +0800 Subject: [PATCH 246/260] bpf, sockmap: fix deadlocks in the sockhash and sockmap When huang uses sched_switch tracepoint, the tracepoint does only one thing in the mounted ebpf program, which deletes the fixed elements in sockhash ([0]) It seems that elements in sockhash are rarely actively deleted by users or ebpf program. Therefore, we do not pay much attention to their deletion. Compared with hash maps, sockhash only provides spin_lock_bh protection. This causes it to appear to have self-locking behavior in the interrupt context. [0]:https://lore.kernel.org/all/CABcoxUayum5oOqFMMqAeWuS8+EzojquSOSyDA3J_2omY=2EeAg@mail.gmail.com/ Reported-by: Hsin-Wei Hung Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Xin Liu Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230406122622.109978-1-liuxin350@huawei.com Signed-off-by: Alexei Starovoitov --- net/core/sock_map.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 7c189c2e2fbf..66305b7bf8b7 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -414,8 +414,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, { struct sock *sk; int err = 0; + unsigned long flags; - raw_spin_lock_bh(&stab->lock); + raw_spin_lock_irqsave(&stab->lock, flags); sk = *psk; if (!sk_test || sk_test == sk) sk = xchg(psk, NULL); @@ -425,7 +426,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, else err = -EINVAL; - raw_spin_unlock_bh(&stab->lock); + raw_spin_unlock_irqrestore(&stab->lock, flags); return err; } @@ -932,11 +933,12 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_bucket *bucket; struct bpf_shtab_elem *elem; int ret = -ENOENT; + unsigned long flags; hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); - raw_spin_lock_bh(&bucket->lock); + raw_spin_lock_irqsave(&bucket->lock, flags); elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size); if (elem) { hlist_del_rcu(&elem->node); @@ -944,7 +946,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) sock_hash_free_elem(htab, elem); ret = 0; } - raw_spin_unlock_bh(&bucket->lock); + raw_spin_unlock_irqrestore(&bucket->lock, flags); return ret; } From ac931d4cdec3df8b6eac3bc40a6871123021f078 Mon Sep 17 00:00:00 2001 From: Christian Ehrig Date: Fri, 7 Apr 2023 15:38:53 +0200 Subject: [PATCH 247/260] ipip,ip_tunnel,sit: Add FOU support for externally controlled ipip devices Today ipip devices in collect-metadata mode don't allow for sending FOU or GUE encapsulated packets. This patch lifts the restriction by adding a struct ip_tunnel_encap to the tunnel metadata. On the egress path, the members of this struct can be set by the bpf_skb_set_fou_encap kfunc via a BPF tc-hook. Instead of dropping packets wishing to use additional UDP encapsulation, ip_md_tunnel_xmit now evaluates the contents of this struct and adds the corresponding FOU or GUE header. Furthermore, it is making sure that additional header bytes are taken into account for PMTU discovery. On the ingress path, an ipip device in collect-metadata mode will fill this struct and a BPF tc-hook can obtain the information via a call to the bpf_skb_get_fou_encap kfunc. The minor change to ip_tunnel_encap, which now takes a pointer to struct ip_tunnel_encap instead of struct ip_tunnel, allows us to control FOU encap type and parameters on a per packet-level. Signed-off-by: Christian Ehrig Link: https://lore.kernel.org/r/cfea47de655d0f870248abf725932f851b53960a.1680874078.git.cehrig@cloudflare.com Signed-off-by: Alexei Starovoitov --- include/net/ip_tunnels.h | 28 +++++++++++++++------------- net/ipv4/ip_tunnel.c | 22 ++++++++++++++++++++-- net/ipv4/ipip.c | 1 + net/ipv6/sit.c | 2 +- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index fca357679816..7912f53caae0 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -57,6 +57,13 @@ struct ip_tunnel_key { __u8 flow_flags; }; +struct ip_tunnel_encap { + u16 type; + u16 flags; + __be16 sport; + __be16 dport; +}; + /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ @@ -66,9 +73,9 @@ struct ip_tunnel_key { #define IP_TUNNEL_OPTS_MAX \ GENMASK((sizeof_field(struct ip_tunnel_info, \ options_len) * BITS_PER_BYTE) - 1, 0) - struct ip_tunnel_info { struct ip_tunnel_key key; + struct ip_tunnel_encap encap; #ifdef CONFIG_DST_CACHE struct dst_cache dst_cache; #endif @@ -86,13 +93,6 @@ struct ip_tunnel_6rd_parm { }; #endif -struct ip_tunnel_encap { - u16 type; - u16 flags; - __be16 sport; - __be16 dport; -}; - struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry __rcu *next; __be32 addr; @@ -293,6 +293,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, __be32 remote, __be32 local, __be32 key); +void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info); int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); @@ -371,22 +372,23 @@ static inline int ip_encap_hlen(struct ip_tunnel_encap *e) return hlen; } -static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, +static inline int ip_tunnel_encap(struct sk_buff *skb, + struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { const struct ip_tunnel_encap_ops *ops; int ret = -EINVAL; - if (t->encap.type == TUNNEL_ENCAP_NONE) + if (e->type == TUNNEL_ENCAP_NONE) return 0; - if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[t->encap.type]); + ops = rcu_dereference(iptun_encaps[e->type]); if (likely(ops && ops->build_header)) - ret = ops->build_header(skb, &t->encap, protocol, fl4); + ret = ops->build_header(skb, e, protocol, fl4); rcu_read_unlock(); return ret; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index de90b09dfe78..add437f710fc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -359,6 +359,20 @@ err_dev_set_mtu: return ERR_PTR(err); } +void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph; + + if (iph->protocol != IPPROTO_UDP) + return; + + udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2)); + info->encap.sport = udph->source; + info->encap.dport = udph->dest; +} +EXPORT_SYMBOL(ip_tunnel_md_udp_encap); + int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error) @@ -572,7 +586,11 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), dev_net(dev), 0, skb->mark, skb_get_hash(skb), key->flow_flags); - if (tunnel->encap.type != TUNNEL_ENCAP_NONE) + + if (!tunnel_hlen) + tunnel_hlen = ip_encap_hlen(&tun_info->encap); + + if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0) goto tx_error; use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); @@ -732,7 +750,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev_net(dev), tunnel->parms.link, tunnel->fwmark, skb_get_hash(skb), 0); - if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) + if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) goto tx_error; if (connected && md) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index abea77759b7e..27b8f83c6ea2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -241,6 +241,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) return 0; + ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info); } skb_reset_mac_header(skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 70d81bba5093..063560e2cb1a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1024,7 +1024,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) { + if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) { ip_rt_put(rt); goto tx_error; } From c50e96099edb134bf107fafc02715fbc4aa2277f Mon Sep 17 00:00:00 2001 From: Christian Ehrig Date: Fri, 7 Apr 2023 15:38:54 +0200 Subject: [PATCH 248/260] bpf,fou: Add bpf_skb_{set,get}_fou_encap kfuncs Add two new kfuncs that allow a BPF tc-hook, installed on an ipip device in collect-metadata mode, to control FOU encap parameters on a per-packet level. The set of kfuncs is registered with the fou module. The bpf_skb_set_fou_encap kfunc is supposed to be used in tandem and after a successful call to the bpf_skb_set_tunnel_key bpf-helper. UDP source and destination ports can be controlled by passing a struct bpf_fou_encap. A source port of zero will auto-assign a source port. enum bpf_fou_encap_type is used to specify if the egress path should FOU or GUE encap the packet. On the ingress path bpf_skb_get_fou_encap can be used to read UDP source and destination ports from the receiver's point of view and allows for packet multiplexing across different destination ports within a single BPF program and ipip device. Signed-off-by: Christian Ehrig Link: https://lore.kernel.org/r/e17c94a646b63e78ce0dbf3f04b2c33dc948a32d.1680874078.git.cehrig@cloudflare.com Signed-off-by: Alexei Starovoitov --- include/net/fou.h | 2 + net/ipv4/Makefile | 2 +- net/ipv4/fou_bpf.c | 119 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/fou_core.c | 5 ++ 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 net/ipv4/fou_bpf.c diff --git a/include/net/fou.h b/include/net/fou.h index 80f56e275b08..824eb4b231fd 100644 --- a/include/net/fou.h +++ b/include/net/fou.h @@ -17,4 +17,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, __be16 *sport, int type); +int register_fou_bpf(void); + #endif diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 880277c9fd07..b18ba8ef93ad 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o obj-$(CONFIG_NET_IPIP) += ipip.o gre-y := gre_demux.o -fou-y := fou_core.o fou_nl.o +fou-y := fou_core.o fou_nl.o fou_bpf.o obj-$(CONFIG_NET_FOU) += fou.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c new file mode 100644 index 000000000000..3760a14b6b57 --- /dev/null +++ b/net/ipv4/fou_bpf.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Fou Helpers for TC-BPF hook + * + * These are called from SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include +#include + +#include +#include + +struct bpf_fou_encap { + __be16 sport; + __be16 dport; +}; + +enum bpf_fou_encap_type { + FOU_BPF_ENCAP_FOU, + FOU_BPF_ENCAP_GUE, +}; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in BTF"); + +/* bpf_skb_set_fou_encap - Set FOU encap parameters + * + * This function allows for using GUE or FOU encapsulation together with an + * ipip device in collect-metadata mode. + * + * It is meant to be used in BPF tc-hooks and after a call to the + * bpf_skb_set_tunnel_key helper, responsible for setting IP addresses. + * + * Parameters: + * @skb_ctx Pointer to ctx (__sk_buff) in TC program. Cannot be NULL + * @encap Pointer to a `struct bpf_fou_encap` storing UDP src and + * dst ports. If sport is set to 0 the kernel will auto-assign a + * port. This is similar to using `encap-sport auto`. + * Cannot be NULL + * @type Encapsulation type for the packet. Their definitions are + * specified in `enum bpf_fou_encap_type` + */ +__bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap, int type) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct ip_tunnel_info *info = skb_tunnel_info(skb); + + if (unlikely(!encap)) + return -EINVAL; + + if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) + return -EINVAL; + + switch (type) { + case FOU_BPF_ENCAP_FOU: + info->encap.type = TUNNEL_ENCAP_FOU; + break; + case FOU_BPF_ENCAP_GUE: + info->encap.type = TUNNEL_ENCAP_GUE; + break; + default: + info->encap.type = TUNNEL_ENCAP_NONE; + } + + if (info->key.tun_flags & TUNNEL_CSUM) + info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM; + + info->encap.sport = encap->sport; + info->encap.dport = encap->dport; + + return 0; +} + +/* bpf_skb_get_fou_encap - Get FOU encap parameters + * + * This function allows for reading encap metadata from a packet received + * on an ipip device in collect-metadata mode. + * + * Parameters: + * @skb_ctx Pointer to ctx (__sk_buff) in TC program. Cannot be NULL + * @encap Pointer to a struct bpf_fou_encap storing UDP source and + * destination port. Cannot be NULL + */ +__bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct ip_tunnel_info *info = skb_tunnel_info(skb); + + if (unlikely(!info)) + return -EINVAL; + + encap->sport = info->encap.sport; + encap->dport = info->encap.dport; + + return 0; +} + +__diag_pop() + +BTF_SET8_START(fou_kfunc_set) +BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) +BTF_ID_FLAGS(func, bpf_skb_get_fou_encap) +BTF_SET8_END(fou_kfunc_set) + +static const struct btf_kfunc_id_set fou_bpf_kfunc_set = { + .owner = THIS_MODULE, + .set = &fou_kfunc_set, +}; + +int register_fou_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &fou_bpf_kfunc_set); +} diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index cafec9b4eee0..0c41076e31ed 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -1236,10 +1236,15 @@ static int __init fou_init(void) if (ret < 0) goto unregister; + ret = register_fou_bpf(); + if (ret < 0) + goto kfunc_failed; + ret = ip_tunnel_encap_add_fou_ops(); if (ret == 0) return 0; +kfunc_failed: genl_unregister_family(&fou_nl_family); unregister: unregister_pernet_device(&fou_net_ops); From d9688f898c08c8f96fb0e7879262877ffd319bfd Mon Sep 17 00:00:00 2001 From: Christian Ehrig Date: Fri, 7 Apr 2023 15:38:55 +0200 Subject: [PATCH 249/260] selftests/bpf: Test FOU kfuncs for externally controlled ipip devices Add tests for FOU and GUE encapsulation via the bpf_skb_{set,get}_fou_encap kfuncs, using ipip devices in collect-metadata mode. These tests make sure that we can successfully set and obtain FOU and GUE encap parameters using ingress / egress BPF tc-hooks. Signed-off-by: Christian Ehrig Link: https://lore.kernel.org/r/040193566ddbdb0b53eb359f7ac7bbd316f338b5.1680874078.git.cehrig@cloudflare.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/test_tunnel.c | 153 +++++++++++++++++- .../selftests/bpf/progs/test_tunnel_kern.c | 117 ++++++++++++++ 2 files changed, 268 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 47f1d482fe39..d149ab98798d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -89,6 +89,9 @@ #define IP6VXLAN_TUNL_DEV0 "ip6vxlan00" #define IP6VXLAN_TUNL_DEV1 "ip6vxlan11" +#define IPIP_TUNL_DEV0 "ipip00" +#define IPIP_TUNL_DEV1 "ipip11" + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -188,6 +191,79 @@ static void delete_ip6vxlan_tunnel(void) SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1); } +enum ipip_encap { + NONE = 0, + FOU = 1, + GUE = 2, +}; + +static int set_ipip_encap(const char *ipproto, const char *type) +{ + SYS(fail, "ip -n at_ns0 fou add port 5555 %s", ipproto); + SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap %s", + IPIP_TUNL_DEV0, type); + SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap-dport 5555", + IPIP_TUNL_DEV0); + + return 0; +fail: + return -1; +} + +static int add_ipip_tunnel(enum ipip_encap encap) +{ + int err; + const char *ipproto, *type; + + switch (encap) { + case FOU: + ipproto = "ipproto 4"; + type = "fou"; + break; + case GUE: + ipproto = "gue"; + type = ipproto; + break; + default: + ipproto = NULL; + type = ipproto; + } + + /* at_ns0 namespace */ + SYS(fail, "ip -n at_ns0 link add dev %s type ipip local %s remote %s", + IPIP_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + if (type && ipproto) { + err = set_ipip_encap(ipproto, type); + if (!ASSERT_OK(err, "set_ipip_encap")) + goto fail; + } + + SYS(fail, "ip -n at_ns0 link set dev %s up", IPIP_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", + IPIP_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); + + /* root namespace */ + if (type && ipproto) + SYS(fail, "ip fou add port 5555 %s", ipproto); + SYS(fail, "ip link add dev %s type ipip external", IPIP_TUNL_DEV1); + SYS(fail, "ip link set dev %s up", IPIP_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/24", IPIP_TUNL_DEV1, + IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_ipip_tunnel(void) +{ + SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0); + SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null"); + SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1); + SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -386,10 +462,80 @@ done: test_tunnel_kern__destroy(skel); } -#define RUN_TEST(name) \ +static void test_ipip_tunnel(enum ipip_encap encap) +{ + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int set_src_prog_fd, get_src_prog_fd; + int ifindex = -1; + int err; + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + + /* add ipip tunnel */ + err = add_ipip_tunnel(encap); + if (!ASSERT_OK(err, "add_ipip_tunnel")) + goto done; + + /* load and attach bpf prog to tunnel dev tc hook point */ + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + ifindex = if_nametoindex(IPIP_TUNL_DEV1); + if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + + switch (encap) { + case FOU: + get_src_prog_fd = bpf_program__fd( + skel->progs.ipip_encap_get_tunnel); + set_src_prog_fd = bpf_program__fd( + skel->progs.ipip_fou_set_tunnel); + break; + case GUE: + get_src_prog_fd = bpf_program__fd( + skel->progs.ipip_encap_get_tunnel); + set_src_prog_fd = bpf_program__fd( + skel->progs.ipip_gue_set_tunnel); + break; + default: + get_src_prog_fd = bpf_program__fd( + skel->progs.ipip_get_tunnel); + set_src_prog_fd = bpf_program__fd( + skel->progs.ipip_set_tunnel); + } + + if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + goto done; + + /* ping from root namespace test */ + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); + if (!ASSERT_OK(err, "test_ping")) + goto done; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + if (!ASSERT_OK(err, "test_ping")) + goto done; + close_netns(nstoken); + +done: + /* delete ipip tunnel */ + delete_ipip_tunnel(); + if (skel) + test_tunnel_kern__destroy(skel); +} + +#define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ - test_ ## name(); \ + test_ ## name(__VA_ARGS__); \ } \ }) @@ -400,6 +546,9 @@ static void *test_tunnel_run_tests(void *arg) RUN_TEST(vxlan_tunnel); RUN_TEST(ip6vxlan_tunnel); + RUN_TEST(ipip_tunnel, NONE); + RUN_TEST(ipip_tunnel, FOU); + RUN_TEST(ipip_tunnel, GUE); cleanup(); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 9ab2d55ab7c0..f66af753bbbb 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -52,6 +52,21 @@ struct vxlan_metadata { __u32 gbp; }; +struct bpf_fou_encap { + __be16 sport; + __be16 dport; +}; + +enum bpf_fou_encap_type { + FOU_BPF_ENCAP_FOU, + FOU_BPF_ENCAP_GUE, +}; + +int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap, int type) __ksym; +int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap) __ksym; + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); @@ -749,6 +764,108 @@ int ipip_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("tc") +int ipip_gue_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + struct bpf_fou_encap encap = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + void *data_end = (void *)(long)skb->data_end; + int ret; + + if (data + sizeof(*iph) > data_end) { + log_err(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + encap.sport = 0; + encap.dport = bpf_htons(5555); + + ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_GUE); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int ipip_fou_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + struct bpf_fou_encap encap = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + void *data_end = (void *)(long)skb->data_end; + int ret; + + if (data + sizeof(*iph) > data_end) { + log_err(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + encap.sport = 0; + encap.dport = bpf_htons(5555); + + ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_FOU); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int ipip_encap_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key = {}; + struct bpf_fou_encap encap = {}; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_fou_encap(skb, &encap); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (bpf_ntohs(encap.dport) != 5555) + return TC_ACT_SHOT; + + bpf_printk("%d remote ip 0x%x, sport %d, dport %d\n", ret, + key.remote_ipv4, bpf_ntohs(encap.sport), + bpf_ntohs(encap.dport)); + return TC_ACT_OK; +} + SEC("tc") int ipip6_set_tunnel(struct __sk_buff *skb) { From d319f344561de23e810515d109c7278919bff7b0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 10 Apr 2023 19:43:44 +0200 Subject: [PATCH 250/260] mm: Fix copy_from_user_nofault(). There are several issues with copy_from_user_nofault(): - access_ok() is designed for user context only and for that reason it has WARN_ON_IN_IRQ() which triggers when bpf, kprobe, eprobe and perf on ppc are calling it from irq. - it's missing nmi_uaccess_okay() which is a nop on all architectures except x86 where it's required. The comment in arch/x86/mm/tlb.c explains the details why it's necessary. Calling copy_from_user_nofault() from bpf, [ke]probe without this check is not safe. - __copy_from_user_inatomic() under CONFIG_HARDENED_USERCOPY is calling check_object_size()->__check_object_size()->check_heap_object()->find_vmap_area()->spin_lock() which is not safe to do from bpf, [ke]probe and perf due to potential deadlock. Fix all three issues. At the end the copy_from_user_nofault() becomes equivalent to copy_from_user_nmi() from safety point of view with a difference in the return value. Reported-by: Hsin-Wei Hung Signed-off-by: Alexei Starovoitov Signed-off-by: Florian Lehner Tested-by: Hsin-Wei Hung Tested-by: Florian Lehner Link: https://lore.kernel.org/r/20230410174345.4376-2-dev@der-flo.net Signed-off-by: Alexei Starovoitov --- mm/maccess.c | 16 +++++++++++----- mm/usercopy.c | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/maccess.c b/mm/maccess.c index 074f6b086671..518a25667323 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -5,6 +5,7 @@ #include #include #include +#include bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) @@ -113,11 +114,16 @@ Efault: long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; - if (access_ok(src, size)) { - pagefault_disable(); - ret = __copy_from_user_inatomic(dst, src, size); - pagefault_enable(); - } + + if (!__access_ok(src, size)) + return ret; + + if (!nmi_uaccess_okay()) + return ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, size); + pagefault_enable(); if (ret) return -EFAULT; diff --git a/mm/usercopy.c b/mm/usercopy.c index 4c3164beacec..83c164aba6e0 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -173,7 +173,7 @@ static inline void check_heap_object(const void *ptr, unsigned long n, return; } - if (is_vmalloc_addr(ptr)) { + if (is_vmalloc_addr(ptr) && !pagefault_disabled()) { struct vmap_area *area = find_vmap_area(addr); if (!area) From 5a674611d116a5fc28c5429beea2b78c6e2933ef Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 13 Apr 2023 10:47:40 +0100 Subject: [PATCH 251/260] selftests/bpf: Fix use of uninitialized op_name in log tests One of the test assertions uses an uninitialized op_name, which leads to some headscratching if it fails. Use a string constant instead. Fixes: b1a7a480a112 ("selftests/bpf: Add fixed vs rotating verifier log tests") Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230413094740.18041-1-lmb@isovalent.com --- tools/testing/selftests/bpf/prog_tests/verifier_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 475092a78deb..8337c6bc5b95 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -110,7 +110,7 @@ static void verif_log_subtest(const char *name, bool expect_load_error, int log_ } if (!ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25")) goto cleanup; - if (!ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name)) + if (!ASSERT_STRNEQ(logs.buf, logs.reference, 24, "log_fixed_contents_25")) goto cleanup; } From de6d014a09bf12a9a8959d60c0a1d4a41d394a89 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Apr 2023 14:04:21 -0700 Subject: [PATCH 252/260] selftests/bpf: Use read_perf_max_sample_freq() in perf_event_stackmap Currently, perf_event sample period in perf_event_stackmap is set too low that the test fails randomly. Fix this by using the max sample frequency, from read_perf_max_sample_freq(). Move read_perf_max_sample_freq() to testing_helpers.c. Replace the CHECK() with if-printf, as CHECK is not available in testing_helpers.c. Fixes: 1da4864c2b20 ("selftests/bpf: Add callchain_stackid") Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230412210423.900851-2-song@kernel.org --- .../bpf/prog_tests/perf_event_stackmap.c | 3 ++- .../bpf/prog_tests/stacktrace_build_id_nmi.c | 15 -------------- tools/testing/selftests/bpf/testing_helpers.c | 20 +++++++++++++++++++ tools/testing/selftests/bpf/testing_helpers.h | 2 ++ 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c index 33144c9432ae..f4aad35afae1 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c @@ -63,7 +63,8 @@ void test_perf_event_stackmap(void) PERF_SAMPLE_BRANCH_NO_FLAGS | PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_CALL_STACK, - .sample_period = 5000, + .freq = 1, + .sample_freq = read_perf_max_sample_freq(), .size = sizeof(struct perf_event_attr), }; struct perf_event_stackmap *skel; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 47558b0d7f66..5db9eec24b5b 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -2,21 +2,6 @@ #include #include "test_stacktrace_build_id.skel.h" -static __u64 read_perf_max_sample_freq(void) -{ - __u64 sample_freq = 5000; /* fallback to 5000 on error */ - FILE *f; - __u32 duration = 0; - - f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); - if (f == NULL) - return sample_freq; - CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", - "return default value: 5000,err %d\n", -errno); - fclose(f); - return sample_freq; -} - void test_stacktrace_build_id_nmi(void) { int control_map_fd, stackid_hmap_fd, stackmap_fd; diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index ecfea13f938b..0b5e0829e5be 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -229,3 +229,23 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts); } + +__u64 read_perf_max_sample_freq(void) +{ + __u64 sample_freq = 5000; /* fallback to 5000 on error */ + FILE *f; + + f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); + if (f == NULL) { + printf("Failed to open /proc/sys/kernel/perf_event_max_sample_rate: err %d\n" + "return default value: 5000\n", -errno); + return sample_freq; + } + if (fscanf(f, "%llu", &sample_freq) != 1) { + printf("Failed to parse /proc/sys/kernel/perf_event_max_sample_rate: err %d\n" + "return default value: 5000\n", -errno); + } + + fclose(f); + return sample_freq; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 6ec00bf79cb5..eb8790f928e4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -20,3 +20,5 @@ struct test_filter_set; int parse_test_list(const char *s, struct test_filter_set *test_set, bool is_glob_pattern); + +__u64 read_perf_max_sample_freq(void); From c1e07a80cf23d3a6e96172bc9a73bfa912a9fcbc Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Apr 2023 14:04:22 -0700 Subject: [PATCH 253/260] selftests/bpf: Fix leaked bpf_link in get_stackid_cannot_attach skel->links.oncpu is leaked in one case. This causes test perf_branches fails when it runs after get_stackid_cannot_attach: ./test_progs -t get_stackid_cannot_attach,perf_branches 84 get_stackid_cannot_attach:OK test_perf_branches_common:PASS:test_perf_branches_load 0 nsec test_perf_branches_common:PASS:attach_perf_event 0 nsec test_perf_branches_common:PASS:set_affinity 0 nsec check_good_sample:FAIL:output not valid no valid sample from prog 146/1 perf_branches/perf_branches_hw:FAIL 146/2 perf_branches/perf_branches_no_hw:OK 146 perf_branches:FAIL All error logs: test_perf_branches_common:PASS:test_perf_branches_load 0 nsec test_perf_branches_common:PASS:attach_perf_event 0 nsec test_perf_branches_common:PASS:set_affinity 0 nsec check_good_sample:FAIL:output not valid no valid sample from prog 146/1 perf_branches/perf_branches_hw:FAIL 146 perf_branches:FAIL Summary: 1/1 PASSED, 0 SKIPPED, 1 FAILED Fix this by adding the missing bpf_link__destroy(). Fixes: 346938e9380c ("selftests/bpf: Add get_stackid_cannot_attach") Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230412210423.900851-3-song@kernel.org --- .../testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c index 5308de1ed478..2715c68301f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c @@ -65,6 +65,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain"); + bpf_link__destroy(skel->links.oncpu); close(pmu_fd); /* add exclude_callchain_kernel, attach should fail */ From 2995f9a8d427b9ff6f3cf4e85c0f9d4456ef324d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Apr 2023 14:04:23 -0700 Subject: [PATCH 254/260] selftests/bpf: Keep the loop in bpf_testmod_loop_test Some compilers (for example clang-15) optimize bpf_testmod_loop_test and remove the loop: gcc version (gdb) disassemble bpf_testmod_loop_test Dump of assembler code for function bpf_testmod_loop_test: 0x0000000000000570 <+0>: callq 0x575 0x0000000000000575 <+5>: xor %eax,%eax 0x0000000000000577 <+7>: test %edi,%edi 0x0000000000000579 <+9>: jle 0x587 0x000000000000057b <+11>: xor %edx,%edx 0x000000000000057d <+13>: add %edx,%eax 0x000000000000057f <+15>: add $0x1,%edx 0x0000000000000582 <+18>: cmp %edx,%edi 0x0000000000000584 <+20>: jne 0x57d 0x0000000000000586 <+22>: retq 0x0000000000000587 <+23>: retq clang-15 version (gdb) disassemble bpf_testmod_loop_test Dump of assembler code for function bpf_testmod_loop_test: 0x0000000000000450 <+0>: nopl 0x0(%rax,%rax,1) 0x0000000000000455 <+5>: test %edi,%edi 0x0000000000000457 <+7>: jle 0x46b 0x0000000000000459 <+9>: lea -0x1(%rdi),%eax 0x000000000000045c <+12>: lea -0x2(%rdi),%ecx 0x000000000000045f <+15>: imul %rax,%rcx 0x0000000000000463 <+19>: shr %rcx 0x0000000000000466 <+22>: lea -0x1(%rdi,%rcx,1),%eax 0x000000000000046a <+26>: retq 0x000000000000046b <+27>: xor %eax,%eax 0x000000000000046d <+29>: retq Note: The jne instruction is removed in clang-15 version. Force the compile to keep the loop by making sum volatile. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230412210423.900851-4-song@kernel.org --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 7999476b9446..c5ad39bbe9af 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -130,7 +130,11 @@ bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) { noinline int bpf_testmod_loop_test(int n) { - int i, sum = 0; + /* Make sum volatile, so smart compilers, such as clang, will not + * optimize the code by removing the loop. + */ + volatile int sum = 0; + int i; /* the primary goal of this test is to test LBR. Create a lot of * branches in the function, so we can catch it easily. From ee5059a64dbad4806a3c11babd0dbed5a5d04ead Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 12 Apr 2023 10:06:55 -0700 Subject: [PATCH 255/260] selftests/bpf: Remove stand-along test_verifier_log test binary test_prog's prog_tests/verifier_log.c is superseding test_verifier_log stand-alone test. It cover same checks and adds more, and is also integrated into test_progs test runner. Just remove test_verifier_log.c. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230412170655.1866831-1-andrii@kernel.org --- tools/testing/selftests/bpf/Makefile | 2 +- .../testing/selftests/bpf/test_verifier_log.c | 175 ------------------ 2 files changed, 1 insertion(+), 176 deletions(-) delete mode 100644 tools/testing/selftests/bpf/test_verifier_log.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b5ffdd89b86f..c49e5403ad0e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -36,7 +36,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_verifier_log test_dev_cgroup \ + test_dev_cgroup \ test_sock test_sockmap get_cgroup_id_user \ test_cgroup_storage \ test_tcpnotify_user test_sysctl \ diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c deleted file mode 100644 index 70feda97cee5..000000000000 --- a/tools/testing/selftests/bpf/test_verifier_log.c +++ /dev/null @@ -1,175 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#define LOG_SIZE (1 << 20) - -#define err(str...) printf("ERROR: " str) - -static const struct bpf_insn code_sample[] = { - /* We need a few instructions to pass the min log length */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), -}; - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -static int load(char *log, size_t log_len, int log_level) -{ - union bpf_attr attr; - - bzero(&attr, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn)); - attr.insns = ptr_to_u64(code_sample); - attr.license = ptr_to_u64("GPL"); - attr.log_buf = ptr_to_u64(log); - attr.log_size = log_len; - attr.log_level = log_level; - - return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); -} - -static void check_ret(int ret, int exp_errno) -{ - if (ret > 0) { - close(ret); - err("broken sample loaded successfully!?\n"); - exit(1); - } - - if (!ret || errno != exp_errno) { - err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n", - ret, errno, -1, exp_errno); - exit(1); - } -} - -static void check_ones(const char *buf, size_t len, const char *msg) -{ - while (len--) - if (buf[len] != 1) { - err("%s", msg); - exit(1); - } -} - -static void test_log_good(char *log, size_t buf_len, size_t log_len, - size_t exp_len, int exp_errno, const char *full_log) -{ - size_t len; - int ret; - - memset(log, 1, buf_len); - - ret = load(log, log_len, 1); - check_ret(ret, exp_errno); - - len = strnlen(log, buf_len); - if (len == buf_len) { - err("verifier did not NULL terminate the log\n"); - exit(1); - } - if (exp_len && len != exp_len) { - err("incorrect log length expected:%zd have:%zd\n", - exp_len, len); - exit(1); - } - - if (strchr(log, 1)) { - err("verifier leaked a byte through\n"); - exit(1); - } - - check_ones(log + len + 1, buf_len - len - 1, - "verifier wrote bytes past NULL termination\n"); - - if (memcmp(full_log, log, LOG_SIZE)) { - err("log did not match expected output\n"); - exit(1); - } -} - -static void test_log_bad(char *log, size_t log_len, int log_level) -{ - int ret; - - ret = load(log, log_len, log_level); - check_ret(ret, EINVAL); - if (log) - check_ones(log, LOG_SIZE, - "verifier touched log with bad parameters\n"); -} - -int main(int argc, char **argv) -{ - char full_log[LOG_SIZE]; - char log[LOG_SIZE]; - size_t want_len; - int i; - - memset(log, 1, LOG_SIZE); - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - /* Test incorrect attr */ - printf("Test log_level 0...\n"); - test_log_bad(log, LOG_SIZE, 0); - - printf("Test log_size < 128...\n"); - test_log_bad(log, 15, 1); - - printf("Test log_buff = NULL...\n"); - test_log_bad(NULL, LOG_SIZE, 1); - - /* Test with log big enough */ - printf("Test oversized buffer...\n"); - test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log); - - want_len = strlen(full_log); - - printf("Test exact buffer...\n"); - test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log); - - printf("Test undersized buffers...\n"); - for (i = 0; i < 64; i++) { - full_log[want_len - i + 1] = 1; - full_log[want_len - i] = 0; - - test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i, - ENOSPC, full_log); - } - - printf("test_verifier_log: OK\n"); - return 0; -} From 0c5f48599bed55c13d6f979ea824554bdebdf2ad Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Mon, 10 Apr 2023 14:18:41 +0200 Subject: [PATCH 256/260] xsk: Simplify xp_aligned_validate_desc implementation Perform the chunk boundary check like the page boundary check in xp_desc_crosses_non_contig_pg(). This simplifies the implementation and reduces the number of branches. Signed-off-by: Kal Conley Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20230410121841.643254-1-kal.conley@dectris.com --- net/xdp/xsk_queue.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 66c6f57c9c44..76b574bffd4a 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -133,16 +133,12 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 chunk, chunk_end; + u64 offset = desc->addr & (pool->chunk_size - 1); - chunk = xp_aligned_extract_addr(pool, desc->addr); - if (likely(desc->len)) { - chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1); - if (chunk != chunk_end) - return false; - } + if (offset + desc->len > pool->chunk_size) + return false; - if (chunk >= pool->addrs_cnt) + if (desc->addr >= pool->addrs_cnt) return false; if (desc->options) From 4099be372faf7b3616634dfe6994b81b1edf1906 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 11 Apr 2023 20:46:47 -0700 Subject: [PATCH 257/260] selftests/bpf: Fix compiler warnings in bpf_testmod for kfuncs Add -Wmissing-prototypes ignore in bpf_testmod.c, similarly to what we do in kernel code proper. Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/oe-kbuild-all/202304080951.l14IDv3n-lkp@intel.com/ Link: https://lore.kernel.org/bpf/20230412034647.3968143-1-andrii@kernel.org --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index c5ad39bbe9af..fe847ebfb731 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -28,6 +28,10 @@ struct bpf_testmod_struct_arg_2 { long b; }; +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in bpf_testmod.ko BTF"); + noinline int bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { bpf_testmod_test_struct_arg_result = a.a + a.b + b + c; @@ -175,6 +179,8 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) return a + b + c; } +__diag_pop(); + int bpf_testmod_fentry_ok; noinline ssize_t From 1ba83f505c53d35eda892ac7f108ef1189da6fa8 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Tue, 11 Apr 2023 15:00:25 +0200 Subject: [PATCH 258/260] xsk: Elide base_addr comparison in xp_unaligned_validate_desc Remove redundant (base_addr >= pool->addrs_cnt) comparison from the conditional. In particular, addr is computed as: addr = base_addr + offset ... where base_addr and offset are stored as 48-bit and 16-bit unsigned integers, respectively. The above sum cannot overflow u64 since base_addr has a maximum value of 0x0000ffffffffffff and offset has a maximum value of 0xffff (implying a maximum sum of 0x000100000000fffe). Since overflow is impossible, it follows that addr >= base_addr. Now if (base_addr >= pool->addrs_cnt), then clearly: addr >= base_addr >= pool->addrs_cnt Thus, (base_addr >= pool->addrs_cnt) implies (addr >= pool->addrs_cnt). Subsequently, the former comparison is unnecessary in the conditional since for any boolean expressions A and B, (A || B) && (A -> B) is equivalent to B. Signed-off-by: Kal Conley Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20230411130025.19704-1-kal.conley@dectris.com --- net/xdp/xsk_queue.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 76b574bffd4a..6d40a77fccbe 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -149,16 +149,12 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr, base_addr; - - base_addr = xp_unaligned_extract_addr(desc->addr); - addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); if (desc->len > pool->chunk_size) return false; - if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt || - addr + desc->len > pool->addrs_cnt || + if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) return false; From 23acb14af1914010dd0aae1bbb7fab28bf518b8e Mon Sep 17 00:00:00 2001 From: Hao Zeng Date: Tue, 11 Apr 2023 16:43:49 +0800 Subject: [PATCH 259/260] samples/bpf: Fix fout leak in hbm's run_bpf_prog Fix fout being fopen'ed but then not subsequently fclose'd. In the affected branch, fout is otherwise going out of scope. Signed-off-by: Hao Zeng Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230411084349.1999628-1-zenghao@kylinos.cn --- samples/bpf/hbm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index ff58ec43f56a..6448b7826107 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -315,6 +315,7 @@ static int run_bpf_prog(char *prog, int cg_id) fout = fopen(fname, "w"); fprintf(fout, "id:%d\n", cg_id); fprintf(fout, "ERROR: Could not lookup queue_stats\n"); + fclose(fout); } else if (stats_flag && qstats.lastPacketTime > qstats.firstPacketTime) { long long delta_us = (qstats.lastPacketTime - From 8c5c2a4898e3d6bad86e29d471e023c8a19ba799 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Apr 2023 20:28:42 +0200 Subject: [PATCH 260/260] bpf, sockmap: Revert buggy deadlock fix in the sockhash and sockmap syzbot reported a splat and bisected it to recent commit ed17aa92dc56 ("bpf, sockmap: fix deadlocks in the sockhash and sockmap"): [...] WARNING: CPU: 1 PID: 9280 at kernel/softirq.c:376 __local_bh_enable_ip+0xbe/0x130 kernel/softirq.c:376 Modules linked in: CPU: 1 PID: 9280 Comm: syz-executor.1 Not tainted 6.2.0-syzkaller-13249-gd319f344561d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023 RIP: 0010:__local_bh_enable_ip+0xbe/0x130 kernel/softirq.c:376 [...] Call Trace: spin_unlock_bh include/linux/spinlock.h:395 [inline] sock_map_del_link+0x2ea/0x510 net/core/sock_map.c:165 sock_map_unref+0xb0/0x1d0 net/core/sock_map.c:184 sock_hash_delete_elem+0x1ec/0x2a0 net/core/sock_map.c:945 map_delete_elem kernel/bpf/syscall.c:1536 [inline] __sys_bpf+0x2edc/0x53e0 kernel/bpf/syscall.c:5053 __do_sys_bpf kernel/bpf/syscall.c:5166 [inline] __se_sys_bpf kernel/bpf/syscall.c:5164 [inline] __x64_sys_bpf+0x79/0xc0 kernel/bpf/syscall.c:5164 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fe8f7c8c169 [...] Revert for now until we have a proper solution. Fixes: ed17aa92dc56 ("bpf, sockmap: fix deadlocks in the sockhash and sockmap") Reported-by: syzbot+49f6cef45247ff249498@syzkaller.appspotmail.com Cc: Hsin-Wei Hung Cc: Xin Liu Cc: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/000000000000f1db9605f939720e@google.com/ --- net/core/sock_map.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 66305b7bf8b7..7c189c2e2fbf 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -414,9 +414,8 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, { struct sock *sk; int err = 0; - unsigned long flags; - raw_spin_lock_irqsave(&stab->lock, flags); + raw_spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) sk = xchg(psk, NULL); @@ -426,7 +425,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, else err = -EINVAL; - raw_spin_unlock_irqrestore(&stab->lock, flags); + raw_spin_unlock_bh(&stab->lock); return err; } @@ -933,12 +932,11 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_bucket *bucket; struct bpf_shtab_elem *elem; int ret = -ENOENT; - unsigned long flags; hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); - raw_spin_lock_irqsave(&bucket->lock, flags); + raw_spin_lock_bh(&bucket->lock); elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size); if (elem) { hlist_del_rcu(&elem->node); @@ -946,7 +944,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) sock_hash_free_elem(htab, elem); ret = 0; } - raw_spin_unlock_irqrestore(&bucket->lock, flags); + raw_spin_unlock_bh(&bucket->lock); return ret; }