bpf: Add alloc/xchg/direct_access support for local percpu kptr

Add two new kfunc's, bpf_percpu_obj_new_impl() and
bpf_percpu_obj_drop_impl(), to allocate a percpu obj.
Two functions are very similar to bpf_obj_new_impl()
and bpf_obj_drop_impl(). The major difference is related
to percpu handling.

    bpf_rcu_read_lock()
    struct val_t __percpu_kptr *v = map_val->percpu_data;
    ...
    bpf_rcu_read_unlock()

For a percpu data map_val like above 'v', the reg->type
is set as
	PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU
if inside rcu critical section.

MEM_RCU marking here is similar to NON_OWN_REF as 'v'
is not a owning reference. But NON_OWN_REF is
trusted and typically inside the spinlock while
MEM_RCU is under rcu read lock. RCU is preferred here
since percpu data structures mean potential concurrent
access into its contents.

Also, bpf_percpu_obj_new_impl() is restricted such that
no pointers or special fields are allowed. Therefore,
the bpf_list_head and bpf_rb_root will not be supported
in this patch set to avoid potential memory leak issue
due to racing between bpf_obj_free_fields() and another
bpf_kptr_xchg() moving an allocated object to
bpf_list_head and bpf_rb_root.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20230827152744.1996739-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Yonghong Song 2023-08-27 08:27:44 -07:00 committed by Alexei Starovoitov
parent 55db92f42f
commit 36d8bdf75a
2 changed files with 106 additions and 22 deletions

View File

@ -1902,6 +1902,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p;
}
__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
{
u64 size = local_type_id__k;
/* The verifier has ensured that meta__ign must be NULL */
return bpf_mem_alloc(&bpf_global_percpu_ma, size);
}
/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
{
@ -1930,6 +1938,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
}
__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
{
/* The verifier has ensured that meta__ign must be NULL */
bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
}
__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
@ -2442,7 +2456,9 @@ BTF_SET8_START(generic_btf_ids)
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)

View File

@ -304,7 +304,7 @@ struct bpf_kfunc_call_arg_meta {
/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
* generally to pass info about user-defined local kptr types to later
* verification logic
* bpf_obj_drop
* bpf_obj_drop/bpf_percpu_obj_drop
* Record the local kptr type to be drop'd
* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
* Record the local kptr type to be refcount_incr'd and use
@ -5001,6 +5001,8 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
perm_flags |= PTR_UNTRUSTED;
} else {
perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
if (kptr_field->type == BPF_KPTR_PERCPU)
perm_flags |= MEM_PERCPU;
}
if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
@ -5044,7 +5046,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
*/
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
kptr_field->kptr.btf, kptr_field->kptr.btf_id,
kptr_field->type == BPF_KPTR_REF))
kptr_field->type != BPF_KPTR_UNREF))
goto bad_type;
return 0;
bad_type:
@ -5088,7 +5090,18 @@ static bool rcu_safe_kptr(const struct btf_field *field)
{
const struct btf_field_kptr *kptr = &field->kptr;
return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
return field->type == BPF_KPTR_PERCPU ||
(field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
}
static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
{
if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
if (kptr_field->type != BPF_KPTR_PERCPU)
return PTR_MAYBE_NULL | MEM_RCU;
return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
}
return PTR_MAYBE_NULL | PTR_UNTRUSTED;
}
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@ -5114,7 +5127,8 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
/* We only allow loading referenced kptr, since it will be marked as
* untrusted, similar to unreferenced kptr.
*/
if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
if (class != BPF_LDX &&
(kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
verbose(env, "store to referenced kptr disallowed\n");
return -EACCES;
}
@ -5125,10 +5139,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
kptr_field->kptr.btf_id,
rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
PTR_MAYBE_NULL | MEM_RCU :
PTR_MAYBE_NULL | PTR_UNTRUSTED);
kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
/* For mark_ptr_or_null_reg */
val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
@ -5182,6 +5193,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
switch (field->type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
if (src != ACCESS_DIRECT) {
verbose(env, "kptr cannot be accessed indirectly by helper\n");
return -EACCES;
@ -7320,7 +7332,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
return -EACCES;
}
if (kptr_field->type != BPF_KPTR_REF) {
if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
return -EACCES;
}
@ -7831,8 +7843,10 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
if (base_type(arg_type) == ARG_PTR_TO_MEM)
type &= ~DYNPTR_TYPE_FLAG_MASK;
if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
type &= ~MEM_ALLOC;
type &= ~MEM_PERCPU;
}
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
@ -7915,6 +7929,7 @@ found:
break;
}
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
meta->func_id != BPF_FUNC_kptr_xchg) {
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
@ -9882,8 +9897,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (func_id == BPF_FUNC_kptr_xchg) {
ret_btf = meta.kptr_field->kptr.btf;
ret_btf_id = meta.kptr_field->kptr.btf_id;
if (!btf_is_kernel(ret_btf))
if (!btf_is_kernel(ret_btf)) {
regs[BPF_REG_0].type |= MEM_ALLOC;
if (meta.kptr_field->type == BPF_KPTR_PERCPU)
regs[BPF_REG_0].type |= MEM_PERCPU;
}
} else {
if (fn->ret_btf_id == BPF_PTR_POISON) {
verbose(env, "verifier internal error:");
@ -10268,6 +10286,8 @@ enum special_kfunc_type {
KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
KF_bpf_percpu_obj_new_impl,
KF_bpf_percpu_obj_drop_impl,
};
BTF_SET_START(special_kfunc_set)
@ -10288,6 +10308,8 @@ BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@ -10310,6 +10332,8 @@ BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@ -11004,7 +11028,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
}
break;
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
return -EINVAL;
}
} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
return -EINVAL;
}
} else {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
return -EINVAL;
}
@ -11012,8 +11046,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "allocated object must be referenced\n");
return -EINVAL;
}
if (meta->btf == btf_vmlinux &&
meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
if (meta->btf == btf_vmlinux) {
meta->arg_btf = reg->btf;
meta->arg_btf_id = reg->btf_id;
}
@ -11413,6 +11446,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* Only exception is bpf_obj_new_impl */
if (meta.btf != btf_vmlinux ||
(meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
return -EINVAL;
@ -11426,11 +11460,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
struct btf_struct_meta *struct_meta;
struct btf *ret_btf;
u32 ret_btf_id;
if (unlikely(!bpf_global_ma_set))
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
return -ENOMEM;
if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
return -ENOMEM;
if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
@ -11443,24 +11482,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* This may be NULL due to user not supplying a BTF */
if (!ret_btf) {
verbose(env, "bpf_obj_new requires prog BTF\n");
verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
return -EINVAL;
}
ret_t = btf_type_by_id(ret_btf, ret_btf_id);
if (!ret_t || !__btf_type_is_struct(ret_t)) {
verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
return -EINVAL;
}
struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
return -EINVAL;
}
if (struct_meta) {
verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
return -EINVAL;
}
}
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
regs[BPF_REG_0].type |= MEM_PERCPU;
insn_aux->obj_new_size = ret_t->size;
insn_aux->kptr_struct_meta =
btf_find_struct_meta(ret_btf, ret_btf_id);
insn_aux->kptr_struct_meta = struct_meta;
} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
@ -11597,7 +11650,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
insn_aux->kptr_struct_meta =
btf_find_struct_meta(meta.arg_btf,
meta.arg_btf_id);
@ -18266,21 +18320,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn->imm = BPF_CALL_IMM(desc->addr);
if (insn->off)
return 0;
if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
insn_idx);
return -EFAULT;
}
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
insn_buf[1] = addr[0];
insn_buf[2] = addr[1];
insn_buf[3] = *insn;
*cnt = 4;
} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
insn_idx);
return -EFAULT;
}
if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
!kptr_struct_meta) {
verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",