diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index bf1b85941452..d8a16c4bef7f 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -471,13 +471,50 @@ struct_ops callback arg. For example: struct task_struct *acquired; acquired = bpf_task_acquire(task); + if (acquired) + /* + * In a typical program you'd do something like store + * the task in a map, and the map will automatically + * release it later. Here, we release it manually. + */ + bpf_task_release(acquired); + return 0; + } + + +References acquired on ``struct task_struct *`` objects are RCU protected. +Therefore, when in an RCU read region, you can obtain a pointer to a task +embedded in a map value without having to acquire a reference: + +.. code-block:: c + + #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) + private(TASK) static struct task_struct *global; + + /** + * A trivial example showing how to access a task stored + * in a map using RCU. + */ + SEC("tp_btf/task_newtask") + int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags) + { + struct task_struct *local_copy; + + bpf_rcu_read_lock(); + local_copy = global; + if (local_copy) + /* + * We could also pass local_copy to kfuncs or helper functions here, + * as we're guaranteed that local_copy will be valid until we exit + * the RCU read region below. + */ + bpf_printk("Global task %s is valid", local_copy->comm); + else + bpf_printk("No global task found"); + bpf_rcu_read_unlock(); + + /* At this point we can no longer reference local_copy. */ - /* - * In a typical program you'd do something like store - * the task in a map, and the map will automatically - * release it later. Here, we release it manually. - */ - bpf_task_release(acquired); return 0; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 8980f6859443..6be16db9f188 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2013,73 +2014,8 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) */ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) { - return get_task_struct(p); -} - -/** - * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task - * acquired by this kfunc which is not stored in a map as a kptr, must be - * released by calling bpf_task_release(). - * @p: The task on which a reference is being acquired. - */ -__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) -{ - /* For the time being this function returns NULL, as it's not currently - * possible to safely acquire a reference to a task with RCU protection - * using get_task_struct() and put_task_struct(). This is due to the - * slightly odd mechanics of p->rcu_users, and how task RCU protection - * works. - * - * A struct task_struct is refcounted by two different refcount_t - * fields: - * - * 1. p->usage: The "true" refcount field which tracks a task's - * lifetime. The task is freed as soon as this - * refcount drops to 0. - * - * 2. p->rcu_users: An "RCU users" refcount field which is statically - * initialized to 2, and is co-located in a union with - * a struct rcu_head field (p->rcu). p->rcu_users - * essentially encapsulates a single p->usage - * refcount, and when p->rcu_users goes to 0, an RCU - * callback is scheduled on the struct rcu_head which - * decrements the p->usage refcount. - * - * There are two important implications to this task refcounting logic - * described above. The first is that - * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as - * after the refcount goes to 0, the RCU callback being scheduled will - * cause the memory backing the refcount to again be nonzero due to the - * fields sharing a union. The other is that we can't rely on RCU to - * guarantee that a task is valid in a BPF program. This is because a - * task could have already transitioned to being in the TASK_DEAD - * state, had its rcu_users refcount go to 0, and its rcu callback - * invoked in which it drops its single p->usage reference. At this - * point the task will be freed as soon as the last p->usage reference - * goes to 0, without waiting for another RCU gp to elapse. The only - * way that a BPF program can guarantee that a task is valid is in this - * scenario is to hold a p->usage refcount itself. - * - * Until we're able to resolve this issue, either by pulling - * p->rcu_users and p->rcu out of the union, or by getting rid of - * p->usage and just using p->rcu_users for refcounting, we'll just - * return NULL here. - */ - return NULL; -} - -/** - * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task - * kptr acquired by this kfunc which is not subsequently stored in a map, must - * be released by calling bpf_task_release(). - * @pp: A pointer to a task kptr on which a reference is being acquired. - */ -__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) -{ - /* We must return NULL here until we have clarity on how to properly - * leverage RCU for ensuring a task's lifetime. See the comment above - * in bpf_task_acquire_not_zero() for more details. - */ + if (refcount_inc_not_zero(&p->rcu_users)) + return p; return NULL; } @@ -2089,7 +2025,7 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { - put_task_struct(p); + put_task_struct_rcu_user(p); } #ifdef CONFIG_CGROUPS @@ -2199,7 +2135,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) rcu_read_lock(); p = find_task_by_pid_ns(pid, &init_pid_ns); if (p) - bpf_task_acquire(p); + p = bpf_task_acquire(p); rcu_read_unlock(); return p; @@ -2371,9 +2307,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front) BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_rbtree_add) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52738f9dcb15..92ae4e8ab87b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4600,6 +4600,7 @@ BTF_SET_START(rcu_protected_types) BTF_ID(struct, prog_test_ref_kfunc) BTF_ID(struct, cgroup) BTF_ID(struct, bpf_cpumask) +BTF_ID(struct, task_struct) BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index f79fa5bc9a8d..740d5f644b40 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -73,11 +73,12 @@ static const char * const success_tests[] = { "test_task_acquire_release_current", "test_task_acquire_leave_in_map", "test_task_xchg_release", - "test_task_get_release", + "test_task_map_acquire_release", "test_task_current_acquire_release", "test_task_from_pid_arg", "test_task_from_pid_current", "test_task_from_pid_invalid", + "task_kfunc_acquire_trusted_walked", }; void test_task_kfunc(void) diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 6a8c88e58df2..14fb01437fb8 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -23,7 +23,7 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; -struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym; +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; void bpf_task_release(struct task_struct *p) __ksym; SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") @@ -159,13 +159,8 @@ int task_acquire(void *ctx) goto out; /* acquire a reference which can be used outside rcu read lock region */ - gparent = bpf_task_acquire_not_zero(gparent); + gparent = bpf_task_acquire(gparent); if (!gparent) - /* Until we resolve the issues with using task->rcu_users, we - * expect bpf_task_acquire_not_zero() to return a NULL task. - * See the comment at the definition of - * bpf_task_acquire_not_zero() for more details. - */ goto out; (void)bpf_task_storage_get(&map_a, gparent, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index 4c2a4b0e3a25..41f2d44f49cb 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -21,9 +21,10 @@ struct hash_map { } __tasks_kfunc_map SEC(".maps"); struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; -struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym; void bpf_task_release(struct task_struct *p) __ksym; struct task_struct *bpf_task_from_pid(s32 pid) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p) { @@ -60,6 +61,9 @@ static inline int tasks_kfunc_map_insert(struct task_struct *p) } acquired = bpf_task_acquire(p); + if (!acquired) + return -ENOENT; + old = bpf_kptr_xchg(&v->task, acquired); if (old) { bpf_task_release(old); diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 2c374a7ffece..dcdea3127086 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -40,6 +40,9 @@ int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_f /* Can't invoke bpf_task_acquire() on an untrusted pointer. */ acquired = bpf_task_acquire(v->task); + if (!acquired) + return 0; + bpf_task_release(acquired); return 0; @@ -53,38 +56,49 @@ int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags) /* Can't invoke bpf_task_acquire() on a random frame pointer. */ acquired = bpf_task_acquire((struct task_struct *)&stack_task); + if (!acquired) + return 0; + bpf_task_release(acquired); return 0; } SEC("kretprobe/free_task") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("calling kernel function bpf_task_acquire is not allowed") int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; + /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */ acquired = bpf_task_acquire(task); - /* Can't release a bpf_task_acquire()'d task without a NULL check. */ + if (!acquired) + return 0; bpf_task_release(acquired); return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("R1 must be referenced or trusted") -int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +SEC("kretprobe/free_task") +__failure __msg("calling kernel function bpf_task_acquire is not allowed") +int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; - /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */ - acquired = bpf_task_acquire(task->group_leader); - bpf_task_release(acquired); + bpf_rcu_read_lock(); + if (!task) { + bpf_rcu_read_unlock(); + return 0; + } + /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */ + acquired = bpf_task_acquire(task); + if (acquired) + bpf_task_release(acquired); + bpf_rcu_read_unlock(); return 0; } - SEC("tp_btf/task_newtask") __failure __msg("Possibly NULL pointer passed to trusted arg0") int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags) @@ -114,57 +128,6 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr; - - /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */ - kptr = bpf_task_kptr_get(&task); - if (!kptr) - return 0; - - bpf_task_release(kptr); - - return 0; -} - -SEC("tp_btf/task_newtask") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr, *acquired; - - acquired = bpf_task_acquire(task); - - /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */ - kptr = bpf_task_kptr_get(&acquired); - bpf_task_release(acquired); - if (!kptr) - return 0; - - bpf_task_release(kptr); - - return 0; -} - -SEC("tp_btf/task_newtask") -__failure __msg("arg#0 expected pointer to map value") -int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags) -{ - struct task_struct *kptr; - - /* Cannot use bpf_task_kptr_get() on a NULL pointer. */ - kptr = bpf_task_kptr_get(NULL); - if (!kptr) - return 0; - - bpf_task_release(kptr); - - return 0; -} - SEC("tp_btf/task_newtask") __failure __msg("Unreleased reference") int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags) @@ -186,21 +149,14 @@ int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_fla } SEC("tp_btf/task_newtask") -__failure __msg("Unreleased reference") -int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags) +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags) { - struct task_struct *kptr; - struct __tasks_kfunc_map_value *v; + struct task_struct *acquired; - v = insert_lookup_task(task); - if (!v) - return 0; - - kptr = bpf_task_kptr_get(&v->task); - if (!kptr) - return 0; - - /* Kptr acquired above is never released. */ + acquired = bpf_task_acquire(task); + /* Can't invoke bpf_task_release() on an acquired task without a NULL check. */ + bpf_task_release(acquired); return 0; } @@ -256,12 +212,13 @@ int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) return -ENOENT; acquired = bpf_task_acquire(task); + if (!acquired) + return -EEXIST; old = bpf_kptr_xchg(&v->task, acquired); /* old cannot be passed to bpf_task_release() without a NULL check. */ bpf_task_release(old); - bpf_task_release(old); return 0; } @@ -298,6 +255,9 @@ int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) /* the argument of lsm task_free hook is untrusted. */ acquired = bpf_task_acquire(task); + if (!acquired) + return 0; + bpf_task_release(acquired); return 0; } @@ -337,3 +297,30 @@ int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool bpf_strncmp(task->comm, 16, "foo"); return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("R1 must be referenced or trusted") +int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *local; + struct __tasks_kfunc_map_value *v; + + if (tasks_kfunc_map_insert(task)) + return 0; + + v = tasks_kfunc_map_value_lookup(task); + if (!v) + return 0; + + bpf_rcu_read_lock(); + local = v->task; + if (!local) { + bpf_rcu_read_unlock(); + return 0; + } + /* Can't release a kptr that's still stored in a map. */ + bpf_task_release(local); + bpf_rcu_read_unlock(); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index cfa7f12b84e8..b09371bba204 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -47,7 +47,10 @@ static int test_acquire_release(struct task_struct *task) } acquired = bpf_task_acquire(task); - bpf_task_release(acquired); + if (acquired) + bpf_task_release(acquired); + else + err = 6; return 0; } @@ -119,7 +122,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") -int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) +int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags) { struct task_struct *kptr; struct __tasks_kfunc_map_value *v; @@ -140,18 +143,18 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) return 0; } - kptr = bpf_task_kptr_get(&v->task); - if (kptr) { - /* Until we resolve the issues with using task->rcu_users, we - * expect bpf_task_kptr_get() to return a NULL task. See the - * comment at the definition of bpf_task_acquire_not_zero() for - * more details. - */ - bpf_task_release(kptr); + bpf_rcu_read_lock(); + kptr = v->task; + if (!kptr) { err = 3; - return 0; + } else { + kptr = bpf_task_acquire(kptr); + if (!kptr) + err = 4; + else + bpf_task_release(kptr); } - + bpf_rcu_read_unlock(); return 0; } @@ -166,7 +169,10 @@ int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 cl current = bpf_get_current_task_btf(); acquired = bpf_task_acquire(current); - bpf_task_release(acquired); + if (acquired) + bpf_task_release(acquired); + else + err = 1; return 0; } @@ -241,3 +247,19 @@ int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_fla return 0; } + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* task->group_leader is listed as a trusted, non-NULL field of task struct. */ + acquired = bpf_task_acquire(task->group_leader); + if (acquired) + bpf_task_release(acquired); + else + err = 1; + + + return 0; +}