This patch modifies bpf_rbtree_remove to account for possible failure due to the input rb_node already not being in any collection. The function can now return NULL, and does when the aforementioned scenario occurs. As before, on successful removal an owning reference to the removed node is returned. Adding KF_RET_NULL to bpf_rbtree_remove's kfunc flags - now KF_RET_NULL | KF_ACQUIRE - provides the desired verifier semantics: * retval must be checked for NULL before use * if NULL, retval's ref_obj_id is released * retval is a "maybe acquired" owning ref, not a non-owning ref, so it will live past end of critical section (bpf_spin_unlock), and thus can be checked for NULL after the end of the CS BPF programs must add checks ============================ This does change bpf_rbtree_remove's verifier behavior. BPF program writers will need to add NULL checks to their programs, but the resulting UX looks natural: bpf_spin_lock(&glock); n = bpf_rbtree_first(&ghead); if (!n) { /* ... */} res = bpf_rbtree_remove(&ghead, &n->node); bpf_spin_unlock(&glock); if (!res) /* Newly-added check after this patch */ return 1; n = container_of(res, /* ... */); /* Do something else with n */ bpf_obj_drop(n); return 0; The "if (!res)" check above is the only addition necessary for the above program to pass verification after this patch. bpf_rbtree_remove no longer clobbers non-owning refs ==================================================== An issue arises when bpf_rbtree_remove fails, though. Consider this example: struct node_data { long key; struct bpf_list_node l; struct bpf_rb_node r; struct bpf_refcount ref; }; long failed_sum; void bpf_prog() { struct node_data *n = bpf_obj_new(/* ... */); struct bpf_rb_node *res; n->key = 10; bpf_spin_lock(&glock); bpf_list_push_back(&some_list, &n->l); /* n is now a non-owning ref */ res = bpf_rbtree_remove(&some_tree, &n->r, /* ... */); if (!res) failed_sum += n->key; /* not possible */ bpf_spin_unlock(&glock); /* if (res) { do something useful and drop } ... */ } The bpf_rbtree_remove in this example will always fail. Similarly to bpf_spin_unlock, bpf_rbtree_remove is a non-owning reference invalidation point. The verifier clobbers all non-owning refs after a bpf_rbtree_remove call, so the "failed_sum += n->key" line will fail verification, and in fact there's no good way to get information about the node which failed to add after the invalidation. This patch removes non-owning reference invalidation from bpf_rbtree_remove to allow the above usecase to pass verification. The logic for why this is now possible is as follows: Before this series, bpf_rbtree_add couldn't fail and thus assumed that its input, a non-owning reference, was in the tree. But it's easy to construct an example where two non-owning references pointing to the same underlying memory are acquired and passed to rbtree_remove one after another (see rbtree_api_release_aliasing in selftests/bpf/progs/rbtree_fail.c). So it was necessary to clobber non-owning refs to prevent this case and, more generally, to enforce "non-owning ref is definitely in some collection" invariant. This series removes that invariant and the failure / runtime checking added in this patch provide a clean way to deal with the aliasing issue - just fail to remove. Because the aliasing issue prevented by clobbering non-owning refs is no longer an issue, this patch removes the invalidate_non_owning_refs call from verifier handling of bpf_rbtree_remove. Note that bpf_spin_unlock - the other caller of invalidate_non_owning_refs - clobbers non-owning refs for a different reason, so its clobbering behavior remains unchanged. No BPF program changes are necessary for programs to remain valid as a result of this clobbering change. A valid program before this patch passed verification with its non-owning refs having shorter (or equal) lifetimes due to more aggressive clobbering. Also, update existing tests to check bpf_rbtree_remove retval for NULL where necessary, and move rbtree_api_release_aliasing from progs/rbtree_fail.c to progs/rbtree.c since it's now expected to pass verification. Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> Link: https://lore.kernel.org/r/20230415201811.343116-8-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
247 lines
4.8 KiB
C
247 lines
4.8 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
|
|
|
|
#include <vmlinux.h>
|
|
#include <bpf/bpf_tracing.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_core_read.h>
|
|
#include "bpf_experimental.h"
|
|
|
|
struct node_data {
|
|
long key;
|
|
long data;
|
|
struct bpf_rb_node node;
|
|
};
|
|
|
|
long less_callback_ran = -1;
|
|
long removed_key = -1;
|
|
long first_data[2] = {-1, -1};
|
|
|
|
#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
|
|
private(A) struct bpf_spin_lock glock;
|
|
private(A) struct bpf_rb_root groot __contains(node_data, node);
|
|
|
|
static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
|
|
{
|
|
struct node_data *node_a;
|
|
struct node_data *node_b;
|
|
|
|
node_a = container_of(a, struct node_data, node);
|
|
node_b = container_of(b, struct node_data, node);
|
|
less_callback_ran = 1;
|
|
|
|
return node_a->key < node_b->key;
|
|
}
|
|
|
|
static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
|
|
{
|
|
struct node_data *n, *m;
|
|
|
|
n = bpf_obj_new(typeof(*n));
|
|
if (!n)
|
|
return 1;
|
|
n->key = 5;
|
|
|
|
m = bpf_obj_new(typeof(*m));
|
|
if (!m) {
|
|
bpf_obj_drop(n);
|
|
return 2;
|
|
}
|
|
m->key = 1;
|
|
|
|
bpf_spin_lock(&glock);
|
|
bpf_rbtree_add(&groot, &n->node, less);
|
|
bpf_rbtree_add(&groot, &m->node, less);
|
|
bpf_spin_unlock(&glock);
|
|
|
|
n = bpf_obj_new(typeof(*n));
|
|
if (!n)
|
|
return 3;
|
|
n->key = 3;
|
|
|
|
bpf_spin_lock(&glock);
|
|
bpf_rbtree_add(&groot, &n->node, less);
|
|
bpf_spin_unlock(&glock);
|
|
return 0;
|
|
}
|
|
|
|
SEC("tc")
|
|
long rbtree_add_nodes(void *ctx)
|
|
{
|
|
return __add_three(&groot, &glock);
|
|
}
|
|
|
|
SEC("tc")
|
|
long rbtree_add_and_remove(void *ctx)
|
|
{
|
|
struct bpf_rb_node *res = NULL;
|
|
struct node_data *n, *m = NULL;
|
|
|
|
n = bpf_obj_new(typeof(*n));
|
|
if (!n)
|
|
goto err_out;
|
|
n->key = 5;
|
|
|
|
m = bpf_obj_new(typeof(*m));
|
|
if (!m)
|
|
goto err_out;
|
|
m->key = 3;
|
|
|
|
bpf_spin_lock(&glock);
|
|
bpf_rbtree_add(&groot, &n->node, less);
|
|
bpf_rbtree_add(&groot, &m->node, less);
|
|
res = bpf_rbtree_remove(&groot, &n->node);
|
|
bpf_spin_unlock(&glock);
|
|
|
|
if (!res)
|
|
return 1;
|
|
|
|
n = container_of(res, struct node_data, node);
|
|
removed_key = n->key;
|
|
bpf_obj_drop(n);
|
|
|
|
return 0;
|
|
err_out:
|
|
if (n)
|
|
bpf_obj_drop(n);
|
|
if (m)
|
|
bpf_obj_drop(m);
|
|
return 1;
|
|
}
|
|
|
|
SEC("tc")
|
|
long rbtree_first_and_remove(void *ctx)
|
|
{
|
|
struct bpf_rb_node *res = NULL;
|
|
struct node_data *n, *m, *o;
|
|
|
|
n = bpf_obj_new(typeof(*n));
|
|
if (!n)
|
|
return 1;
|
|
n->key = 3;
|
|
n->data = 4;
|
|
|
|
m = bpf_obj_new(typeof(*m));
|
|
if (!m)
|
|
goto err_out;
|
|
m->key = 5;
|
|
m->data = 6;
|
|
|
|
o = bpf_obj_new(typeof(*o));
|
|
if (!o)
|
|
goto err_out;
|
|
o->key = 1;
|
|
o->data = 2;
|
|
|
|
bpf_spin_lock(&glock);
|
|
bpf_rbtree_add(&groot, &n->node, less);
|
|
bpf_rbtree_add(&groot, &m->node, less);
|
|
bpf_rbtree_add(&groot, &o->node, less);
|
|
|
|
res = bpf_rbtree_first(&groot);
|
|
if (!res) {
|
|
bpf_spin_unlock(&glock);
|
|
return 2;
|
|
}
|
|
|
|
o = container_of(res, struct node_data, node);
|
|
first_data[0] = o->data;
|
|
|
|
res = bpf_rbtree_remove(&groot, &o->node);
|
|
bpf_spin_unlock(&glock);
|
|
|
|
if (!res)
|
|
return 5;
|
|
|
|
o = container_of(res, struct node_data, node);
|
|
removed_key = o->key;
|
|
bpf_obj_drop(o);
|
|
|
|
bpf_spin_lock(&glock);
|
|
res = bpf_rbtree_first(&groot);
|
|
if (!res) {
|
|
bpf_spin_unlock(&glock);
|
|
return 3;
|
|
}
|
|
|
|
o = container_of(res, struct node_data, node);
|
|
first_data[1] = o->data;
|
|
bpf_spin_unlock(&glock);
|
|
|
|
return 0;
|
|
err_out:
|
|
if (n)
|
|
bpf_obj_drop(n);
|
|
if (m)
|
|
bpf_obj_drop(m);
|
|
return 1;
|
|
}
|
|
|
|
SEC("tc")
|
|
long rbtree_api_release_aliasing(void *ctx)
|
|
{
|
|
struct node_data *n, *m, *o;
|
|
struct bpf_rb_node *res, *res2;
|
|
|
|
n = bpf_obj_new(typeof(*n));
|
|
if (!n)
|
|
return 1;
|
|
n->key = 41;
|
|
n->data = 42;
|
|
|
|
bpf_spin_lock(&glock);
|
|
bpf_rbtree_add(&groot, &n->node, less);
|
|
bpf_spin_unlock(&glock);
|
|
|
|
bpf_spin_lock(&glock);
|
|
|
|
/* m and o point to the same node,
|
|
* but verifier doesn't know this
|
|
*/
|
|
res = bpf_rbtree_first(&groot);
|
|
if (!res)
|
|
goto err_out;
|
|
o = container_of(res, struct node_data, node);
|
|
|
|
res = bpf_rbtree_first(&groot);
|
|
if (!res)
|
|
goto err_out;
|
|
m = container_of(res, struct node_data, node);
|
|
|
|
res = bpf_rbtree_remove(&groot, &m->node);
|
|
/* Retval of previous remove returns an owning reference to m,
|
|
* which is the same node non-owning ref o is pointing at.
|
|
* We can safely try to remove o as the second rbtree_remove will
|
|
* return NULL since the node isn't in a tree.
|
|
*
|
|
* Previously we relied on the verifier type system + rbtree_remove
|
|
* invalidating non-owning refs to ensure that rbtree_remove couldn't
|
|
* fail, but now rbtree_remove does runtime checking so we no longer
|
|
* invalidate non-owning refs after remove.
|
|
*/
|
|
res2 = bpf_rbtree_remove(&groot, &o->node);
|
|
|
|
bpf_spin_unlock(&glock);
|
|
|
|
if (res) {
|
|
o = container_of(res, struct node_data, node);
|
|
first_data[0] = o->data;
|
|
bpf_obj_drop(o);
|
|
}
|
|
if (res2) {
|
|
/* The second remove fails, so res2 is null and this doesn't
|
|
* execute
|
|
*/
|
|
m = container_of(res2, struct node_data, node);
|
|
first_data[1] = m->data;
|
|
bpf_obj_drop(m);
|
|
}
|
|
return 0;
|
|
|
|
err_out:
|
|
bpf_spin_unlock(&glock);
|
|
return 1;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|