bpf-for-netdev
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZo+YhgAKCRDbK58LschI gyutAP9h32BTAQQ6p4AxyXKSQvrQU8Ha0kQLhsBDPRjh/O1QqQD9F26ohaTdJTMZ PduJdIl521nte7Lk5xHQgkYZ7FnMygM= =sIFy -----END PGP SIGNATURE----- Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf Daniel Borkmann says: ==================== pull-request: bpf 2024-07-11 The following pull-request contains BPF updates for your *net* tree. We've added 4 non-merge commits during the last 2 day(s) which contain a total of 4 files changed, 262 insertions(+), 19 deletions(-). The main changes are: 1) Fixes for a BPF timer lockup and a use-after-free scenario when timers are used concurrently, from Kumar Kartikeya Dwivedi. 2) Fix the argument order in the call to bpf_map_kvcalloc() which could otherwise lead to a compilation error, from Mohammad Shehar Yaar Tausif. bpf-for-netdev * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: Add timer lockup selftest bpf: Defer work in bpf_timer_cancel_and_free bpf: Fail bpf_timer_cancel when callback is being cancelled bpf: fix order of args in call to bpf_map_kvcalloc ==================== Link: https://patch.msgid.link/20240711084016.25757-1-daniel@iogearbox.net Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
commit
a819ff0cf9
@ -782,8 +782,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
|
||||
nbuckets = max_t(u32, 2, nbuckets);
|
||||
smap->bucket_log = ilog2(nbuckets);
|
||||
|
||||
smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
|
||||
nbuckets, GFP_USER | __GFP_NOWARN);
|
||||
smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets,
|
||||
sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN);
|
||||
if (!smap->buckets) {
|
||||
err = -ENOMEM;
|
||||
goto free_smap;
|
||||
|
@ -1084,7 +1084,10 @@ struct bpf_async_cb {
|
||||
struct bpf_prog *prog;
|
||||
void __rcu *callback_fn;
|
||||
void *value;
|
||||
struct rcu_head rcu;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct delete_work;
|
||||
};
|
||||
u64 flags;
|
||||
};
|
||||
|
||||
@ -1107,6 +1110,7 @@ struct bpf_async_cb {
|
||||
struct bpf_hrtimer {
|
||||
struct bpf_async_cb cb;
|
||||
struct hrtimer timer;
|
||||
atomic_t cancelling;
|
||||
};
|
||||
|
||||
struct bpf_work {
|
||||
@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work)
|
||||
kfree_rcu(w, cb.rcu);
|
||||
}
|
||||
|
||||
static void bpf_timer_delete_work(struct work_struct *work)
|
||||
{
|
||||
struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
|
||||
|
||||
/* Cancel the timer and wait for callback to complete if it was running.
|
||||
* If hrtimer_cancel() can be safely called it's safe to call
|
||||
* kfree_rcu(t) right after for both preallocated and non-preallocated
|
||||
* maps. The async->cb = NULL was already done and no code path can see
|
||||
* address 't' anymore. Timer if armed for existing bpf_hrtimer before
|
||||
* bpf_timer_cancel_and_free will have been cancelled.
|
||||
*/
|
||||
hrtimer_cancel(&t->timer);
|
||||
kfree_rcu(t, cb.rcu);
|
||||
}
|
||||
|
||||
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
|
||||
enum bpf_async_type type)
|
||||
{
|
||||
@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
|
||||
clockid = flags & (MAX_CLOCKS - 1);
|
||||
t = (struct bpf_hrtimer *)cb;
|
||||
|
||||
atomic_set(&t->cancelling, 0);
|
||||
INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
|
||||
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
|
||||
t->timer.function = bpf_timer_cb;
|
||||
cb->value = (void *)async - map->record->timer_off;
|
||||
@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async)
|
||||
|
||||
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
|
||||
{
|
||||
struct bpf_hrtimer *t;
|
||||
struct bpf_hrtimer *t, *cur_t;
|
||||
bool inc = false;
|
||||
int ret = 0;
|
||||
|
||||
if (in_nmi())
|
||||
@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (this_cpu_read(hrtimer_running) == t) {
|
||||
|
||||
cur_t = this_cpu_read(hrtimer_running);
|
||||
if (cur_t == t) {
|
||||
/* If bpf callback_fn is trying to bpf_timer_cancel()
|
||||
* its own timer the hrtimer_cancel() will deadlock
|
||||
* since it waits for callback_fn to finish
|
||||
* since it waits for callback_fn to finish.
|
||||
*/
|
||||
ret = -EDEADLK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Only account in-flight cancellations when invoked from a timer
|
||||
* callback, since we want to avoid waiting only if other _callbacks_
|
||||
* are waiting on us, to avoid introducing lockups. Non-callback paths
|
||||
* are ok, since nobody would synchronously wait for their completion.
|
||||
*/
|
||||
if (!cur_t)
|
||||
goto drop;
|
||||
atomic_inc(&t->cancelling);
|
||||
/* Need full barrier after relaxed atomic_inc */
|
||||
smp_mb__after_atomic();
|
||||
inc = true;
|
||||
if (atomic_read(&cur_t->cancelling)) {
|
||||
/* We're cancelling timer t, while some other timer callback is
|
||||
* attempting to cancel us. In such a case, it might be possible
|
||||
* that timer t belongs to the other callback, or some other
|
||||
* callback waiting upon it (creating transitive dependencies
|
||||
* upon us), and we will enter a deadlock if we continue
|
||||
* cancelling and waiting for it synchronously, since it might
|
||||
* do the same. Bail!
|
||||
*/
|
||||
ret = -EDEADLK;
|
||||
goto out;
|
||||
}
|
||||
drop:
|
||||
drop_prog_refcnt(&t->cb);
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
@ -1467,6 +1516,8 @@ out:
|
||||
* if it was running.
|
||||
*/
|
||||
ret = ret ?: hrtimer_cancel(&t->timer);
|
||||
if (inc)
|
||||
atomic_dec(&t->cancelling);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val)
|
||||
|
||||
if (!t)
|
||||
return;
|
||||
/* Cancel the timer and wait for callback to complete if it was running.
|
||||
* If hrtimer_cancel() can be safely called it's safe to call kfree(t)
|
||||
* right after for both preallocated and non-preallocated maps.
|
||||
* The async->cb = NULL was already done and no code path can
|
||||
* see address 't' anymore.
|
||||
*
|
||||
* Check that bpf_map_delete/update_elem() wasn't called from timer
|
||||
* callback_fn. In such case don't call hrtimer_cancel() (since it will
|
||||
* deadlock) and don't call hrtimer_try_to_cancel() (since it will just
|
||||
* return -1). Though callback_fn is still running on this cpu it's
|
||||
/* We check that bpf_map_delete/update_elem() was called from timer
|
||||
* callback_fn. In such case we don't call hrtimer_cancel() (since it
|
||||
* will deadlock) and don't call hrtimer_try_to_cancel() (since it will
|
||||
* just return -1). Though callback_fn is still running on this cpu it's
|
||||
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
|
||||
* from 't'. The bpf subprog callback_fn won't be able to access 't',
|
||||
* since async->cb = NULL was already done. The timer will be
|
||||
* effectively cancelled because bpf_timer_cb() will return
|
||||
* HRTIMER_NORESTART.
|
||||
*
|
||||
* However, it is possible the timer callback_fn calling us armed the
|
||||
* timer _before_ calling us, such that failing to cancel it here will
|
||||
* cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
|
||||
* Therefore, we _need_ to cancel any outstanding timers before we do
|
||||
* kfree_rcu, even though no more timers can be armed.
|
||||
*
|
||||
* Moreover, we need to schedule work even if timer does not belong to
|
||||
* the calling callback_fn, as on two different CPUs, we can end up in a
|
||||
* situation where both sides run in parallel, try to cancel one
|
||||
* another, and we end up waiting on both sides in hrtimer_cancel
|
||||
* without making forward progress, since timer1 depends on time2
|
||||
* callback to finish, and vice versa.
|
||||
*
|
||||
* CPU 1 (timer1_cb) CPU 2 (timer2_cb)
|
||||
* bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
|
||||
*
|
||||
* To avoid these issues, punt to workqueue context when we are in a
|
||||
* timer callback.
|
||||
*/
|
||||
if (this_cpu_read(hrtimer_running) != t)
|
||||
hrtimer_cancel(&t->timer);
|
||||
kfree_rcu(t, cb.rcu);
|
||||
if (this_cpu_read(hrtimer_running))
|
||||
queue_work(system_unbound_wq, &t->cb.delete_work);
|
||||
else
|
||||
bpf_timer_delete_work(&t->cb.delete_work);
|
||||
}
|
||||
|
||||
/* This function is called by map_delete/update_elem for individual element and
|
||||
|
91
tools/testing/selftests/bpf/prog_tests/timer_lockup.c
Normal file
91
tools/testing/selftests/bpf/prog_tests/timer_lockup.c
Normal file
@ -0,0 +1,91 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <test_progs.h>
|
||||
#include <pthread.h>
|
||||
#include <network_helpers.h>
|
||||
|
||||
#include "timer_lockup.skel.h"
|
||||
|
||||
static long cpu;
|
||||
static int *timer1_err;
|
||||
static int *timer2_err;
|
||||
static bool skip;
|
||||
|
||||
volatile int k = 0;
|
||||
|
||||
static void *timer_lockup_thread(void *arg)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts,
|
||||
.data_in = &pkt_v4,
|
||||
.data_size_in = sizeof(pkt_v4),
|
||||
.repeat = 1000,
|
||||
);
|
||||
int i, prog_fd = *(int *)arg;
|
||||
cpu_set_t cpuset;
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
|
||||
ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
|
||||
&cpuset),
|
||||
"cpu affinity");
|
||||
|
||||
for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
|
||||
bpf_prog_test_run_opts(prog_fd, &opts);
|
||||
/* Skip the test if we can't reproduce the race in a reasonable
|
||||
* amount of time.
|
||||
*/
|
||||
if (i > 50) {
|
||||
WRITE_ONCE(skip, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void test_timer_lockup(void)
|
||||
{
|
||||
int timer1_prog, timer2_prog;
|
||||
struct timer_lockup *skel;
|
||||
pthread_t thrds[2];
|
||||
void *ret;
|
||||
|
||||
skel = timer_lockup__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
|
||||
return;
|
||||
|
||||
timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
|
||||
timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
|
||||
|
||||
timer1_err = &skel->bss->timer1_err;
|
||||
timer2_err = &skel->bss->timer2_err;
|
||||
|
||||
if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
|
||||
&timer1_prog),
|
||||
"pthread_create thread1"))
|
||||
goto out;
|
||||
if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
|
||||
&timer2_prog),
|
||||
"pthread_create thread2")) {
|
||||
pthread_exit(&thrds[0]);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pthread_join(thrds[1], &ret);
|
||||
pthread_join(thrds[0], &ret);
|
||||
|
||||
if (skip) {
|
||||
test__skip();
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (*timer1_err != -EDEADLK && *timer1_err != 0)
|
||||
ASSERT_FAIL("timer1_err bad value");
|
||||
if (*timer2_err != -EDEADLK && *timer2_err != 0)
|
||||
ASSERT_FAIL("timer2_err bad value");
|
||||
out:
|
||||
timer_lockup__destroy(skel);
|
||||
return;
|
||||
}
|
87
tools/testing/selftests/bpf/progs/timer_lockup.c
Normal file
87
tools/testing/selftests/bpf/progs/timer_lockup.c
Normal file
@ -0,0 +1,87 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_misc.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct elem {
|
||||
struct bpf_timer t;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, int);
|
||||
__type(value, struct elem);
|
||||
} timer1_map SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, int);
|
||||
__type(value, struct elem);
|
||||
} timer2_map SEC(".maps");
|
||||
|
||||
int timer1_err;
|
||||
int timer2_err;
|
||||
|
||||
static int timer_cb1(void *map, int *k, struct elem *v)
|
||||
{
|
||||
struct bpf_timer *timer;
|
||||
int key = 0;
|
||||
|
||||
timer = bpf_map_lookup_elem(&timer2_map, &key);
|
||||
if (timer)
|
||||
timer2_err = bpf_timer_cancel(timer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int timer_cb2(void *map, int *k, struct elem *v)
|
||||
{
|
||||
struct bpf_timer *timer;
|
||||
int key = 0;
|
||||
|
||||
timer = bpf_map_lookup_elem(&timer1_map, &key);
|
||||
if (timer)
|
||||
timer1_err = bpf_timer_cancel(timer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int timer1_prog(void *ctx)
|
||||
{
|
||||
struct bpf_timer *timer;
|
||||
int key = 0;
|
||||
|
||||
timer = bpf_map_lookup_elem(&timer1_map, &key);
|
||||
if (timer) {
|
||||
bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
|
||||
bpf_timer_set_callback(timer, timer_cb1);
|
||||
bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int timer2_prog(void *ctx)
|
||||
{
|
||||
struct bpf_timer *timer;
|
||||
int key = 0;
|
||||
|
||||
timer = bpf_map_lookup_elem(&timer2_map, &key);
|
||||
if (timer) {
|
||||
bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
|
||||
bpf_timer_set_callback(timer, timer_cb2);
|
||||
bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user