diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bfdf40be5360..a75faf437e75 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -175,9 +175,14 @@ struct bpf_map { */ atomic64_t refcnt ____cacheline_aligned; atomic64_t usercnt; - struct work_struct work; + /* rcu is used before freeing and work is only used during freeing */ + union { + struct work_struct work; + struct rcu_head rcu; + }; struct mutex freeze_mutex; atomic64_t writecnt; + bool free_after_mult_rcu_gp; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 0cf4cb685810..caa1a17cbae1 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -102,10 +102,15 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map, void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { - /* ptr->ops->map_free() has to go through one - * rcu grace period by itself. + struct bpf_map *inner_map = ptr; + + /* The inner map may still be used by both non-sleepable and sleepable + * bpf program, so free it after one RCU grace period and one tasks + * trace RCU grace period. */ - bpf_map_put(ptr); + if (need_defer) + WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); + bpf_map_put(inner_map); } u32 bpf_map_fd_sys_lookup_elem(void *ptr) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 16affa09db5c..e1bee8cd3404 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -493,6 +493,25 @@ static void bpf_map_put_uref(struct bpf_map *map) } } +static void bpf_map_free_in_work(struct bpf_map *map) +{ + INIT_WORK(&map->work, bpf_map_free_deferred); + schedule_work(&map->work); +} + +static void bpf_map_free_rcu_gp(struct rcu_head *rcu) +{ + bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); +} + +static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_map_free_rcu_gp(rcu); + else + call_rcu(rcu, bpf_map_free_rcu_gp); +} + /* decrement map refcnt and schedule it for freeing via workqueue * (unrelying map implementation ops->map_free() might sleep) */ @@ -502,8 +521,11 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) /* bpf_map_free_id() must be called first */ bpf_map_free_id(map, do_idr_lock); btf_put(map->btf); - INIT_WORK(&map->work, bpf_map_free_deferred); - schedule_work(&map->work); + + if (READ_ONCE(map->free_after_mult_rcu_gp)) + call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); + else + bpf_map_free_in_work(map); } }