diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 6d37a40cd90e..0fe92986412b 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -74,6 +74,12 @@ struct bpf_local_storage_elem { struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; struct rcu_head rcu; + bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU + * callbacks? bpf_local_storage_map_free only + * executes rcu_barrier when there are special + * fields, this field remembers that to ensure we + * don't access already freed smap in sdata. + */ /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 58da17ae5124..2bdd722fe293 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -85,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (selem) { if (value) copy_map_value(&smap->map, SDATA(selem)->data, value); + /* No need to call check_and_init_map_value as memory is zero init */ return selem; } @@ -113,10 +114,25 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* The can_use_smap bool is set whenever we need to free additional + * fields in selem data before freeing selem. bpf_local_storage_map_free + * only executes rcu_barrier to wait for RCU callbacks when it has + * special fields, hence we can only conditionally dereference smap, as + * by this time the map might have already been freed without waiting + * for our call_rcu callback if it did not have any special fields. + */ + if (selem->can_use_smap) + bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data); + kfree(selem); +} + +static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu) +{ + /* Free directly if Tasks Trace RCU GP also implies RCU GP */ if (rcu_trace_implies_rcu_gp()) - kfree(selem); + bpf_selem_free_rcu(rcu); else - kfree_rcu(selem, rcu); + call_rcu(rcu, bpf_selem_free_rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -170,9 +186,9 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu); else - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, bpf_selem_free_rcu); return free_local_storage; } @@ -240,6 +256,11 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, RCU_INIT_POINTER(SDATA(selem)->smap, smap); hlist_add_head_rcu(&selem->map_node, &b->list); raw_spin_unlock_irqrestore(&b->lock, flags); + + /* If our data will have special fields, smap will wait for us to use + * its record in bpf_selem_free_* RCU callbacks before freeing itself. + */ + selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record); } void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) @@ -723,6 +744,25 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); + /* Only delay freeing of smap, buckets are not needed anymore */ kvfree(smap->buckets); + + /* When local storage has special fields, callbacks for + * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using + * the map BTF record, we need to execute an RCU barrier to wait for + * them as the record will be freed right after our map_free callback. + */ + if (!IS_ERR_OR_NULL(smap->map.record)) { + rcu_barrier_tasks_trace(); + /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() + * is true, because while call_rcu invocation is skipped in that + * case in bpf_selem_free_tasks_trace_rcu (and all local storage + * maps pass use_trace_rcu = true), there can be call_rcu + * callbacks based on use_trace_rcu = false in the earlier while + * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock + * called from owner's free path. + */ + rcu_barrier(); + } bpf_map_area_free(smap); } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5dfcb5ad0d06..653aeb481c79 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -249,7 +249,18 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab) struct htab_elem *elem; elem = get_htab_elem(htab, i); - bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); + if (htab_is_percpu(htab)) { + void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); + int cpu; + + for_each_possible_cpu(cpu) { + bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); + cond_resched(); + } + } else { + bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); + cond_resched(); + } cond_resched(); } } @@ -759,9 +770,17 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, static void check_and_free_fields(struct bpf_htab *htab, struct htab_elem *elem) { - void *map_value = elem->key + round_up(htab->map.key_size, 8); + if (htab_is_percpu(htab)) { + void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); + int cpu; - bpf_obj_free_fields(htab->map.record, map_value); + for_each_possible_cpu(cpu) + bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); + } else { + void *map_value = elem->key + round_up(htab->map.key_size, 8); + + bpf_obj_free_fields(htab->map.record, map_value); + } } /* It is called from the bpf_lru_list when the LRU needs to delete @@ -858,9 +877,9 @@ find_first_elem: static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { + check_and_free_fields(htab, l); if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); - check_and_free_fields(htab, l); bpf_mem_cache_free(&htab->ma, l); } @@ -918,14 +937,13 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, { if (!onallcpus) { /* copy true value_size bytes */ - memcpy(this_cpu_ptr(pptr), value, htab->map.value_size); + copy_map_value(&htab->map, this_cpu_ptr(pptr), value); } else { u32 size = round_up(htab->map.value_size, 8); int off = 0, cpu; for_each_possible_cpu(cpu) { - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), - value + off, size); + copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); off += size; } } @@ -940,16 +958,14 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, * (onallcpus=false always when coming from bpf prog). */ if (!onallcpus) { - u32 size = round_up(htab->map.value_size, 8); int current_cpu = raw_smp_processor_id(); int cpu; for_each_possible_cpu(cpu) { if (cpu == current_cpu) - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, - size); - else - memset(per_cpu_ptr(pptr, cpu), 0, size); + copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); + else /* Since elem is preallocated, we cannot touch special fields */ + zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); } } else { pcpu_copy_value(htab, pptr, value, onallcpus); @@ -1575,9 +1591,8 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, pptr = htab_elem_get_ptr(l, key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, - per_cpu_ptr(pptr, cpu), - roundup_value_size); + copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(&htab->map, value + off); off += roundup_value_size; } } else { @@ -1772,8 +1787,8 @@ again_nocopy: pptr = htab_elem_get_ptr(l, map->key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(dst_val + off, - per_cpu_ptr(pptr, cpu), size); + copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(&htab->map, dst_val + off); off += size; } } else { @@ -2046,9 +2061,9 @@ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) roundup_value_size = round_up(map->value_size, 8); pptr = htab_elem_get_ptr(elem, map->key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(info->percpu_value_buf + off, - per_cpu_ptr(pptr, cpu), - roundup_value_size); + copy_map_value_long(map, info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, info->percpu_value_buf + off); off += roundup_value_size; } ctx.value = info->percpu_value_buf; @@ -2292,8 +2307,8 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) */ pptr = htab_elem_get_ptr(l, map->key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, - per_cpu_ptr(pptr, cpu), size); + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value + off); off += size; } ret = 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e3fcdc9836a6..eb50025b03c1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1059,9 +1059,15 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, case BPF_KPTR_UNREF: case BPF_KPTR_REF: if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && - map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) { + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_SK_STORAGE && + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && + map->map_type != BPF_MAP_TYPE_TASK_STORAGE && + map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { ret = -EOPNOTSUPP; goto free_map_tab; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a856896e835a..bf580f246a01 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7222,22 +7222,26 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, break; case BPF_MAP_TYPE_SK_STORAGE: if (func_id != BPF_FUNC_sk_storage_get && - func_id != BPF_FUNC_sk_storage_delete) + func_id != BPF_FUNC_sk_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_INODE_STORAGE: if (func_id != BPF_FUNC_inode_storage_get && - func_id != BPF_FUNC_inode_storage_delete) + func_id != BPF_FUNC_inode_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_TASK_STORAGE: if (func_id != BPF_FUNC_task_storage_get && - func_id != BPF_FUNC_task_storage_delete) + func_id != BPF_FUNC_task_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_CGRP_STORAGE: if (func_id != BPF_FUNC_cgrp_storage_get && - func_id != BPF_FUNC_cgrp_storage_delete) + func_id != BPF_FUNC_cgrp_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_BLOOM_FILTER: diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 3533a4ecad01..8743df599567 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -4,70 +4,160 @@ #include "map_kptr.skel.h" #include "map_kptr_fail.skel.h" +#include "rcu_tasks_trace_gp.skel.h" static void test_map_kptr_success(bool test_run) { + LIBBPF_OPTS(bpf_test_run_opts, lopts); LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .repeat = 1, ); + int key = 0, ret, cpu; struct map_kptr *skel; - int key = 0, ret; - char buf[16]; + char buf[16], *pbuf; skel = map_kptr__open_and_load(); if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) return; - ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); - ASSERT_OK(ret, "test_map_kptr_ref refcount"); - ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts); + ASSERT_OK(ret, "test_map_kptr_ref1 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval"); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval"); + if (test_run) goto exit; + cpu = libbpf_num_possible_cpus(); + if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus")) + goto exit; + + pbuf = calloc(cpu, sizeof(buf)); + if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)")) + goto exit; + ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - ret = bpf_map__update_elem(skel->maps.array_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "array_map update2"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__update_elem(skel->maps.pcpu_array_map, + &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); + ASSERT_OK(ret, "pcpu_array_map update"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_map update"); ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_malloc_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_malloc_map update"); ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.lru_hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "lru_hash_map update"); ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "lru_pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete"); + skel->data->ref--; + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval"); + + free(pbuf); exit: map_kptr__destroy(skel); } -void test_map_kptr(void) +static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu) { - if (test__start_subtest("success")) { + long gp_seq = READ_ONCE(rcu->bss->gp_seq); + LIBBPF_OPTS(bpf_test_run_opts, opts); + + if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace), + &opts), "do_call_rcu_tasks_trace")) + return -EFAULT; + if (!ASSERT_OK(opts.retval, "opts.retval == 0")) + return -EFAULT; + while (gp_seq == READ_ONCE(rcu->bss->gp_seq)) + sched_yield(); + return 0; +} + +void serial_test_map_kptr(void) +{ + struct rcu_tasks_trace_gp *skel; + + RUN_TESTS(map_kptr_fail); + + skel = rcu_tasks_trace_gp__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) + return; + if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach")) + goto end; + + if (test__start_subtest("success-map")) { + test_map_kptr_success(true); + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on bpf_map_free_deferred */ test_map_kptr_success(false); - /* Do test_run twice, so that we see refcount going back to 1 - * after we leave it in map from first iteration. - */ + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on synchronous delete elem */ test_map_kptr_success(true); } - RUN_TESTS(map_kptr_fail); +end: + rcu_tasks_trace_gp__destroy(skel); + return; } diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 228ec45365a8..a24d17bc17eb 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -15,6 +15,13 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); +struct pcpu_array_map { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_array_map SEC(".maps"); + struct hash_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -22,6 +29,13 @@ struct hash_map { __uint(max_entries, 1); } hash_map SEC(".maps"); +struct pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_hash_map SEC(".maps"); + struct hash_malloc_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -30,6 +44,14 @@ struct hash_malloc_map { __uint(map_flags, BPF_F_NO_PREALLOC); } hash_malloc_map SEC(".maps"); +struct pcpu_hash_malloc_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} pcpu_hash_malloc_map SEC(".maps"); + struct lru_hash_map { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, int); @@ -37,6 +59,41 @@ struct lru_hash_map { __uint(max_entries, 1); } lru_hash_map SEC(".maps"); +struct lru_pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} lru_pcpu_hash_map SEC(".maps"); + +struct cgrp_ls_map { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} cgrp_ls_map SEC(".maps"); + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} task_ls_map SEC(".maps"); + +struct inode_ls_map { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} inode_ls_map SEC(".maps"); + +struct sk_ls_map { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} sk_ls_map SEC(".maps"); + #define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ struct { \ __uint(type, map_type); \ @@ -160,6 +217,58 @@ int test_map_kptr(struct __sk_buff *ctx) return 0; } +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path) +{ + struct map_value *v; + + v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct task_struct *task; + struct map_value *v; + + task = bpf_get_current_task_btf(); + if (!task) + return 0; + v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct map_value *v; + + v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("tc") +int test_sk_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 0; + v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + SEC("tc") int test_map_in_map_kptr(struct __sk_buff *ctx) { @@ -189,106 +298,257 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) return 0; } -SEC("tc") -int test_map_kptr_ref(struct __sk_buff *ctx) +int ref = 1; + +static __always_inline +int test_map_kptr_ref_pre(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; unsigned long arg = 0; - struct map_value *v; - int key = 0, ret; + int ret; p = bpf_kfunc_call_test_acquire(&arg); if (!p) return 1; + ref++; p_st = p->next; - if (p_st->cnt.refs.counter != 2) { + if (p_st->cnt.refs.counter != ref) { ret = 2; goto end; } - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) { + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { ret = 3; goto end; } - - p = bpf_kptr_xchg(&v->ref_ptr, p); - if (p) { - ret = 4; - goto end; - } - if (p_st->cnt.refs.counter != 2) - return 5; + if (p_st->cnt.refs.counter != ref) + return 4; p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); if (!p) - return 6; - if (p_st->cnt.refs.counter != 3) { - ret = 7; + return 5; + ref++; + if (p_st->cnt.refs.counter != ref) { + ret = 6; goto end; } bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 2) - return 8; + ref--; + if (p_st->cnt.refs.counter != ref) + return 7; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 9; + return 8; bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 1) - return 10; + ref--; + if (p_st->cnt.refs.counter != ref) + return 9; p = bpf_kfunc_call_test_acquire(&arg); if (!p) - return 11; + return 10; + ref++; p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 12; + ret = 11; goto end; } - if (p_st->cnt.refs.counter != 2) - return 13; + if (p_st->cnt.refs.counter != ref) + return 12; /* Leave in map */ return 0; end: + ref--; bpf_kfunc_call_test_release(p); return ret; } -SEC("tc") -int test_map_kptr_ref2(struct __sk_buff *ctx) +static __always_inline +int test_map_kptr_ref_post(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; - struct map_value *v; - int key = 0; - - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) - return 1; p_st = v->ref_ptr; - if (!p_st || p_st->cnt.refs.counter != 2) - return 2; + if (!p_st || p_st->cnt.refs.counter != ref) + return 1; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 3; - if (p_st->cnt.refs.counter != 2) { + return 2; + if (p_st->cnt.refs.counter != ref) { bpf_kfunc_call_test_release(p); - return 4; + return 3; } p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { bpf_kfunc_call_test_release(p); - return 5; + return 4; } - if (p_st->cnt.refs.counter != 2) - return 6; + if (p_st->cnt.refs.counter != ref) + return 5; return 0; } +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref1(struct __sk_buff *ctx) +{ + struct map_value *v, val = {}; + int key = 0, ret; + + bpf_map_update_elem(&hash_map, &key, &val, 0); + bpf_map_update_elem(&hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_hash_map, &key, &val, 0); + + bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0); + bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0); + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); + + return 0; +} + +#undef TEST +#undef TEST_PCPU + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref2(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, ret; + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); + + return 0; +} + +#undef TEST +#undef TEST_PCPU + +SEC("tc") +int test_map_kptr_ref3(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long sp = 0; + + p = bpf_kfunc_call_test_acquire(&sp); + if (!p) + return 1; + ref++; + if (p->cnt.refs.counter != ref) { + bpf_kfunc_call_test_release(p); + return 2; + } + bpf_kfunc_call_test_release(p); + ref--; + return 0; +} + +SEC("syscall") +int test_ls_map_kptr_ref1(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (v) + return 150; + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 200; + return test_map_kptr_ref_pre(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref2(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + return test_map_kptr_ref_post(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref_del(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + if (!v->ref_ptr) + return 300; + return bpf_task_storage_delete(&task_ls_map, current); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c new file mode 100644 index 000000000000..df4873558634 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_ls_map SEC(".maps"); + +long gp_seq; + +SEC("syscall") +int do_call_rcu_tasks_trace(void *ctx) +{ + struct task_struct *current; + int *v; + + current = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 1; + /* Invoke call_rcu_tasks_trace */ + return bpf_task_storage_delete(&task_ls_map, current); +} + +SEC("kprobe/rcu_tasks_trace_postgp") +int rcu_tasks_trace_postgp(void *ctx) +{ + __sync_add_and_fetch(&gp_seq, 1); + return 0; +} + +char _license[] SEC("license") = "GPL";