diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 0fe92986412b..6d37a40cd90e 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -74,12 +74,6 @@ struct bpf_local_storage_elem { struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; struct rcu_head rcu; - bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU - * callbacks? bpf_local_storage_map_free only - * executes rcu_barrier when there are special - * fields, this field remembers that to ensure we - * don't access already freed smap in sdata. - */ /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 2bdd722fe293..3d320393a12c 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -109,30 +109,36 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_rcu(struct rcu_head *rcu) +static void bpf_selem_free_fields_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* protected by the rcu_barrier*() */ + smap = rcu_dereference_protected(SDATA(selem)->smap, true); + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + kfree(selem); +} + +static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu) +{ + /* Free directly if Tasks Trace RCU GP also implies RCU GP */ + if (rcu_trace_implies_rcu_gp()) + bpf_selem_free_fields_rcu(rcu); + else + call_rcu(rcu, bpf_selem_free_fields_rcu); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - /* The can_use_smap bool is set whenever we need to free additional - * fields in selem data before freeing selem. bpf_local_storage_map_free - * only executes rcu_barrier to wait for RCU callbacks when it has - * special fields, hence we can only conditionally dereference smap, as - * by this time the map might have already been freed without waiting - * for our call_rcu callback if it did not have any special fields. - */ - if (selem->can_use_smap) - bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data); - kfree(selem); -} - -static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu) -{ - /* Free directly if Tasks Trace RCU GP also implies RCU GP */ if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_rcu(rcu); + kfree(selem); else - call_rcu(rcu, bpf_selem_free_rcu); + kfree_rcu(selem, rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -145,6 +151,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor { struct bpf_local_storage_map *smap; bool free_local_storage; + struct btf_record *rec; void *owner; smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); @@ -185,10 +192,26 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu); - else - call_rcu(&selem->rcu, bpf_selem_free_rcu); + /* A different RCU callback is chosen whenever we need to free + * additional fields in selem data before freeing selem. + * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU + * callbacks when it has special fields, hence we can only conditionally + * dereference smap, as by this time the map might have already been + * freed without waiting for our call_rcu callback if it did not have + * any special fields. + */ + rec = smap->map.record; + if (use_trace_rcu) { + if (!IS_ERR_OR_NULL(rec)) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); + else + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + } else { + if (!IS_ERR_OR_NULL(rec)) + call_rcu(&selem->rcu, bpf_selem_free_fields_rcu); + else + kfree_rcu(selem, rcu); + } return free_local_storage; } @@ -256,11 +279,6 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, RCU_INIT_POINTER(SDATA(selem)->smap, smap); hlist_add_head_rcu(&selem->map_node, &b->list); raw_spin_unlock_irqrestore(&b->lock, flags); - - /* If our data will have special fields, smap will wait for us to use - * its record in bpf_selem_free_* RCU callbacks before freeing itself. - */ - selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record); } void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) @@ -748,19 +766,20 @@ void bpf_local_storage_map_free(struct bpf_map *map, kvfree(smap->buckets); /* When local storage has special fields, callbacks for - * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using - * the map BTF record, we need to execute an RCU barrier to wait for - * them as the record will be freed right after our map_free callback. + * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will + * keep using the map BTF record, we need to execute an RCU barrier to + * wait for them as the record will be freed right after our map_free + * callback. */ if (!IS_ERR_OR_NULL(smap->map.record)) { rcu_barrier_tasks_trace(); /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() * is true, because while call_rcu invocation is skipped in that - * case in bpf_selem_free_tasks_trace_rcu (and all local storage - * maps pass use_trace_rcu = true), there can be call_rcu - * callbacks based on use_trace_rcu = false in the earlier while - * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock - * called from owner's free path. + * case in bpf_selem_free_fields_trace_rcu (and all local + * storage maps pass use_trace_rcu = true), there can be + * call_rcu callbacks based on use_trace_rcu = false in the + * while ((selem = ...)) loop above or when owner's free path + * calls bpf_local_storage_unlink_nolock. */ rcu_barrier(); }