diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3db6f6c95489..bca11ac818c1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -190,7 +190,7 @@ struct bpf_map { atomic64_t usercnt; struct work_struct work; struct mutex freeze_mutex; - u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ + atomic64_t writecnt; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -1387,6 +1387,7 @@ void bpf_map_put(struct bpf_map *map); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); +bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1cad6979a0d0..ecd51a8a8680 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } +static void bpf_map_write_active_inc(struct bpf_map *map) +{ + atomic64_inc(&map->writecnt); +} + +static void bpf_map_write_active_dec(struct bpf_map *map) +{ + atomic64_dec(&map->writecnt); +} + +bool bpf_map_write_active(const struct bpf_map *map) +{ + return atomic64_read(&map->writecnt) != 0; +} + static u32 bpf_map_value_size(const struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -596,11 +611,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_MAYWRITE) { - mutex_lock(&map->freeze_mutex); - map->writecnt++; - mutex_unlock(&map->freeze_mutex); - } + if (vma->vm_flags & VM_MAYWRITE) + bpf_map_write_active_inc(map); } /* called for all unmapped memory region (including initial) */ @@ -608,11 +620,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_MAYWRITE) { - mutex_lock(&map->freeze_mutex); - map->writecnt--; - mutex_unlock(&map->freeze_mutex); - } + if (vma->vm_flags & VM_MAYWRITE) + bpf_map_write_active_dec(map); } static const struct vm_operations_struct bpf_map_default_vmops = { @@ -663,7 +672,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) goto out; if (vma->vm_flags & VM_MAYWRITE) - map->writecnt++; + bpf_map_write_active_inc(map); out: mutex_unlock(&map->freeze_mutex); return err; @@ -1122,6 +1131,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; @@ -1157,6 +1167,7 @@ free_value: free_key: kvfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1179,6 +1190,7 @@ static int map_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; @@ -1209,6 +1221,7 @@ static int map_delete_elem(union bpf_attr *attr) out: kvfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1516,6 +1529,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; @@ -1580,6 +1594,7 @@ free_value: free_key: kvfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1607,8 +1622,7 @@ static int map_freeze(const union bpf_attr *attr) } mutex_lock(&map->freeze_mutex); - - if (map->writecnt) { + if (bpf_map_write_active(map)) { err = -EBUSY; goto err_put; } @@ -4143,6 +4157,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd) { + bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || + cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; + bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; struct bpf_map *map; int err, ufd; struct fd f; @@ -4155,16 +4172,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr, map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if ((cmd == BPF_MAP_LOOKUP_BATCH || - cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && - !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { + if (has_write) + bpf_map_write_active_inc(map); + if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } - - if (cmd != BPF_MAP_LOOKUP_BATCH && - !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -4177,8 +4191,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, BPF_DO_BATCH(map->ops->map_update_batch); else BPF_DO_BATCH(map->ops->map_delete_batch); - err_put: + if (has_write) + bpf_map_write_active_dec(map); fdput(f); return err; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cba37d83451e..2105b57c9c23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3885,7 +3885,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) static bool bpf_map_is_rdonly(const struct bpf_map *map) { - return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; + /* A map is considered read-only if the following condition are true: + * + * 1) BPF program side cannot change any of the map content. The + * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map + * and was set at map creation time. + * 2) The map value(s) have been initialized from user space by a + * loader and then "frozen", such that no new map update/delete + * operations from syscall side are possible for the rest of + * the map's lifetime from that point onwards. + * 3) Any parallel/pending map update/delete operations from syscall + * side have been completed. Only after that point, it's safe to + * assume that map value(s) are immutable. + */ + return (map->map_flags & BPF_F_RDONLY_PROG) && + READ_ONCE(map->frozen) && + !bpf_map_write_active(map); } static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)