From 87ac0d600943994444e24382a87aa19acc4cd3d4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 22:31:43 -0700 Subject: [PATCH 1/4] bpf: fix potential 32-bit overflow when accessing ARRAY map element If BPF array map is bigger than 4GB, element pointer calculation can overflow because both index and elem_size are u32. Fix this everywhere by forcing 64-bit multiplication. Extract this formula into separate small helper and use it consistently in various places. Speculative-preventing formula utilizing index_mask trick is left as is, but explicit u64 casts are added in both places. Fixes: c85d69135a91 ("bpf: move memory size checks to bpf_map_charge_init()") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220715053146.1291891-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index fe40d3b9458f..1d05d63e6fa5 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -156,6 +156,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) return &array->map; } +static void *array_map_elem_ptr(struct bpf_array* array, u32 index) +{ + return array->value + (u64)array->elem_size * index; +} + /* Called from syscall or from eBPF program */ static void *array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -165,7 +170,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * (index & array->index_mask); + return array->value + (u64)array->elem_size * (index & array->index_mask); } static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, @@ -339,7 +344,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, value, map->value_size); } else { val = array->value + - array->elem_size * (index & array->index_mask); + (u64)array->elem_size * (index & array->index_mask); if (map_flags & BPF_F_LOCK) copy_map_value_locked(map, val, value, false); else @@ -408,8 +413,7 @@ static void array_map_free_timers(struct bpf_map *map) return; for (i = 0; i < array->map.max_entries; i++) - bpf_timer_cancel_and_free(array->value + array->elem_size * i + - map->timer_off); + bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off); } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -420,7 +424,7 @@ static void array_map_free(struct bpf_map *map) if (map_value_has_kptrs(map)) { for (i = 0; i < array->map.max_entries; i++) - bpf_map_free_kptrs(map, array->value + array->elem_size * i); + bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); bpf_map_free_kptr_off_tab(map); } @@ -556,7 +560,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) index = info->index & array->index_mask; if (info->percpu_value_buf) return array->pptrs[index]; - return array->value + array->elem_size * index; + return array_map_elem_ptr(array, index); } static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -575,7 +579,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) index = info->index & array->index_mask; if (info->percpu_value_buf) return array->pptrs[index]; - return array->value + array->elem_size * index; + return array_map_elem_ptr(array, index); } static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) @@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ if (is_percpu) val = this_cpu_ptr(array->pptrs[i]); else - val = array->value + array->elem_size * i; + val = array_map_elem_ptr(array, i); num_elems++; key = i; ret = callback_fn((u64)(long)map, (u64)(long)&key, From d937bc3449fa868cbeaf5c87576f9929b765c1e0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 22:31:44 -0700 Subject: [PATCH 2/4] bpf: make uniform use of array->elem_size everywhere in arraymap.c BPF_MAP_TYPE_ARRAY is rounding value_size to closest multiple of 8 and stores that as array->elem_size for various memory allocations and accesses. But the code tends to re-calculate round_up(map->value_size, 8) in multiple places instead of using array->elem_size. Cleaning this up and making sure we always use array->size to avoid duplication of this (admittedly simple) logic for consistency. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220715053146.1291891-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1d05d63e6fa5..98ee09155151 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -208,7 +208,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; - u32 elem_size = round_up(map->value_size, 8); + u32 elem_size = array->elem_size; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; @@ -277,7 +277,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) * access 'value_size' of them, so copying rounded areas * will not leak any kernel data */ - size = round_up(map->value_size, 8); + size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { @@ -381,7 +381,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, * returned or zeros which were zero-filled by percpu_alloc, * so no kernel data leaks possible */ - size = round_up(map->value_size, 8); + size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { @@ -587,6 +587,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_iter__bpf_map_elem ctx = {}; struct bpf_map *map = info->map; + struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_iter_meta meta; struct bpf_prog *prog; int off = 0, cpu = 0; @@ -607,7 +608,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) ctx.value = v; } else { pptr = v; - size = round_up(map->value_size, 8); + size = array->elem_size; for_each_possible_cpu(cpu) { bpf_long_memcpy(info->percpu_value_buf + off, per_cpu_ptr(pptr, cpu), @@ -637,11 +638,12 @@ static int bpf_iter_init_array_map(void *priv_data, { struct bpf_iter_seq_array_map_info *seq_info = priv_data; struct bpf_map *map = aux->map; + struct bpf_array *array = container_of(map, struct bpf_array, map); void *value_buf; u32 buf_size; if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + buf_size = array->elem_size * num_possible_cpus(); value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); if (!value_buf) return -ENOMEM; @@ -1326,7 +1328,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); - u32 elem_size = round_up(map->value_size, 8); + u32 elem_size = array->elem_size; struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; From 63b8ce77b15ebf69c4b0ef4b87451e2626aa3c43 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 22:31:45 -0700 Subject: [PATCH 3/4] bpf: remove obsolete KMALLOC_MAX_SIZE restriction on array map value size Syscall-side map_lookup_elem() and map_update_elem() used to use kmalloc() to allocate temporary buffers of value_size, so KMALLOC_MAX_SIZE limit on value_size made sense to prevent creation of array map that won't be accessible through syscall interface. But this limitation since has been lifted by relying on kvmalloc() in syscall handling code. So remove KMALLOC_MAX_SIZE, which among other things means that it's possible to have BPF global variable sections (.bss, .data, .rodata) bigger than 8MB now. Keep the sanity check to prevent trivial overflows like round_up(map->value_size, 8) and restrict value size to <= INT_MAX (2GB). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220715053146.1291891-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98ee09155151..d3e734bf8056 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -70,10 +70,8 @@ int array_map_alloc_check(union bpf_attr *attr) attr->map_flags & BPF_F_PRESERVE_ELEMS) return -EINVAL; - if (attr->value_size > KMALLOC_MAX_SIZE) - /* if value_size is bigger, the user space won't be able to - * access the elements. - */ + /* avoid overflow on round_up(map->value_size) */ + if (attr->value_size > INT_MAX) return -E2BIG; return 0; From 24316461200502aa5feddaa72dcbb8059503a528 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 22:31:46 -0700 Subject: [PATCH 4/4] selftests/bpf: validate .bss section bigger than 8MB is possible now Add a simple big 16MB array and validate access to the very last byte of it to make sure that kernel supports > KMALLOC_MAX_SIZE value_size for BPF array maps (which are backing .bss in this case). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220715053146.1291891-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/skeleton.c | 2 ++ tools/testing/selftests/bpf/progs/test_skeleton.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 180afd632f4c..99dac5292b41 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -122,6 +122,8 @@ void test_skeleton(void) ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1b1187d2967b..1a4e93f6d9df 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; int read_mostly_var __read_mostly; int out_mostly_var; +char huge_arr[16 * 1024 * 1024]; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -71,6 +73,8 @@ int handler(const void *ctx) out_mostly_var = read_mostly_var; + huge_arr[sizeof(huge_arr) - 1] = 123; + return 0; }