From 0ea7eeec24be5f04ae80d68f5b1ea3a11f49de2f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:52 +0200 Subject: [PATCH 1/3] mm, percpu: add support for __GFP_NOWARN flag Add an option for pcpu_alloc() to support __GFP_NOWARN flag. Currently, we always throw a warning when size or alignment is unsupported (and also dump stack on failed allocation requests). The warning itself is harmless since we return NULL anyway for any failed request, which callers are required to handle anyway. However, it becomes harmful when panic_on_warn is set. The rationale for the WARN() in pcpu_alloc() is that it can be tracked when larger than supported allocation requests are made such that allocations limits can be tweaked if warranted. This makes sense for in-kernel users, however, there are users of pcpu allocator where allocation size is derived from user space requests, e.g. when creating BPF maps. In these cases, the requests should fail gracefully without throwing a splat. The current work-around was to check allocation size against the upper limit of PCPU_MIN_UNIT_SIZE from call-sites for bailing out prior to a call to pcpu_alloc() in order to avoid throwing the WARN(). This is bad in multiple ways since PCPU_MIN_UNIT_SIZE is an implementation detail, and having the checks on call-sites only complicates the code for no good reason. Thus, lets fix it generically by supporting the __GFP_NOWARN flag that users can then use with calling the __alloc_percpu_gfp() helper instead. Signed-off-by: Daniel Borkmann Cc: Tejun Heo Cc: Mark Rutland Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- mm/percpu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index aa121cef76de..a0e0c82c1e4c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1329,7 +1329,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @gfp: allocation flags * * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't - * contain %GFP_KERNEL, the allocation is atomic. + * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN + * then no warning will be triggered on invalid or failed allocation + * requests. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. @@ -1337,10 +1339,11 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; + bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; - bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; int slot, off, cpu, ret; unsigned long flags; void __percpu *ptr; @@ -1361,7 +1364,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE || !is_power_of_2(align))) { - WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n", + WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n", size, align); return NULL; } @@ -1482,7 +1485,7 @@ fail_unlock: fail: trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); - if (!is_atomic && warn_limit) { + if (!is_atomic && do_warn && warn_limit) { pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", size, align, is_atomic, err); dump_stack(); @@ -1507,7 +1510,9 @@ fail: * * Allocate zero-filled percpu area of @size bytes aligned at @align. If * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can - * be called from any context but is a lot more likely to fail. + * be called from any context but is a lot more likely to fail. If @gfp + * has __GFP_NOWARN then no warning will be triggered on invalid or failed + * allocation requests. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. From 82f8dd28bd3abe181b7a66ea4ea132134d37a400 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:53 +0200 Subject: [PATCH 2/3] bpf: fix splat for illegal devmap percpu allocation It was reported that syzkaller was able to trigger a splat on devmap percpu allocation due to illegal/unsupported allocation request size passed to __alloc_percpu(): [ 70.094249] illegal size (32776) or align (8) for percpu allocation [ 70.094256] ------------[ cut here ]------------ [ 70.094259] WARNING: CPU: 3 PID: 3451 at mm/percpu.c:1365 pcpu_alloc+0x96/0x630 [...] [ 70.094325] Call Trace: [ 70.094328] __alloc_percpu_gfp+0x12/0x20 [ 70.094330] dev_map_alloc+0x134/0x1e0 [ 70.094331] SyS_bpf+0x9bc/0x1610 [ 70.094333] ? selinux_task_setrlimit+0x5a/0x60 [ 70.094334] ? security_task_setrlimit+0x43/0x60 [ 70.094336] entry_SYSCALL_64_fastpath+0x1a/0xa5 This was due to too large max_entries for the map such that we surpassed the upper limit of PCPU_MIN_UNIT_SIZE. It's fine to fail naturally here, so switch to __alloc_percpu_gfp() and pass __GFP_NOWARN instead. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Reported-by: Mark Rutland Reported-by: Shankara Pailoor Reported-by: Richard Weinberger Signed-off-by: Daniel Borkmann Cc: John Fastabend Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e093d9a2c4dd..920428d84da2 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -111,8 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) err = -ENOMEM; /* A per cpu bitfield with a bit per possible net device */ - dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), - __alignof__(unsigned long)); + dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), + __alignof__(unsigned long), + GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) goto free_dtab; From bc6d5031b43a2291de638ab9304320b4cae61689 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:54 +0200 Subject: [PATCH 3/3] bpf: do not test for PCPU_MIN_UNIT_SIZE before percpu allocations PCPU_MIN_UNIT_SIZE is an implementation detail of the percpu allocator. Given we support __GFP_NOWARN now, lets just let the allocation request fail naturally instead. The two call sites from BPF mistakenly assumed __GFP_NOWARN would work, so no changes needed to their actual __alloc_percpu_gfp() calls which use the flag already. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 +- kernel/bpf/hashtab.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..e2636737b69b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); if (array_size >= U32_MAX - PAGE_SIZE || - elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { + bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 431126f31ea3..6533f08d1238 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ goto free_htab; - if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) - /* make sure the size for pcpu_alloc() is reasonable */ - goto free_htab; - htab->elem_size = sizeof(struct htab_elem) + round_up(htab->map.key_size, 8); if (percpu)