From 7e9c323c52b379d261a72dc7bd38120a761a93cd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 31 Aug 2022 22:54:54 +0800 Subject: [PATCH 1/4] mm/slub: fix to return errno if kmalloc() fails In create_unique_id(), kmalloc(, GFP_KERNEL) can fail due to out-of-memory, if it fails, return errno correctly rather than triggering panic via BUG_ON(); kernel BUG at mm/slub.c:5893! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Call trace: sysfs_slab_add+0x258/0x260 mm/slub.c:5973 __kmem_cache_create+0x60/0x118 mm/slub.c:4899 create_cache mm/slab_common.c:229 [inline] kmem_cache_create_usercopy+0x19c/0x31c mm/slab_common.c:335 kmem_cache_create+0x1c/0x28 mm/slab_common.c:390 f2fs_kmem_cache_create fs/f2fs/f2fs.h:2766 [inline] f2fs_init_xattr_caches+0x78/0xb4 fs/f2fs/xattr.c:808 f2fs_fill_super+0x1050/0x1e0c fs/f2fs/super.c:4149 mount_bdev+0x1b8/0x210 fs/super.c:1400 f2fs_mount+0x44/0x58 fs/f2fs/super.c:4512 legacy_get_tree+0x30/0x74 fs/fs_context.c:610 vfs_get_tree+0x40/0x140 fs/super.c:1530 do_new_mount+0x1dc/0x4e4 fs/namespace.c:3040 path_mount+0x358/0x914 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __arm64_sys_mount+0x2f8/0x408 fs/namespace.c:3568 Cc: Fixes: 81819f0fc8285 ("SLUB core") Reported-by: syzbot+81684812ea68216e08c5@syzkaller.appspotmail.com Reviewed-by: Muchun Song Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Chao Yu Acked-by: David Rientjes Signed-off-by: Vlastimil Babka --- mm/slub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 862dbd9af4f5..e6f3727b9ad2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5890,7 +5890,8 @@ static char *create_unique_id(struct kmem_cache *s) char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); char *p = name; - BUG_ON(!name); + if (!name) + return ERR_PTR(-ENOMEM); *p++ = ':'; /* @@ -5948,6 +5949,8 @@ static int sysfs_slab_add(struct kmem_cache *s) * for the symlinks. */ name = create_unique_id(s); + if (IS_ERR(name)) + return PTR_ERR(name); } s->kobj.kset = kset; From 5373b8a09d6e037ee0587cb5d9fe4cc09077deeb Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 13 Sep 2022 19:00:01 -0700 Subject: [PATCH 2/4] kasan: call kasan_malloc() from __kmalloc_*track_caller() We were failing to call kasan_malloc() from __kmalloc_*track_caller() which was causing us to sometimes fail to produce KASAN error reports for allocations made using e.g. devm_kcalloc(), as the KASAN poison was not being initialized. Fix it. Signed-off-by: Peter Collingbourne Cc: # 5.15 Signed-off-by: Vlastimil Babka --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index e6f3727b9ad2..5ba6db62a5ab 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4926,6 +4926,8 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) /* Honor the call site pointer we received. */ trace_kmalloc(caller, ret, s, size, s->size, gfpflags); + ret = kasan_kmalloc(s, ret, size, gfpflags); + return ret; } EXPORT_SYMBOL(__kmalloc_track_caller); @@ -4957,6 +4959,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, /* Honor the call site pointer we received. */ trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node); + ret = kasan_kmalloc(s, ret, size, gfpflags); + return ret; } EXPORT_SYMBOL(__kmalloc_node_track_caller); From d71608a877362becdc94191f190902fac1e64d35 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 19 Sep 2022 11:12:41 +0800 Subject: [PATCH 3/4] mm/slab_common: fix possible double free of kmem_cache When doing slub_debug test, kfence's 'test_memcache_typesafe_by_rcu' kunit test case cause a use-after-free error: BUG: KASAN: use-after-free in kobject_del+0x14/0x30 Read of size 8 at addr ffff888007679090 by task kunit_try_catch/261 CPU: 1 PID: 261 Comm: kunit_try_catch Tainted: G B N 6.0.0-rc5-next-20220916 #17 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x48 print_address_description.constprop.0+0x87/0x2a5 print_report+0x103/0x1ed kasan_report+0xb7/0x140 kobject_del+0x14/0x30 kmem_cache_destroy+0x130/0x170 test_exit+0x1a/0x30 kunit_try_run_case+0xad/0xc0 kunit_generic_run_threadfn_adapter+0x26/0x50 kthread+0x17b/0x1b0 The cause is inside kmem_cache_destroy(): kmem_cache_destroy acquire lock/mutex shutdown_cache schedule_work(kmem_cache_release) (if RCU flag set) release lock/mutex kmem_cache_release (if RCU flag not set) In some certain timing, the scheduled work could be run before the next RCU flag checking, which can then get a wrong value and lead to double kmem_cache_release(). Fix it by caching the RCU flag inside protected area, just like 'refcnt' Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock") Signed-off-by: Feng Tang Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Waiman Long Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 07b948288f84..ccc02573588f 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -475,6 +475,7 @@ void slab_kmem_cache_release(struct kmem_cache *s) void kmem_cache_destroy(struct kmem_cache *s) { int refcnt; + bool rcu_set; if (unlikely(!s) || !kasan_check_byte(s)) return; @@ -482,6 +483,8 @@ void kmem_cache_destroy(struct kmem_cache *s) cpus_read_lock(); mutex_lock(&slab_mutex); + rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU; + refcnt = --s->refcount; if (refcnt) goto out_unlock; @@ -492,7 +495,7 @@ void kmem_cache_destroy(struct kmem_cache *s) out_unlock: mutex_unlock(&slab_mutex); cpus_read_unlock(); - if (!refcnt && !(s->flags & SLAB_TYPESAFE_BY_RCU)) + if (!refcnt && !rcu_set) kmem_cache_release(s); } EXPORT_SYMBOL(kmem_cache_destroy); From e45cc288724f0cfd497bb5920bcfa60caa335729 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 19 Sep 2022 18:39:29 +0200 Subject: [PATCH 4/4] mm: slub: fix flush_cpu_slab()/__free_slab() invocations in task context. Commit 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context") moved all flush_cpu_slab() invocations to the global workqueue to avoid a problem related with deactivate_slab()/__free_slab() being called from an IRQ context on PREEMPT_RT kernels. When the flush_all_cpu_locked() function is called from a task context it may happen that a workqueue with WQ_MEM_RECLAIM bit set ends up flushing the global workqueue, this will cause a dependency issue. workqueue: WQ_MEM_RECLAIM nvme-delete-wq:nvme_delete_ctrl_work [nvme_core] is flushing !WQ_MEM_RECLAIM events:flush_cpu_slab WARNING: CPU: 37 PID: 410 at kernel/workqueue.c:2637 check_flush_dependency+0x10a/0x120 Workqueue: nvme-delete-wq nvme_delete_ctrl_work [nvme_core] RIP: 0010:check_flush_dependency+0x10a/0x120[ 453.262125] Call Trace: __flush_work.isra.0+0xbf/0x220 ? __queue_work+0x1dc/0x420 flush_all_cpus_locked+0xfb/0x120 __kmem_cache_shutdown+0x2b/0x320 kmem_cache_destroy+0x49/0x100 bioset_exit+0x143/0x190 blk_release_queue+0xb9/0x100 kobject_cleanup+0x37/0x130 nvme_fc_ctrl_free+0xc6/0x150 [nvme_fc] nvme_free_ctrl+0x1ac/0x2b0 [nvme_core] Fix this bug by creating a workqueue for the flush operation with the WQ_MEM_RECLAIM bit set. Fixes: 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context") Cc: Signed-off-by: Maurizio Lombardi Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka --- mm/slub.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 5ba6db62a5ab..4b98dff9be8e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -310,6 +310,11 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) */ static nodemask_t slab_nodes; +/* + * Workqueue used for flush_cpu_slab(). + */ +static struct workqueue_struct *flushwq; + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -2730,7 +2735,7 @@ static void flush_all_cpus_locked(struct kmem_cache *s) INIT_WORK(&sfw->work, flush_cpu_slab); sfw->skip = false; sfw->s = s; - schedule_work_on(cpu, &sfw->work); + queue_work_on(cpu, flushwq, &sfw->work); } for_each_online_cpu(cpu) { @@ -4858,6 +4863,8 @@ void __init kmem_cache_init(void) void __init kmem_cache_init_late(void) { + flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0); + WARN_ON(!flushwq); } struct kmem_cache *