erofs: rename per-CPU buffers to global buffer pool and make it configurable

It will cost more time if compressed buffers are allocated on demand for low-latency algorithms (like lz4) so EROFS uses per-CPU buffers to keep compressed data if in-place decompression is unfulfilled. While it is kind of wasteful of memory for a device with hundreds of CPUs, and only a small number of CPUs concurrently decompress most of the time. This patch renames it as 'global buffer pool' and makes it configurable. This allows two or more CPUs to share a common buffer to reduce memory occupation. Suggested-by: Gao Xiang <xiang@kernel.org> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Signed-off-by: Chunhai Guo <guochunhai@vivo.com> Link: https://lore.kernel.org/r/20240402100036.2673604-1-guochunhai@vivo.com Signed-off-by: Sandeep Dhavale <dhavale@google.com> Link: https://lore.kernel.org/r/20240408215231.3376659-1-dhavale@google.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2024-04-02 04:00:36 -06:00 · 2024-04-02 04:00:36 -06:00 · f36f3010f6
commit f36f3010f6
parent cacd5b04e2
6 changed files with 166 additions and 161 deletions
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@ -3,7 +3,7 @@
 obj-$(CONFIG_EROFS_FS) += erofs.o
 erofs-objs := super.o inode.o data.o namei.o dir.o sysfs.o
 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o zutil.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
 erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
 erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
 erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
 	sbi->lz4.max_distance_pages = distance ?
 					DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
 					LZ4_MAX_DISTANCE_PAGES;
-	return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
+	return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks);
 }
 /*
@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
 docopy:
 	/* Or copy compressed data which can be overlapped to per-CPU buffer */
 	in = rq->in;
-	src = erofs_get_pcpubuf(ctx->inpages);
+	src = z_erofs_get_gbuf(ctx->inpages);
 	if (!src) {
 		DBG_BUGON(1);
 		kunmap_local(inpage);
@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
 	} else if (maptype == 1) {
 		vm_unmap_ram(src, ctx->inpages);
 	} else if (maptype == 2) {
-		erofs_put_pcpubuf(src);
+		z_erofs_put_gbuf(src);
 	} else if (maptype != 3) {
 		DBG_BUGON(1);
 		return -EFAULT;
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@ -463,11 +463,11 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
 					struct erofs_workgroup *egrp);
 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
 			    int flags);
-void *erofs_get_pcpubuf(unsigned int requiredpages);
+void *z_erofs_get_gbuf(unsigned int requiredpages);
-void erofs_put_pcpubuf(void *ptr);
+void z_erofs_put_gbuf(void *ptr);
-int erofs_pcpubuf_growsize(unsigned int nrpages);
+int z_erofs_gbuf_growsize(unsigned int nrpages);
-void __init erofs_pcpubuf_init(void);
+int __init z_erofs_gbuf_init(void);
-void erofs_pcpubuf_exit(void);
+void z_erofs_gbuf_exit(void);
 int erofs_init_managed_cache(struct super_block *sb);
 int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
 #else
@ -477,8 +477,8 @@ static inline int erofs_init_shrinker(void) { return 0; }
 static inline void erofs_exit_shrinker(void) {}
 static inline int z_erofs_init_zip_subsystem(void) { return 0; }
 static inline void z_erofs_exit_zip_subsystem(void) {}
-static inline void erofs_pcpubuf_init(void) {}
+static inline int z_erofs_gbuf_init(void) { return 0; }
-static inline void erofs_pcpubuf_exit(void) {}
+static inline void z_erofs_gbuf_exit(void) {}
 static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
 #endif	/* !CONFIG_EROFS_FS_ZIP */
--- a/fs/erofs/pcpubuf.c
+++ b/fs/erofs/pcpubuf.c
@ -1,148 +0,0 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
 * Copyright (C) Gao Xiang <xiang@kernel.org>
 *
 * For low-latency decompression algorithms (e.g. lz4), reserve consecutive
 * per-CPU virtual memory (in pages) in advance to store such inplace I/O
 * data if inplace decompression is failed (due to unmet inplace margin for
 * example).
 */
 #include "internal.h"
 struct erofs_pcpubuf {
 	raw_spinlock_t lock;
 	void *ptr;
 	struct page **pages;
 	unsigned int nrpages;
 };
 static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
 void *erofs_get_pcpubuf(unsigned int requiredpages)
 	__acquires(pcb->lock)
 {
 	struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
 	raw_spin_lock(&pcb->lock);
 	/* check if the per-CPU buffer is too small */
 	if (requiredpages > pcb->nrpages) {
 		raw_spin_unlock(&pcb->lock);
 		put_cpu_var(erofs_pcb);
 		/* (for sparse checker) pretend pcb->lock is still taken */
 		__acquire(pcb->lock);
 		return NULL;
 	}
 	return pcb->ptr;
 }
 void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
 {
 	struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
 	DBG_BUGON(pcb->ptr != ptr);
 	raw_spin_unlock(&pcb->lock);
 	put_cpu_var(erofs_pcb);
 }
 /* the next step: support per-CPU page buffers hotplug */
 int erofs_pcpubuf_growsize(unsigned int nrpages)
 {
 	static DEFINE_MUTEX(pcb_resize_mutex);
 	static unsigned int pcb_nrpages;
 	struct page *pagepool = NULL;
 	int delta, cpu, ret, i;
 	mutex_lock(&pcb_resize_mutex);
 	delta = nrpages - pcb_nrpages;
 	ret = 0;
 	/* avoid shrinking pcpubuf, since no idea how many fses rely on */
 	if (delta <= 0)
 		goto out;
 	for_each_possible_cpu(cpu) {
 		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
 		struct page **pages, **oldpages;
 		void *ptr, *old_ptr;
 		pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
 		if (!pages) {
 			ret = -ENOMEM;
 			break;
 		}
 		for (i = 0; i < nrpages; ++i) {
 			pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
 			if (!pages[i]) {
 				ret = -ENOMEM;
 				oldpages = pages;
 				goto free_pagearray;
 			}
 		}
 		ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
 		if (!ptr) {
 			ret = -ENOMEM;
 			oldpages = pages;
 			goto free_pagearray;
 		}
 		raw_spin_lock(&pcb->lock);
 		old_ptr = pcb->ptr;
 		pcb->ptr = ptr;
 		oldpages = pcb->pages;
 		pcb->pages = pages;
 		i = pcb->nrpages;
 		pcb->nrpages = nrpages;
 		raw_spin_unlock(&pcb->lock);
 		if (!oldpages) {
 			DBG_BUGON(old_ptr);
 			continue;
 		}
 		if (old_ptr)
 			vunmap(old_ptr);
 free_pagearray:
 		while (i)
 			erofs_pagepool_add(&pagepool, oldpages[--i]);
 		kfree(oldpages);
 		if (ret)
 			break;
 	}
 	pcb_nrpages = nrpages;
 	erofs_release_pages(&pagepool);
 out:
 	mutex_unlock(&pcb_resize_mutex);
 	return ret;
 }
 void __init erofs_pcpubuf_init(void)
 {
 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
 		raw_spin_lock_init(&pcb->lock);
 	}
 }
 void erofs_pcpubuf_exit(void)
 {
 	int cpu, i;
 	for_each_possible_cpu(cpu) {
 		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
 		if (pcb->ptr) {
 			vunmap(pcb->ptr);
 			pcb->ptr = NULL;
 		}
 		if (!pcb->pages)
 			continue;
 		for (i = 0; i < pcb->nrpages; ++i)
 			if (pcb->pages[i])
 				put_page(pcb->pages[i]);
 		kfree(pcb->pages);
 		pcb->pages = NULL;
 	}
 }
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@ -859,7 +859,10 @@ static int __init erofs_module_init(void)
 	if (err)
 		goto deflate_err;
-	erofs_pcpubuf_init();
+	err = z_erofs_gbuf_init();
 	if (err)
 		goto gbuf_err;
 	err = z_erofs_init_zip_subsystem();
 	if (err)
 		goto zip_err;
@ -879,6 +882,8 @@ fs_err:
 sysfs_err:
 	z_erofs_exit_zip_subsystem();
 zip_err:
 	z_erofs_gbuf_exit();
 gbuf_err:
 	z_erofs_deflate_exit();
 deflate_err:
 	z_erofs_lzma_exit();
@ -902,7 +907,7 @@ static void __exit erofs_module_exit(void)
 	z_erofs_lzma_exit();
 	erofs_exit_shrinker();
 	kmem_cache_destroy(erofs_inode_cachep);
-	erofs_pcpubuf_exit();
+	z_erofs_gbuf_exit();
 }
 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@ -5,6 +5,18 @@
 */
 #include "internal.h"
 struct z_erofs_gbuf {
 	spinlock_t lock;
 	void *ptr;
 	struct page **pages;
 	unsigned int nrpages;
 };
 static struct z_erofs_gbuf *z_erofs_gbufpool;
 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages;
 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
 static atomic_long_t erofs_global_shrink_cnt;	/* for all mounted instances */
 /* protected by 'erofs_sb_list_lock' */
 static unsigned int shrinker_run_no;
@ -14,6 +26,142 @@ static DEFINE_SPINLOCK(erofs_sb_list_lock);
 static LIST_HEAD(erofs_sb_list);
 static struct shrinker *erofs_shrinker_info;
 static unsigned int z_erofs_gbuf_id(void)
 {
 	return raw_smp_processor_id() % z_erofs_gbuf_count;
 }
 void *z_erofs_get_gbuf(unsigned int requiredpages)
 	__acquires(gbuf->lock)
 {
 	struct z_erofs_gbuf *gbuf;
 	gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
 	spin_lock(&gbuf->lock);
 	/* check if the buffer is too small */
 	if (requiredpages > gbuf->nrpages) {
 		spin_unlock(&gbuf->lock);
 		/* (for sparse checker) pretend gbuf->lock is still taken */
 		__acquire(gbuf->lock);
 		return NULL;
 	}
 	return gbuf->ptr;
 }
 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
 {
 	struct z_erofs_gbuf *gbuf;
 	gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
 	DBG_BUGON(gbuf->ptr != ptr);
 	spin_unlock(&gbuf->lock);
 }
 int z_erofs_gbuf_growsize(unsigned int nrpages)
 {
 	static DEFINE_MUTEX(gbuf_resize_mutex);
 	struct page *pagepool = NULL;
 	int delta, ret, i, j;
 	mutex_lock(&gbuf_resize_mutex);
 	delta = nrpages - z_erofs_gbuf_nrpages;
 	ret = 0;
 	/* avoid shrinking gbufs, since no idea how many fses rely on */
 	if (delta <= 0)
 		goto out;
 	for (i = 0; i < z_erofs_gbuf_count; ++i) {
 		struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
 		struct page **pages, **tmp_pages;
 		void *ptr, *old_ptr = NULL;
 		ret = -ENOMEM;
 		tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
 		if (!tmp_pages)
 			break;
 		for (j = 0; j < nrpages; ++j) {
 			tmp_pages[j] = erofs_allocpage(&pagepool, GFP_KERNEL);
 			if (!tmp_pages[j])
 				goto free_pagearray;
 		}
 		ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
 		if (!ptr)
 			goto free_pagearray;
 		pages = tmp_pages;
 		spin_lock(&gbuf->lock);
 		old_ptr = gbuf->ptr;
 		gbuf->ptr = ptr;
 		tmp_pages = gbuf->pages;
 		gbuf->pages = pages;
 		j = gbuf->nrpages;
 		gbuf->nrpages = nrpages;
 		spin_unlock(&gbuf->lock);
 		ret = 0;
 		if (!tmp_pages) {
 			DBG_BUGON(old_ptr);
 			continue;
 		}
 		if (old_ptr)
 			vunmap(old_ptr);
 free_pagearray:
 		while (j)
 			erofs_pagepool_add(&pagepool, tmp_pages[--j]);
 		kfree(tmp_pages);
 		if (ret)
 			break;
 	}
 	z_erofs_gbuf_nrpages = nrpages;
 	erofs_release_pages(&pagepool);
 out:
 	mutex_unlock(&gbuf_resize_mutex);
 	return ret;
 }
 int __init z_erofs_gbuf_init(void)
 {
 	unsigned int i = num_possible_cpus();
 	if (!z_erofs_gbuf_count)
 		z_erofs_gbuf_count = i;
 	else
 		z_erofs_gbuf_count = min(z_erofs_gbuf_count, i);
 	z_erofs_gbufpool = kcalloc(z_erofs_gbuf_count,
 			sizeof(*z_erofs_gbufpool), GFP_KERNEL);
 	if (!z_erofs_gbufpool)
 		return -ENOMEM;
 	for (i = 0; i < z_erofs_gbuf_count; ++i)
 		spin_lock_init(&z_erofs_gbufpool[i].lock);
 	return 0;
 }
 void z_erofs_gbuf_exit(void)
 {
 	int i;
 	for (i = 0; i < z_erofs_gbuf_count; ++i) {
 		struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
 		if (gbuf->ptr) {
 			vunmap(gbuf->ptr);
 			gbuf->ptr = NULL;
 		}
 		if (!gbuf->pages)
 			continue;
 		for (i = 0; i < gbuf->nrpages; ++i)
 			if (gbuf->pages[i])
 				put_page(gbuf->pages[i]);
 		kfree(gbuf->pages);
 		gbuf->pages = NULL;
 	}
 	kfree(z_erofs_gbufpool);
 }
 struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
 {
 	struct page *page = *pagepool;