erofs: rename per-CPU buffers to global buffer pool and make it configurable
It will cost more time if compressed buffers are allocated on demand for low-latency algorithms (like lz4) so EROFS uses per-CPU buffers to keep compressed data if in-place decompression is unfulfilled. While it is kind of wasteful of memory for a device with hundreds of CPUs, and only a small number of CPUs concurrently decompress most of the time. This patch renames it as 'global buffer pool' and makes it configurable. This allows two or more CPUs to share a common buffer to reduce memory occupation. Suggested-by: Gao Xiang <xiang@kernel.org> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Signed-off-by: Chunhai Guo <guochunhai@vivo.com> Link: https://lore.kernel.org/r/20240402100036.2673604-1-guochunhai@vivo.com Signed-off-by: Sandeep Dhavale <dhavale@google.com> Link: https://lore.kernel.org/r/20240408215231.3376659-1-dhavale@google.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
This commit is contained in:
parent
cacd5b04e2
commit
f36f3010f6
@ -3,7 +3,7 @@
|
||||
obj-$(CONFIG_EROFS_FS) += erofs.o
|
||||
erofs-objs := super.o inode.o data.o namei.o dir.o sysfs.o
|
||||
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o zutil.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
|
||||
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
|
||||
|
@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
|
||||
sbi->lz4.max_distance_pages = distance ?
|
||||
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
|
||||
LZ4_MAX_DISTANCE_PAGES;
|
||||
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
|
||||
return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
|
||||
docopy:
|
||||
/* Or copy compressed data which can be overlapped to per-CPU buffer */
|
||||
in = rq->in;
|
||||
src = erofs_get_pcpubuf(ctx->inpages);
|
||||
src = z_erofs_get_gbuf(ctx->inpages);
|
||||
if (!src) {
|
||||
DBG_BUGON(1);
|
||||
kunmap_local(inpage);
|
||||
@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
|
||||
} else if (maptype == 1) {
|
||||
vm_unmap_ram(src, ctx->inpages);
|
||||
} else if (maptype == 2) {
|
||||
erofs_put_pcpubuf(src);
|
||||
z_erofs_put_gbuf(src);
|
||||
} else if (maptype != 3) {
|
||||
DBG_BUGON(1);
|
||||
return -EFAULT;
|
||||
|
@ -463,11 +463,11 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
|
||||
struct erofs_workgroup *egrp);
|
||||
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
|
||||
int flags);
|
||||
void *erofs_get_pcpubuf(unsigned int requiredpages);
|
||||
void erofs_put_pcpubuf(void *ptr);
|
||||
int erofs_pcpubuf_growsize(unsigned int nrpages);
|
||||
void __init erofs_pcpubuf_init(void);
|
||||
void erofs_pcpubuf_exit(void);
|
||||
void *z_erofs_get_gbuf(unsigned int requiredpages);
|
||||
void z_erofs_put_gbuf(void *ptr);
|
||||
int z_erofs_gbuf_growsize(unsigned int nrpages);
|
||||
int __init z_erofs_gbuf_init(void);
|
||||
void z_erofs_gbuf_exit(void);
|
||||
int erofs_init_managed_cache(struct super_block *sb);
|
||||
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
|
||||
#else
|
||||
@ -477,8 +477,8 @@ static inline int erofs_init_shrinker(void) { return 0; }
|
||||
static inline void erofs_exit_shrinker(void) {}
|
||||
static inline int z_erofs_init_zip_subsystem(void) { return 0; }
|
||||
static inline void z_erofs_exit_zip_subsystem(void) {}
|
||||
static inline void erofs_pcpubuf_init(void) {}
|
||||
static inline void erofs_pcpubuf_exit(void) {}
|
||||
static inline int z_erofs_gbuf_init(void) { return 0; }
|
||||
static inline void z_erofs_gbuf_exit(void) {}
|
||||
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP */
|
||||
|
||||
|
@ -1,148 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) Gao Xiang <xiang@kernel.org>
|
||||
*
|
||||
* For low-latency decompression algorithms (e.g. lz4), reserve consecutive
|
||||
* per-CPU virtual memory (in pages) in advance to store such inplace I/O
|
||||
* data if inplace decompression is failed (due to unmet inplace margin for
|
||||
* example).
|
||||
*/
|
||||
#include "internal.h"
|
||||
|
||||
struct erofs_pcpubuf {
|
||||
raw_spinlock_t lock;
|
||||
void *ptr;
|
||||
struct page **pages;
|
||||
unsigned int nrpages;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
|
||||
|
||||
void *erofs_get_pcpubuf(unsigned int requiredpages)
|
||||
__acquires(pcb->lock)
|
||||
{
|
||||
struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
|
||||
|
||||
raw_spin_lock(&pcb->lock);
|
||||
/* check if the per-CPU buffer is too small */
|
||||
if (requiredpages > pcb->nrpages) {
|
||||
raw_spin_unlock(&pcb->lock);
|
||||
put_cpu_var(erofs_pcb);
|
||||
/* (for sparse checker) pretend pcb->lock is still taken */
|
||||
__acquire(pcb->lock);
|
||||
return NULL;
|
||||
}
|
||||
return pcb->ptr;
|
||||
}
|
||||
|
||||
void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
|
||||
{
|
||||
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
|
||||
|
||||
DBG_BUGON(pcb->ptr != ptr);
|
||||
raw_spin_unlock(&pcb->lock);
|
||||
put_cpu_var(erofs_pcb);
|
||||
}
|
||||
|
||||
/* the next step: support per-CPU page buffers hotplug */
|
||||
int erofs_pcpubuf_growsize(unsigned int nrpages)
|
||||
{
|
||||
static DEFINE_MUTEX(pcb_resize_mutex);
|
||||
static unsigned int pcb_nrpages;
|
||||
struct page *pagepool = NULL;
|
||||
int delta, cpu, ret, i;
|
||||
|
||||
mutex_lock(&pcb_resize_mutex);
|
||||
delta = nrpages - pcb_nrpages;
|
||||
ret = 0;
|
||||
/* avoid shrinking pcpubuf, since no idea how many fses rely on */
|
||||
if (delta <= 0)
|
||||
goto out;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
|
||||
struct page **pages, **oldpages;
|
||||
void *ptr, *old_ptr;
|
||||
|
||||
pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrpages; ++i) {
|
||||
pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
|
||||
if (!pages[i]) {
|
||||
ret = -ENOMEM;
|
||||
oldpages = pages;
|
||||
goto free_pagearray;
|
||||
}
|
||||
}
|
||||
ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
|
||||
if (!ptr) {
|
||||
ret = -ENOMEM;
|
||||
oldpages = pages;
|
||||
goto free_pagearray;
|
||||
}
|
||||
raw_spin_lock(&pcb->lock);
|
||||
old_ptr = pcb->ptr;
|
||||
pcb->ptr = ptr;
|
||||
oldpages = pcb->pages;
|
||||
pcb->pages = pages;
|
||||
i = pcb->nrpages;
|
||||
pcb->nrpages = nrpages;
|
||||
raw_spin_unlock(&pcb->lock);
|
||||
|
||||
if (!oldpages) {
|
||||
DBG_BUGON(old_ptr);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (old_ptr)
|
||||
vunmap(old_ptr);
|
||||
free_pagearray:
|
||||
while (i)
|
||||
erofs_pagepool_add(&pagepool, oldpages[--i]);
|
||||
kfree(oldpages);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
pcb_nrpages = nrpages;
|
||||
erofs_release_pages(&pagepool);
|
||||
out:
|
||||
mutex_unlock(&pcb_resize_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __init erofs_pcpubuf_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
|
||||
|
||||
raw_spin_lock_init(&pcb->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void erofs_pcpubuf_exit(void)
|
||||
{
|
||||
int cpu, i;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
|
||||
|
||||
if (pcb->ptr) {
|
||||
vunmap(pcb->ptr);
|
||||
pcb->ptr = NULL;
|
||||
}
|
||||
if (!pcb->pages)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < pcb->nrpages; ++i)
|
||||
if (pcb->pages[i])
|
||||
put_page(pcb->pages[i]);
|
||||
kfree(pcb->pages);
|
||||
pcb->pages = NULL;
|
||||
}
|
||||
}
|
@ -859,7 +859,10 @@ static int __init erofs_module_init(void)
|
||||
if (err)
|
||||
goto deflate_err;
|
||||
|
||||
erofs_pcpubuf_init();
|
||||
err = z_erofs_gbuf_init();
|
||||
if (err)
|
||||
goto gbuf_err;
|
||||
|
||||
err = z_erofs_init_zip_subsystem();
|
||||
if (err)
|
||||
goto zip_err;
|
||||
@ -879,6 +882,8 @@ fs_err:
|
||||
sysfs_err:
|
||||
z_erofs_exit_zip_subsystem();
|
||||
zip_err:
|
||||
z_erofs_gbuf_exit();
|
||||
gbuf_err:
|
||||
z_erofs_deflate_exit();
|
||||
deflate_err:
|
||||
z_erofs_lzma_exit();
|
||||
@ -902,7 +907,7 @@ static void __exit erofs_module_exit(void)
|
||||
z_erofs_lzma_exit();
|
||||
erofs_exit_shrinker();
|
||||
kmem_cache_destroy(erofs_inode_cachep);
|
||||
erofs_pcpubuf_exit();
|
||||
z_erofs_gbuf_exit();
|
||||
}
|
||||
|
||||
static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
|
148
fs/erofs/zutil.c
148
fs/erofs/zutil.c
@ -5,6 +5,18 @@
|
||||
*/
|
||||
#include "internal.h"
|
||||
|
||||
struct z_erofs_gbuf {
|
||||
spinlock_t lock;
|
||||
void *ptr;
|
||||
struct page **pages;
|
||||
unsigned int nrpages;
|
||||
};
|
||||
|
||||
static struct z_erofs_gbuf *z_erofs_gbufpool;
|
||||
static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages;
|
||||
|
||||
module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
|
||||
|
||||
static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */
|
||||
/* protected by 'erofs_sb_list_lock' */
|
||||
static unsigned int shrinker_run_no;
|
||||
@ -14,6 +26,142 @@ static DEFINE_SPINLOCK(erofs_sb_list_lock);
|
||||
static LIST_HEAD(erofs_sb_list);
|
||||
static struct shrinker *erofs_shrinker_info;
|
||||
|
||||
static unsigned int z_erofs_gbuf_id(void)
|
||||
{
|
||||
return raw_smp_processor_id() % z_erofs_gbuf_count;
|
||||
}
|
||||
|
||||
void *z_erofs_get_gbuf(unsigned int requiredpages)
|
||||
__acquires(gbuf->lock)
|
||||
{
|
||||
struct z_erofs_gbuf *gbuf;
|
||||
|
||||
gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
|
||||
spin_lock(&gbuf->lock);
|
||||
/* check if the buffer is too small */
|
||||
if (requiredpages > gbuf->nrpages) {
|
||||
spin_unlock(&gbuf->lock);
|
||||
/* (for sparse checker) pretend gbuf->lock is still taken */
|
||||
__acquire(gbuf->lock);
|
||||
return NULL;
|
||||
}
|
||||
return gbuf->ptr;
|
||||
}
|
||||
|
||||
void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
|
||||
{
|
||||
struct z_erofs_gbuf *gbuf;
|
||||
|
||||
gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
|
||||
DBG_BUGON(gbuf->ptr != ptr);
|
||||
spin_unlock(&gbuf->lock);
|
||||
}
|
||||
|
||||
int z_erofs_gbuf_growsize(unsigned int nrpages)
|
||||
{
|
||||
static DEFINE_MUTEX(gbuf_resize_mutex);
|
||||
struct page *pagepool = NULL;
|
||||
int delta, ret, i, j;
|
||||
|
||||
mutex_lock(&gbuf_resize_mutex);
|
||||
delta = nrpages - z_erofs_gbuf_nrpages;
|
||||
ret = 0;
|
||||
/* avoid shrinking gbufs, since no idea how many fses rely on */
|
||||
if (delta <= 0)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < z_erofs_gbuf_count; ++i) {
|
||||
struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
|
||||
struct page **pages, **tmp_pages;
|
||||
void *ptr, *old_ptr = NULL;
|
||||
|
||||
ret = -ENOMEM;
|
||||
tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
|
||||
if (!tmp_pages)
|
||||
break;
|
||||
for (j = 0; j < nrpages; ++j) {
|
||||
tmp_pages[j] = erofs_allocpage(&pagepool, GFP_KERNEL);
|
||||
if (!tmp_pages[j])
|
||||
goto free_pagearray;
|
||||
}
|
||||
ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
|
||||
if (!ptr)
|
||||
goto free_pagearray;
|
||||
|
||||
pages = tmp_pages;
|
||||
spin_lock(&gbuf->lock);
|
||||
old_ptr = gbuf->ptr;
|
||||
gbuf->ptr = ptr;
|
||||
tmp_pages = gbuf->pages;
|
||||
gbuf->pages = pages;
|
||||
j = gbuf->nrpages;
|
||||
gbuf->nrpages = nrpages;
|
||||
spin_unlock(&gbuf->lock);
|
||||
ret = 0;
|
||||
if (!tmp_pages) {
|
||||
DBG_BUGON(old_ptr);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (old_ptr)
|
||||
vunmap(old_ptr);
|
||||
free_pagearray:
|
||||
while (j)
|
||||
erofs_pagepool_add(&pagepool, tmp_pages[--j]);
|
||||
kfree(tmp_pages);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
z_erofs_gbuf_nrpages = nrpages;
|
||||
erofs_release_pages(&pagepool);
|
||||
out:
|
||||
mutex_unlock(&gbuf_resize_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __init z_erofs_gbuf_init(void)
|
||||
{
|
||||
unsigned int i = num_possible_cpus();
|
||||
|
||||
if (!z_erofs_gbuf_count)
|
||||
z_erofs_gbuf_count = i;
|
||||
else
|
||||
z_erofs_gbuf_count = min(z_erofs_gbuf_count, i);
|
||||
|
||||
z_erofs_gbufpool = kcalloc(z_erofs_gbuf_count,
|
||||
sizeof(*z_erofs_gbufpool), GFP_KERNEL);
|
||||
if (!z_erofs_gbufpool)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < z_erofs_gbuf_count; ++i)
|
||||
spin_lock_init(&z_erofs_gbufpool[i].lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void z_erofs_gbuf_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < z_erofs_gbuf_count; ++i) {
|
||||
struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
|
||||
|
||||
if (gbuf->ptr) {
|
||||
vunmap(gbuf->ptr);
|
||||
gbuf->ptr = NULL;
|
||||
}
|
||||
|
||||
if (!gbuf->pages)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < gbuf->nrpages; ++i)
|
||||
if (gbuf->pages[i])
|
||||
put_page(gbuf->pages[i]);
|
||||
kfree(gbuf->pages);
|
||||
gbuf->pages = NULL;
|
||||
}
|
||||
kfree(z_erofs_gbufpool);
|
||||
}
|
||||
|
||||
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
|
||||
{
|
||||
struct page *page = *pagepool;
|
||||
|
Loading…
Reference in New Issue
Block a user