erofs: Zstandard compression support
Add Zstandard compression as the 4th supported algorithm since it becomes more popular now and some end users have asked this for quite a while [1][2]. Each EROFS physical cluster contains only one valid standard Zstandard frame as described in [3] so that decompression can be performed on a per-pcluster basis independently. Currently, it just leverages multi-call stream decompression APIs with internal sliding window buffers. One-shot or bufferless decompression could be implemented later for even better performance if needed. [1] https://github.com/erofs/erofs-utils/issues/6 [2] https://lore.kernel.org/r/Y08h+z6CZdnS1XBm@B-P7TQMD6M-0146.lan [3] https://www.rfc-editor.org/rfc/rfc8478.txt Acked-by: Chao Yu <chao@kernel.org> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20240508234453.17896-1-xiang@kernel.org
This commit is contained in:
parent
d69189428d
commit
7c35de4df1
@ -112,6 +112,21 @@ config EROFS_FS_ZIP_DEFLATE
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config EROFS_FS_ZIP_ZSTD
|
||||
bool "EROFS Zstandard compressed data support"
|
||||
depends on EROFS_FS_ZIP
|
||||
select ZSTD_DECOMPRESS
|
||||
help
|
||||
Saying Y here includes support for reading EROFS file systems
|
||||
containing Zstandard compressed data. It gives better compression
|
||||
ratios than the default LZ4 format, while it costs more CPU
|
||||
overhead.
|
||||
|
||||
Zstandard support is an experimental feature for now and so most
|
||||
file systems will be readable without selecting this option.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config EROFS_FS_ONDEMAND
|
||||
bool "EROFS fscache-based on-demand read support"
|
||||
depends on EROFS_FS
|
||||
|
@ -6,4 +6,5 @@ erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
|
||||
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
|
||||
|
@ -90,8 +90,12 @@ int z_erofs_load_lzma_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb, void *data, int size);
|
||||
int z_erofs_load_deflate_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb, void *data, int size);
|
||||
int z_erofs_load_zstd_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb, void *data, int size);
|
||||
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool);
|
||||
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool);
|
||||
int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pgpl);
|
||||
#endif
|
||||
|
@ -399,6 +399,13 @@ const struct z_erofs_decompressor erofs_decompressors[] = {
|
||||
.name = "deflate"
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_ZIP_ZSTD
|
||||
[Z_EROFS_COMPRESSION_ZSTD] = {
|
||||
.config = z_erofs_load_zstd_config,
|
||||
.decompress = z_erofs_zstd_decompress,
|
||||
.name = "zstd"
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
|
||||
|
279
fs/erofs/decompressor_zstd.c
Normal file
279
fs/erofs/decompressor_zstd.c
Normal file
@ -0,0 +1,279 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#include <linux/zstd.h>
|
||||
#include "compress.h"
|
||||
|
||||
struct z_erofs_zstd {
|
||||
struct z_erofs_zstd *next;
|
||||
u8 bounce[PAGE_SIZE];
|
||||
void *wksp;
|
||||
unsigned int wkspsz;
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(z_erofs_zstd_lock);
|
||||
static unsigned int z_erofs_zstd_max_dictsize;
|
||||
static unsigned int z_erofs_zstd_nstrms, z_erofs_zstd_avail_strms;
|
||||
static struct z_erofs_zstd *z_erofs_zstd_head;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_zstd_wq);
|
||||
|
||||
module_param_named(zstd_streams, z_erofs_zstd_nstrms, uint, 0444);
|
||||
|
||||
static struct z_erofs_zstd *z_erofs_isolate_strms(bool all)
|
||||
{
|
||||
struct z_erofs_zstd *strm;
|
||||
|
||||
again:
|
||||
spin_lock(&z_erofs_zstd_lock);
|
||||
strm = z_erofs_zstd_head;
|
||||
if (!strm) {
|
||||
spin_unlock(&z_erofs_zstd_lock);
|
||||
wait_event(z_erofs_zstd_wq, READ_ONCE(z_erofs_zstd_head));
|
||||
goto again;
|
||||
}
|
||||
z_erofs_zstd_head = all ? NULL : strm->next;
|
||||
spin_unlock(&z_erofs_zstd_lock);
|
||||
return strm;
|
||||
}
|
||||
|
||||
void z_erofs_zstd_exit(void)
|
||||
{
|
||||
while (z_erofs_zstd_avail_strms) {
|
||||
struct z_erofs_zstd *strm, *n;
|
||||
|
||||
for (strm = z_erofs_isolate_strms(true); strm; strm = n) {
|
||||
n = strm->next;
|
||||
|
||||
kvfree(strm->wksp);
|
||||
kfree(strm);
|
||||
--z_erofs_zstd_avail_strms;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int __init z_erofs_zstd_init(void)
|
||||
{
|
||||
/* by default, use # of possible CPUs instead */
|
||||
if (!z_erofs_zstd_nstrms)
|
||||
z_erofs_zstd_nstrms = num_possible_cpus();
|
||||
|
||||
for (; z_erofs_zstd_avail_strms < z_erofs_zstd_nstrms;
|
||||
++z_erofs_zstd_avail_strms) {
|
||||
struct z_erofs_zstd *strm;
|
||||
|
||||
strm = kzalloc(sizeof(*strm), GFP_KERNEL);
|
||||
if (!strm) {
|
||||
z_erofs_zstd_exit();
|
||||
return -ENOMEM;
|
||||
}
|
||||
spin_lock(&z_erofs_zstd_lock);
|
||||
strm->next = z_erofs_zstd_head;
|
||||
z_erofs_zstd_head = strm;
|
||||
spin_unlock(&z_erofs_zstd_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int z_erofs_load_zstd_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb, void *data, int size)
|
||||
{
|
||||
static DEFINE_MUTEX(zstd_resize_mutex);
|
||||
struct z_erofs_zstd_cfgs *zstd = data;
|
||||
unsigned int dict_size, wkspsz;
|
||||
struct z_erofs_zstd *strm, *head = NULL;
|
||||
void *wksp;
|
||||
|
||||
if (!zstd || size < sizeof(struct z_erofs_zstd_cfgs) || zstd->format) {
|
||||
erofs_err(sb, "unsupported zstd format, size=%u", size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (zstd->windowlog > ilog2(Z_EROFS_ZSTD_MAX_DICT_SIZE) - 10) {
|
||||
erofs_err(sb, "unsupported zstd window log %u", zstd->windowlog);
|
||||
return -EINVAL;
|
||||
}
|
||||
dict_size = 1U << (zstd->windowlog + 10);
|
||||
|
||||
/* in case 2 z_erofs_load_zstd_config() race to avoid deadlock */
|
||||
mutex_lock(&zstd_resize_mutex);
|
||||
if (z_erofs_zstd_max_dictsize >= dict_size) {
|
||||
mutex_unlock(&zstd_resize_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* 1. collect/isolate all streams for the following check */
|
||||
while (z_erofs_zstd_avail_strms) {
|
||||
struct z_erofs_zstd *n;
|
||||
|
||||
for (strm = z_erofs_isolate_strms(true); strm; strm = n) {
|
||||
n = strm->next;
|
||||
strm->next = head;
|
||||
head = strm;
|
||||
--z_erofs_zstd_avail_strms;
|
||||
}
|
||||
}
|
||||
|
||||
/* 2. walk each isolated stream and grow max dict_size if needed */
|
||||
wkspsz = zstd_dstream_workspace_bound(dict_size);
|
||||
for (strm = head; strm; strm = strm->next) {
|
||||
wksp = kvmalloc(wkspsz, GFP_KERNEL);
|
||||
if (!wksp)
|
||||
break;
|
||||
kvfree(strm->wksp);
|
||||
strm->wksp = wksp;
|
||||
strm->wkspsz = wkspsz;
|
||||
}
|
||||
|
||||
/* 3. push back all to the global list and update max dict_size */
|
||||
spin_lock(&z_erofs_zstd_lock);
|
||||
DBG_BUGON(z_erofs_zstd_head);
|
||||
z_erofs_zstd_head = head;
|
||||
spin_unlock(&z_erofs_zstd_lock);
|
||||
z_erofs_zstd_avail_strms = z_erofs_zstd_nstrms;
|
||||
wake_up_all(&z_erofs_zstd_wq);
|
||||
if (!strm)
|
||||
z_erofs_zstd_max_dictsize = dict_size;
|
||||
mutex_unlock(&zstd_resize_mutex);
|
||||
return strm ? -ENOMEM : 0;
|
||||
}
|
||||
|
||||
int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pgpl)
|
||||
{
|
||||
const unsigned int nrpages_out =
|
||||
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
|
||||
const unsigned int nrpages_in =
|
||||
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
|
||||
zstd_dstream *stream;
|
||||
struct super_block *sb = rq->sb;
|
||||
unsigned int insz, outsz, pofs;
|
||||
struct z_erofs_zstd *strm;
|
||||
zstd_in_buffer in_buf = { NULL, 0, 0 };
|
||||
zstd_out_buffer out_buf = { NULL, 0, 0 };
|
||||
u8 *kin, *kout = NULL;
|
||||
bool bounced = false;
|
||||
int no = -1, ni = 0, j = 0, zerr, err;
|
||||
|
||||
/* 1. get the exact compressed size */
|
||||
kin = kmap_local_page(*rq->in);
|
||||
err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
|
||||
min_t(unsigned int, rq->inputsize,
|
||||
sb->s_blocksize - rq->pageofs_in));
|
||||
if (err) {
|
||||
kunmap_local(kin);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* 2. get an available ZSTD context */
|
||||
strm = z_erofs_isolate_strms(false);
|
||||
|
||||
/* 3. multi-call decompress */
|
||||
insz = rq->inputsize;
|
||||
outsz = rq->outputsize;
|
||||
stream = zstd_init_dstream(z_erofs_zstd_max_dictsize, strm->wksp, strm->wkspsz);
|
||||
if (!stream) {
|
||||
err = -EIO;
|
||||
goto failed_zinit;
|
||||
}
|
||||
|
||||
pofs = rq->pageofs_out;
|
||||
in_buf.size = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
|
||||
insz -= in_buf.size;
|
||||
in_buf.src = kin + rq->pageofs_in;
|
||||
do {
|
||||
if (out_buf.size == out_buf.pos) {
|
||||
if (++no >= nrpages_out || !outsz) {
|
||||
erofs_err(sb, "insufficient space for decompressed data");
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
if (kout)
|
||||
kunmap_local(kout);
|
||||
out_buf.size = min_t(u32, outsz, PAGE_SIZE - pofs);
|
||||
outsz -= out_buf.size;
|
||||
if (!rq->out[no]) {
|
||||
rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
|
||||
if (!rq->out[no]) {
|
||||
kout = NULL;
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
set_page_private(rq->out[no],
|
||||
Z_EROFS_SHORTLIVED_PAGE);
|
||||
}
|
||||
kout = kmap_local_page(rq->out[no]);
|
||||
out_buf.dst = kout + pofs;
|
||||
out_buf.pos = 0;
|
||||
pofs = 0;
|
||||
}
|
||||
|
||||
if (in_buf.size == in_buf.pos && insz) {
|
||||
if (++ni >= nrpages_in) {
|
||||
erofs_err(sb, "invalid compressed data");
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
if (kout) /* unlike kmap(), take care of the orders */
|
||||
kunmap_local(kout);
|
||||
kunmap_local(kin);
|
||||
in_buf.size = min_t(u32, insz, PAGE_SIZE);
|
||||
insz -= in_buf.size;
|
||||
kin = kmap_local_page(rq->in[ni]);
|
||||
in_buf.src = kin;
|
||||
in_buf.pos = 0;
|
||||
bounced = false;
|
||||
if (kout) {
|
||||
j = (u8 *)out_buf.dst - kout;
|
||||
kout = kmap_local_page(rq->out[no]);
|
||||
out_buf.dst = kout + j;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle overlapping: Use bounced buffer if the compressed
|
||||
* data is under processing; Or use short-lived pages from the
|
||||
* on-stack pagepool where pages share among the same request
|
||||
* and not _all_ inplace I/O pages are needed to be doubled.
|
||||
*/
|
||||
if (!bounced && rq->out[no] == rq->in[ni]) {
|
||||
memcpy(strm->bounce, in_buf.src, in_buf.size);
|
||||
in_buf.src = strm->bounce;
|
||||
bounced = true;
|
||||
}
|
||||
|
||||
for (j = ni + 1; j < nrpages_in; ++j) {
|
||||
struct page *tmppage;
|
||||
|
||||
if (rq->out[no] != rq->in[j])
|
||||
continue;
|
||||
tmppage = erofs_allocpage(pgpl, rq->gfp);
|
||||
if (!tmppage) {
|
||||
err = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
|
||||
copy_highpage(tmppage, rq->in[j]);
|
||||
rq->in[j] = tmppage;
|
||||
}
|
||||
zerr = zstd_decompress_stream(stream, &out_buf, &in_buf);
|
||||
if (zstd_is_error(zerr) || (!zerr && outsz)) {
|
||||
erofs_err(sb, "failed to decompress in[%u] out[%u]: %s",
|
||||
rq->inputsize, rq->outputsize,
|
||||
zerr ? zstd_get_error_name(zerr) : "unexpected end of stream");
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
} while (outsz || out_buf.pos < out_buf.size);
|
||||
failed:
|
||||
if (kout)
|
||||
kunmap_local(kout);
|
||||
failed_zinit:
|
||||
kunmap_local(kin);
|
||||
/* 4. push back ZSTD stream context to the global list */
|
||||
spin_lock(&z_erofs_zstd_lock);
|
||||
strm->next = z_erofs_zstd_head;
|
||||
z_erofs_zstd_head = strm;
|
||||
spin_unlock(&z_erofs_zstd_lock);
|
||||
wake_up(&z_erofs_zstd_wq);
|
||||
return err;
|
||||
}
|
@ -296,6 +296,7 @@ enum {
|
||||
Z_EROFS_COMPRESSION_LZ4 = 0,
|
||||
Z_EROFS_COMPRESSION_LZMA = 1,
|
||||
Z_EROFS_COMPRESSION_DEFLATE = 2,
|
||||
Z_EROFS_COMPRESSION_ZSTD = 3,
|
||||
Z_EROFS_COMPRESSION_MAX
|
||||
};
|
||||
#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
|
||||
@ -322,6 +323,15 @@ struct z_erofs_deflate_cfgs {
|
||||
u8 reserved[5];
|
||||
} __packed;
|
||||
|
||||
/* 6 bytes (+ length field = 8 bytes) */
|
||||
struct z_erofs_zstd_cfgs {
|
||||
u8 format;
|
||||
u8 windowlog; /* windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN(10) */
|
||||
u8 reserved[4];
|
||||
} __packed;
|
||||
|
||||
#define Z_EROFS_ZSTD_MAX_DICT_SIZE Z_EROFS_PCLUSTER_MAX_SIZE
|
||||
|
||||
/*
|
||||
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
|
||||
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;
|
||||
|
@ -502,6 +502,14 @@ static inline int z_erofs_deflate_init(void) { return 0; }
|
||||
static inline int z_erofs_deflate_exit(void) { return 0; }
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP_DEFLATE */
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ZIP_ZSTD
|
||||
int __init z_erofs_zstd_init(void);
|
||||
void z_erofs_zstd_exit(void);
|
||||
#else
|
||||
static inline int z_erofs_zstd_init(void) { return 0; }
|
||||
static inline int z_erofs_zstd_exit(void) { return 0; }
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP_ZSTD */
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ONDEMAND
|
||||
int erofs_fscache_register_fs(struct super_block *sb);
|
||||
void erofs_fscache_unregister_fs(struct super_block *sb);
|
||||
|
@ -859,6 +859,10 @@ static int __init erofs_module_init(void)
|
||||
if (err)
|
||||
goto deflate_err;
|
||||
|
||||
err = z_erofs_zstd_init();
|
||||
if (err)
|
||||
goto zstd_err;
|
||||
|
||||
err = z_erofs_gbuf_init();
|
||||
if (err)
|
||||
goto gbuf_err;
|
||||
@ -884,6 +888,8 @@ sysfs_err:
|
||||
zip_err:
|
||||
z_erofs_gbuf_exit();
|
||||
gbuf_err:
|
||||
z_erofs_zstd_exit();
|
||||
zstd_err:
|
||||
z_erofs_deflate_exit();
|
||||
deflate_err:
|
||||
z_erofs_lzma_exit();
|
||||
@ -903,6 +909,7 @@ static void __exit erofs_module_exit(void)
|
||||
|
||||
erofs_exit_sysfs();
|
||||
z_erofs_exit_zip_subsystem();
|
||||
z_erofs_zstd_exit();
|
||||
z_erofs_deflate_exit();
|
||||
z_erofs_lzma_exit();
|
||||
erofs_exit_shrinker();
|
||||
|
@ -550,7 +550,8 @@ static int z_erofs_do_map_blocks(struct inode *inode,
|
||||
if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
|
||||
((flags & EROFS_GET_BLOCKS_READMORE) &&
|
||||
(map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
|
||||
map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE) &&
|
||||
map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE ||
|
||||
map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) &&
|
||||
map->m_llen >= i_blocksize(inode))) {
|
||||
err = z_erofs_get_extent_decompressedlen(&m);
|
||||
if (!err)
|
||||
|
Loading…
x
Reference in New Issue
Block a user