2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
# include "checksum.h"
# include "compress.h"
# include "extents.h"
# include "super-io.h"
# include <linux/lz4.h>
# include <linux/zlib.h>
# include <linux/zstd.h>
/* Bounce buffer: */
struct bbuf {
void * b ;
enum {
BB_NONE ,
BB_VMAP ,
BB_KMALLOC ,
BB_MEMPOOL ,
} type ;
int rw ;
} ;
static struct bbuf __bounce_alloc ( struct bch_fs * c , unsigned size , int rw )
{
void * b ;
2021-12-14 22:34:03 +03:00
BUG_ON ( size > c - > opts . encoded_extent_max ) ;
2017-03-17 09:18:50 +03:00
2023-05-29 01:02:38 +03:00
b = kmalloc ( size , GFP_NOFS | __GFP_NOWARN ) ;
2017-03-17 09:18:50 +03:00
if ( b )
return ( struct bbuf ) { . b = b , . type = BB_KMALLOC , . rw = rw } ;
2023-05-29 01:02:38 +03:00
b = mempool_alloc ( & c - > compression_bounce [ rw ] , GFP_NOFS ) ;
2017-03-17 09:18:50 +03:00
if ( b )
return ( struct bbuf ) { . b = b , . type = BB_MEMPOOL , . rw = rw } ;
BUG ( ) ;
}
2020-05-06 22:37:04 +03:00
static bool bio_phys_contig ( struct bio * bio , struct bvec_iter start )
{
struct bio_vec bv ;
struct bvec_iter iter ;
void * expected_start = NULL ;
__bio_for_each_bvec ( bv , bio , iter , start ) {
if ( expected_start & &
expected_start ! = page_address ( bv . bv_page ) + bv . bv_offset )
return false ;
expected_start = page_address ( bv . bv_page ) +
bv . bv_offset + bv . bv_len ;
}
return true ;
}
2017-03-17 09:18:50 +03:00
static struct bbuf __bio_map_or_bounce ( struct bch_fs * c , struct bio * bio ,
struct bvec_iter start , int rw )
{
struct bbuf ret ;
struct bio_vec bv ;
struct bvec_iter iter ;
2020-07-20 20:00:15 +03:00
unsigned nr_pages = 0 ;
2017-03-17 09:18:50 +03:00
struct page * stack_pages [ 16 ] ;
struct page * * pages = NULL ;
void * data ;
2021-12-14 22:34:03 +03:00
BUG_ON ( start . bi_size > c - > opts . encoded_extent_max ) ;
2017-03-17 09:18:50 +03:00
2020-10-31 00:29:38 +03:00
if ( ! PageHighMem ( bio_iter_page ( bio , start ) ) & &
2020-05-06 22:37:04 +03:00
bio_phys_contig ( bio , start ) )
return ( struct bbuf ) {
. b = page_address ( bio_iter_page ( bio , start ) ) +
bio_iter_offset ( bio , start ) ,
. type = BB_NONE , . rw = rw
} ;
/* check if we can map the pages contiguously: */
2017-03-17 09:18:50 +03:00
__bio_for_each_segment ( bv , bio , iter , start ) {
2020-05-06 22:37:04 +03:00
if ( iter . bi_size ! = start . bi_size & &
bv . bv_offset )
goto bounce ;
if ( bv . bv_len < iter . bi_size & &
bv . bv_offset + bv . bv_len < PAGE_SIZE )
2017-03-17 09:18:50 +03:00
goto bounce ;
nr_pages + + ;
}
BUG_ON ( DIV_ROUND_UP ( start . bi_size , PAGE_SIZE ) > nr_pages ) ;
pages = nr_pages > ARRAY_SIZE ( stack_pages )
2023-05-29 01:02:38 +03:00
? kmalloc_array ( nr_pages , sizeof ( struct page * ) , GFP_NOFS )
2017-03-17 09:18:50 +03:00
: stack_pages ;
if ( ! pages )
goto bounce ;
nr_pages = 0 ;
__bio_for_each_segment ( bv , bio , iter , start )
pages [ nr_pages + + ] = bv . bv_page ;
data = vmap ( pages , nr_pages , VM_MAP , PAGE_KERNEL ) ;
if ( pages ! = stack_pages )
kfree ( pages ) ;
if ( data )
return ( struct bbuf ) {
. b = data + bio_iter_offset ( bio , start ) ,
. type = BB_VMAP , . rw = rw
} ;
bounce :
ret = __bounce_alloc ( c , start . bi_size , rw ) ;
if ( rw = = READ )
memcpy_from_bio ( ret . b , bio , start ) ;
return ret ;
}
static struct bbuf bio_map_or_bounce ( struct bch_fs * c , struct bio * bio , int rw )
{
return __bio_map_or_bounce ( c , bio , bio - > bi_iter , rw ) ;
}
static void bio_unmap_or_unbounce ( struct bch_fs * c , struct bbuf buf )
{
switch ( buf . type ) {
case BB_NONE :
break ;
case BB_VMAP :
vunmap ( ( void * ) ( ( unsigned long ) buf . b & PAGE_MASK ) ) ;
break ;
case BB_KMALLOC :
kfree ( buf . b ) ;
break ;
case BB_MEMPOOL :
2020-03-29 19:33:41 +03:00
mempool_free ( buf . b , & c - > compression_bounce [ buf . rw ] ) ;
2017-03-17 09:18:50 +03:00
break ;
}
}
static inline void zlib_set_workspace ( z_stream * strm , void * workspace )
{
# ifdef __KERNEL__
strm - > workspace = workspace ;
# endif
}
static int __bio_uncompress ( struct bch_fs * c , struct bio * src ,
void * dst_data , struct bch_extent_crc_unpacked crc )
{
struct bbuf src_data = { NULL } ;
size_t src_len = src - > bi_iter . bi_size ;
size_t dst_len = crc . uncompressed_size < < 9 ;
void * workspace ;
int ret ;
src_data = bio_map_or_bounce ( c , src , READ ) ;
switch ( crc . compression_type ) {
2019-12-29 04:17:06 +03:00
case BCH_COMPRESSION_TYPE_lz4_old :
case BCH_COMPRESSION_TYPE_lz4 :
2017-03-17 09:18:50 +03:00
ret = LZ4_decompress_safe_partial ( src_data . b , dst_data ,
src_len , dst_len , dst_len ) ;
if ( ret ! = dst_len )
goto err ;
break ;
2019-12-29 04:17:06 +03:00
case BCH_COMPRESSION_TYPE_gzip : {
2017-03-17 09:18:50 +03:00
z_stream strm = {
. next_in = src_data . b ,
. avail_in = src_len ,
. next_out = dst_data ,
. avail_out = dst_len ,
} ;
2023-05-29 01:02:38 +03:00
workspace = mempool_alloc ( & c - > decompress_workspace , GFP_NOFS ) ;
2017-03-17 09:18:50 +03:00
zlib_set_workspace ( & strm , workspace ) ;
zlib_inflateInit2 ( & strm , - MAX_WBITS ) ;
ret = zlib_inflate ( & strm , Z_FINISH ) ;
mempool_free ( workspace , & c - > decompress_workspace ) ;
if ( ret ! = Z_STREAM_END )
goto err ;
break ;
}
2019-12-29 04:17:06 +03:00
case BCH_COMPRESSION_TYPE_zstd : {
2017-03-17 09:18:50 +03:00
ZSTD_DCtx * ctx ;
2020-05-09 06:15:42 +03:00
size_t real_src_len = le32_to_cpup ( src_data . b ) ;
if ( real_src_len > src_len - 4 )
goto err ;
2017-03-17 09:18:50 +03:00
2023-05-29 01:02:38 +03:00
workspace = mempool_alloc ( & c - > decompress_workspace , GFP_NOFS ) ;
2017-03-17 09:18:50 +03:00
ctx = zstd_init_dctx ( workspace , zstd_dctx_workspace_bound ( ) ) ;
2020-05-09 06:15:42 +03:00
ret = zstd_decompress_dctx ( ctx ,
2017-03-17 09:18:50 +03:00
dst_data , dst_len ,
2020-05-09 06:15:42 +03:00
src_data . b + 4 , real_src_len ) ;
2017-03-17 09:18:50 +03:00
mempool_free ( workspace , & c - > decompress_workspace ) ;
2020-05-09 06:15:42 +03:00
if ( ret ! = dst_len )
2017-03-17 09:18:50 +03:00
goto err ;
break ;
}
default :
BUG ( ) ;
}
ret = 0 ;
out :
bio_unmap_or_unbounce ( c , src_data ) ;
return ret ;
err :
ret = - EIO ;
goto out ;
}
int bch2_bio_uncompress_inplace ( struct bch_fs * c , struct bio * bio ,
struct bch_extent_crc_unpacked * crc )
{
struct bbuf data = { NULL } ;
size_t dst_len = crc - > uncompressed_size < < 9 ;
/* bio must own its pages: */
BUG_ON ( ! bio - > bi_vcnt ) ;
BUG_ON ( DIV_ROUND_UP ( crc - > live_size , PAGE_SECTORS ) > bio - > bi_max_vecs ) ;
2021-12-14 22:34:03 +03:00
if ( crc - > uncompressed_size < < 9 > c - > opts . encoded_extent_max | |
crc - > compressed_size < < 9 > c - > opts . encoded_extent_max ) {
2017-03-17 09:18:50 +03:00
bch_err ( c , " error rewriting existing data: extent too big " ) ;
return - EIO ;
}
data = __bounce_alloc ( c , dst_len , WRITE ) ;
if ( __bio_uncompress ( c , bio , data . b , * crc ) ) {
2023-07-17 05:31:19 +03:00
if ( ! c - > opts . no_data_io )
bch_err ( c , " error rewriting existing data: decompression error " ) ;
2017-03-17 09:18:50 +03:00
bio_unmap_or_unbounce ( c , data ) ;
return - EIO ;
}
/*
2019-07-09 18:16:33 +03:00
* XXX : don ' t have a good way to assert that the bio was allocated with
* enough space , we depend on bch2_move_extent doing the right thing
2017-03-17 09:18:50 +03:00
*/
2019-07-09 18:16:33 +03:00
bio - > bi_iter . bi_size = crc - > live_size < < 9 ;
2017-03-17 09:18:50 +03:00
memcpy_to_bio ( bio , bio - > bi_iter , data . b + ( crc - > offset < < 9 ) ) ;
crc - > csum_type = 0 ;
crc - > compression_type = 0 ;
crc - > compressed_size = crc - > live_size ;
crc - > uncompressed_size = crc - > live_size ;
crc - > offset = 0 ;
crc - > csum = ( struct bch_csum ) { 0 , 0 } ;
bio_unmap_or_unbounce ( c , data ) ;
return 0 ;
}
int bch2_bio_uncompress ( struct bch_fs * c , struct bio * src ,
struct bio * dst , struct bvec_iter dst_iter ,
struct bch_extent_crc_unpacked crc )
{
struct bbuf dst_data = { NULL } ;
size_t dst_len = crc . uncompressed_size < < 9 ;
2023-03-14 22:35:57 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2021-12-14 22:34:03 +03:00
if ( crc . uncompressed_size < < 9 > c - > opts . encoded_extent_max | |
crc . compressed_size < < 9 > c - > opts . encoded_extent_max )
2017-03-17 09:18:50 +03:00
return - EIO ;
dst_data = dst_len = = dst_iter . bi_size
? __bio_map_or_bounce ( c , dst , dst_iter , WRITE )
: __bounce_alloc ( c , dst_len , WRITE ) ;
ret = __bio_uncompress ( c , src , dst_data . b , crc ) ;
if ( ret )
goto err ;
2020-05-06 22:37:04 +03:00
if ( dst_data . type ! = BB_NONE & &
dst_data . type ! = BB_VMAP )
2017-03-17 09:18:50 +03:00
memcpy_to_bio ( dst , dst_iter , dst_data . b + ( crc . offset < < 9 ) ) ;
err :
bio_unmap_or_unbounce ( c , dst_data ) ;
return ret ;
}
static int attempt_compress ( struct bch_fs * c ,
void * workspace ,
void * dst , size_t dst_len ,
void * src , size_t src_len ,
2023-07-13 05:27:16 +03:00
struct bch_compression_opt compression )
2017-03-17 09:18:50 +03:00
{
2023-07-13 05:27:16 +03:00
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type [ compression . type ] ;
2017-03-17 09:18:50 +03:00
2023-07-13 05:27:16 +03:00
switch ( compression_type ) {
case BCH_COMPRESSION_TYPE_lz4 :
if ( compression . level < LZ4HC_MIN_CLEVEL ) {
int len = src_len ;
int ret = LZ4_compress_destSize (
src , dst ,
& len , dst_len ,
workspace ) ;
if ( len < src_len )
return - len ;
return ret ;
} else {
int ret = LZ4_compress_HC (
src , dst ,
src_len , dst_len ,
compression . level ,
workspace ) ;
return ret ? : - 1 ;
}
2019-12-29 04:17:06 +03:00
case BCH_COMPRESSION_TYPE_gzip : {
2017-03-17 09:18:50 +03:00
z_stream strm = {
. next_in = src ,
. avail_in = src_len ,
. next_out = dst ,
. avail_out = dst_len ,
} ;
zlib_set_workspace ( & strm , workspace ) ;
2023-07-13 05:27:16 +03:00
zlib_deflateInit2 ( & strm ,
compression . level
? clamp_t ( unsigned , compression . level ,
Z_BEST_SPEED , Z_BEST_COMPRESSION )
: Z_DEFAULT_COMPRESSION ,
2017-03-17 09:18:50 +03:00
Z_DEFLATED , - MAX_WBITS , DEF_MEM_LEVEL ,
Z_DEFAULT_STRATEGY ) ;
if ( zlib_deflate ( & strm , Z_FINISH ) ! = Z_STREAM_END )
return 0 ;
if ( zlib_deflateEnd ( & strm ) ! = Z_OK )
return 0 ;
return strm . total_out ;
}
2019-12-29 04:17:06 +03:00
case BCH_COMPRESSION_TYPE_zstd : {
2023-07-13 05:27:16 +03:00
/*
* rescale :
* zstd max compression level is 22 , our max level is 15
*/
unsigned level = min ( ( compression . level * 3 ) / 2 , zstd_max_clevel ( ) ) ;
ZSTD_parameters params = zstd_get_params ( level , c - > opts . encoded_extent_max ) ;
2023-11-25 07:12:45 +03:00
ZSTD_CCtx * ctx = zstd_init_cctx ( workspace , c - > zstd_workspace_size ) ;
2017-03-17 09:18:50 +03:00
2021-01-08 01:06:22 +03:00
/*
* ZSTD requires that when we decompress we pass in the exact
* compressed size - rounding it up to the nearest sector
* doesn ' t work , so we use the first 4 bytes of the buffer for
* that .
*
* Additionally , the ZSTD code seems to have a bug where it will
* write just past the end of the buffer - so subtract a fudge
* factor ( 7 bytes ) from the dst buffer size to account for
* that .
*/
2017-03-17 09:18:50 +03:00
size_t len = zstd_compress_cctx ( ctx ,
2021-01-08 01:06:22 +03:00
dst + 4 , dst_len - 4 - 7 ,
2017-03-17 09:18:50 +03:00
src , src_len ,
2023-11-25 07:12:45 +03:00
& params ) ;
2017-03-17 09:18:50 +03:00
if ( zstd_is_error ( len ) )
return 0 ;
* ( ( __le32 * ) dst ) = cpu_to_le32 ( len ) ;
return len + 4 ;
}
default :
BUG ( ) ;
}
}
static unsigned __bio_compress ( struct bch_fs * c ,
struct bio * dst , size_t * dst_len ,
struct bio * src , size_t * src_len ,
2023-07-13 05:27:16 +03:00
struct bch_compression_opt compression )
2017-03-17 09:18:50 +03:00
{
struct bbuf src_data = { NULL } , dst_data = { NULL } ;
void * workspace ;
2023-07-13 05:27:16 +03:00
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type [ compression . type ] ;
2017-03-17 09:18:50 +03:00
unsigned pad ;
int ret = 0 ;
2019-12-29 04:17:06 +03:00
BUG_ON ( compression_type > = BCH_COMPRESSION_TYPE_NR ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( ! mempool_initialized ( & c - > compress_workspace [ compression_type ] ) ) ;
/* If it's only one block, don't bother trying to compress: */
2021-12-14 22:24:41 +03:00
if ( src - > bi_iter . bi_size < = c - > opts . block_size )
2022-10-15 16:25:54 +03:00
return BCH_COMPRESSION_TYPE_incompressible ;
2017-03-17 09:18:50 +03:00
dst_data = bio_map_or_bounce ( c , dst , WRITE ) ;
src_data = bio_map_or_bounce ( c , src , READ ) ;
2023-05-29 01:02:38 +03:00
workspace = mempool_alloc ( & c - > compress_workspace [ compression_type ] , GFP_NOFS ) ;
2017-03-17 09:18:50 +03:00
* src_len = src - > bi_iter . bi_size ;
* dst_len = dst - > bi_iter . bi_size ;
/*
* XXX : this algorithm sucks when the compression code doesn ' t tell us
* how much would fit , like LZ4 does :
*/
while ( 1 ) {
if ( * src_len < = block_bytes ( c ) ) {
ret = - 1 ;
break ;
}
ret = attempt_compress ( c , workspace ,
dst_data . b , * dst_len ,
src_data . b , * src_len ,
2023-07-13 05:27:16 +03:00
compression ) ;
2017-03-17 09:18:50 +03:00
if ( ret > 0 ) {
* dst_len = ret ;
ret = 0 ;
break ;
}
/* Didn't fit: should we retry with a smaller amount? */
if ( * src_len < = * dst_len ) {
ret = - 1 ;
break ;
}
/*
* If ret is negative , it ' s a hint as to how much data would fit
*/
BUG_ON ( - ret > = * src_len ) ;
if ( ret < 0 )
* src_len = - ret ;
else
* src_len - = ( * src_len - * dst_len ) / 2 ;
* src_len = round_down ( * src_len , block_bytes ( c ) ) ;
}
mempool_free ( workspace , & c - > compress_workspace [ compression_type ] ) ;
if ( ret )
goto err ;
/* Didn't get smaller: */
if ( round_up ( * dst_len , block_bytes ( c ) ) > = * src_len )
goto err ;
pad = round_up ( * dst_len , block_bytes ( c ) ) - * dst_len ;
memset ( dst_data . b + * dst_len , 0 , pad ) ;
* dst_len + = pad ;
2020-05-06 22:37:04 +03:00
if ( dst_data . type ! = BB_NONE & &
dst_data . type ! = BB_VMAP )
2017-03-17 09:18:50 +03:00
memcpy_to_bio ( dst , dst - > bi_iter , dst_data . b ) ;
BUG_ON ( ! * dst_len | | * dst_len > dst - > bi_iter . bi_size ) ;
BUG_ON ( ! * src_len | | * src_len > src - > bi_iter . bi_size ) ;
BUG_ON ( * dst_len & ( block_bytes ( c ) - 1 ) ) ;
BUG_ON ( * src_len & ( block_bytes ( c ) - 1 ) ) ;
2023-07-13 05:27:16 +03:00
ret = compression_type ;
2017-03-17 09:18:50 +03:00
out :
bio_unmap_or_unbounce ( c , src_data ) ;
bio_unmap_or_unbounce ( c , dst_data ) ;
2023-07-13 05:27:16 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
err :
2023-07-13 05:27:16 +03:00
ret = BCH_COMPRESSION_TYPE_incompressible ;
2017-03-17 09:18:50 +03:00
goto out ;
}
unsigned bch2_bio_compress ( struct bch_fs * c ,
struct bio * dst , size_t * dst_len ,
struct bio * src , size_t * src_len ,
2023-07-13 05:27:16 +03:00
unsigned compression_opt )
2017-03-17 09:18:50 +03:00
{
unsigned orig_dst = dst - > bi_iter . bi_size ;
unsigned orig_src = src - > bi_iter . bi_size ;
2023-07-13 05:27:16 +03:00
unsigned compression_type ;
2017-03-17 09:18:50 +03:00
/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
src - > bi_iter . bi_size = min_t ( unsigned , src - > bi_iter . bi_size ,
2021-12-14 22:34:03 +03:00
c - > opts . encoded_extent_max ) ;
2017-03-17 09:18:50 +03:00
/* Don't generate a bigger output than input: */
dst - > bi_iter . bi_size = min ( dst - > bi_iter . bi_size , src - > bi_iter . bi_size ) ;
compression_type =
2023-07-13 05:27:16 +03:00
__bio_compress ( c , dst , dst_len , src , src_len ,
bch2_compression_decode ( compression_opt ) ) ;
2017-03-17 09:18:50 +03:00
dst - > bi_iter . bi_size = orig_dst ;
src - > bi_iter . bi_size = orig_src ;
return compression_type ;
}
static int __bch2_fs_compress_init ( struct bch_fs * , u64 ) ;
2019-12-29 04:17:06 +03:00
# define BCH_FEATURE_none 0
2017-03-17 09:18:50 +03:00
static const unsigned bch2_compression_opt_to_feature [ ] = {
2019-12-29 04:17:06 +03:00
# define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
BCH_COMPRESSION_OPTS ( )
2017-03-17 09:18:50 +03:00
# undef x
} ;
2019-12-29 04:17:06 +03:00
# undef BCH_FEATURE_none
2017-03-17 09:18:50 +03:00
static int __bch2_check_set_has_compressed_data ( struct bch_fs * c , u64 f )
{
int ret = 0 ;
if ( ( c - > sb . features & f ) = = f )
return 0 ;
mutex_lock ( & c - > sb_lock ) ;
if ( ( c - > sb . features & f ) = = f ) {
mutex_unlock ( & c - > sb_lock ) ;
return 0 ;
}
ret = __bch2_fs_compress_init ( c , c - > sb . features | f ) ;
if ( ret ) {
mutex_unlock ( & c - > sb_lock ) ;
return ret ;
}
c - > disk_sb . sb - > features [ 0 ] | = cpu_to_le64 ( f ) ;
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
return 0 ;
}
int bch2_check_set_has_compressed_data ( struct bch_fs * c ,
2023-07-13 05:27:16 +03:00
unsigned compression_opt )
2017-03-17 09:18:50 +03:00
{
2023-07-13 05:27:16 +03:00
unsigned compression_type = bch2_compression_decode ( compression_opt ) . type ;
2017-03-17 09:18:50 +03:00
BUG_ON ( compression_type > = ARRAY_SIZE ( bch2_compression_opt_to_feature ) ) ;
return compression_type
? __bch2_check_set_has_compressed_data ( c ,
1ULL < < bch2_compression_opt_to_feature [ compression_type ] )
: 0 ;
}
void bch2_fs_compress_exit ( struct bch_fs * c )
{
unsigned i ;
mempool_exit ( & c - > decompress_workspace ) ;
for ( i = 0 ; i < ARRAY_SIZE ( c - > compress_workspace ) ; i + + )
mempool_exit ( & c - > compress_workspace [ i ] ) ;
mempool_exit ( & c - > compression_bounce [ WRITE ] ) ;
mempool_exit ( & c - > compression_bounce [ READ ] ) ;
}
2023-07-07 11:38:29 +03:00
static int __bch2_fs_compress_init ( struct bch_fs * c , u64 features )
2017-03-17 09:18:50 +03:00
{
size_t decompress_workspace_size = 0 ;
2023-07-13 05:27:16 +03:00
ZSTD_parameters params = zstd_get_params ( zstd_max_clevel ( ) ,
c - > opts . encoded_extent_max ) ;
2023-11-25 07:12:45 +03:00
c - > zstd_workspace_size = zstd_cctx_workspace_bound ( & params . cParams ) ;
2017-03-17 09:18:50 +03:00
struct {
2023-07-13 05:27:16 +03:00
unsigned feature ;
enum bch_compression_type type ;
size_t compress_workspace ;
size_t decompress_workspace ;
2017-03-17 09:18:50 +03:00
} compression_types [ ] = {
2023-07-13 05:27:16 +03:00
{ BCH_FEATURE_lz4 , BCH_COMPRESSION_TYPE_lz4 ,
2023-09-13 01:41:22 +03:00
max_t ( size_t , LZ4_MEM_COMPRESS , LZ4HC_MEM_COMPRESS ) ,
0 } ,
2019-12-29 04:17:06 +03:00
{ BCH_FEATURE_gzip , BCH_COMPRESSION_TYPE_gzip ,
2017-03-17 09:18:50 +03:00
zlib_deflate_workspacesize ( MAX_WBITS , DEF_MEM_LEVEL ) ,
zlib_inflate_workspacesize ( ) , } ,
2019-12-29 04:17:06 +03:00
{ BCH_FEATURE_zstd , BCH_COMPRESSION_TYPE_zstd ,
2023-11-25 07:12:45 +03:00
c - > zstd_workspace_size ,
2017-03-17 09:18:50 +03:00
zstd_dctx_workspace_bound ( ) } ,
} , * i ;
2023-03-14 22:35:57 +03:00
bool have_compressed = false ;
2017-03-17 09:18:50 +03:00
for ( i = compression_types ;
i < compression_types + ARRAY_SIZE ( compression_types ) ;
i + + )
2023-03-14 22:35:57 +03:00
have_compressed | = ( features & ( 1 < < i - > feature ) ) ! = 0 ;
2017-03-17 09:18:50 +03:00
2023-03-14 22:35:57 +03:00
if ( ! have_compressed )
return 0 ;
2017-03-17 09:18:50 +03:00
2023-03-14 22:35:57 +03:00
if ( ! mempool_initialized ( & c - > compression_bounce [ READ ] ) & &
2024-02-01 14:35:46 +03:00
mempool_init_kvmalloc_pool ( & c - > compression_bounce [ READ ] ,
1 , c - > opts . encoded_extent_max ) )
2023-03-14 22:35:57 +03:00
return - BCH_ERR_ENOMEM_compression_bounce_read_init ;
2017-03-17 09:18:50 +03:00
2023-03-14 22:35:57 +03:00
if ( ! mempool_initialized ( & c - > compression_bounce [ WRITE ] ) & &
2024-02-01 14:35:46 +03:00
mempool_init_kvmalloc_pool ( & c - > compression_bounce [ WRITE ] ,
1 , c - > opts . encoded_extent_max ) )
2023-03-14 22:35:57 +03:00
return - BCH_ERR_ENOMEM_compression_bounce_write_init ;
2017-03-17 09:18:50 +03:00
for ( i = compression_types ;
i < compression_types + ARRAY_SIZE ( compression_types ) ;
i + + ) {
decompress_workspace_size =
max ( decompress_workspace_size , i - > decompress_workspace ) ;
if ( ! ( features & ( 1 < < i - > feature ) ) )
continue ;
if ( mempool_initialized ( & c - > compress_workspace [ i - > type ] ) )
continue ;
2024-02-01 14:35:46 +03:00
if ( mempool_init_kvmalloc_pool (
2017-03-17 09:18:50 +03:00
& c - > compress_workspace [ i - > type ] ,
2023-03-14 22:35:57 +03:00
1 , i - > compress_workspace ) )
return - BCH_ERR_ENOMEM_compression_workspace_init ;
2017-03-17 09:18:50 +03:00
}
2023-03-14 22:35:57 +03:00
if ( ! mempool_initialized ( & c - > decompress_workspace ) & &
2024-02-01 14:35:46 +03:00
mempool_init_kvmalloc_pool ( & c - > decompress_workspace ,
1 , decompress_workspace_size ) )
2023-03-14 22:35:57 +03:00
return - BCH_ERR_ENOMEM_decompression_workspace_init ;
return 0 ;
}
2023-07-13 05:27:16 +03:00
static u64 compression_opt_to_feature ( unsigned v )
{
unsigned type = bch2_compression_decode ( v ) . type ;
2023-08-07 19:04:05 +03:00
return BIT_ULL ( bch2_compression_opt_to_feature [ type ] ) ;
2023-07-13 05:27:16 +03:00
}
2017-03-17 09:18:50 +03:00
int bch2_fs_compress_init ( struct bch_fs * c )
{
u64 f = c - > sb . features ;
2023-07-13 05:27:16 +03:00
f | = compression_opt_to_feature ( c - > opts . compression ) ;
f | = compression_opt_to_feature ( c - > opts . background_compression ) ;
2017-03-17 09:18:50 +03:00
return __bch2_fs_compress_init ( c , f ) ;
2023-07-13 05:27:16 +03:00
}
int bch2_opt_compression_parse ( struct bch_fs * c , const char * _val , u64 * res ,
struct printbuf * err )
{
char * val = kstrdup ( _val , GFP_KERNEL ) ;
char * p = val , * type_str , * level_str ;
struct bch_compression_opt opt = { 0 } ;
int ret ;
if ( ! val )
return - ENOMEM ;
type_str = strsep ( & p , " : " ) ;
level_str = p ;
ret = match_string ( bch2_compression_opts , - 1 , type_str ) ;
if ( ret < 0 & & err )
prt_str ( err , " invalid compression type " ) ;
if ( ret < 0 )
goto err ;
opt . type = ret ;
if ( level_str ) {
unsigned level ;
ret = kstrtouint ( level_str , 10 , & level ) ;
if ( ! ret & & ! opt . type & & level )
ret = - EINVAL ;
if ( ! ret & & level > 15 )
ret = - EINVAL ;
if ( ret < 0 & & err )
prt_str ( err , " invalid compression level " ) ;
if ( ret < 0 )
goto err ;
opt . level = level ;
}
* res = bch2_compression_encode ( opt ) ;
err :
kfree ( val ) ;
return ret ;
}
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 20:33:14 +03:00
void bch2_compression_opt_to_text ( struct printbuf * out , u64 v )
2023-07-13 05:27:16 +03:00
{
struct bch_compression_opt opt = bch2_compression_decode ( v ) ;
2017-03-17 09:18:50 +03:00
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 20:33:14 +03:00
if ( opt . type < BCH_COMPRESSION_OPT_NR )
prt_str ( out , bch2_compression_opts [ opt . type ] ) ;
else
prt_printf ( out , " (unknown compression opt %u) " , opt . type ) ;
2023-07-13 05:27:16 +03:00
if ( opt . level )
prt_printf ( out , " :%u " , opt . level ) ;
2017-03-17 09:18:50 +03:00
}
2023-10-23 01:29:54 +03:00
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 20:33:14 +03:00
void bch2_opt_compression_to_text ( struct printbuf * out ,
struct bch_fs * c ,
struct bch_sb * sb ,
u64 v )
{
return bch2_compression_opt_to_text ( out , v ) ;
}
2023-10-23 01:29:54 +03:00
int bch2_opt_compression_validate ( u64 v , struct printbuf * err )
{
if ( ! bch2_compression_opt_valid ( v ) ) {
prt_printf ( err , " invalid compression opt %llu " , v ) ;
return - BCH_ERR_invalid_sb_opt_compression ;
}
return 0 ;
}