2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2019-09-23 02:10:21 +03:00
# include "btree_key_cache.h"
2017-03-17 09:18:50 +03:00
# include "bkey_methods.h"
# include "btree_update.h"
2021-12-23 06:39:50 +03:00
# include "buckets.h"
2017-03-17 09:18:50 +03:00
# include "error.h"
# include "extents.h"
2021-03-16 07:28:17 +03:00
# include "extent_update.h"
2017-03-17 09:18:50 +03:00
# include "inode.h"
2019-10-04 22:58:43 +03:00
# include "str_hash.h"
2021-03-16 07:42:25 +03:00
# include "subvolume.h"
2020-11-06 07:39:33 +03:00
# include "varint.h"
2017-03-17 09:18:50 +03:00
# include <linux/random.h>
# include <asm/unaligned.h>
2018-12-17 13:31:09 +03:00
const char * const bch2_inode_opts [ ] = {
# define x(name, ...) #name,
BCH_INODE_OPTS ( )
# undef x
NULL ,
} ;
2017-03-17 09:18:50 +03:00
static const u8 byte_table [ 8 ] = { 1 , 2 , 3 , 4 , 6 , 8 , 10 , 13 } ;
static int inode_decode_field ( const u8 * in , const u8 * end ,
u64 out [ 2 ] , unsigned * out_bits )
{
__be64 be [ 2 ] = { 0 , 0 } ;
unsigned bytes , shift ;
u8 * p ;
if ( in > = end )
return - 1 ;
if ( ! * in )
return - 1 ;
/*
* position of highest set bit indicates number of bytes :
* shift = number of bits to remove in high byte :
*/
shift = 8 - __fls ( * in ) ; /* 1 <= shift <= 8 */
bytes = byte_table [ shift - 1 ] ;
if ( in + bytes > end )
return - 1 ;
p = ( u8 * ) be + 16 - bytes ;
memcpy ( p , in , bytes ) ;
* p ^ = ( 1 < < 8 ) > > shift ;
out [ 0 ] = be64_to_cpu ( be [ 0 ] ) ;
out [ 1 ] = be64_to_cpu ( be [ 1 ] ) ;
* out_bits = out [ 0 ] ? 64 + fls64 ( out [ 0 ] ) : fls64 ( out [ 1 ] ) ;
return bytes ;
}
2022-10-21 20:21:03 +03:00
static inline void bch2_inode_pack_inlined ( struct bkey_inode_buf * packed ,
2022-10-17 14:09:02 +03:00
const struct bch_inode_unpacked * inode )
2020-11-06 07:39:33 +03:00
{
2022-10-21 20:21:03 +03:00
struct bkey_i_inode_v3 * k = & packed - > inode ;
2020-11-06 07:39:33 +03:00
u8 * out = k - > v . fields ;
u8 * end = ( void * ) & packed [ 1 ] ;
u8 * last_nonzero_field = out ;
unsigned nr_fields = 0 , last_nonzero_fieldnr = 0 ;
unsigned bytes ;
int ret ;
2022-10-21 20:21:03 +03:00
bkey_inode_v3_init ( & packed - > inode . k_i ) ;
2021-10-30 04:14:23 +03:00
packed - > inode . k . p . offset = inode - > bi_inum ;
packed - > inode . v . bi_journal_seq = cpu_to_le64 ( inode - > bi_journal_seq ) ;
packed - > inode . v . bi_hash_seed = inode - > bi_hash_seed ;
packed - > inode . v . bi_flags = cpu_to_le64 ( inode - > bi_flags ) ;
2022-10-21 20:21:03 +03:00
packed - > inode . v . bi_sectors = cpu_to_le64 ( inode - > bi_sectors ) ;
packed - > inode . v . bi_size = cpu_to_le64 ( inode - > bi_size ) ;
packed - > inode . v . bi_version = cpu_to_le64 ( inode - > bi_version ) ;
SET_INODEv3_MODE ( & packed - > inode . v , inode - > bi_mode ) ;
SET_INODEv3_FIELDS_START ( & packed - > inode . v , INODEv3_FIELDS_START_CUR ) ;
2021-10-30 04:14:23 +03:00
2020-11-06 07:39:33 +03:00
# define x(_name, _bits) \
nr_fields + + ; \
\
if ( inode - > _name ) { \
2021-07-13 23:03:51 +03:00
ret = bch2_varint_encode_fast ( out , inode - > _name ) ; \
2020-11-06 07:39:33 +03:00
out + = ret ; \
\
if ( _bits > 64 ) \
* out + + = 0 ; \
\
last_nonzero_field = out ; \
last_nonzero_fieldnr = nr_fields ; \
} else { \
* out + + = 0 ; \
\
if ( _bits > 64 ) \
* out + + = 0 ; \
}
2022-10-21 20:21:03 +03:00
BCH_INODE_FIELDS_v3 ( )
2020-11-06 07:39:33 +03:00
# undef x
BUG_ON ( out > end ) ;
out = last_nonzero_field ;
nr_fields = last_nonzero_fieldnr ;
bytes = out - ( u8 * ) & packed - > inode . v ;
set_bkey_val_bytes ( & packed - > inode . k , bytes ) ;
memset_u64s_tail ( & packed - > inode . v , 0 , bytes ) ;
2022-10-21 20:21:03 +03:00
SET_INODEv3_NR_FIELDS ( & k - > v , nr_fields ) ;
2017-03-17 09:18:50 +03:00
if ( IS_ENABLED ( CONFIG_BCACHEFS_DEBUG ) ) {
struct bch_inode_unpacked unpacked ;
2021-10-30 04:14:23 +03:00
int ret = bch2_inode_unpack ( bkey_i_to_s_c ( & packed - > inode . k_i ) ,
2017-03-17 09:18:50 +03:00
& unpacked ) ;
BUG_ON ( ret ) ;
BUG_ON ( unpacked . bi_inum ! = inode - > bi_inum ) ;
BUG_ON ( unpacked . bi_hash_seed ! = inode - > bi_hash_seed ) ;
2022-10-21 20:21:03 +03:00
BUG_ON ( unpacked . bi_sectors ! = inode - > bi_sectors ) ;
BUG_ON ( unpacked . bi_size ! = inode - > bi_size ) ;
BUG_ON ( unpacked . bi_version ! = inode - > bi_version ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( unpacked . bi_mode ! = inode - > bi_mode ) ;
2020-11-06 07:39:33 +03:00
# define x(_name, _bits) if (unpacked._name != inode->_name) \
panic ( " unpacked %llu should be %llu " , \
( u64 ) unpacked . _name , ( u64 ) inode - > _name ) ;
2022-10-21 20:21:03 +03:00
BCH_INODE_FIELDS_v3 ( )
2018-12-13 14:01:30 +03:00
# undef x
2017-03-17 09:18:50 +03:00
}
}
2022-10-21 20:21:03 +03:00
void bch2_inode_pack ( struct bkey_inode_buf * packed ,
2022-10-17 14:09:02 +03:00
const struct bch_inode_unpacked * inode )
{
2022-10-21 20:21:03 +03:00
bch2_inode_pack_inlined ( packed , inode ) ;
2022-10-17 14:09:02 +03:00
}
2020-11-06 07:39:33 +03:00
static noinline int bch2_inode_unpack_v1 ( struct bkey_s_c_inode inode ,
struct bch_inode_unpacked * unpacked )
2017-03-17 09:18:50 +03:00
{
const u8 * in = inode . v - > fields ;
2020-11-06 07:39:33 +03:00
const u8 * end = bkey_val_end ( inode ) ;
2017-03-17 09:18:50 +03:00
u64 field [ 2 ] ;
unsigned fieldnr = 0 , field_bits ;
int ret ;
2018-12-13 14:01:30 +03:00
# define x(_name, _bits) \
2017-03-17 09:18:50 +03:00
if ( fieldnr + + = = INODE_NR_FIELDS ( inode . v ) ) { \
unsigned offset = offsetof ( struct bch_inode_unpacked , _name ) ; \
memset ( ( void * ) unpacked + offset , 0 , \
sizeof ( * unpacked ) - offset ) ; \
return 0 ; \
} \
\
ret = inode_decode_field ( in , end , field , & field_bits ) ; \
if ( ret < 0 ) \
return ret ; \
\
if ( field_bits > sizeof ( unpacked - > _name ) * 8 ) \
return - 1 ; \
\
unpacked - > _name = field [ 1 ] ; \
in + = ret ;
2022-10-21 20:21:03 +03:00
BCH_INODE_FIELDS_v2 ( )
2018-12-13 14:01:30 +03:00
# undef x
2017-03-17 09:18:50 +03:00
/* XXX: signal if there were more fields than expected? */
2020-11-06 07:39:33 +03:00
return 0 ;
}
2021-10-30 04:14:23 +03:00
static int bch2_inode_unpack_v2 ( struct bch_inode_unpacked * unpacked ,
const u8 * in , const u8 * end ,
unsigned nr_fields )
2020-11-06 07:39:33 +03:00
{
unsigned fieldnr = 0 ;
int ret ;
u64 v [ 2 ] ;
# define x(_name, _bits) \
2021-10-30 04:14:23 +03:00
if ( fieldnr < nr_fields ) { \
2021-07-13 23:03:51 +03:00
ret = bch2_varint_decode_fast ( in , end , & v [ 0 ] ) ; \
2020-11-06 07:39:33 +03:00
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
\
if ( _bits > 64 ) { \
2021-07-13 23:03:51 +03:00
ret = bch2_varint_decode_fast ( in , end , & v [ 1 ] ) ; \
2020-11-06 07:39:33 +03:00
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v [ 1 ] = 0 ; \
} \
} else { \
v [ 0 ] = v [ 1 ] = 0 ; \
} \
\
unpacked - > _name = v [ 0 ] ; \
if ( v [ 1 ] | | v [ 0 ] ! = unpacked - > _name ) \
return - 1 ; \
fieldnr + + ;
2022-10-21 20:21:03 +03:00
BCH_INODE_FIELDS_v2 ( )
2020-11-06 07:39:33 +03:00
# undef x
/* XXX: signal if there were more fields than expected? */
return 0 ;
}
2022-10-21 20:21:03 +03:00
static int bch2_inode_unpack_v3 ( struct bkey_s_c k ,
struct bch_inode_unpacked * unpacked )
{
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3 ( k ) ;
const u8 * in = inode . v - > fields ;
const u8 * end = bkey_val_end ( inode ) ;
unsigned nr_fields = INODEv3_NR_FIELDS ( inode . v ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v [ 2 ] ;
unpacked - > bi_inum = inode . k - > p . offset ;
unpacked - > bi_journal_seq = le64_to_cpu ( inode . v - > bi_journal_seq ) ;
unpacked - > bi_hash_seed = inode . v - > bi_hash_seed ;
unpacked - > bi_flags = le64_to_cpu ( inode . v - > bi_flags ) ;
unpacked - > bi_sectors = le64_to_cpu ( inode . v - > bi_sectors ) ;
unpacked - > bi_size = le64_to_cpu ( inode . v - > bi_size ) ;
unpacked - > bi_version = le64_to_cpu ( inode . v - > bi_version ) ;
unpacked - > bi_mode = INODEv3_MODE ( inode . v ) ;
# define x(_name, _bits) \
if ( fieldnr < nr_fields ) { \
ret = bch2_varint_decode_fast ( in , end , & v [ 0 ] ) ; \
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
\
if ( _bits > 64 ) { \
ret = bch2_varint_decode_fast ( in , end , & v [ 1 ] ) ; \
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v [ 1 ] = 0 ; \
} \
} else { \
v [ 0 ] = v [ 1 ] = 0 ; \
} \
\
unpacked - > _name = v [ 0 ] ; \
if ( v [ 1 ] | | v [ 0 ] ! = unpacked - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_INODE_FIELDS_v3 ( )
# undef x
/* XXX: signal if there were more fields than expected? */
return 0 ;
}
static noinline int bch2_inode_unpack_slowpath ( struct bkey_s_c k ,
struct bch_inode_unpacked * unpacked )
2020-11-06 07:39:33 +03:00
{
2022-12-03 23:44:54 +03:00
memset ( unpacked , 0 , sizeof ( * unpacked ) ) ;
2021-10-30 04:14:23 +03:00
switch ( k . k - > type ) {
case KEY_TYPE_inode : {
struct bkey_s_c_inode inode = bkey_s_c_to_inode ( k ) ;
unpacked - > bi_inum = inode . k - > p . offset ;
2021-11-14 01:57:52 +03:00
unpacked - > bi_journal_seq = 0 ;
2021-10-30 04:14:23 +03:00
unpacked - > bi_hash_seed = inode . v - > bi_hash_seed ;
unpacked - > bi_flags = le32_to_cpu ( inode . v - > bi_flags ) ;
unpacked - > bi_mode = le16_to_cpu ( inode . v - > bi_mode ) ;
if ( INODE_NEW_VARINT ( inode . v ) ) {
return bch2_inode_unpack_v2 ( unpacked , inode . v - > fields ,
bkey_val_end ( inode ) ,
INODE_NR_FIELDS ( inode . v ) ) ;
} else {
return bch2_inode_unpack_v1 ( inode , unpacked ) ;
}
break ;
}
case KEY_TYPE_inode_v2 : {
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2 ( k ) ;
unpacked - > bi_inum = inode . k - > p . offset ;
unpacked - > bi_journal_seq = le64_to_cpu ( inode . v - > bi_journal_seq ) ;
unpacked - > bi_hash_seed = inode . v - > bi_hash_seed ;
unpacked - > bi_flags = le64_to_cpu ( inode . v - > bi_flags ) ;
unpacked - > bi_mode = le16_to_cpu ( inode . v - > bi_mode ) ;
return bch2_inode_unpack_v2 ( unpacked , inode . v - > fields ,
bkey_val_end ( inode ) ,
INODEv2_NR_FIELDS ( inode . v ) ) ;
}
default :
BUG ( ) ;
2020-11-06 07:39:33 +03:00
}
2017-03-17 09:18:50 +03:00
}
2022-10-21 20:21:03 +03:00
int bch2_inode_unpack ( struct bkey_s_c k ,
struct bch_inode_unpacked * unpacked )
{
if ( likely ( k . k - > type = = KEY_TYPE_inode_v3 ) )
return bch2_inode_unpack_v3 ( k , unpacked ) ;
return bch2_inode_unpack_slowpath ( k , unpacked ) ;
}
2021-08-30 22:18:31 +03:00
int bch2_inode_peek ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bch_inode_unpacked * inode ,
2021-03-16 07:28:17 +03:00
subvol_inum inum , unsigned flags )
2019-10-01 23:51:57 +03:00
{
struct bkey_s_c k ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2019-10-01 23:51:57 +03:00
int ret ;
2021-03-16 07:28:17 +03:00
ret = bch2_subvolume_get_snapshot ( trans , inum . subvol , & snapshot ) ;
if ( ret )
return ret ;
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , iter , BTREE_ID_inodes ,
SPOS ( 0 , inum . inum , snapshot ) ,
flags | BTREE_ITER_CACHED ) ;
2019-10-01 23:51:57 +03:00
ret = bkey_err ( k ) ;
if ( ret )
2023-04-30 02:33:09 +03:00
return ret ;
2019-10-01 23:51:57 +03:00
2023-05-28 02:59:59 +03:00
ret = bkey_is_inode ( k . k ) ? 0 : - BCH_ERR_ENOENT_inode ;
2019-10-01 23:51:57 +03:00
if ( ret )
goto err ;
2021-10-30 04:14:23 +03:00
ret = bch2_inode_unpack ( k , inode ) ;
2019-10-01 23:51:57 +03:00
if ( ret )
goto err ;
2021-08-30 22:18:31 +03:00
return 0 ;
2019-10-01 23:51:57 +03:00
err :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , iter ) ;
return ret ;
2019-10-01 23:51:57 +03:00
}
int bch2_inode_write ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bch_inode_unpacked * inode )
{
struct bkey_inode_buf * inode_p ;
inode_p = bch2_trans_kmalloc ( trans , sizeof ( * inode_p ) ) ;
if ( IS_ERR ( inode_p ) )
return PTR_ERR ( inode_p ) ;
2022-10-21 20:21:03 +03:00
bch2_inode_pack_inlined ( inode_p , inode ) ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-25 01:02:16 +03:00
inode_p - > inode . k . p . snapshot = iter - > snapshot ;
2021-06-02 07:15:07 +03:00
return bch2_trans_update ( trans , iter , & inode_p - > inode . k_i , 0 ) ;
2019-10-01 23:51:57 +03:00
}
2022-10-21 20:21:03 +03:00
struct bkey_i * bch2_inode_to_v3 ( struct btree_trans * trans , struct bkey_i * k )
{
struct bch_inode_unpacked u ;
struct bkey_inode_buf * inode_p ;
int ret ;
if ( ! bkey_is_inode ( & k - > k ) )
return ERR_PTR ( - ENOENT ) ;
inode_p = bch2_trans_kmalloc ( trans , sizeof ( * inode_p ) ) ;
if ( IS_ERR ( inode_p ) )
return ERR_CAST ( inode_p ) ;
ret = bch2_inode_unpack ( bkey_i_to_s_c ( k ) , & u ) ;
if ( ret )
return ERR_PTR ( ret ) ;
bch2_inode_pack ( inode_p , & u ) ;
return & inode_p - > inode . k_i ;
}
2022-04-04 00:50:01 +03:00
static int __bch2_inode_invalid ( struct bkey_s_c k , struct printbuf * err )
2017-03-17 09:18:50 +03:00
{
2021-03-16 07:42:25 +03:00
struct bch_inode_unpacked unpacked ;
2017-03-17 09:18:50 +03:00
2022-04-04 00:50:01 +03:00
if ( k . k - > p . inode ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " nonzero k.p.inode " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2017-03-17 09:18:50 +03:00
2022-04-04 00:50:01 +03:00
if ( k . k - > p . offset < BLOCKDEV_INODE_MAX ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " fs inode in blockdev range " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2017-03-17 09:18:50 +03:00
2022-10-20 01:31:33 +03:00
if ( bch2_inode_unpack ( k , & unpacked ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " invalid variable length fields " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-10-30 04:14:23 +03:00
2022-04-04 00:50:01 +03:00
if ( unpacked . bi_data_checksum > = BCH_CSUM_OPT_NR + 1 ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " invalid data checksum type (%u >= %u " ,
2022-04-04 00:50:01 +03:00
unpacked . bi_data_checksum , BCH_CSUM_OPT_NR + 1 ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-10-30 04:14:23 +03:00
2022-04-04 00:50:01 +03:00
if ( unpacked . bi_compression > = BCH_COMPRESSION_OPT_NR + 1 ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " invalid data checksum type (%u >= %u) " ,
2022-04-04 00:50:01 +03:00
unpacked . bi_compression , BCH_COMPRESSION_OPT_NR + 1 ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-10-30 04:14:23 +03:00
if ( ( unpacked . bi_flags & BCH_INODE_UNLINKED ) & &
2022-04-04 00:50:01 +03:00
unpacked . bi_nlink ! = 0 ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " flagged as unlinked but bi_nlink != 0 " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-10-30 04:14:23 +03:00
2022-04-04 00:50:01 +03:00
if ( unpacked . bi_subvol & & ! S_ISDIR ( unpacked . bi_mode ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " subvolume root but not a directory " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-10-30 04:14:23 +03:00
2022-04-04 00:50:01 +03:00
return 0 ;
2021-10-30 04:14:23 +03:00
}
2022-04-04 00:50:01 +03:00
int bch2_inode_invalid ( const struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2021-10-30 04:14:23 +03:00
{
2022-04-04 00:50:01 +03:00
struct bkey_s_c_inode inode = bkey_s_c_to_inode ( k ) ;
2021-10-30 04:14:23 +03:00
2022-04-04 00:50:01 +03:00
if ( INODE_STR_HASH ( inode . v ) > = BCH_STR_HASH_NR ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " invalid str hash type (%llu >= %u) " ,
2022-04-04 00:50:01 +03:00
INODE_STR_HASH ( inode . v ) , BCH_STR_HASH_NR ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2017-03-17 09:18:50 +03:00
2022-04-04 00:50:01 +03:00
return __bch2_inode_invalid ( k , err ) ;
}
2017-03-17 09:18:50 +03:00
2022-04-04 00:50:01 +03:00
int bch2_inode_v2_invalid ( const struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2022-04-04 00:50:01 +03:00
{
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2 ( k ) ;
2017-03-17 09:18:50 +03:00
2022-04-04 00:50:01 +03:00
if ( INODEv2_STR_HASH ( inode . v ) > = BCH_STR_HASH_NR ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " invalid str hash type (%llu >= %u) " ,
2022-04-04 00:50:01 +03:00
INODEv2_STR_HASH ( inode . v ) , BCH_STR_HASH_NR ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-03-16 07:42:25 +03:00
2022-04-04 00:50:01 +03:00
return __bch2_inode_invalid ( k , err ) ;
2017-03-17 09:18:50 +03:00
}
2022-10-21 20:21:03 +03:00
int bch2_inode_v3_invalid ( const struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2022-10-21 20:21:03 +03:00
{
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3 ( k ) ;
if ( INODEv3_FIELDS_START ( inode . v ) < INODEv3_FIELDS_START_INITIAL | |
INODEv3_FIELDS_START ( inode . v ) > bkey_val_u64s ( inode . k ) ) {
prt_printf ( err , " invalid fields_start (got %llu, min %u max %zu) " ,
INODEv3_FIELDS_START ( inode . v ) ,
INODEv3_FIELDS_START_INITIAL ,
bkey_val_u64s ( inode . k ) ) ;
return - BCH_ERR_invalid_bkey ;
}
if ( INODEv3_STR_HASH ( inode . v ) > = BCH_STR_HASH_NR ) {
prt_printf ( err , " invalid str hash type (%llu >= %u) " ,
INODEv3_STR_HASH ( inode . v ) , BCH_STR_HASH_NR ) ;
return - BCH_ERR_invalid_bkey ;
}
return __bch2_inode_invalid ( k , err ) ;
}
static void __bch2_inode_unpacked_to_text ( struct printbuf * out ,
struct bch_inode_unpacked * inode )
2021-07-31 00:59:37 +03:00
{
2022-10-21 20:21:03 +03:00
prt_printf ( out , " mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu " ,
2021-10-30 04:14:23 +03:00
inode - > bi_mode , inode - > bi_flags ,
2022-10-21 20:21:03 +03:00
inode - > bi_journal_seq ,
inode - > bi_size ,
inode - > bi_sectors ,
inode - > bi_version ) ;
2021-07-31 00:59:37 +03:00
# define x(_name, _bits) \
2023-02-04 05:01:40 +03:00
prt_printf ( out , " " # _name " %llu " , ( u64 ) inode - > _name ) ;
2022-10-21 20:21:03 +03:00
BCH_INODE_FIELDS_v3 ( )
2021-07-31 00:59:37 +03:00
# undef x
}
void bch2_inode_unpacked_to_text ( struct printbuf * out , struct bch_inode_unpacked * inode )
{
2023-02-04 05:01:40 +03:00
prt_printf ( out , " inum: %llu " , inode - > bi_inum ) ;
2021-07-31 00:59:37 +03:00
__bch2_inode_unpacked_to_text ( out , inode ) ;
}
2022-10-21 20:21:03 +03:00
void bch2_inode_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
2017-03-17 09:18:50 +03:00
{
2021-10-30 04:14:23 +03:00
struct bch_inode_unpacked inode ;
2017-03-17 09:18:50 +03:00
2021-10-30 04:14:23 +03:00
if ( bch2_inode_unpack ( k , & inode ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( out , " (unpack error) " ) ;
2018-11-01 22:10:01 +03:00
return ;
}
2017-03-17 09:18:50 +03:00
2021-10-30 04:14:23 +03:00
__bch2_inode_unpacked_to_text ( out , & inode ) ;
2018-11-01 22:10:01 +03:00
}
2023-07-17 07:41:48 +03:00
int bch2_trans_mark_inode ( struct btree_trans * trans ,
enum btree_id btree_id , unsigned level ,
struct bkey_s_c old ,
struct bkey_i * new ,
unsigned flags )
{
int nr = bkey_is_inode ( & new - > k ) - bkey_is_inode ( old . k ) ;
if ( nr ) {
int ret = bch2_replicas_deltas_realloc ( trans , 0 ) ;
struct replicas_delta_list * d = trans - > fs_usage_deltas ;
if ( ret )
return ret ;
d - > nr_inodes + = nr ;
}
return 0 ;
}
int bch2_mark_inode ( struct btree_trans * trans ,
enum btree_id btree_id , unsigned level ,
struct bkey_s_c old , struct bkey_s_c new ,
unsigned flags )
{
struct bch_fs * c = trans - > c ;
struct bch_fs_usage * fs_usage ;
u64 journal_seq = trans - > journal_res . seq ;
if ( flags & BTREE_TRIGGER_INSERT ) {
struct bch_inode_v3 * v = ( struct bch_inode_v3 * ) new . v ;
BUG_ON ( ! journal_seq ) ;
BUG_ON ( new . k - > type ! = KEY_TYPE_inode_v3 ) ;
v - > bi_journal_seq = cpu_to_le64 ( journal_seq ) ;
}
if ( flags & BTREE_TRIGGER_GC ) {
percpu_down_read ( & c - > mark_lock ) ;
preempt_disable ( ) ;
fs_usage = fs_usage_ptr ( c , journal_seq , flags & BTREE_TRIGGER_GC ) ;
fs_usage - > nr_inodes + = bkey_is_inode ( new . k ) ;
fs_usage - > nr_inodes - = bkey_is_inode ( old . k ) ;
preempt_enable ( ) ;
percpu_up_read ( & c - > mark_lock ) ;
}
return 0 ;
}
2022-04-04 00:50:01 +03:00
int bch2_inode_generation_invalid ( const struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2018-11-01 22:10:01 +03:00
{
2022-04-04 00:50:01 +03:00
if ( k . k - > p . inode ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " nonzero k.p.inode " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2018-11-01 22:10:01 +03:00
2022-04-04 00:50:01 +03:00
return 0 ;
2018-11-01 22:10:01 +03:00
}
void bch2_inode_generation_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
{
2019-06-25 00:55:15 +03:00
struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation ( k ) ;
2023-02-04 05:01:40 +03:00
prt_printf ( out , " generation: %u " , le32_to_cpu ( gen . v - > bi_generation ) ) ;
2017-03-17 09:18:50 +03:00
}
2019-10-03 01:35:36 +03:00
void bch2_inode_init_early ( struct bch_fs * c ,
struct bch_inode_unpacked * inode_u )
2017-03-17 09:18:50 +03:00
{
2019-10-04 22:58:43 +03:00
enum bch_str_hash_type str_hash =
bch2_str_hash_opt_to_type ( c , c - > opts . str_hash ) ;
2017-03-17 09:18:50 +03:00
memset ( inode_u , 0 , sizeof ( * inode_u ) ) ;
/* ick */
2019-10-04 22:58:43 +03:00
inode_u - > bi_flags | = str_hash < < INODE_STR_HASH_OFFSET ;
2018-12-17 14:11:14 +03:00
get_random_bytes ( & inode_u - > bi_hash_seed ,
sizeof ( inode_u - > bi_hash_seed ) ) ;
2019-10-03 01:35:36 +03:00
}
2017-03-17 09:18:50 +03:00
2019-10-03 01:35:36 +03:00
void bch2_inode_init_late ( struct bch_inode_unpacked * inode_u , u64 now ,
uid_t uid , gid_t gid , umode_t mode , dev_t rdev ,
struct bch_inode_unpacked * parent )
{
2017-03-17 09:18:50 +03:00
inode_u - > bi_mode = mode ;
inode_u - > bi_uid = uid ;
inode_u - > bi_gid = gid ;
inode_u - > bi_dev = rdev ;
inode_u - > bi_atime = now ;
inode_u - > bi_mtime = now ;
inode_u - > bi_ctime = now ;
inode_u - > bi_otime = now ;
2019-10-03 01:35:36 +03:00
if ( parent & & parent - > bi_mode & S_ISGID ) {
inode_u - > bi_gid = parent - > bi_gid ;
if ( S_ISDIR ( mode ) )
inode_u - > bi_mode | = S_ISGID ;
}
2017-03-17 09:18:50 +03:00
if ( parent ) {
2018-12-13 16:24:21 +03:00
# define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name;
BCH_INODE_OPTS ( )
2018-12-13 14:01:30 +03:00
# undef x
2017-03-17 09:18:50 +03:00
}
}
2019-10-03 01:35:36 +03:00
void bch2_inode_init ( struct bch_fs * c , struct bch_inode_unpacked * inode_u ,
uid_t uid , gid_t gid , umode_t mode , dev_t rdev ,
struct bch_inode_unpacked * parent )
{
bch2_inode_init_early ( c , inode_u ) ;
bch2_inode_init_late ( inode_u , bch2_current_time ( c ) ,
uid , gid , mode , rdev , parent ) ;
}
2017-03-17 09:18:50 +03:00
static inline u32 bkey_generation ( struct bkey_s_c k )
{
switch ( k . k - > type ) {
2018-11-01 22:10:01 +03:00
case KEY_TYPE_inode :
2021-10-30 04:14:23 +03:00
case KEY_TYPE_inode_v2 :
2017-03-17 09:18:50 +03:00
BUG ( ) ;
2018-11-01 22:10:01 +03:00
case KEY_TYPE_inode_generation :
2017-03-17 09:18:50 +03:00
return le32_to_cpu ( bkey_s_c_to_inode_generation ( k ) . v - > bi_generation ) ;
default :
return 0 ;
}
}
2021-03-16 07:28:17 +03:00
/*
* This just finds an empty slot :
*/
2021-08-30 22:18:31 +03:00
int bch2_inode_create ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bch_inode_unpacked * inode_u ,
u32 snapshot , u64 cpu )
2020-10-28 01:56:21 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_s_c k ;
2021-03-16 02:18:30 +03:00
u64 min , max , start , pos , * hint ;
2021-05-15 03:02:44 +03:00
int ret = 0 ;
2021-05-28 03:20:20 +03:00
unsigned bits = ( c - > opts . inodes_32bit ? 31 : 63 ) ;
2020-11-03 07:51:33 +03:00
2021-05-28 03:20:20 +03:00
if ( c - > opts . shard_inode_numbers ) {
bits - = c - > inode_shard_bits ;
2020-11-03 07:51:33 +03:00
2021-05-28 03:20:20 +03:00
min = ( cpu < < bits ) ;
max = ( cpu < < bits ) | ~ ( ULLONG_MAX < < bits ) ;
min = max_t ( u64 , min , BLOCKDEV_INODE_MAX ) ;
hint = c - > unused_inode_hints + cpu ;
} else {
min = BLOCKDEV_INODE_MAX ;
max = ~ ( ULLONG_MAX < < bits ) ;
hint = c - > unused_inode_hints ;
}
2020-11-03 07:51:33 +03:00
start = READ_ONCE ( * hint ) ;
if ( start > = max | | start < min )
start = min ;
2021-03-16 02:18:30 +03:00
pos = start ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( trans , iter , BTREE_ID_inodes , POS ( 0 , pos ) ,
BTREE_ITER_ALL_SNAPSHOTS |
BTREE_ITER_INTENT ) ;
2020-11-03 07:51:33 +03:00
again :
2021-03-16 02:18:30 +03:00
while ( ( k = bch2_btree_iter_peek ( iter ) ) . k & &
! ( ret = bkey_err ( k ) ) & &
2022-11-24 11:12:22 +03:00
bkey_lt ( k . k - > p , POS ( 0 , max ) ) ) {
2023-01-09 10:25:08 +03:00
if ( pos < iter - > pos . offset )
goto found_slot ;
2017-03-17 09:18:50 +03:00
2020-11-03 07:51:33 +03:00
/*
2021-03-16 02:18:30 +03:00
* We don ' t need to iterate over keys in every snapshot once
* we ' ve found just one :
2020-11-03 07:51:33 +03:00
*/
2021-03-16 02:18:30 +03:00
pos = iter - > pos . offset + 1 ;
bch2_btree_iter_set_pos ( iter , POS ( 0 , pos ) ) ;
}
2023-01-09 10:25:08 +03:00
if ( ! ret & & pos < max )
goto found_slot ;
2020-10-28 01:56:21 +03:00
2021-03-16 02:18:30 +03:00
if ( ! ret & & start = = min )
2022-09-19 00:10:33 +03:00
ret = - BCH_ERR_ENOSPC_inode_create ;
2020-10-28 01:56:21 +03:00
2021-03-16 02:18:30 +03:00
if ( ret ) {
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , iter ) ;
return ret ;
2020-11-03 07:51:33 +03:00
}
2017-03-17 09:18:50 +03:00
2021-03-16 02:18:30 +03:00
/* Retry from start */
pos = start = min ;
bch2_btree_iter_set_pos ( iter , POS ( 0 , pos ) ) ;
goto again ;
2020-11-03 07:51:33 +03:00
found_slot :
2021-03-16 02:18:30 +03:00
bch2_btree_iter_set_pos ( iter , SPOS ( 0 , pos , snapshot ) ) ;
k = bch2_btree_iter_peek_slot ( iter ) ;
ret = bkey_err ( k ) ;
if ( ret ) {
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , iter ) ;
return ret ;
2021-03-16 02:18:30 +03:00
}
2020-11-03 07:51:33 +03:00
* hint = k . k - > p . offset ;
inode_u - > bi_inum = k . k - > p . offset ;
inode_u - > bi_generation = bkey_generation ( k ) ;
2021-08-30 22:18:31 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
}
2021-03-16 07:28:17 +03:00
static int bch2_inode_delete_keys ( struct btree_trans * trans ,
subvol_inum inum , enum btree_id id )
{
2021-12-28 02:25:23 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
struct bkey_i delete ;
u32 snapshot ;
2021-03-16 07:28:17 +03:00
int ret = 0 ;
2021-12-28 02:25:23 +03:00
/*
2023-01-06 14:29:04 +03:00
* We ' re never going to be deleting partial extents , no need to use an
* extent iterator :
2021-12-28 02:25:23 +03:00
*/
bch2_trans_iter_init ( trans , & iter , id , POS ( inum . inum , 0 ) ,
2023-01-06 14:29:04 +03:00
BTREE_ITER_INTENT | BTREE_ITER_NOT_EXTENTS ) ;
2021-03-16 07:28:17 +03:00
2021-12-28 02:25:23 +03:00
while ( 1 ) {
2021-03-16 07:28:17 +03:00
bch2_trans_begin ( trans ) ;
ret = bch2_subvolume_get_snapshot ( trans , inum . subvol , & snapshot ) ;
if ( ret )
2021-12-28 02:25:23 +03:00
goto err ;
2021-03-16 07:28:17 +03:00
2021-12-28 02:25:23 +03:00
bch2_btree_iter_set_snapshot ( & iter , snapshot ) ;
2021-03-16 07:28:17 +03:00
2022-03-11 20:31:52 +03:00
k = bch2_btree_iter_peek_upto ( & iter , POS ( inum . inum , U64_MAX ) ) ;
2021-03-16 07:28:17 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2022-03-11 20:31:52 +03:00
if ( ! k . k )
2021-12-28 02:25:23 +03:00
break ;
2021-03-16 07:28:17 +03:00
bkey_init ( & delete . k ) ;
delete . k . p = iter . pos ;
ret = bch2_trans_update ( trans , & iter , & delete , 0 ) ? :
2021-12-28 02:25:23 +03:00
bch2_trans_commit ( trans , NULL , NULL ,
2021-03-16 07:28:17 +03:00
BTREE_INSERT_NOFAIL ) ;
err :
2022-07-18 06:06:38 +03:00
if ( ret & & ! bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-12-28 02:25:23 +03:00
break ;
2021-03-16 07:28:17 +03:00
}
2021-12-28 02:25:23 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-03-16 07:28:17 +03:00
return ret ;
}
2022-01-12 10:13:21 +03:00
int bch2_inode_rm ( struct bch_fs * c , subvol_inum inum )
2017-03-17 09:18:50 +03:00
{
2019-03-14 03:49:16 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter = { NULL } ;
2017-03-17 09:18:50 +03:00
struct bkey_i_inode_generation delete ;
2021-03-22 05:01:12 +03:00
struct bch_inode_unpacked inode_u ;
2019-09-23 02:10:21 +03:00
struct bkey_s_c k ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
int ret ;
2021-06-03 06:31:42 +03:00
bch2_trans_init ( & trans , c , 0 , 1024 ) ;
2020-11-21 05:28:55 +03:00
2017-03-17 09:18:50 +03:00
/*
* If this was a directory , there shouldn ' t be any real dirents left -
* but there could be whiteouts ( from hash collisions ) that we should
* delete :
*
* XXX : the dirent could ideally would delete whiteouts when they ' re no
* longer needed
*/
2021-03-16 07:28:17 +03:00
ret = bch2_inode_delete_keys ( & trans , inum , BTREE_ID_extents ) ? :
bch2_inode_delete_keys ( & trans , inum , BTREE_ID_xattrs ) ? :
bch2_inode_delete_keys ( & trans , inum , BTREE_ID_dirents ) ;
2018-08-09 02:53:30 +03:00
if ( ret )
2020-11-21 05:28:55 +03:00
goto err ;
2019-09-23 02:10:21 +03:00
retry :
bch2_trans_begin ( & trans ) ;
2021-03-16 07:28:17 +03:00
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( & trans , & iter , BTREE_ID_inodes ,
SPOS ( 0 , inum . inum , snapshot ) ,
BTREE_ITER_INTENT | BTREE_ITER_CACHED ) ;
2019-09-23 02:10:21 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2021-10-30 04:14:23 +03:00
if ( ! bkey_is_inode ( k . k ) ) {
2021-03-22 05:01:12 +03:00
bch2_fs_inconsistent ( trans . c ,
2022-11-23 04:15:33 +03:00
" inode %llu:%u not found when deleting " ,
inum . inum , snapshot ) ;
2021-03-22 05:01:12 +03:00
ret = - EIO ;
goto err ;
2019-09-23 02:10:21 +03:00
}
2017-03-17 09:18:50 +03:00
2021-10-30 04:14:23 +03:00
bch2_inode_unpack ( k , & inode_u ) ;
2021-03-22 05:01:12 +03:00
bkey_inode_generation_init ( & delete . k_i ) ;
2021-08-30 22:18:31 +03:00
delete . k . p = iter . pos ;
2021-03-22 05:01:12 +03:00
delete . v . bi_generation = cpu_to_le32 ( inode_u . bi_generation + 1 ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
ret = bch2_trans_update ( & trans , & iter , & delete . k_i , 0 ) ? :
2021-06-02 07:15:07 +03:00
bch2_trans_commit ( & trans , NULL , NULL ,
2019-09-23 02:10:21 +03:00
BTREE_INSERT_NOFAIL ) ;
err :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2019-09-23 02:10:21 +03:00
goto retry ;
2017-03-17 09:18:50 +03:00
2019-03-14 03:49:16 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2021-11-06 07:03:40 +03:00
int bch2_inode_find_by_inum_trans ( struct btree_trans * trans ,
subvol_inum inum ,
struct bch_inode_unpacked * inode )
2017-03-17 09:18:50 +03:00
{
2021-03-16 07:28:17 +03:00
struct btree_iter iter ;
2019-10-26 02:06:26 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2021-03-16 07:28:17 +03:00
ret = bch2_inode_peek ( trans , & iter , inode , inum , 0 ) ;
if ( ! ret )
bch2_trans_iter_exit ( trans , & iter ) ;
2019-04-01 00:37:30 +03:00
return ret ;
}
2021-03-16 07:28:17 +03:00
int bch2_inode_find_by_inum ( struct bch_fs * c , subvol_inum inum ,
2019-04-01 00:37:30 +03:00
struct bch_inode_unpacked * inode )
{
2019-12-23 07:04:30 +03:00
return bch2_trans_do ( c , NULL , NULL , 0 ,
2021-03-16 07:28:17 +03:00
bch2_inode_find_by_inum_trans ( & trans , inum , inode ) ) ;
2017-03-17 09:18:50 +03:00
}
2022-06-24 01:26:01 +03:00
int bch2_inode_nlink_inc ( struct bch_inode_unpacked * bi )
{
if ( bi - > bi_flags & BCH_INODE_UNLINKED )
bi - > bi_flags & = ~ BCH_INODE_UNLINKED ;
else {
if ( bi - > bi_nlink = = U32_MAX )
return - EINVAL ;
bi - > bi_nlink + + ;
}
return 0 ;
}
void bch2_inode_nlink_dec ( struct btree_trans * trans , struct bch_inode_unpacked * bi )
{
if ( bi - > bi_nlink & & ( bi - > bi_flags & BCH_INODE_UNLINKED ) ) {
bch2_trans_inconsistent ( trans , " inode %llu unlinked but link count nonzero " ,
bi - > bi_inum ) ;
return ;
}
if ( bi - > bi_flags & BCH_INODE_UNLINKED ) {
bch2_trans_inconsistent ( trans , " inode %llu link count underflow " , bi - > bi_inum ) ;
return ;
}
if ( bi - > bi_nlink )
bi - > bi_nlink - - ;
else
bi - > bi_flags | = BCH_INODE_UNLINKED ;
}
2022-11-24 04:28:15 +03:00
struct bch_opts bch2_inode_opts_to_opts ( struct bch_inode_unpacked * inode )
{
struct bch_opts ret = { 0 } ;
# define x(_name, _bits) \
if ( inode - > bi_ # # _name ) \
opt_set ( ret , _name , inode - > bi_ # # _name - 1 ) ;
BCH_INODE_OPTS ( )
# undef x
return ret ;
}
2022-11-24 04:14:55 +03:00
void bch2_inode_opts_get ( struct bch_io_opts * opts , struct bch_fs * c ,
struct bch_inode_unpacked * inode )
{
# define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name);
BCH_INODE_OPTS ( )
# undef x
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-03 00:12:00 +03:00
if ( opts - > nocow )
opts - > compression = opts - > background_compression = opts - > data_checksum = opts - > erasure_code = 0 ;
2022-11-24 04:14:55 +03:00
}