2017-03-16 22:18:50 -08:00
/* SPDX-License-Identifier: GPL-2.0 */
# ifndef _BCACHEFS_BTREE_IO_H
# define _BCACHEFS_BTREE_IO_H
2020-01-07 13:29:32 -05:00
# include "bkey_methods.h"
2017-03-16 22:18:50 -08:00
# include "bset.h"
2019-01-13 16:02:22 -05:00
# include "btree_locking.h"
2020-08-04 23:10:08 -04:00
# include "checksum.h"
2017-03-16 22:18:50 -08:00
# include "extents.h"
# include "io_types.h"
struct bch_fs ;
struct btree_write ;
struct btree ;
struct btree_iter ;
2021-05-21 23:57:37 -04:00
struct btree_node_read_all ;
2017-03-16 22:18:50 -08:00
2020-11-09 13:01:52 -05:00
static inline bool btree_node_dirty ( struct btree * b )
{
return test_bit ( BTREE_NODE_dirty , & b - > flags ) ;
}
static inline void set_btree_node_dirty ( struct bch_fs * c , struct btree * b )
{
if ( ! test_and_set_bit ( BTREE_NODE_dirty , & b - > flags ) )
atomic_inc ( & c - > btree_cache . dirty ) ;
}
static inline void clear_btree_node_dirty ( struct bch_fs * c , struct btree * b )
{
if ( test_and_clear_bit ( BTREE_NODE_dirty , & b - > flags ) )
atomic_dec ( & c - > btree_cache . dirty ) ;
}
2021-07-10 13:44:42 -04:00
static inline unsigned btree_ptr_sectors_written ( struct bkey_i * k )
{
return k - > k . type = = KEY_TYPE_btree_ptr_v2
? le16_to_cpu ( bkey_i_to_btree_ptr_v2 ( k ) - > v . sectors_written )
: 0 ;
}
2017-03-16 22:18:50 -08:00
struct btree_read_bio {
struct bch_fs * c ;
2021-05-21 23:57:37 -04:00
struct btree * b ;
struct btree_node_read_all * ra ;
2017-03-16 22:18:50 -08:00
u64 start_time ;
unsigned have_ioref : 1 ;
2021-05-21 23:57:37 -04:00
unsigned idx : 7 ;
2018-10-02 11:03:39 -04:00
struct extent_ptr_decoded pick ;
2017-03-16 22:18:50 -08:00
struct work_struct work ;
struct bio bio ;
} ;
struct btree_write_bio {
struct work_struct work ;
2021-04-06 15:28:34 -04:00
__BKEY_PADDED ( key , BKEY_BTREE_PTR_VAL_U64s_MAX ) ;
2020-07-25 15:07:37 -04:00
void * data ;
2021-07-10 13:44:42 -04:00
unsigned data_bytes ;
unsigned sector_offset ;
2017-03-16 22:18:50 -08:00
struct bch_write_bio wbio ;
} ;
2021-07-10 23:03:15 -04:00
void bch2_btree_node_io_unlock ( struct btree * ) ;
void bch2_btree_node_io_lock ( struct btree * ) ;
void __bch2_btree_node_wait_on_read ( struct btree * ) ;
void __bch2_btree_node_wait_on_write ( struct btree * ) ;
void bch2_btree_node_wait_on_read ( struct btree * ) ;
void bch2_btree_node_wait_on_write ( struct btree * ) ;
2017-03-16 22:18:50 -08:00
static inline bool btree_node_may_write ( struct btree * b )
{
return list_empty_careful ( & b - > write_blocked ) & &
2019-01-13 16:02:22 -05:00
( ! b - > written | | ! b - > will_make_reachable ) ;
2017-03-16 22:18:50 -08:00
}
enum compact_mode {
COMPACT_LAZY ,
2019-12-13 13:08:37 -05:00
COMPACT_ALL ,
2017-03-16 22:18:50 -08:00
} ;
2019-12-13 13:08:37 -05:00
bool bch2_compact_whiteouts ( struct bch_fs * , struct btree * ,
enum compact_mode ) ;
2017-03-16 22:18:50 -08:00
2019-12-13 13:08:37 -05:00
static inline bool should_compact_bset_lazy ( struct btree * b ,
struct bset_tree * t )
2017-03-16 22:18:50 -08:00
{
2019-10-19 19:03:23 -04:00
unsigned total_u64s = bset_u64s ( t ) ;
2019-12-13 13:08:37 -05:00
unsigned dead_u64s = bset_dead_u64s ( b , t ) ;
2017-03-16 22:18:50 -08:00
2019-10-19 19:03:23 -04:00
return dead_u64s > 64 & & dead_u64s * 3 > total_u64s ;
2017-03-16 22:18:50 -08:00
}
static inline bool bch2_maybe_compact_whiteouts ( struct bch_fs * c , struct btree * b )
{
struct bset_tree * t ;
for_each_bset ( b , t )
if ( should_compact_bset_lazy ( b , t ) )
2019-12-13 13:08:37 -05:00
return bch2_compact_whiteouts ( c , b , COMPACT_LAZY ) ;
2017-03-16 22:18:50 -08:00
return false ;
}
2020-08-04 23:10:08 -04:00
static inline struct nonce btree_nonce ( struct bset * i , unsigned offset )
{
return ( struct nonce ) { {
[ 0 ] = cpu_to_le32 ( offset ) ,
[ 1 ] = ( ( __le32 * ) & i - > seq ) [ 0 ] ,
[ 2 ] = ( ( __le32 * ) & i - > seq ) [ 1 ] ,
[ 3 ] = ( ( __le32 * ) & i - > journal_seq ) [ 0 ] ^ BCH_NONCE_BTREE ,
} } ;
}
static inline void bset_encrypt ( struct bch_fs * c , struct bset * i , unsigned offset )
{
struct nonce nonce = btree_nonce ( i , offset ) ;
if ( ! offset ) {
struct btree_node * bn = container_of ( i , struct btree_node , keys ) ;
unsigned bytes = ( void * ) & bn - > keys - ( void * ) & bn - > flags ;
bch2_encrypt ( c , BSET_CSUM_TYPE ( i ) , nonce , & bn - > flags ,
bytes ) ;
nonce = nonce_add ( nonce , round_up ( bytes , CHACHA_BLOCK_SIZE ) ) ;
}
bch2_encrypt ( c , BSET_CSUM_TYPE ( i ) , nonce , i - > _data ,
vstruct_end ( i ) - ( void * ) i - > _data ) ;
}
2017-03-16 22:18:50 -08:00
void bch2_btree_sort_into ( struct bch_fs * , struct btree * , struct btree * ) ;
2021-04-24 16:32:35 -04:00
void bch2_btree_node_drop_keys_outside_node ( struct btree * ) ;
2017-03-16 22:18:50 -08:00
void bch2_btree_build_aux_trees ( struct btree * ) ;
2021-08-30 14:36:03 -04:00
void bch2_btree_init_next ( struct btree_trans * , struct btree * ) ;
2017-03-16 22:18:50 -08:00
2021-02-02 17:08:54 -05:00
int bch2_btree_node_read_done ( struct bch_fs * , struct bch_dev * ,
struct btree * , bool ) ;
2017-03-16 22:18:50 -08:00
void bch2_btree_node_read ( struct bch_fs * , struct btree * , bool ) ;
int bch2_btree_root_read ( struct bch_fs * , enum btree_id ,
const struct bkey_i * , unsigned ) ;
void bch2_btree_complete_write ( struct bch_fs * , struct btree * ,
struct btree_write * ) ;
2021-07-11 16:41:14 -04:00
void __bch2_btree_node_write ( struct bch_fs * , struct btree * , bool ) ;
2017-03-16 22:18:50 -08:00
bool bch2_btree_post_write_cleanup ( struct bch_fs * , struct btree * ) ;
void bch2_btree_node_write ( struct bch_fs * , struct btree * ,
enum six_lock_type ) ;
2020-02-08 19:06:31 -05:00
static inline void btree_node_write_if_need ( struct bch_fs * c , struct btree * b ,
enum six_lock_type lock_held )
2017-03-16 22:18:50 -08:00
{
2021-07-11 16:41:14 -04:00
if ( b - > written & &
btree_node_need_write ( b ) & &
btree_node_may_write ( b ) & &
! btree_node_write_in_flight ( b ) )
bch2_btree_node_write ( c , b , lock_held ) ;
2017-03-16 22:18:50 -08:00
}
# define bch2_btree_node_write_cond(_c, _b, cond) \
do { \
2019-01-13 16:02:22 -05:00
unsigned long old , new , v = READ_ONCE ( ( _b ) - > flags ) ; \
\
do { \
old = new = v ; \
2017-03-16 22:18:50 -08:00
\
2019-01-13 16:02:22 -05:00
if ( ! ( old & ( 1 < < BTREE_NODE_dirty ) ) | | ! ( cond ) ) \
2017-03-16 22:18:50 -08:00
break ; \
\
2019-01-13 16:02:22 -05:00
new | = ( 1 < < BTREE_NODE_need_write ) ; \
} while ( ( v = cmpxchg ( & ( _b ) - > flags , old , new ) ) ! = old ) ; \
\
2020-02-08 19:06:31 -05:00
btree_node_write_if_need ( _c , _b , SIX_LOCK_read ) ; \
2017-03-16 22:18:50 -08:00
} while ( 0 )
void bch2_btree_flush_all_reads ( struct bch_fs * ) ;
void bch2_btree_flush_all_writes ( struct bch_fs * ) ;
2020-07-25 17:06:11 -04:00
void bch2_dirty_btree_nodes_to_text ( struct printbuf * , struct bch_fs * ) ;
2017-03-16 22:18:50 -08:00
2020-01-07 13:29:32 -05:00
static inline void compat_bformat ( unsigned level , enum btree_id btree_id ,
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
unsigned version , unsigned big_endian ,
int write , struct bkey_format * f )
2020-01-07 13:29:32 -05:00
{
if ( version < bcachefs_metadata_version_inode_btree_change & &
2021-02-20 19:27:37 -05:00
btree_id = = BTREE_ID_inodes ) {
2020-01-07 13:29:32 -05:00
swap ( f - > bits_per_field [ BKEY_FIELD_INODE ] ,
f - > bits_per_field [ BKEY_FIELD_OFFSET ] ) ;
swap ( f - > field_offset [ BKEY_FIELD_INODE ] ,
f - > field_offset [ BKEY_FIELD_OFFSET ] ) ;
}
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
if ( version < bcachefs_metadata_version_snapshot & &
( level | | btree_type_has_snapshots ( btree_id ) ) ) {
u64 max_packed =
~ ( ~ 0ULL < < f - > bits_per_field [ BKEY_FIELD_SNAPSHOT ] ) ;
f - > field_offset [ BKEY_FIELD_SNAPSHOT ] = write
? 0
: U32_MAX - max_packed ;
}
2020-01-07 13:29:32 -05:00
}
static inline void compat_bpos ( unsigned level , enum btree_id btree_id ,
unsigned version , unsigned big_endian ,
int write , struct bpos * p )
{
if ( big_endian ! = CPU_BIG_ENDIAN )
bch2_bpos_swab ( p ) ;
if ( version < bcachefs_metadata_version_inode_btree_change & &
2021-02-20 19:27:37 -05:00
btree_id = = BTREE_ID_inodes )
2020-01-07 13:29:32 -05:00
swap ( p - > inode , p - > offset ) ;
}
static inline void compat_btree_node ( unsigned level , enum btree_id btree_id ,
unsigned version , unsigned big_endian ,
int write ,
struct btree_node * bn )
{
if ( version < bcachefs_metadata_version_inode_btree_change & &
btree_node_type_is_extents ( btree_id ) & &
2021-03-04 16:20:16 -05:00
bpos_cmp ( bn - > min_key , POS_MIN ) & &
2020-01-07 13:29:32 -05:00
write )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
bn - > min_key = bpos_nosnap_predecessor ( bn - > min_key ) ;
if ( version < bcachefs_metadata_version_snapshot & &
write )
bn - > max_key . snapshot = 0 ;
2020-01-07 13:29:32 -05:00
compat_bpos ( level , btree_id , version , big_endian , write , & bn - > min_key ) ;
compat_bpos ( level , btree_id , version , big_endian , write , & bn - > max_key ) ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
if ( version < bcachefs_metadata_version_snapshot & &
! write )
bn - > max_key . snapshot = U32_MAX ;
2020-01-07 13:29:32 -05:00
if ( version < bcachefs_metadata_version_inode_btree_change & &
btree_node_type_is_extents ( btree_id ) & &
2021-03-04 16:20:16 -05:00
bpos_cmp ( bn - > min_key , POS_MIN ) & &
2020-01-07 13:29:32 -05:00
! write )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
bn - > min_key = bpos_nosnap_successor ( bn - > min_key ) ;
2020-01-07 13:29:32 -05:00
}
2017-03-16 22:18:50 -08:00
# endif /* _BCACHEFS_BTREE_IO_H */