2017-03-16 22:18:50 -08:00
/* SPDX-License-Identifier: GPL-2.0 */
# ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
# define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
# include "btree_cache.h"
# include "btree_locking.h"
# include "btree_update.h"
void __bch2_btree_calc_format ( struct bkey_format_state * , struct btree * ) ;
bool bch2_btree_node_format_fits ( struct bch_fs * c , struct btree * ,
struct bkey_format * ) ;
2020-05-25 14:57:06 -04:00
# define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
2017-03-16 22:18:50 -08:00
2020-05-25 14:57:06 -04:00
# define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
2020-04-05 21:49:17 -04:00
2017-03-16 22:18:50 -08:00
/*
* Tracks an in progress split / rewrite of a btree node and the update to the
* parent node :
*
* When we split / rewrite a node , we do all the updates in memory without
* waiting for any writes to complete - we allocate the new node ( s ) and update
* the parent node , possibly recursively up to the root .
*
* The end result is that we have one or more new nodes being written -
* possibly several , if there were multiple splits - and then a write ( updating
* an interior node ) which will make all these new nodes visible .
*
* Additionally , as we split / rewrite nodes we free the old nodes - but the old
* nodes can ' t be freed ( their space on disk can ' t be reclaimed ) until the
* update to the interior node that makes the new node visible completes -
* until then , the old nodes are still reachable on disk .
*
*/
struct btree_update {
struct closure cl ;
struct bch_fs * c ;
struct list_head list ;
2020-02-08 16:39:37 -05:00
struct list_head unwritten_list ;
2017-03-16 22:18:50 -08:00
/* What kind of update are we doing? */
enum {
BTREE_INTERIOR_NO_UPDATE ,
BTREE_INTERIOR_UPDATING_NODE ,
BTREE_INTERIOR_UPDATING_ROOT ,
BTREE_INTERIOR_UPDATING_AS ,
} mode ;
unsigned nodes_written : 1 ;
2021-03-31 15:21:37 -04:00
unsigned took_gc_lock : 1 ;
2017-03-16 22:18:50 -08:00
enum btree_id btree_id ;
2020-05-25 14:57:06 -04:00
struct disk_reservation disk_res ;
2020-02-08 19:06:31 -05:00
struct journal_preres journal_preres ;
2017-03-16 22:18:50 -08:00
/*
* BTREE_INTERIOR_UPDATING_NODE :
* The update that made the new nodes visible was a regular update to an
* existing interior node - @ b . We can ' t write out the update to @ b
* until the new nodes we created are finished writing , so we block @ b
* from writing by putting this btree_interior update on the
* @ b - > write_blocked list with @ write_blocked_list :
*/
struct btree * b ;
struct list_head write_blocked_list ;
/*
* We may be freeing nodes that were dirty , and thus had journal entries
* pinned : we need to transfer the oldest of those pins to the
* btree_update operation , and release it when the new node ( s )
* are all persistent and reachable :
*/
struct journal_entry_pin journal ;
2020-05-25 14:57:06 -04:00
/* Preallocated nodes we reserve when we start the update: */
struct btree * prealloc_nodes [ BTREE_UPDATE_NODES_MAX ] ;
unsigned nr_prealloc_nodes ;
/* Nodes being freed: */
struct keylist old_keys ;
u64 _old_keys [ BTREE_UPDATE_NODES_MAX *
BKEY_BTREE_PTR_VAL_U64s_MAX ] ;
/* Nodes being added: */
struct keylist new_keys ;
u64 _new_keys [ BTREE_UPDATE_NODES_MAX *
BKEY_BTREE_PTR_VAL_U64s_MAX ] ;
2017-03-16 22:18:50 -08:00
/* New nodes, that will be made reachable by this update: */
2020-05-25 14:57:06 -04:00
struct btree * new_nodes [ BTREE_UPDATE_NODES_MAX ] ;
2017-03-16 22:18:50 -08:00
unsigned nr_new_nodes ;
2023-09-10 23:33:08 -04:00
struct btree * old_nodes [ BTREE_UPDATE_NODES_MAX ] ;
__le64 old_nodes_seq [ BTREE_UPDATE_NODES_MAX ] ;
unsigned nr_old_nodes ;
2020-06-09 15:44:03 -04:00
open_bucket_idx_t open_buckets [ BTREE_UPDATE_NODES_MAX *
2020-05-25 14:57:06 -04:00
BCH_REPLICAS_MAX ] ;
2020-06-09 15:44:03 -04:00
open_bucket_idx_t nr_open_buckets ;
2020-05-25 14:57:06 -04:00
2020-03-31 16:23:43 -04:00
unsigned journal_u64s ;
2020-04-05 21:49:17 -04:00
u64 journal_entries [ BTREE_UPDATE_JOURNAL_RES ] ;
2020-03-31 16:23:43 -04:00
2017-03-16 22:18:50 -08:00
/* Only here to reduce stack usage on recursive splits: */
struct keylist parent_keys ;
/*
* Enough room for btree_split ' s keys without realloc - btree node
* pointers never have crc / compression info , so we only need to acount
* for the pointers for three keys
*/
u64 inline_keys [ BKEY_BTREE_PTR_U64s_MAX * 3 ] ;
} ;
void bch2_btree_node_free_inmem ( struct bch_fs * , struct btree * ,
struct btree_iter * ) ;
void bch2_btree_node_free_never_inserted ( struct bch_fs * , struct btree * ) ;
2020-05-25 14:57:06 -04:00
void bch2_btree_update_get_open_buckets ( struct btree_update * , struct btree * ) ;
2017-03-16 22:18:50 -08:00
struct btree * __bch2_btree_node_alloc_replacement ( struct btree_update * ,
struct btree * ,
struct bkey_format ) ;
void bch2_btree_update_done ( struct btree_update * ) ;
struct btree_update *
2021-03-31 15:21:37 -04:00
bch2_btree_update_start ( struct btree_iter * , unsigned , unsigned , unsigned ) ;
2017-03-16 22:18:50 -08:00
void bch2_btree_interior_update_will_free_node ( struct btree_update * ,
struct btree * ) ;
2020-05-25 14:57:06 -04:00
void bch2_btree_update_add_new_node ( struct btree_update * , struct btree * ) ;
2017-03-16 22:18:50 -08:00
void bch2_btree_insert_node ( struct btree_update * , struct btree * ,
struct btree_iter * , struct keylist * ,
unsigned ) ;
int bch2_btree_split_leaf ( struct bch_fs * , struct btree_iter * , unsigned ) ;
2021-03-31 16:16:39 -04:00
int __bch2_foreground_maybe_merge ( struct bch_fs * , struct btree_iter * ,
unsigned , unsigned , enum btree_node_sibling ) ;
2017-03-16 22:18:50 -08:00
2021-03-31 16:16:39 -04:00
static inline int bch2_foreground_maybe_merge_sibling ( struct bch_fs * c ,
2017-03-16 22:18:50 -08:00
struct btree_iter * iter ,
unsigned level , unsigned flags ,
enum btree_node_sibling sib )
{
struct btree * b ;
if ( iter - > uptodate > = BTREE_ITER_NEED_TRAVERSE )
2021-03-31 16:16:39 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
if ( ! bch2_btree_node_relock ( iter , level ) )
2021-03-31 16:16:39 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
b = iter - > l [ level ] . b ;
if ( b - > sib_u64s [ sib ] > c - > btree_foreground_merge_threshold )
2021-03-31 16:16:39 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
2021-03-31 16:16:39 -04:00
return __bch2_foreground_maybe_merge ( c , iter , level , flags , sib ) ;
2017-03-16 22:18:50 -08:00
}
2021-03-31 16:16:39 -04:00
static inline int bch2_foreground_maybe_merge ( struct bch_fs * c ,
2017-03-16 22:18:50 -08:00
struct btree_iter * iter ,
unsigned level ,
unsigned flags )
{
2021-03-31 16:16:39 -04:00
return bch2_foreground_maybe_merge_sibling ( c , iter , level , flags ,
btree_prev_sib ) ? :
bch2_foreground_maybe_merge_sibling ( c , iter , level , flags ,
btree_next_sib ) ;
2017-03-16 22:18:50 -08:00
}
void bch2_btree_set_root_for_read ( struct bch_fs * , struct btree * ) ;
void bch2_btree_root_alloc ( struct bch_fs * , enum btree_id ) ;
static inline unsigned btree_update_reserve_required ( struct bch_fs * c ,
struct btree * b )
{
2020-06-06 12:28:01 -04:00
unsigned depth = btree_node_root ( c , b ) - > c . level + 1 ;
2017-03-16 22:18:50 -08:00
/*
* Number of nodes we might have to allocate in a worst case btree
* split operation - we split all the way up to the root , then allocate
* a new root , unless we ' re already at max depth :
*/
if ( depth < BTREE_MAX_DEPTH )
2020-06-06 12:28:01 -04:00
return ( depth - b - > c . level ) * 2 + 1 ;
2017-03-16 22:18:50 -08:00
else
2020-06-06 12:28:01 -04:00
return ( depth - b - > c . level ) * 2 - 1 ;
2017-03-16 22:18:50 -08:00
}
static inline void btree_node_reset_sib_u64s ( struct btree * b )
{
b - > sib_u64s [ 0 ] = b - > nr . live_u64s ;
b - > sib_u64s [ 1 ] = b - > nr . live_u64s ;
}
static inline void * btree_data_end ( struct bch_fs * c , struct btree * b )
{
return ( void * ) b - > data + btree_bytes ( c ) ;
}
static inline struct bkey_packed * unwritten_whiteouts_start ( struct bch_fs * c ,
struct btree * b )
{
return ( void * ) ( ( u64 * ) btree_data_end ( c , b ) - b - > whiteout_u64s ) ;
}
static inline struct bkey_packed * unwritten_whiteouts_end ( struct bch_fs * c ,
struct btree * b )
{
return btree_data_end ( c , b ) ;
}
static inline void * write_block ( struct btree * b )
{
return ( void * ) b - > data + ( b - > written < < 9 ) ;
}
2018-08-05 22:23:44 -04:00
static inline bool __btree_addr_written ( struct btree * b , void * p )
{
return p < write_block ( b ) ;
}
2017-03-16 22:18:50 -08:00
static inline bool bset_written ( struct btree * b , struct bset * i )
{
2018-08-05 22:23:44 -04:00
return __btree_addr_written ( b , i ) ;
2017-03-16 22:18:50 -08:00
}
2018-08-05 22:23:44 -04:00
static inline bool bkey_written ( struct btree * b , struct bkey_packed * k )
2017-03-16 22:18:50 -08:00
{
2018-08-05 22:23:44 -04:00
return __btree_addr_written ( b , k ) ;
2017-03-16 22:18:50 -08:00
}
static inline ssize_t __bch_btree_u64s_remaining ( struct bch_fs * c ,
struct btree * b ,
void * end )
{
ssize_t used = bset_byte_offset ( b , end ) / sizeof ( u64 ) +
2019-11-29 14:08:51 -05:00
b - > whiteout_u64s ;
2017-03-16 22:18:50 -08:00
ssize_t total = c - > opts . btree_node_size < < 6 ;
2020-11-11 12:42:54 -05:00
/* Always leave one extra u64 for bch2_varint_decode: */
used + + ;
2017-03-16 22:18:50 -08:00
return total - used ;
}
static inline size_t bch_btree_keys_u64s_remaining ( struct bch_fs * c ,
struct btree * b )
{
ssize_t remaining = __bch_btree_u64s_remaining ( c , b ,
btree_bkey_last ( b , bset_tree_last ( b ) ) ) ;
BUG_ON ( remaining < 0 ) ;
if ( bset_written ( b , btree_bset_last ( b ) ) )
return 0 ;
return remaining ;
}
2021-04-06 15:33:19 -04:00
# define BTREE_WRITE_SET_U64s_BITS 9
2017-03-16 22:18:50 -08:00
static inline unsigned btree_write_set_buffer ( struct btree * b )
{
/*
* Could buffer up larger amounts of keys for btrees with larger keys ,
* pending benchmarking :
*/
2021-04-06 15:33:19 -04:00
return 8 < < BTREE_WRITE_SET_U64s_BITS ;
2017-03-16 22:18:50 -08:00
}
static inline struct btree_node_entry * want_new_bset ( struct bch_fs * c ,
struct btree * b )
{
2019-10-28 19:35:13 -04:00
struct bset_tree * t = bset_tree_last ( b ) ;
2017-03-16 22:18:50 -08:00
struct btree_node_entry * bne = max ( write_block ( b ) ,
( void * ) btree_bkey_last ( b , bset_tree_last ( b ) ) ) ;
ssize_t remaining_space =
__bch_btree_u64s_remaining ( c , b , & bne - > keys . start [ 0 ] ) ;
2019-10-28 19:35:13 -04:00
if ( unlikely ( bset_written ( b , bset ( b , t ) ) ) ) {
2017-03-16 22:18:50 -08:00
if ( remaining_space > ( ssize_t ) ( block_bytes ( c ) > > 3 ) )
return bne ;
} else {
2019-10-28 19:35:13 -04:00
if ( unlikely ( bset_u64s ( t ) * sizeof ( u64 ) > btree_write_set_buffer ( b ) ) & &
2017-03-16 22:18:50 -08:00
remaining_space > ( ssize_t ) ( btree_write_set_buffer ( b ) > > 3 ) )
return bne ;
}
return NULL ;
}
2019-11-29 14:08:51 -05:00
static inline void push_whiteout ( struct bch_fs * c , struct btree * b ,
2019-12-30 14:37:25 -05:00
struct bpos pos )
2017-03-16 22:18:50 -08:00
{
2019-12-30 14:37:25 -05:00
struct bkey_packed k ;
2017-03-16 22:18:50 -08:00
2019-12-30 14:37:25 -05:00
BUG_ON ( bch_btree_keys_u64s_remaining ( c , b ) < BKEY_U64s ) ;
2019-11-29 14:08:51 -05:00
2019-12-30 14:37:25 -05:00
if ( ! bkey_pack_pos ( & k , pos , b ) ) {
struct bkey * u = ( void * ) & k ;
bkey_init ( u ) ;
u - > p = pos ;
}
k . needs_whiteout = true ;
b - > whiteout_u64s + = k . u64s ;
bkey_copy ( unwritten_whiteouts_start ( c , b ) , & k ) ;
2017-03-16 22:18:50 -08:00
}
/*
* write lock must be held on @ b ( else the dirty bset that we were going to
* insert into could be written out from under us )
*/
static inline bool bch2_btree_node_insert_fits ( struct bch_fs * c ,
2018-08-05 15:21:52 -04:00
struct btree * b , unsigned u64s )
2017-03-16 22:18:50 -08:00
{
2020-07-25 14:19:37 -04:00
if ( unlikely ( btree_node_need_rewrite ( b ) ) )
2017-03-16 22:18:50 -08:00
return false ;
return u64s < = bch_btree_keys_u64s_remaining ( c , b ) ;
}
2020-07-25 17:06:11 -04:00
void bch2_btree_updates_to_text ( struct printbuf * , struct bch_fs * ) ;
2017-03-16 22:18:50 -08:00
size_t bch2_btree_interior_updates_nr_pending ( struct bch_fs * ) ;
2020-05-25 14:57:06 -04:00
void bch2_journal_entries_to_btree_roots ( struct bch_fs * , struct jset * ) ;
struct jset_entry * bch2_btree_roots_to_journal_entries ( struct bch_fs * ,
struct jset_entry * , struct jset_entry * ) ;
2020-05-25 20:35:53 -04:00
void bch2_fs_btree_interior_update_exit ( struct bch_fs * ) ;
int bch2_fs_btree_interior_update_init ( struct bch_fs * ) ;
2017-03-16 22:18:50 -08:00
# endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */