2017-03-16 22:18:50 -08:00
/* SPDX-License-Identifier: GPL-2.0 */
# ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
# define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
# include "btree_cache.h"
# include "btree_locking.h"
# include "btree_update.h"
2020-05-25 14:57:06 -04:00
# define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
2017-03-16 22:18:50 -08:00
2020-05-25 14:57:06 -04:00
# define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
2020-04-05 21:49:17 -04:00
2024-03-23 19:29:19 -04:00
int bch2_btree_node_check_topology ( struct btree_trans * , struct btree * ) ;
2024-04-02 16:42:27 -04:00
# define BTREE_UPDATE_MODES() \
x ( none ) \
x ( node ) \
x ( root ) \
x ( update )
enum btree_update_mode {
# define x(n) BTREE_UPDATE_##n,
BTREE_UPDATE_MODES ( )
# undef x
} ;
2017-03-16 22:18:50 -08:00
/*
* Tracks an in progress split / rewrite of a btree node and the update to the
* parent node :
*
* When we split / rewrite a node , we do all the updates in memory without
* waiting for any writes to complete - we allocate the new node ( s ) and update
* the parent node , possibly recursively up to the root .
*
* The end result is that we have one or more new nodes being written -
* possibly several , if there were multiple splits - and then a write ( updating
* an interior node ) which will make all these new nodes visible .
*
* Additionally , as we split / rewrite nodes we free the old nodes - but the old
* nodes can ' t be freed ( their space on disk can ' t be reclaimed ) until the
* update to the interior node that makes the new node visible completes -
* until then , the old nodes are still reachable on disk .
*
*/
struct btree_update {
struct closure cl ;
struct bch_fs * c ;
2021-12-10 15:41:38 -05:00
u64 start_time ;
2024-03-17 20:27:00 -04:00
unsigned long ip_started ;
2017-03-16 22:18:50 -08:00
struct list_head list ;
2020-02-08 16:39:37 -05:00
struct list_head unwritten_list ;
2017-03-16 22:18:50 -08:00
2024-04-02 16:42:27 -04:00
enum btree_update_mode mode ;
enum bch_watermark watermark ;
2017-03-16 22:18:50 -08:00
unsigned nodes_written : 1 ;
2021-03-31 15:21:37 -04:00
unsigned took_gc_lock : 1 ;
2017-03-16 22:18:50 -08:00
enum btree_id btree_id ;
2024-04-03 19:52:10 -04:00
unsigned update_level_start ;
unsigned update_level_end ;
2017-03-16 22:18:50 -08:00
2020-05-25 14:57:06 -04:00
struct disk_reservation disk_res ;
2017-03-16 22:18:50 -08:00
/*
2024-04-02 16:42:27 -04:00
* BTREE_UPDATE_node :
2017-03-16 22:18:50 -08:00
* The update that made the new nodes visible was a regular update to an
* existing interior node - @ b . We can ' t write out the update to @ b
* until the new nodes we created are finished writing , so we block @ b
* from writing by putting this btree_interior update on the
* @ b - > write_blocked list with @ write_blocked_list :
*/
struct btree * b ;
struct list_head write_blocked_list ;
/*
* We may be freeing nodes that were dirty , and thus had journal entries
* pinned : we need to transfer the oldest of those pins to the
* btree_update operation , and release it when the new node ( s )
* are all persistent and reachable :
*/
struct journal_entry_pin journal ;
2020-05-25 14:57:06 -04:00
/* Preallocated nodes we reserve when we start the update: */
2022-03-04 19:16:04 -05:00
struct prealloc_nodes {
struct btree * b [ BTREE_UPDATE_NODES_MAX ] ;
unsigned nr ;
} prealloc_nodes [ 2 ] ;
2020-05-25 14:57:06 -04:00
/* Nodes being freed: */
struct keylist old_keys ;
u64 _old_keys [ BTREE_UPDATE_NODES_MAX *
2021-12-29 11:27:47 -05:00
BKEY_BTREE_PTR_U64s_MAX ] ;
2020-05-25 14:57:06 -04:00
/* Nodes being added: */
struct keylist new_keys ;
u64 _new_keys [ BTREE_UPDATE_NODES_MAX *
2021-12-29 11:27:47 -05:00
BKEY_BTREE_PTR_U64s_MAX ] ;
2017-03-16 22:18:50 -08:00
/* New nodes, that will be made reachable by this update: */
2020-05-25 14:57:06 -04:00
struct btree * new_nodes [ BTREE_UPDATE_NODES_MAX ] ;
2017-03-16 22:18:50 -08:00
unsigned nr_new_nodes ;
2023-09-10 23:33:08 -04:00
struct btree * old_nodes [ BTREE_UPDATE_NODES_MAX ] ;
__le64 old_nodes_seq [ BTREE_UPDATE_NODES_MAX ] ;
unsigned nr_old_nodes ;
2020-06-09 15:44:03 -04:00
open_bucket_idx_t open_buckets [ BTREE_UPDATE_NODES_MAX *
2020-05-25 14:57:06 -04:00
BCH_REPLICAS_MAX ] ;
2020-06-09 15:44:03 -04:00
open_bucket_idx_t nr_open_buckets ;
2020-05-25 14:57:06 -04:00
2020-03-31 16:23:43 -04:00
unsigned journal_u64s ;
2020-04-05 21:49:17 -04:00
u64 journal_entries [ BTREE_UPDATE_JOURNAL_RES ] ;
2020-03-31 16:23:43 -04:00
2017-03-16 22:18:50 -08:00
/* Only here to reduce stack usage on recursive splits: */
struct keylist parent_keys ;
/*
* Enough room for btree_split ' s keys without realloc - btree node
* pointers never have crc / compression info , so we only need to acount
* for the pointers for three keys
*/
u64 inline_keys [ BKEY_BTREE_PTR_U64s_MAX * 3 ] ;
} ;
struct btree * __bch2_btree_node_alloc_replacement ( struct btree_update * ,
2022-08-21 14:29:43 -04:00
struct btree_trans * ,
2017-03-16 22:18:50 -08:00
struct btree * ,
struct bkey_format ) ;
2023-12-15 15:21:40 -05:00
int bch2_btree_split_leaf ( struct btree_trans * , btree_path_idx_t , unsigned ) ;
2017-03-16 22:18:50 -08:00
2024-03-08 19:57:22 -05:00
int bch2_btree_increase_depth ( struct btree_trans * , btree_path_idx_t , unsigned ) ;
2023-12-15 15:21:40 -05:00
int __bch2_foreground_maybe_merge ( struct btree_trans * , btree_path_idx_t ,
2021-03-31 16:16:39 -04:00
unsigned , unsigned , enum btree_node_sibling ) ;
2017-03-16 22:18:50 -08:00
2021-07-10 23:22:06 -04:00
static inline int bch2_foreground_maybe_merge_sibling ( struct btree_trans * trans ,
2023-12-15 15:21:40 -05:00
btree_path_idx_t path_idx ,
2017-03-16 22:18:50 -08:00
unsigned level , unsigned flags ,
enum btree_node_sibling sib )
{
2023-12-15 15:21:40 -05:00
struct btree_path * path = trans - > paths + path_idx ;
2017-03-16 22:18:50 -08:00
struct btree * b ;
2021-09-05 00:22:32 -04:00
EBUG_ON ( ! btree_node_locked ( path , level ) ) ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
b = path - > l [ level ] . b ;
2021-07-10 23:22:06 -04:00
if ( b - > sib_u64s [ sib ] > trans - > c - > btree_foreground_merge_threshold )
2021-03-31 16:16:39 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
2023-12-15 15:21:40 -05:00
return __bch2_foreground_maybe_merge ( trans , path_idx , level , flags , sib ) ;
2017-03-16 22:18:50 -08:00
}
2021-07-10 23:22:06 -04:00
static inline int bch2_foreground_maybe_merge ( struct btree_trans * trans ,
2023-12-15 15:21:40 -05:00
btree_path_idx_t path ,
2021-07-10 23:22:06 -04:00
unsigned level ,
unsigned flags )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
return bch2_foreground_maybe_merge_sibling ( trans , path , level , flags ,
2021-03-31 16:16:39 -04:00
btree_prev_sib ) ? :
2021-08-30 15:18:31 -04:00
bch2_foreground_maybe_merge_sibling ( trans , path , level , flags ,
2021-03-31 16:16:39 -04:00
btree_next_sib ) ;
2017-03-16 22:18:50 -08:00
}
2023-07-20 23:14:05 -04:00
int bch2_btree_node_rewrite ( struct btree_trans * , struct btree_iter * ,
struct btree * , unsigned ) ;
void bch2_btree_node_rewrite_async ( struct bch_fs * , struct btree * ) ;
int bch2_btree_node_update_key ( struct btree_trans * , struct btree_iter * ,
struct btree * , struct bkey_i * ,
unsigned , bool ) ;
int bch2_btree_node_update_key_get_iter ( struct btree_trans * , struct btree * ,
struct bkey_i * , unsigned , bool ) ;
2017-03-16 22:18:50 -08:00
void bch2_btree_set_root_for_read ( struct bch_fs * , struct btree * ) ;
2024-03-14 22:17:40 -04:00
void bch2_btree_root_alloc_fake ( struct bch_fs * , enum btree_id , unsigned ) ;
2017-03-16 22:18:50 -08:00
static inline unsigned btree_update_reserve_required ( struct bch_fs * c ,
struct btree * b )
{
2020-06-06 12:28:01 -04:00
unsigned depth = btree_node_root ( c , b ) - > c . level + 1 ;
2017-03-16 22:18:50 -08:00
/*
* Number of nodes we might have to allocate in a worst case btree
* split operation - we split all the way up to the root , then allocate
* a new root , unless we ' re already at max depth :
*/
if ( depth < BTREE_MAX_DEPTH )
2020-06-06 12:28:01 -04:00
return ( depth - b - > c . level ) * 2 + 1 ;
2017-03-16 22:18:50 -08:00
else
2020-06-06 12:28:01 -04:00
return ( depth - b - > c . level ) * 2 - 1 ;
2017-03-16 22:18:50 -08:00
}
static inline void btree_node_reset_sib_u64s ( struct btree * b )
{
b - > sib_u64s [ 0 ] = b - > nr . live_u64s ;
b - > sib_u64s [ 1 ] = b - > nr . live_u64s ;
}
2024-01-16 13:29:59 -05:00
static inline void * btree_data_end ( struct btree * b )
2017-03-16 22:18:50 -08:00
{
2024-01-16 13:29:59 -05:00
return ( void * ) b - > data + btree_buf_bytes ( b ) ;
2017-03-16 22:18:50 -08:00
}
2024-01-16 13:29:59 -05:00
static inline struct bkey_packed * unwritten_whiteouts_start ( struct btree * b )
2017-03-16 22:18:50 -08:00
{
2024-01-16 13:29:59 -05:00
return ( void * ) ( ( u64 * ) btree_data_end ( b ) - b - > whiteout_u64s ) ;
2017-03-16 22:18:50 -08:00
}
2024-01-16 13:29:59 -05:00
static inline struct bkey_packed * unwritten_whiteouts_end ( struct btree * b )
2017-03-16 22:18:50 -08:00
{
2024-01-16 13:29:59 -05:00
return btree_data_end ( b ) ;
2017-03-16 22:18:50 -08:00
}
static inline void * write_block ( struct btree * b )
{
return ( void * ) b - > data + ( b - > written < < 9 ) ;
}
2018-08-05 22:23:44 -04:00
static inline bool __btree_addr_written ( struct btree * b , void * p )
{
return p < write_block ( b ) ;
}
2017-03-16 22:18:50 -08:00
static inline bool bset_written ( struct btree * b , struct bset * i )
{
2018-08-05 22:23:44 -04:00
return __btree_addr_written ( b , i ) ;
2017-03-16 22:18:50 -08:00
}
2018-08-05 22:23:44 -04:00
static inline bool bkey_written ( struct btree * b , struct bkey_packed * k )
2017-03-16 22:18:50 -08:00
{
2018-08-05 22:23:44 -04:00
return __btree_addr_written ( b , k ) ;
2017-03-16 22:18:50 -08:00
}
2024-01-16 13:29:59 -05:00
static inline ssize_t __bch2_btree_u64s_remaining ( struct btree * b , void * end )
2017-03-16 22:18:50 -08:00
{
ssize_t used = bset_byte_offset ( b , end ) / sizeof ( u64 ) +
2019-11-29 14:08:51 -05:00
b - > whiteout_u64s ;
2024-01-16 13:29:59 -05:00
ssize_t total = btree_buf_bytes ( b ) > > 3 ;
2017-03-16 22:18:50 -08:00
2020-11-11 12:42:54 -05:00
/* Always leave one extra u64 for bch2_varint_decode: */
used + + ;
2017-03-16 22:18:50 -08:00
return total - used ;
}
2024-01-16 13:29:59 -05:00
static inline size_t bch2_btree_keys_u64s_remaining ( struct btree * b )
2017-03-16 22:18:50 -08:00
{
2024-01-16 13:29:59 -05:00
ssize_t remaining = __bch2_btree_u64s_remaining ( b ,
2017-03-16 22:18:50 -08:00
btree_bkey_last ( b , bset_tree_last ( b ) ) ) ;
BUG_ON ( remaining < 0 ) ;
if ( bset_written ( b , btree_bset_last ( b ) ) )
return 0 ;
return remaining ;
}
2021-04-06 15:33:19 -04:00
# define BTREE_WRITE_SET_U64s_BITS 9
2017-03-16 22:18:50 -08:00
static inline unsigned btree_write_set_buffer ( struct btree * b )
{
/*
* Could buffer up larger amounts of keys for btrees with larger keys ,
* pending benchmarking :
*/
2021-04-06 15:33:19 -04:00
return 8 < < BTREE_WRITE_SET_U64s_BITS ;
2017-03-16 22:18:50 -08:00
}
2024-01-16 13:29:59 -05:00
static inline struct btree_node_entry * want_new_bset ( struct bch_fs * c , struct btree * b )
2017-03-16 22:18:50 -08:00
{
2019-10-28 19:35:13 -04:00
struct bset_tree * t = bset_tree_last ( b ) ;
2017-03-16 22:18:50 -08:00
struct btree_node_entry * bne = max ( write_block ( b ) ,
( void * ) btree_bkey_last ( b , bset_tree_last ( b ) ) ) ;
ssize_t remaining_space =
2024-01-16 13:29:59 -05:00
__bch2_btree_u64s_remaining ( b , bne - > keys . start ) ;
2017-03-16 22:18:50 -08:00
2019-10-28 19:35:13 -04:00
if ( unlikely ( bset_written ( b , bset ( b , t ) ) ) ) {
2017-03-16 22:18:50 -08:00
if ( remaining_space > ( ssize_t ) ( block_bytes ( c ) > > 3 ) )
return bne ;
} else {
2019-10-28 19:35:13 -04:00
if ( unlikely ( bset_u64s ( t ) * sizeof ( u64 ) > btree_write_set_buffer ( b ) ) & &
2017-03-16 22:18:50 -08:00
remaining_space > ( ssize_t ) ( btree_write_set_buffer ( b ) > > 3 ) )
return bne ;
}
return NULL ;
}
2024-01-16 13:29:59 -05:00
static inline void push_whiteout ( struct btree * b , struct bpos pos )
2017-03-16 22:18:50 -08:00
{
2019-12-30 14:37:25 -05:00
struct bkey_packed k ;
2017-03-16 22:18:50 -08:00
2024-01-16 13:29:59 -05:00
BUG_ON ( bch2_btree_keys_u64s_remaining ( b ) < BKEY_U64s ) ;
2022-10-28 17:08:41 -04:00
EBUG_ON ( btree_node_just_written ( b ) ) ;
2019-11-29 14:08:51 -05:00
2019-12-30 14:37:25 -05:00
if ( ! bkey_pack_pos ( & k , pos , b ) ) {
struct bkey * u = ( void * ) & k ;
bkey_init ( u ) ;
u - > p = pos ;
}
k . needs_whiteout = true ;
b - > whiteout_u64s + = k . u64s ;
2024-01-16 13:29:59 -05:00
bkey_p_copy ( unwritten_whiteouts_start ( b ) , & k ) ;
2017-03-16 22:18:50 -08:00
}
/*
* write lock must be held on @ b ( else the dirty bset that we were going to
* insert into could be written out from under us )
*/
2024-01-16 13:29:59 -05:00
static inline bool bch2_btree_node_insert_fits ( struct btree * b , unsigned u64s )
2017-03-16 22:18:50 -08:00
{
2020-07-25 14:19:37 -04:00
if ( unlikely ( btree_node_need_rewrite ( b ) ) )
2017-03-16 22:18:50 -08:00
return false ;
2024-01-16 13:29:59 -05:00
return u64s < = bch2_btree_keys_u64s_remaining ( b ) ;
2017-03-16 22:18:50 -08:00
}
2020-07-25 17:06:11 -04:00
void bch2_btree_updates_to_text ( struct printbuf * , struct bch_fs * ) ;
2017-03-16 22:18:50 -08:00
2022-04-17 17:30:49 -04:00
bool bch2_btree_interior_updates_flush ( struct bch_fs * ) ;
2017-03-16 22:18:50 -08:00
2023-02-19 00:49:51 -05:00
void bch2_journal_entry_to_btree_root ( struct bch_fs * , struct jset_entry * ) ;
2020-05-25 14:57:06 -04:00
struct jset_entry * bch2_btree_roots_to_journal_entries ( struct bch_fs * ,
2023-11-02 21:43:26 -04:00
struct jset_entry * , unsigned long ) ;
2020-05-25 14:57:06 -04:00
2023-02-11 12:57:04 -05:00
void bch2_do_pending_node_rewrites ( struct bch_fs * ) ;
void bch2_free_pending_node_rewrites ( struct bch_fs * ) ;
2020-05-25 20:35:53 -04:00
void bch2_fs_btree_interior_update_exit ( struct bch_fs * ) ;
2023-06-28 00:01:19 -04:00
void bch2_fs_btree_interior_update_init_early ( struct bch_fs * ) ;
2020-05-25 20:35:53 -04:00
int bch2_fs_btree_interior_update_init ( struct bch_fs * ) ;
2017-03-16 22:18:50 -08:00
# endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */