2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2017-03-17 09:18:50 +03:00
# include "bkey_methods.h"
# include "btree_cache.h"
# include "btree_gc.h"
2023-08-05 23:08:44 +03:00
# include "btree_journal_iter.h"
2017-03-17 09:18:50 +03:00
# include "btree_update.h"
# include "btree_update_interior.h"
# include "btree_io.h"
# include "btree_iter.h"
# include "btree_locking.h"
# include "buckets.h"
2023-03-17 17:56:44 +03:00
# include "clock.h"
2020-11-16 22:16:42 +03:00
# include "error.h"
2017-03-17 09:18:50 +03:00
# include "extents.h"
# include "journal.h"
# include "journal_reclaim.h"
# include "keylist.h"
# include "replicas.h"
# include "super-io.h"
# include "trace.h"
# include <linux/random.h>
2022-10-02 05:15:30 +03:00
static int bch2_btree_insert_node ( struct btree_update * , struct btree_trans * ,
struct btree_path * , struct btree * ,
struct keylist * , unsigned ) ;
2021-08-25 04:30:06 +03:00
static void bch2_btree_update_add_new_node ( struct btree_update * , struct btree * ) ;
2021-07-11 06:22:06 +03:00
2022-09-16 21:42:38 +03:00
static struct btree_path * get_unlocked_mut_path ( struct btree_trans * trans ,
enum btree_id btree_id ,
unsigned level ,
struct bpos pos )
{
struct btree_path * path ;
path = bch2_path_get ( trans , btree_id , pos , level + 1 , level ,
2023-01-09 09:11:18 +03:00
BTREE_ITER_NOPRESERVE |
BTREE_ITER_INTENT , _RET_IP_ ) ;
path = bch2_btree_path_make_mut ( trans , path , true , _RET_IP_ ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_downgrade ( trans , path ) ;
__bch2_btree_path_unlock ( trans , path ) ;
return path ;
}
2017-03-17 09:18:50 +03:00
/* Debug code: */
2020-03-31 23:23:43 +03:00
/*
* Verify that child nodes correctly span parent node ' s range :
*/
2020-07-21 18:51:17 +03:00
static void btree_node_interior_verify ( struct bch_fs * c , struct btree * b )
2017-03-17 09:18:50 +03:00
{
2020-03-31 23:23:43 +03:00
# ifdef CONFIG_BCACHEFS_DEBUG
struct bpos next_node = b - > data - > min_key ;
2017-03-17 09:18:50 +03:00
struct btree_node_iter iter ;
2020-03-31 23:23:43 +03:00
struct bkey_s_c k ;
struct bkey_s_c_btree_ptr_v2 bp ;
struct bkey unpacked ;
2022-02-25 21:18:19 +03:00
struct printbuf buf1 = PRINTBUF , buf2 = PRINTBUF ;
2017-03-17 09:18:50 +03:00
2020-06-06 19:28:01 +03:00
BUG_ON ( ! b - > c . level ) ;
2017-03-17 09:18:50 +03:00
2021-12-28 07:10:06 +03:00
if ( ! test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) )
2020-07-21 18:51:17 +03:00
return ;
2020-03-31 23:23:43 +03:00
bch2_btree_node_iter_init_from_start ( & iter , b ) ;
while ( 1 ) {
k = bch2_btree_node_iter_peek_unpack ( & iter , b , & unpacked ) ;
2020-04-04 23:47:59 +03:00
if ( k . k - > type ! = KEY_TYPE_btree_ptr_v2 )
break ;
2020-03-31 23:23:43 +03:00
bp = bkey_s_c_to_btree_ptr_v2 ( k ) ;
2022-11-24 11:12:22 +03:00
if ( ! bpos_eq ( next_node , bp . v - > min_key ) ) {
2020-11-19 19:53:38 +03:00
bch2_dump_btree_node ( c , b ) ;
2022-02-25 21:18:19 +03:00
bch2_bpos_to_text ( & buf1 , next_node ) ;
bch2_bpos_to_text ( & buf2 , bp . v - > min_key ) ;
panic ( " expected next min_key %s got %s \n " , buf1 . buf , buf2 . buf ) ;
2020-11-19 19:53:38 +03:00
}
2020-03-31 23:23:43 +03:00
bch2_btree_node_iter_advance ( & iter , b ) ;
if ( bch2_btree_node_iter_end ( & iter ) ) {
2022-11-24 11:12:22 +03:00
if ( ! bpos_eq ( k . k - > p , b - > key . k . p ) ) {
2020-11-19 19:53:38 +03:00
bch2_dump_btree_node ( c , b ) ;
2022-02-25 21:18:19 +03:00
bch2_bpos_to_text ( & buf1 , b - > key . k . p ) ;
bch2_bpos_to_text ( & buf2 , k . k - > p ) ;
panic ( " expected end %s got %s \n " , buf1 . buf , buf2 . buf ) ;
2020-11-19 19:53:38 +03:00
}
2020-03-31 23:23:43 +03:00
break ;
}
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-25 01:02:16 +03:00
next_node = bpos_successor ( k . k - > p ) ;
2020-03-31 23:23:43 +03:00
}
2017-03-17 09:18:50 +03:00
# endif
}
/* Calculate ideal packed bkey format for new btree nodes: */
void __bch2_btree_calc_format ( struct bkey_format_state * s , struct btree * b )
{
struct bkey_packed * k ;
struct bset_tree * t ;
struct bkey uk ;
for_each_bset ( b , t )
2019-11-10 07:50:52 +03:00
bset_tree_for_each_key ( b , t , k )
2021-02-20 07:41:40 +03:00
if ( ! bkey_deleted ( k ) ) {
2017-03-17 09:18:50 +03:00
uk = bkey_unpack_key ( b , k ) ;
bch2_bkey_format_add_key ( s , & uk ) ;
}
}
static struct bkey_format bch2_btree_calc_format ( struct btree * b )
{
struct bkey_format_state s ;
bch2_bkey_format_init ( & s ) ;
2021-03-27 03:29:04 +03:00
bch2_bkey_format_add_pos ( & s , b - > data - > min_key ) ;
bch2_bkey_format_add_pos ( & s , b - > data - > max_key ) ;
2017-03-17 09:18:50 +03:00
__bch2_btree_calc_format ( & s , b ) ;
return bch2_bkey_format_done ( & s ) ;
}
static size_t btree_node_u64s_with_format ( struct btree * b ,
struct bkey_format * new_f )
{
struct bkey_format * old_f = & b - > format ;
/* stupid integer promotion rules */
ssize_t delta =
( ( ( int ) new_f - > key_u64s - old_f - > key_u64s ) *
( int ) b - > nr . packed_keys ) +
( ( ( int ) new_f - > key_u64s - BKEY_U64s ) *
( int ) b - > nr . unpacked_keys ) ;
BUG_ON ( delta + b - > nr . live_u64s < 0 ) ;
return b - > nr . live_u64s + delta ;
}
/**
2023-09-12 15:37:40 +03:00
* bch2_btree_node_format_fits - check if we could rewrite node with a new format
2017-03-17 09:18:50 +03:00
*
2023-09-13 01:41:22 +03:00
* @ c : filesystem handle
* @ b : btree node to rewrite
* @ new_f : bkey format to translate keys to
*
* Returns : true if all re - packed keys will be able to fit in a new node .
*
* Assumes all keys will successfully pack with the new format .
2017-03-17 09:18:50 +03:00
*/
bool bch2_btree_node_format_fits ( struct bch_fs * c , struct btree * b ,
struct bkey_format * new_f )
{
size_t u64s = btree_node_u64s_with_format ( b , new_f ) ;
return __vstruct_bytes ( struct btree_node , u64s ) < btree_bytes ( c ) ;
}
/* Btree node freeing/allocation: */
static void __btree_node_free ( struct bch_fs * c , struct btree * b )
{
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_free , c , b ) ;
2017-03-17 09:18:50 +03:00
2023-02-10 05:13:37 +03:00
BUG_ON ( btree_node_write_blocked ( b ) ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( btree_node_dirty ( b ) ) ;
BUG_ON ( btree_node_need_write ( b ) ) ;
BUG_ON ( b = = btree_node_root ( c , b ) ) ;
BUG_ON ( b - > ob . nr ) ;
BUG_ON ( ! list_empty ( & b - > write_blocked ) ) ;
BUG_ON ( b - > will_make_reachable ) ;
clear_btree_node_noevict ( b ) ;
mutex_lock ( & c - > btree_cache . lock ) ;
list_move ( & b - > list , & c - > btree_cache . freeable ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
}
2021-08-25 04:30:06 +03:00
static void bch2_btree_node_free_inmem ( struct btree_trans * trans ,
2022-09-03 05:59:39 +03:00
struct btree_path * path ,
2021-08-25 04:30:06 +03:00
struct btree * b )
2017-03-17 09:18:50 +03:00
{
2021-08-25 04:30:06 +03:00
struct bch_fs * c = trans - > c ;
2022-09-03 05:59:39 +03:00
unsigned level = b - > c . level ;
2021-09-01 07:50:18 +03:00
2022-09-03 05:59:39 +03:00
bch2_btree_node_lock_write_nofail ( trans , path , & b - > c ) ;
2021-09-01 07:50:18 +03:00
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
2017-03-17 09:18:50 +03:00
__btree_node_free ( c , b ) ;
2020-06-06 19:28:01 +03:00
six_unlock_write ( & b - > c . lock ) ;
2023-08-02 03:06:45 +03:00
mark_btree_node_locked_noreset ( path , level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-03 05:59:39 +03:00
trans_for_each_path ( trans , path )
if ( path - > l [ level ] . b = = b ) {
btree_node_unlock ( trans , path , level ) ;
path - > l [ level ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_init ) ;
}
2017-03-17 09:18:50 +03:00
}
2022-10-02 05:15:30 +03:00
static void bch2_btree_node_free_never_used ( struct btree_update * as ,
struct btree_trans * trans ,
struct btree * b )
{
struct bch_fs * c = as - > c ;
struct prealloc_nodes * p = & as - > prealloc_nodes [ b - > c . lock . readers ! = NULL ] ;
struct btree_path * path ;
unsigned level = b - > c . level ;
BUG_ON ( ! list_empty ( & b - > write_blocked ) ) ;
BUG_ON ( b - > will_make_reachable ! = ( 1UL | ( unsigned long ) as ) ) ;
b - > will_make_reachable = 0 ;
closure_put ( & as - > cl ) ;
clear_btree_node_will_make_reachable ( b ) ;
clear_btree_node_accessed ( b ) ;
clear_btree_node_dirty_acct ( c , b ) ;
clear_btree_node_need_write ( b ) ;
mutex_lock ( & c - > btree_cache . lock ) ;
list_del_init ( & b - > list ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
BUG_ON ( p - > nr > = ARRAY_SIZE ( p - > b ) ) ;
p - > b [ p - > nr + + ] = b ;
six_unlock_intent ( & b - > c . lock ) ;
trans_for_each_path ( trans , path )
if ( path - > l [ level ] . b = = b ) {
btree_node_unlock ( trans , path , level ) ;
path - > l [ level ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_init ) ;
}
}
2022-01-10 04:48:31 +03:00
static struct btree * __bch2_btree_node_alloc ( struct btree_trans * trans ,
2017-03-17 09:18:50 +03:00
struct disk_reservation * res ,
struct closure * cl ,
2022-03-05 03:16:04 +03:00
bool interior_node ,
2017-03-17 09:18:50 +03:00
unsigned flags )
{
2022-01-10 04:48:31 +03:00
struct bch_fs * c = trans - > c ;
2017-03-17 09:18:50 +03:00
struct write_point * wp ;
struct btree * b ;
2023-03-05 06:36:02 +03:00
BKEY_PADDED_ONSTACK ( k , BKEY_BTREE_PTR_VAL_U64s_MAX ) tmp ;
2023-09-13 01:41:22 +03:00
struct open_buckets obs = { . nr = 0 } ;
2017-03-17 09:18:50 +03:00
struct bch_devs_list devs_have = ( struct bch_devs_list ) { 0 } ;
2023-06-28 00:32:48 +03:00
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK ;
unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
? BTREE_NODE_RESERVE
: 0 ;
2022-10-31 23:13:05 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
mutex_lock ( & c - > btree_reserve_cache_lock ) ;
if ( c - > btree_reserve_cache_nr > nr_reserve ) {
struct btree_alloc * a =
& c - > btree_reserve_cache [ - - c - > btree_reserve_cache_nr ] ;
2023-09-13 01:41:22 +03:00
obs = a - > ob ;
2017-03-17 09:18:50 +03:00
bkey_copy ( & tmp . k , & a - > k ) ;
mutex_unlock ( & c - > btree_reserve_cache_lock ) ;
goto mem_alloc ;
}
mutex_unlock ( & c - > btree_reserve_cache_lock ) ;
retry :
2022-01-10 04:48:31 +03:00
ret = bch2_alloc_sectors_start_trans ( trans ,
2021-01-29 23:37:28 +03:00
c - > opts . metadata_target ? :
c - > opts . foreground_target ,
0 ,
2017-03-17 09:18:50 +03:00
writepoint_ptr ( & c - > btree_write_point ) ,
& devs_have ,
res - > nr_replicas ,
c - > opts . metadata_replicas_required ,
2023-06-28 00:32:48 +03:00
watermark , 0 , cl , & wp ) ;
2022-10-31 23:13:05 +03:00
if ( unlikely ( ret ) )
return ERR_PTR ( ret ) ;
2017-03-17 09:18:50 +03:00
2021-12-14 22:24:41 +03:00
if ( wp - > sectors_free < btree_sectors ( c ) ) {
2017-03-17 09:18:50 +03:00
struct open_bucket * ob ;
unsigned i ;
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
2021-12-14 22:24:41 +03:00
if ( ob - > sectors_free < btree_sectors ( c ) )
2017-03-17 09:18:50 +03:00
ob - > sectors_free = 0 ;
bch2_alloc_sectors_done ( c , wp ) ;
goto retry ;
}
2021-07-10 20:44:42 +03:00
bkey_btree_ptr_v2_init ( & tmp . k ) ;
2021-12-26 05:14:49 +03:00
bch2_alloc_sectors_append_ptrs ( c , wp , & tmp . k , btree_sectors ( c ) , false ) ;
2017-03-17 09:18:50 +03:00
2023-09-13 01:41:22 +03:00
bch2_open_bucket_get ( c , wp , & obs ) ;
2017-03-17 09:18:50 +03:00
bch2_alloc_sectors_done ( c , wp ) ;
mem_alloc :
2023-03-02 10:12:18 +03:00
b = bch2_btree_node_mem_alloc ( trans , interior_node ) ;
2022-02-07 09:19:39 +03:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
2017-03-17 09:18:50 +03:00
/* we hold cannibalize_lock: */
BUG_ON ( IS_ERR ( b ) ) ;
BUG_ON ( b - > ob . nr ) ;
bkey_copy ( & b - > key , & tmp . k ) ;
2023-09-13 01:41:22 +03:00
b - > ob = obs ;
2017-03-17 09:18:50 +03:00
return b ;
}
2022-08-21 21:29:43 +03:00
static struct btree * bch2_btree_node_alloc ( struct btree_update * as ,
struct btree_trans * trans ,
unsigned level )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
struct btree * b ;
2022-03-05 03:16:04 +03:00
struct prealloc_nodes * p = & as - > prealloc_nodes [ ! ! level ] ;
2020-02-07 21:38:02 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
BUG_ON ( level > = BTREE_MAX_DEPTH ) ;
2022-03-05 03:16:04 +03:00
BUG_ON ( ! p - > nr ) ;
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
b = p - > b [ - - p - > nr ] ;
2017-03-17 09:18:50 +03:00
2022-08-21 21:29:43 +03:00
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_intent ) ;
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_write ) ;
2022-02-07 09:19:39 +03:00
2017-03-17 09:18:50 +03:00
set_btree_node_accessed ( b ) ;
2022-02-26 19:10:20 +03:00
set_btree_node_dirty_acct ( c , b ) ;
2019-01-14 00:02:22 +03:00
set_btree_node_need_write ( b ) ;
2017-03-17 09:18:50 +03:00
bch2_bset_init_first ( b , & b - > data - > keys ) ;
2020-02-27 01:11:00 +03:00
b - > c . level = level ;
b - > c . btree_id = as - > btree_id ;
2021-03-15 02:01:14 +03:00
b - > version_ondisk = c - > sb . version ;
2020-02-27 01:11:00 +03:00
2017-03-17 09:18:50 +03:00
memset ( & b - > nr , 0 , sizeof ( b - > nr ) ) ;
b - > data - > magic = cpu_to_le64 ( bset_magic ( c ) ) ;
2021-05-21 03:47:27 +03:00
memset ( & b - > data - > _ptr , 0 , sizeof ( b - > data - > _ptr ) ) ;
2017-03-17 09:18:50 +03:00
b - > data - > flags = 0 ;
SET_BTREE_NODE_ID ( b - > data , as - > btree_id ) ;
SET_BTREE_NODE_LEVEL ( b - > data , level ) ;
2020-02-07 21:38:02 +03:00
if ( b - > key . k . type = = KEY_TYPE_btree_ptr_v2 ) {
struct bkey_i_btree_ptr_v2 * bp = bkey_i_to_btree_ptr_v2 ( & b - > key ) ;
bp - > v . mem_ptr = 0 ;
bp - > v . seq = b - > data - > keys . seq ;
bp - > v . sectors_written = 0 ;
}
2017-03-17 09:18:50 +03:00
2021-02-20 08:00:23 +03:00
SET_BTREE_NODE_NEW_EXTENT_OVERWRITE ( b - > data , true ) ;
2019-11-27 01:26:04 +03:00
2017-03-17 09:18:50 +03:00
bch2_btree_build_aux_trees ( b ) ;
2020-02-07 21:38:02 +03:00
ret = bch2_btree_node_hash_insert ( & c - > btree_cache , b , level , as - > btree_id ) ;
BUG_ON ( ret ) ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_alloc , c , b ) ;
2023-03-17 17:56:44 +03:00
bch2_increment_clock ( c , btree_sectors ( c ) , WRITE ) ;
2017-03-17 09:18:50 +03:00
return b ;
}
2020-02-07 21:38:02 +03:00
static void btree_set_min ( struct btree * b , struct bpos pos )
{
if ( b - > key . k . type = = KEY_TYPE_btree_ptr_v2 )
bkey_i_to_btree_ptr_v2 ( & b - > key ) - > v . min_key = pos ;
b - > data - > min_key = pos ;
}
static void btree_set_max ( struct btree * b , struct bpos pos )
{
b - > key . k . p = pos ;
b - > data - > max_key = pos ;
}
2022-11-16 05:52:12 +03:00
static struct btree * bch2_btree_node_alloc_replacement ( struct btree_update * as ,
struct btree_trans * trans ,
struct btree * b )
2017-03-17 09:18:50 +03:00
{
2022-11-16 05:52:12 +03:00
struct btree * n = bch2_btree_node_alloc ( as , trans , b - > c . level ) ;
struct bkey_format format = bch2_btree_calc_format ( b ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
/*
* The keys might expand with the new format - if they wouldn ' t fit in
* the btree node anymore , use the old format for now :
*/
if ( ! bch2_btree_node_format_fits ( as - > c , b , & format ) )
format = b - > format ;
2017-03-17 09:18:50 +03:00
SET_BTREE_NODE_SEQ ( n - > data , BTREE_NODE_SEQ ( b - > data ) + 1 ) ;
2020-02-07 21:38:02 +03:00
btree_set_min ( n , b - > data - > min_key ) ;
btree_set_max ( n , b - > data - > max_key ) ;
n - > data - > format = format ;
2017-03-17 09:18:50 +03:00
btree_node_set_format ( n , format ) ;
bch2_btree_sort_into ( as - > c , n , b ) ;
btree_node_reset_sib_u64s ( n ) ;
return n ;
}
2022-08-21 21:29:43 +03:00
static struct btree * __btree_root_alloc ( struct btree_update * as ,
struct btree_trans * trans , unsigned level )
2017-03-17 09:18:50 +03:00
{
2022-08-21 21:29:43 +03:00
struct btree * b = bch2_btree_node_alloc ( as , trans , level ) ;
2017-03-17 09:18:50 +03:00
2020-02-07 21:38:02 +03:00
btree_set_min ( b , POS_MIN ) ;
2021-07-06 05:02:07 +03:00
btree_set_max ( b , SPOS_MAX ) ;
2017-03-17 09:18:50 +03:00
b - > data - > format = bch2_btree_calc_format ( b ) ;
btree_node_set_format ( b , b - > data - > format ) ;
bch2_btree_build_aux_trees ( b ) ;
return b ;
}
2022-08-21 21:29:43 +03:00
static void bch2_btree_reserve_put ( struct btree_update * as , struct btree_trans * trans )
2017-03-17 09:18:50 +03:00
{
2020-05-25 21:57:06 +03:00
struct bch_fs * c = as - > c ;
2022-03-05 03:16:04 +03:00
struct prealloc_nodes * p ;
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
for ( p = as - > prealloc_nodes ;
p < as - > prealloc_nodes + ARRAY_SIZE ( as - > prealloc_nodes ) ;
p + + ) {
while ( p - > nr ) {
struct btree * b = p - > b [ - - p - > nr ] ;
2017-03-17 09:18:50 +03:00
2022-04-19 04:50:19 +03:00
mutex_lock ( & c - > btree_reserve_cache_lock ) ;
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
if ( c - > btree_reserve_cache_nr <
ARRAY_SIZE ( c - > btree_reserve_cache ) ) {
struct btree_alloc * a =
& c - > btree_reserve_cache [ c - > btree_reserve_cache_nr + + ] ;
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
a - > ob = b - > ob ;
b - > ob . nr = 0 ;
bkey_copy ( & a - > k , & b - > key ) ;
} else {
bch2_open_buckets_put ( c , & b - > ob ) ;
}
2017-03-17 09:18:50 +03:00
2022-04-19 04:50:19 +03:00
mutex_unlock ( & c - > btree_reserve_cache_lock ) ;
2022-08-21 21:29:43 +03:00
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_intent ) ;
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_write ) ;
2022-03-05 03:16:04 +03:00
__btree_node_free ( c , b ) ;
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
}
2017-03-17 09:18:50 +03:00
}
}
2022-01-10 04:48:31 +03:00
static int bch2_btree_reserve_get ( struct btree_trans * trans ,
struct btree_update * as ,
2022-01-12 07:24:43 +03:00
unsigned nr_nodes [ 2 ] ,
unsigned flags ,
struct closure * cl )
2017-03-17 09:18:50 +03:00
{
2020-05-25 21:57:06 +03:00
struct bch_fs * c = as - > c ;
2017-03-17 09:18:50 +03:00
struct btree * b ;
2022-03-05 03:16:04 +03:00
unsigned interior ;
2022-01-12 07:24:43 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
BUG_ON ( nr_nodes [ 0 ] + nr_nodes [ 1 ] > BTREE_RESERVE_MAX ) ;
2017-03-17 09:18:50 +03:00
/*
* Protects reaping from the btree node cache and using the btree node
* open bucket reserve :
2022-01-12 07:24:43 +03:00
*
* BTREE_INSERT_NOWAIT only applies to btree node allocation , not
* blocking on this lock :
2017-03-17 09:18:50 +03:00
*/
ret = bch2_btree_cache_cannibalize_lock ( c , cl ) ;
2020-05-25 21:57:06 +03:00
if ( ret )
return ret ;
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
for ( interior = 0 ; interior < 2 ; interior + + ) {
struct prealloc_nodes * p = as - > prealloc_nodes + interior ;
while ( p - > nr < nr_nodes [ interior ] ) {
2022-01-10 04:48:31 +03:00
b = __bch2_btree_node_alloc ( trans , & as - > disk_res ,
2022-01-12 07:24:43 +03:00
flags & BTREE_INSERT_NOWAIT ? NULL : cl ,
interior , flags ) ;
2022-03-05 03:16:04 +03:00
if ( IS_ERR ( b ) ) {
ret = PTR_ERR ( b ) ;
goto err ;
}
2017-03-17 09:18:50 +03:00
2022-03-05 03:16:04 +03:00
p - > b [ p - > nr + + ] = b ;
}
2017-03-17 09:18:50 +03:00
}
2022-03-05 03:16:04 +03:00
err :
2017-03-17 09:18:50 +03:00
bch2_btree_cache_cannibalize_unlock ( c ) ;
2020-05-25 21:57:06 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
/* Asynchronous interior node update machinery */
2022-08-21 21:29:43 +03:00
static void bch2_btree_update_free ( struct btree_update * as , struct btree_trans * trans )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
2021-03-31 22:21:37 +03:00
if ( as - > took_gc_lock )
up_read ( & c - > gc_lock ) ;
as - > took_gc_lock = false ;
2020-02-09 03:06:31 +03:00
bch2_journal_pin_drop ( & c - > journal , & as - > journal ) ;
2018-07-17 19:19:14 +03:00
bch2_journal_pin_flush ( & c - > journal , & as - > journal ) ;
2020-05-25 21:57:06 +03:00
bch2_disk_reservation_put ( c , & as - > disk_res ) ;
2022-08-21 21:29:43 +03:00
bch2_btree_reserve_put ( as , trans ) ;
2018-07-17 19:19:14 +03:00
2021-12-10 23:41:38 +03:00
bch2_time_stats_update ( & c - > times [ BCH_TIME_btree_interior_update_total ] ,
as - > start_time ) ;
2020-05-25 21:57:06 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
2020-05-02 23:21:35 +03:00
list_del ( & as - > unwritten_list ) ;
2017-03-17 09:18:50 +03:00
list_del ( & as - > list ) ;
closure_debug_destroy ( & as - > cl ) ;
mempool_free ( as , & c - > btree_interior_update_pool ) ;
2021-12-10 23:41:38 +03:00
/*
* Have to do the wakeup with btree_interior_update_lock still held ,
* since being on btree_interior_update_list is our ref on @ c :
*/
2017-03-17 09:18:50 +03:00
closure_wake_up ( & c - > btree_interior_update_wait ) ;
2021-12-10 23:41:38 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
2017-03-17 09:18:50 +03:00
}
2022-04-01 04:44:55 +03:00
static void btree_update_add_key ( struct btree_update * as ,
struct keylist * keys , struct btree * b )
2017-03-17 09:18:50 +03:00
{
2022-04-01 04:44:55 +03:00
struct bkey_i * k = & b - > key ;
BUG_ON ( bch2_keylist_u64s ( keys ) + k - > k . u64s >
2020-05-25 21:57:06 +03:00
ARRAY_SIZE ( as - > _old_keys ) ) ;
2022-04-01 04:44:55 +03:00
bkey_copy ( keys - > top , k ) ;
bkey_i_to_btree_ptr_v2 ( keys - > top ) - > v . mem_ptr = b - > c . level + 1 ;
bch2_keylist_push ( keys ) ;
}
2020-05-25 21:57:06 +03:00
/*
* The transactional part of an interior btree node update , where we journal the
* update we did to the interior node and update alloc info :
*/
static int btree_update_nodes_written_trans ( struct btree_trans * trans ,
struct btree_update * as )
2020-05-02 02:56:31 +03:00
{
2020-05-25 21:57:06 +03:00
struct bkey_i * k ;
int ret ;
2022-03-29 23:29:10 +03:00
ret = darray_make_room ( & trans - > extra_journal_entries , as - > journal_u64s ) ;
if ( ret )
return ret ;
memcpy ( & darray_top ( trans - > extra_journal_entries ) ,
as - > journal_entries ,
as - > journal_u64s * sizeof ( u64 ) ) ;
trans - > extra_journal_entries . nr + = as - > journal_u64s ;
2020-05-25 21:57:06 +03:00
trans - > journal_pin = & as - > journal ;
2020-05-02 02:56:31 +03:00
2022-04-14 22:37:16 +03:00
for_each_keylist_key ( & as - > old_keys , k ) {
2022-04-01 04:44:55 +03:00
unsigned level = bkey_i_to_btree_ptr_v2 ( k ) - > v . mem_ptr ;
2022-04-14 22:37:16 +03:00
ret = bch2_trans_mark_old ( trans , as - > btree_id , level , bkey_i_to_s_c ( k ) , 0 ) ;
2020-05-25 21:57:06 +03:00
if ( ret )
return ret ;
2020-05-02 02:56:31 +03:00
}
2022-04-14 22:37:16 +03:00
for_each_keylist_key ( & as - > new_keys , k ) {
2022-04-01 04:44:55 +03:00
unsigned level = bkey_i_to_btree_ptr_v2 ( k ) - > v . mem_ptr ;
2022-04-14 22:37:16 +03:00
ret = bch2_trans_mark_new ( trans , as - > btree_id , level , k , 0 ) ;
2020-05-25 21:57:06 +03:00
if ( ret )
return ret ;
}
return 0 ;
2020-05-02 02:56:31 +03:00
}
2020-05-25 21:57:06 +03:00
static void btree_update_nodes_written ( struct btree_update * as )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
2022-08-21 21:29:43 +03:00
struct btree * b ;
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2020-05-25 21:57:06 +03:00
u64 journal_seq = 0 ;
unsigned i ;
2020-02-09 03:06:31 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2020-12-03 02:30:06 +03:00
/*
* If we ' re already in an error state , it might be because a btree node
* was never written , and we might be trying to free that same btree
* node here , but it won ' t have been marked as allocated and we ' ll see
* spurious disk usage inconsistencies in the transactional part below
* if we don ' t skip it :
*/
ret = bch2_journal_error ( & c - > journal ) ;
if ( ret )
goto err ;
2023-09-11 06:33:08 +03:00
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk :
*/
for ( i = 0 ; i < as - > nr_old_nodes ; i + + ) {
__le64 seq ;
2022-08-21 21:29:43 +03:00
b = as - > old_nodes [ i ] ;
2023-09-13 00:16:02 +03:00
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_read ) ;
2022-08-21 21:29:43 +03:00
seq = b - > data ? b - > data - > keys . seq : 0 ;
six_unlock_read ( & b - > c . lock ) ;
2023-09-11 06:33:08 +03:00
if ( seq = = as - > old_nodes_seq [ i ] )
2022-08-21 21:29:43 +03:00
wait_on_bit_io ( & b - > flags , BTREE_NODE_write_in_flight_inner ,
2021-07-10 20:44:42 +03:00
TASK_UNINTERRUPTIBLE ) ;
2023-09-11 06:33:08 +03:00
}
2017-03-17 09:18:50 +03:00
/*
* We did an update to a parent node where the pointers we added pointed
* to child nodes that weren ' t written yet : now , the child nodes have
* been written so we can write out the update to the interior node .
*/
2019-03-08 03:46:10 +03:00
/*
* We can ' t call into journal reclaim here : we ' d block on the journal
* reclaim lock , but we may need to release the open buckets we have
* pinned in order for other btree updates to make forward progress , and
* journal reclaim does btree updates when flushing bkey_cached entries ,
* which may require allocations as well .
*/
2023-09-13 00:16:02 +03:00
ret = commit_do ( trans , & as - > disk_res , & journal_seq ,
2023-06-28 00:32:48 +03:00
BCH_WATERMARK_reclaim |
2022-08-21 21:29:43 +03:00
BTREE_INSERT_NOFAIL |
BTREE_INSERT_NOCHECK_RW |
2023-06-28 00:32:48 +03:00
BTREE_INSERT_JOURNAL_RECLAIM ,
2023-09-13 00:16:02 +03:00
btree_update_nodes_written_trans ( trans , as ) ) ;
bch2_trans_unlock ( trans ) ;
2020-05-25 21:57:06 +03:00
2020-12-03 02:30:06 +03:00
bch2_fs_fatal_err_on ( ret & & ! bch2_journal_error ( & c - > journal ) , c ,
2022-10-22 22:10:28 +03:00
" %s(): error %s " , __func__ , bch2_err_str ( ret ) ) ;
2020-12-03 02:30:06 +03:00
err :
2022-08-21 21:29:43 +03:00
if ( as - > b ) {
2022-09-16 21:42:38 +03:00
struct btree_path * path ;
2022-08-21 21:29:43 +03:00
b = as - > b ;
2023-09-13 00:16:02 +03:00
path = get_unlocked_mut_path ( trans , as - > btree_id , b - > c . level , b - > key . k . p ) ;
2020-05-02 23:21:35 +03:00
/*
2020-05-25 21:57:06 +03:00
* @ b is the node we did the final insert into :
*
2020-05-02 23:21:35 +03:00
* On failure to get a journal reservation , we still have to
* unblock the write and allow most of the write path to happen
* so that shutdown works , but the i - > journal_seq mechanism
* won ' t work to prevent the btree write from being visible ( we
* didn ' t get a journal sequence number ) - instead
* __bch2_btree_node_write ( ) doesn ' t do the actual write if
* we ' re in journal error state :
*/
2017-03-17 09:18:50 +03:00
2022-10-03 23:41:17 +03:00
/*
* Ensure transaction is unlocked before using
* btree_node_lock_nopath ( ) ( the use of which is always suspect ,
* we need to work on removing this in the future )
*
* It should be , but get_unlocked_mut_path ( ) - > bch2_path_get ( )
* calls bch2_path_upgrade ( ) , before we call path_make_mut ( ) , so
* we may rarely end up with a locked path besides the one we
* have here :
*/
2023-09-13 00:16:02 +03:00
bch2_trans_unlock ( trans ) ;
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_intent ) ;
mark_btree_node_locked ( trans , path , b - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2023-05-21 06:57:48 +03:00
path - > l [ b - > c . level ] . lock_seq = six_lock_seq ( & b - > c . lock ) ;
2022-09-16 21:42:38 +03:00
path - > l [ b - > c . level ] . b = b ;
2023-09-13 00:16:02 +03:00
bch2_btree_node_lock_write_nofail ( trans , path , & b - > c ) ;
2022-09-16 21:42:38 +03:00
2020-05-25 21:57:06 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
2017-03-17 09:18:50 +03:00
list_del ( & as - > write_blocked_list ) ;
2022-02-27 17:56:33 +03:00
if ( list_empty ( & b - > write_blocked ) )
clear_btree_node_write_blocked ( b ) ;
2019-01-14 00:02:22 +03:00
2020-12-04 00:20:18 +03:00
/*
* Node might have been freed , recheck under
* btree_interior_update_lock :
*/
if ( as - > b = = b ) {
2020-05-25 21:57:06 +03:00
BUG_ON ( ! b - > c . level ) ;
BUG_ON ( ! btree_node_dirty ( b ) ) ;
2020-12-04 00:20:18 +03:00
if ( ! ret ) {
2023-09-13 01:41:22 +03:00
struct bset * last = btree_bset_last ( b ) ;
last - > journal_seq = cpu_to_le64 (
2022-04-01 04:44:55 +03:00
max ( journal_seq ,
2023-09-13 01:41:22 +03:00
le64_to_cpu ( last - > journal_seq ) ) ) ;
2020-12-04 00:20:18 +03:00
bch2_btree_add_journal_pin ( c , b , journal_seq ) ;
} else {
/*
* If we didn ' t get a journal sequence number we
* can ' t write this btree node , because recovery
* won ' t know to ignore this write :
*/
set_btree_node_never_write ( b ) ;
}
2020-05-02 23:21:35 +03:00
}
2020-05-25 21:57:06 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
2022-09-16 21:42:38 +03:00
2023-08-02 03:06:45 +03:00
mark_btree_node_locked_noreset ( path , b - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2020-05-02 23:21:35 +03:00
six_unlock_write ( & b - > c . lock ) ;
2017-03-17 09:18:50 +03:00
2020-05-25 21:57:06 +03:00
btree_node_write_if_need ( c , b , SIX_LOCK_intent ) ;
2023-09-13 00:16:02 +03:00
btree_node_unlock ( trans , path , b - > c . level ) ;
bch2_path_put ( trans , path , true ) ;
2017-03-17 09:18:50 +03:00
}
2020-02-09 03:06:31 +03:00
bch2_journal_pin_drop ( & c - > journal , & as - > journal ) ;
2020-05-25 21:57:06 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
for ( i = 0 ; i < as - > nr_new_nodes ; i + + ) {
b = as - > new_nodes [ i ] ;
2020-04-08 00:27:12 +03:00
2020-05-02 23:21:35 +03:00
BUG_ON ( b - > will_make_reachable ! = ( unsigned long ) as ) ;
b - > will_make_reachable = 0 ;
2022-02-27 17:56:33 +03:00
clear_btree_node_will_make_reachable ( b ) ;
2020-05-25 21:57:06 +03:00
}
mutex_unlock ( & c - > btree_interior_update_lock ) ;
for ( i = 0 ; i < as - > nr_new_nodes ; i + + ) {
b = as - > new_nodes [ i ] ;
2020-04-08 00:27:12 +03:00
2023-09-13 00:16:02 +03:00
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_read ) ;
2020-05-25 21:57:06 +03:00
btree_node_write_if_need ( c , b , SIX_LOCK_read ) ;
six_unlock_read ( & b - > c . lock ) ;
2020-04-08 00:27:12 +03:00
}
2020-04-04 22:45:06 +03:00
2020-05-25 21:57:06 +03:00
for ( i = 0 ; i < as - > nr_open_buckets ; i + + )
bch2_open_bucket_put ( c , c - > open_buckets + as - > open_buckets [ i ] ) ;
2020-05-02 23:21:35 +03:00
2023-09-13 00:16:02 +03:00
bch2_btree_update_free ( as , trans ) ;
bch2_trans_put ( trans ) ;
2020-05-25 21:57:06 +03:00
}
2020-04-08 00:27:12 +03:00
2020-05-25 21:57:06 +03:00
static void btree_interior_update_work ( struct work_struct * work )
{
struct bch_fs * c =
container_of ( work , struct bch_fs , btree_interior_update_work ) ;
struct btree_update * as ;
2020-04-08 00:27:12 +03:00
2020-05-25 21:57:06 +03:00
while ( 1 ) {
mutex_lock ( & c - > btree_interior_update_lock ) ;
as = list_first_entry_or_null ( & c - > btree_interior_updates_unwritten ,
struct btree_update , unwritten_list ) ;
if ( as & & ! as - > nodes_written )
as = NULL ;
mutex_unlock ( & c - > btree_interior_update_lock ) ;
2020-05-02 23:21:35 +03:00
2020-05-25 21:57:06 +03:00
if ( ! as )
break ;
btree_update_nodes_written ( as ) ;
2020-04-08 00:27:12 +03:00
}
2020-05-25 21:57:06 +03:00
}
2023-11-18 03:13:27 +03:00
static CLOSURE_CALLBACK ( btree_update_set_nodes_written )
2020-05-25 21:57:06 +03:00
{
2023-11-18 03:13:27 +03:00
closure_type ( as , struct btree_update , cl ) ;
2020-05-25 21:57:06 +03:00
struct bch_fs * c = as - > c ;
2020-04-08 00:27:12 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
2020-05-25 21:57:06 +03:00
as - > nodes_written = true ;
mutex_unlock ( & c - > btree_interior_update_lock ) ;
queue_work ( c - > btree_interior_update_worker , & c - > btree_interior_update_work ) ;
2017-03-17 09:18:50 +03:00
}
/*
* We ' re updating @ b with pointers to nodes that haven ' t finished writing yet :
* block @ b from being written until @ as completes
*/
static void btree_update_updated_node ( struct btree_update * as , struct btree * b )
{
struct bch_fs * c = as - > c ;
mutex_lock ( & c - > btree_interior_update_lock ) ;
2020-02-09 00:39:37 +03:00
list_add_tail ( & as - > unwritten_list , & c - > btree_interior_updates_unwritten ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( as - > mode ! = BTREE_INTERIOR_NO_UPDATE ) ;
BUG_ON ( ! btree_node_dirty ( b ) ) ;
2023-02-10 05:13:37 +03:00
BUG_ON ( ! b - > c . level ) ;
2017-03-17 09:18:50 +03:00
2020-02-09 03:06:31 +03:00
as - > mode = BTREE_INTERIOR_UPDATING_NODE ;
as - > b = b ;
2022-02-27 17:56:33 +03:00
set_btree_node_write_blocked ( b ) ;
2017-03-17 09:18:50 +03:00
list_add ( & as - > write_blocked_list , & b - > write_blocked ) ;
mutex_unlock ( & c - > btree_interior_update_lock ) ;
}
static void btree_update_reparent ( struct btree_update * as ,
struct btree_update * child )
{
struct bch_fs * c = as - > c ;
2020-02-09 03:06:31 +03:00
lockdep_assert_held ( & c - > btree_interior_update_lock ) ;
2017-03-17 09:18:50 +03:00
child - > b = NULL ;
child - > mode = BTREE_INTERIOR_UPDATING_AS ;
2020-02-09 03:06:31 +03:00
bch2_journal_pin_copy ( & c - > journal , & as - > journal , & child - > journal , NULL ) ;
2017-03-17 09:18:50 +03:00
}
2020-02-09 03:06:31 +03:00
static void btree_update_updated_root ( struct btree_update * as , struct btree * b )
2017-03-17 09:18:50 +03:00
{
2020-05-25 21:57:06 +03:00
struct bkey_i * insert = & b - > key ;
2017-03-17 09:18:50 +03:00
struct bch_fs * c = as - > c ;
BUG_ON ( as - > mode ! = BTREE_INTERIOR_NO_UPDATE ) ;
2020-05-25 21:57:06 +03:00
BUG_ON ( as - > journal_u64s + jset_u64s ( insert - > k . u64s ) >
ARRAY_SIZE ( as - > journal_entries ) ) ;
as - > journal_u64s + =
journal_entry_set ( ( void * ) & as - > journal_entries [ as - > journal_u64s ] ,
BCH_JSET_ENTRY_btree_root ,
b - > c . btree_id , b - > c . level ,
insert , insert - > k . u64s ) ;
2017-03-17 09:18:50 +03:00
2020-02-09 03:06:31 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
list_add_tail ( & as - > unwritten_list , & c - > btree_interior_updates_unwritten ) ;
2017-03-17 09:18:50 +03:00
2020-02-09 03:06:31 +03:00
as - > mode = BTREE_INTERIOR_UPDATING_ROOT ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
}
2020-05-25 21:57:06 +03:00
/*
* bch2_btree_update_add_new_node :
*
* This causes @ as to wait on @ b to be written , before it gets to
* bch2_btree_update_nodes_written
*
* Additionally , it sets b - > will_make_reachable to prevent any additional writes
* to @ b from happening besides the first until @ b is reachable on disk
*
* And it adds @ b to the list of @ as ' s new nodes , so that we can update sector
* counts in bch2_btree_update_nodes_written :
*/
2021-08-25 04:30:06 +03:00
static void bch2_btree_update_add_new_node ( struct btree_update * as , struct btree * b )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
2020-05-25 21:57:06 +03:00
closure_get ( & as - > cl ) ;
2017-03-17 09:18:50 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
BUG_ON ( as - > nr_new_nodes > = ARRAY_SIZE ( as - > new_nodes ) ) ;
BUG_ON ( b - > will_make_reachable ) ;
as - > new_nodes [ as - > nr_new_nodes + + ] = b ;
b - > will_make_reachable = 1UL | ( unsigned long ) as ;
2022-02-27 17:56:33 +03:00
set_btree_node_will_make_reachable ( b ) ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
2020-05-25 21:57:06 +03:00
2022-04-14 22:37:16 +03:00
btree_update_add_key ( as , & as - > new_keys , b ) ;
2022-10-01 07:34:02 +03:00
if ( b - > key . k . type = = KEY_TYPE_btree_ptr_v2 ) {
unsigned bytes = vstruct_end ( & b - > data - > keys ) - ( void * ) b - > data ;
unsigned sectors = round_up ( bytes , block_bytes ( c ) ) > > 9 ;
bkey_i_to_btree_ptr_v2 ( & b - > key ) - > v . sectors_written =
cpu_to_le16 ( sectors ) ;
}
2017-03-17 09:18:50 +03:00
}
2020-05-25 21:57:06 +03:00
/*
* returns true if @ b was a new node
*/
2017-03-17 09:18:50 +03:00
static void btree_update_drop_new_node ( struct bch_fs * c , struct btree * b )
{
struct btree_update * as ;
unsigned long v ;
unsigned i ;
mutex_lock ( & c - > btree_interior_update_lock ) ;
2020-05-25 21:57:06 +03:00
/*
* When b - > will_make_reachable ! = 0 , it owns a ref on as - > cl that ' s
* dropped when it gets written by bch2_btree_complete_write - the
* xchg ( ) is for synchronization with bch2_btree_complete_write :
*/
2017-03-17 09:18:50 +03:00
v = xchg ( & b - > will_make_reachable , 0 ) ;
2022-02-27 17:56:33 +03:00
clear_btree_node_will_make_reachable ( b ) ;
2017-03-17 09:18:50 +03:00
as = ( struct btree_update * ) ( v & ~ 1UL ) ;
if ( ! as ) {
mutex_unlock ( & c - > btree_interior_update_lock ) ;
return ;
}
for ( i = 0 ; i < as - > nr_new_nodes ; i + + )
if ( as - > new_nodes [ i ] = = b )
goto found ;
BUG ( ) ;
found :
array_remove_item ( as - > new_nodes , as - > nr_new_nodes , i ) ;
mutex_unlock ( & c - > btree_interior_update_lock ) ;
if ( v & 1 )
closure_put ( & as - > cl ) ;
}
2021-08-25 04:30:06 +03:00
static void bch2_btree_update_get_open_buckets ( struct btree_update * as , struct btree * b )
2017-03-17 09:18:50 +03:00
{
2020-05-25 21:57:06 +03:00
while ( b - > ob . nr )
as - > open_buckets [ as - > nr_open_buckets + + ] =
b - > ob . v [ - - b - > ob . nr ] ;
2017-03-17 09:18:50 +03:00
}
/*
* @ b is being split / rewritten : it may have pointers to not - yet - written btree
* nodes and thus outstanding btree_updates - redirect @ b ' s
* btree_updates to point to this btree_update :
*/
2021-08-25 04:30:06 +03:00
static void bch2_btree_interior_update_will_free_node ( struct btree_update * as ,
2022-04-01 04:44:55 +03:00
struct btree * b )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
struct btree_update * p , * n ;
struct btree_write * w ;
set_btree_node_dying ( b ) ;
if ( btree_node_fake ( b ) )
return ;
mutex_lock ( & c - > btree_interior_update_lock ) ;
/*
* Does this node have any btree_update operations preventing
* it from being written ?
*
* If so , redirect them to point to this btree_update : we can
* write out our new nodes , but we won ' t make them visible until those
* operations complete
*/
list_for_each_entry_safe ( p , n , & b - > write_blocked , write_blocked_list ) {
2020-06-09 23:25:07 +03:00
list_del_init ( & p - > write_blocked_list ) ;
2017-03-17 09:18:50 +03:00
btree_update_reparent ( as , p ) ;
2019-01-14 00:02:22 +03:00
/*
* for flush_held_btree_writes ( ) waiting on updates to flush or
* nodes to be writeable :
*/
closure_wake_up ( & c - > btree_interior_update_wait ) ;
2017-03-17 09:18:50 +03:00
}
2022-02-26 19:10:20 +03:00
clear_btree_node_dirty_acct ( c , b ) ;
2017-03-17 09:18:50 +03:00
clear_btree_node_need_write ( b ) ;
2023-02-10 05:13:37 +03:00
clear_btree_node_write_blocked ( b ) ;
2017-03-17 09:18:50 +03:00
/*
* Does this node have unwritten data that has a pin on the journal ?
*
* If so , transfer that pin to the btree_update operation -
* note that if we ' re freeing multiple nodes , we only need to keep the
* oldest pin of any of the nodes we ' re freeing . We ' ll release the pin
* when the new nodes are persistent and reachable on disk :
*/
2020-02-09 03:06:31 +03:00
w = btree_current_write ( b ) ;
bch2_journal_pin_copy ( & c - > journal , & as - > journal , & w - > journal , NULL ) ;
2017-03-17 09:18:50 +03:00
bch2_journal_pin_drop ( & c - > journal , & w - > journal ) ;
w = btree_prev_write ( b ) ;
2020-02-09 03:06:31 +03:00
bch2_journal_pin_copy ( & c - > journal , & as - > journal , & w - > journal , NULL ) ;
2017-03-17 09:18:50 +03:00
bch2_journal_pin_drop ( & c - > journal , & w - > journal ) ;
mutex_unlock ( & c - > btree_interior_update_lock ) ;
2020-05-25 21:57:06 +03:00
/*
* Is this a node that isn ' t reachable on disk yet ?
*
* Nodes that aren ' t reachable yet have writes blocked until they ' re
* reachable - now that we ' ve cancelled any pending writes and moved
* things waiting on that write to wait on this update , we can drop this
* node from the list of nodes that the other update is making
* reachable , prior to freeing it :
*/
btree_update_drop_new_node ( c , b ) ;
2022-04-14 22:37:16 +03:00
btree_update_add_key ( as , & as - > old_keys , b ) ;
2021-04-20 00:17:34 +03:00
2023-09-11 06:33:08 +03:00
as - > old_nodes [ as - > nr_old_nodes ] = b ;
as - > old_nodes_seq [ as - > nr_old_nodes ] = b - > data - > keys . seq ;
as - > nr_old_nodes + + ;
2017-03-17 09:18:50 +03:00
}
2022-08-21 21:29:43 +03:00
static void bch2_btree_update_done ( struct btree_update * as , struct btree_trans * trans )
2017-03-17 09:18:50 +03:00
{
2021-12-10 23:41:38 +03:00
struct bch_fs * c = as - > c ;
u64 start_time = as - > start_time ;
2017-03-17 09:18:50 +03:00
BUG_ON ( as - > mode = = BTREE_INTERIOR_NO_UPDATE ) ;
2021-03-31 22:21:37 +03:00
if ( as - > took_gc_lock )
up_read ( & as - > c - > gc_lock ) ;
as - > took_gc_lock = false ;
2022-08-21 21:29:43 +03:00
bch2_btree_reserve_put ( as , trans ) ;
2017-03-17 09:18:50 +03:00
2021-05-23 00:37:25 +03:00
continue_at ( & as - > cl , btree_update_set_nodes_written ,
as - > c - > btree_interior_update_worker ) ;
2021-12-10 23:41:38 +03:00
bch2_time_stats_update ( & c - > times [ BCH_TIME_btree_interior_update_foreground ] ,
start_time ) ;
2017-03-17 09:18:50 +03:00
}
2021-08-25 04:30:06 +03:00
static struct btree_update *
2021-08-30 22:18:31 +03:00
bch2_btree_update_start ( struct btree_trans * trans , struct btree_path * path ,
2022-03-05 03:15:46 +03:00
unsigned level , bool split , unsigned flags )
2017-03-17 09:18:50 +03:00
{
2020-04-06 04:49:17 +03:00
struct bch_fs * c = trans - > c ;
2017-03-17 09:18:50 +03:00
struct btree_update * as ;
2021-12-10 23:41:38 +03:00
u64 start_time = local_clock ( ) ;
2020-06-10 03:54:36 +03:00
int disk_res_flags = ( flags & BTREE_INSERT_NOFAIL )
2020-05-25 21:57:06 +03:00
? BCH_DISK_RESERVATION_NOFAIL : 0 ;
2022-01-12 07:24:43 +03:00
unsigned nr_nodes [ 2 ] = { 0 , 0 } ;
2022-03-05 03:15:46 +03:00
unsigned update_level = level ;
2023-06-28 00:32:48 +03:00
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK ;
2020-06-10 03:54:36 +03:00
int ret = 0 ;
2022-07-07 07:37:46 +03:00
u32 restart_count = trans - > restart_count ;
2020-05-25 21:57:06 +03:00
2021-08-30 22:18:31 +03:00
BUG_ON ( ! path - > should_be_locked ) ;
2021-06-15 01:16:10 +03:00
2023-06-28 00:32:48 +03:00
if ( watermark = = BCH_WATERMARK_copygc )
watermark = BCH_WATERMARK_btree_copygc ;
if ( watermark < BCH_WATERMARK_btree )
watermark = BCH_WATERMARK_btree ;
flags & = ~ BCH_WATERMARK_MASK ;
flags | = watermark ;
2022-03-05 03:15:46 +03:00
while ( 1 ) {
2022-03-05 03:16:04 +03:00
nr_nodes [ ! ! update_level ] + = 1 + split ;
2022-03-05 03:15:46 +03:00
update_level + + ;
2022-09-28 01:57:34 +03:00
ret = bch2_btree_path_upgrade ( trans , path , update_level + 1 ) ;
if ( ret )
return ERR_PTR ( ret ) ;
2022-03-05 03:15:46 +03:00
2022-09-28 01:57:34 +03:00
if ( ! btree_path_node ( path , update_level ) ) {
/* Allocating new root? */
nr_nodes [ 1 ] + = split ;
update_level = BTREE_MAX_DEPTH ;
break ;
}
2022-03-05 03:15:46 +03:00
2023-11-27 02:31:11 +03:00
/*
* Always check for space for two keys , even if we won ' t have to
* split at prior level - it might have been a merge instead :
*/
2022-09-28 01:57:34 +03:00
if ( bch2_btree_node_insert_fits ( c , path - > l [ update_level ] . b ,
2023-11-27 02:31:11 +03:00
BKEY_BTREE_PTR_U64s_MAX * 2 ) )
2022-09-28 01:57:34 +03:00
break ;
2020-05-25 21:57:06 +03:00
2023-06-17 06:30:02 +03:00
split = path - > l [ update_level ] . b - > nr . live_u64s > BTREE_SPLIT_THRESHOLD ( c ) ;
2022-09-28 01:57:34 +03:00
}
2021-03-31 22:21:37 +03:00
if ( flags & BTREE_INSERT_GC_LOCK_HELD )
lockdep_assert_held ( & c - > gc_lock ) ;
else if ( ! down_read_trylock ( & c - > gc_lock ) ) {
2023-05-29 01:06:27 +03:00
ret = drop_locks_do ( trans , ( down_read ( & c - > gc_lock ) , 0 ) ) ;
2022-07-18 06:06:38 +03:00
if ( ret ) {
2021-03-31 22:21:37 +03:00
up_read ( & c - > gc_lock ) ;
2022-07-18 06:06:38 +03:00
return ERR_PTR ( ret ) ;
2021-03-31 22:21:37 +03:00
}
}
2023-05-29 01:02:38 +03:00
as = mempool_alloc ( & c - > btree_interior_update_pool , GFP_NOFS ) ;
2020-05-25 21:57:06 +03:00
memset ( as , 0 , sizeof ( * as ) ) ;
closure_init ( & as - > cl , NULL ) ;
as - > c = c ;
2021-12-10 23:41:38 +03:00
as - > start_time = start_time ;
2020-05-25 21:57:06 +03:00
as - > mode = BTREE_INTERIOR_NO_UPDATE ;
2021-03-31 22:21:37 +03:00
as - > took_gc_lock = ! ( flags & BTREE_INSERT_GC_LOCK_HELD ) ;
2021-08-30 22:18:31 +03:00
as - > btree_id = path - > btree_id ;
2022-09-28 01:57:34 +03:00
as - > update_level = update_level ;
2020-05-25 21:57:06 +03:00
INIT_LIST_HEAD ( & as - > list ) ;
INIT_LIST_HEAD ( & as - > unwritten_list ) ;
INIT_LIST_HEAD ( & as - > write_blocked_list ) ;
bch2_keylist_init ( & as - > old_keys , as - > _old_keys ) ;
bch2_keylist_init ( & as - > new_keys , as - > _new_keys ) ;
bch2_keylist_init ( & as - > parent_keys , as - > inline_keys ) ;
2017-03-17 09:18:50 +03:00
2021-07-13 23:12:00 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
list_add_tail ( & as - > list , & c - > btree_interior_update_list ) ;
mutex_unlock ( & c - > btree_interior_update_lock ) ;
/*
* We don ' t want to allocate if we ' re in an error state , that can cause
* deadlock on emergency shutdown due to open buckets getting stuck in
* the btree_reserve_cache after allocator shutdown has cleared it out .
* This check needs to come after adding us to the btree_interior_update
* list but before calling bch2_btree_reserve_get , to synchronize with
* __bch2_fs_read_only ( ) .
*/
ret = bch2_journal_error ( & c - > journal ) ;
if ( ret )
goto err ;
2020-05-25 21:57:06 +03:00
ret = bch2_disk_reservation_get ( c , & as - > disk_res ,
2022-03-05 03:16:04 +03:00
( nr_nodes [ 0 ] + nr_nodes [ 1 ] ) * btree_sectors ( c ) ,
2020-05-25 21:57:06 +03:00
c - > opts . metadata_replicas ,
disk_res_flags ) ;
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2022-01-10 04:48:31 +03:00
ret = bch2_btree_reserve_get ( trans , as , nr_nodes , flags , NULL ) ;
2022-09-23 04:27:42 +03:00
if ( bch2_err_matches ( ret , ENOSPC ) | |
bch2_err_matches ( ret , ENOMEM ) ) {
2022-01-12 07:24:43 +03:00
struct closure cl ;
2017-03-17 09:18:50 +03:00
2023-07-10 18:17:56 +03:00
/*
* XXX : this should probably be a separate BTREE_INSERT_NONBLOCK
* flag
*/
if ( bch2_err_matches ( ret , ENOSPC ) & &
( flags & BTREE_INSERT_JOURNAL_RECLAIM ) & &
watermark ! = BCH_WATERMARK_reclaim ) {
ret = - BCH_ERR_journal_reclaim_would_deadlock ;
goto err ;
}
2022-01-12 07:24:43 +03:00
closure_init_stack ( & cl ) ;
2021-03-31 22:21:37 +03:00
2022-01-12 07:24:43 +03:00
do {
2022-01-10 04:48:31 +03:00
ret = bch2_btree_reserve_get ( trans , as , nr_nodes , flags , & cl ) ;
2022-09-24 04:00:24 +03:00
bch2_trans_unlock ( trans ) ;
2022-01-12 07:24:43 +03:00
closure_sync ( & cl ) ;
2022-12-13 23:17:40 +03:00
} while ( bch2_err_matches ( ret , BCH_ERR_operation_blocked ) ) ;
2021-03-31 22:21:37 +03:00
}
2022-01-12 07:24:43 +03:00
if ( ret ) {
2023-02-02 06:51:51 +03:00
trace_and_count ( c , btree_reserve_get_fail , trans - > fn ,
_RET_IP_ , nr_nodes [ 0 ] + nr_nodes [ 1 ] , ret ) ;
2022-01-12 07:24:43 +03:00
goto err ;
}
2021-03-31 22:21:37 +03:00
2022-07-18 06:06:38 +03:00
ret = bch2_trans_relock ( trans ) ;
if ( ret )
2022-01-12 07:24:43 +03:00
goto err ;
2022-07-07 07:37:46 +03:00
bch2_trans_verify_not_restarted ( trans , restart_count ) ;
2022-01-12 07:24:43 +03:00
return as ;
err :
2022-08-21 21:29:43 +03:00
bch2_btree_update_free ( as , trans ) ;
2020-05-25 21:57:06 +03:00
return ERR_PTR ( ret ) ;
2017-03-17 09:18:50 +03:00
}
/* Btree root updates: */
2020-05-25 21:57:06 +03:00
static void bch2_btree_set_root_inmem ( struct bch_fs * c , struct btree * b )
2017-03-17 09:18:50 +03:00
{
/* Root nodes cannot be reaped */
mutex_lock ( & c - > btree_cache . lock ) ;
list_del_init ( & b - > list ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
mutex_lock ( & c - > btree_root_lock ) ;
BUG_ON ( btree_node_root ( c , b ) & &
2020-06-06 19:28:01 +03:00
( b - > c . level < btree_node_root ( c , b ) - > c . level | |
2017-03-17 09:18:50 +03:00
! btree_node_dying ( btree_node_root ( c , b ) ) ) ) ;
2023-06-29 05:09:13 +03:00
bch2_btree_id_root ( c , b - > c . btree_id ) - > b = b ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_root_lock ) ;
bch2_recalc_btree_reserve ( c ) ;
}
2021-08-25 04:30:06 +03:00
static void bch2_btree_set_root ( struct btree_update * as ,
struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2021-08-25 04:30:06 +03:00
struct btree * b )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
struct btree * old ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_set_root , c , b ) ;
2017-03-17 09:18:50 +03:00
old = btree_node_root ( c , b ) ;
/*
* Ensure no one is using the old root while we switch to the
* new root :
*/
2022-09-04 04:09:54 +03:00
bch2_btree_node_lock_write_nofail ( trans , path , & old - > c ) ;
2017-03-17 09:18:50 +03:00
2020-05-25 21:57:06 +03:00
bch2_btree_set_root_inmem ( c , b ) ;
2017-03-17 09:18:50 +03:00
2020-02-09 03:06:31 +03:00
btree_update_updated_root ( as , b ) ;
2017-03-17 09:18:50 +03:00
/*
* Unlock old root after new root is visible :
*
* The new root isn ' t persistent , but that ' s ok : we still have
* an intent lock on the new root , and any updates that would
* depend on the new root would have to update the new root .
*/
2021-08-30 22:18:31 +03:00
bch2_btree_node_unlock_write ( trans , path , old ) ;
2017-03-17 09:18:50 +03:00
}
/* Interior node updates: */
2021-08-25 04:30:06 +03:00
static void bch2_insert_fixup_btree_ptr ( struct btree_update * as ,
struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2021-08-25 04:30:06 +03:00
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bkey_i * insert )
2017-03-17 09:18:50 +03:00
{
2020-11-16 22:16:42 +03:00
struct bch_fs * c = as - > c ;
2017-03-17 09:18:50 +03:00
struct bkey_packed * k ;
2022-04-04 00:50:01 +03:00
struct printbuf buf = PRINTBUF ;
2022-11-18 00:03:15 +03:00
unsigned long old , new , v ;
2020-11-16 22:16:42 +03:00
2021-07-10 20:44:42 +03:00
BUG_ON ( insert - > k . type = = KEY_TYPE_btree_ptr_v2 & &
! btree_ptr_sectors_written ( insert ) ) ;
2021-12-26 04:07:00 +03:00
if ( unlikely ( ! test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) ) )
bch2_journal_key_overwritten ( c , b - > c . btree_id , b - > c . level , insert - > k . p ) ;
2022-04-04 04:50:25 +03:00
if ( bch2_bkey_invalid ( c , bkey_i_to_s_c ( insert ) ,
btree_node_type ( b ) , WRITE , & buf ) ? :
2023-10-25 03:44:36 +03:00
bch2_bkey_in_btree_node ( c , b , bkey_i_to_s_c ( insert ) , & buf ) ) {
2022-04-04 00:50:01 +03:00
printbuf_reset ( & buf ) ;
2023-02-04 05:01:40 +03:00
prt_printf ( & buf , " inserting invalid bkey \n " ) ;
2022-02-25 21:18:19 +03:00
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( insert ) ) ;
2023-02-04 05:01:40 +03:00
prt_printf ( & buf , " \n " ) ;
2022-04-04 04:50:25 +03:00
bch2_bkey_invalid ( c , bkey_i_to_s_c ( insert ) ,
btree_node_type ( b ) , WRITE , & buf ) ;
2023-10-25 03:44:36 +03:00
bch2_bkey_in_btree_node ( c , b , bkey_i_to_s_c ( insert ) , & buf ) ;
2022-04-04 00:50:01 +03:00
bch2_fs_inconsistent ( c , " %s " , buf . buf ) ;
2020-11-16 22:16:42 +03:00
dump_stack ( ) ;
}
2017-03-17 09:18:50 +03:00
2020-03-31 23:23:43 +03:00
BUG_ON ( as - > journal_u64s + jset_u64s ( insert - > k . u64s ) >
ARRAY_SIZE ( as - > journal_entries ) ) ;
2020-05-25 21:57:06 +03:00
as - > journal_u64s + =
journal_entry_set ( ( void * ) & as - > journal_entries [ as - > journal_u64s ] ,
BCH_JSET_ENTRY_btree_keys ,
b - > c . btree_id , b - > c . level ,
insert , insert - > k . u64s ) ;
2017-03-17 09:18:50 +03:00
while ( ( k = bch2_btree_node_iter_peek_all ( node_iter , b ) ) & &
2020-01-07 06:25:09 +03:00
bkey_iter_pos_cmp ( b , k , & insert - > k . p ) < 0 )
2017-03-17 09:18:50 +03:00
bch2_btree_node_iter_advance ( node_iter , b ) ;
2021-08-30 22:18:31 +03:00
bch2_btree_bset_insert_key ( trans , path , b , node_iter , insert ) ;
2022-02-26 19:10:20 +03:00
set_btree_node_dirty_acct ( c , b ) ;
2022-11-18 00:03:15 +03:00
v = READ_ONCE ( b - > flags ) ;
do {
old = new = v ;
new & = ~ BTREE_WRITE_TYPE_MASK ;
new | = BTREE_WRITE_interior ;
new | = 1 < < BTREE_NODE_need_write ;
} while ( ( v = cmpxchg ( & b - > flags , old , new ) ) ! = old ) ;
2022-04-04 00:50:01 +03:00
printbuf_exit ( & buf ) ;
2017-03-17 09:18:50 +03:00
}
2021-04-24 02:25:27 +03:00
static void
2021-08-25 04:30:06 +03:00
__bch2_btree_insert_keys_interior ( struct btree_update * as ,
struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2021-08-25 04:30:06 +03:00
struct btree * b ,
struct btree_node_iter node_iter ,
struct keylist * keys )
2021-04-24 02:25:27 +03:00
{
struct bkey_i * insert = bch2_keylist_front ( keys ) ;
struct bkey_packed * k ;
BUG_ON ( btree_node_type ( b ) ! = BKEY_TYPE_btree ) ;
while ( ( k = bch2_btree_node_iter_prev_all ( & node_iter , b ) ) & &
( bkey_cmp_left_packed ( b , k , & insert - > k . p ) > = 0 ) )
;
while ( ! bch2_keylist_empty ( keys ) ) {
2023-09-13 01:41:22 +03:00
insert = bch2_keylist_front ( keys ) ;
2022-11-16 05:52:12 +03:00
2023-09-13 01:41:22 +03:00
if ( bpos_gt ( insert - > k . p , b - > key . k . p ) )
2022-11-16 05:52:12 +03:00
break ;
2023-09-13 01:41:22 +03:00
bch2_insert_fixup_btree_ptr ( as , trans , path , b , & node_iter , insert ) ;
2021-04-24 02:25:27 +03:00
bch2_keylist_pop_front ( keys ) ;
}
}
2017-03-17 09:18:50 +03:00
/*
* Move keys from n1 ( original replacement node , now lower node ) to n2 ( higher
* node )
*/
2022-11-16 05:52:12 +03:00
static void __btree_split_node ( struct btree_update * as ,
struct btree_trans * trans ,
struct btree * b ,
struct btree * n [ 2 ] )
2017-03-17 09:18:50 +03:00
{
2022-11-16 05:52:12 +03:00
struct bkey_packed * k ;
struct bpos n1_pos = POS_MIN ;
struct btree_node_iter iter ;
struct bset * bsets [ 2 ] ;
struct bkey_format_state format [ 2 ] ;
struct bkey_packed * out [ 2 ] ;
struct bkey uk ;
unsigned u64s , n1_u64s = ( b - > nr . live_u64s * 3 ) / 5 ;
int i ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
for ( i = 0 ; i < 2 ; i + + ) {
BUG_ON ( n [ i ] - > nsets ! = 1 ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
bsets [ i ] = btree_bset_first ( n [ i ] ) ;
out [ i ] = bsets [ i ] - > start ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
SET_BTREE_NODE_SEQ ( n [ i ] - > data , BTREE_NODE_SEQ ( b - > data ) + 1 ) ;
bch2_bkey_format_init ( & format [ i ] ) ;
}
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
u64s = 0 ;
for_each_btree_node_key ( b , k , & iter ) {
if ( bkey_deleted ( k ) )
continue ;
i = u64s > = n1_u64s ;
u64s + = k - > u64s ;
uk = bkey_unpack_key ( b , k ) ;
if ( ! i )
n1_pos = uk . p ;
bch2_bkey_format_add_key ( & format [ i ] , & uk ) ;
}
2019-11-10 07:50:52 +03:00
2022-11-16 05:52:12 +03:00
btree_set_min ( n [ 0 ] , b - > data - > min_key ) ;
btree_set_max ( n [ 0 ] , n1_pos ) ;
btree_set_min ( n [ 1 ] , bpos_successor ( n1_pos ) ) ;
btree_set_max ( n [ 1 ] , b - > data - > max_key ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
for ( i = 0 ; i < 2 ; i + + ) {
bch2_bkey_format_add_pos ( & format [ i ] , n [ i ] - > data - > min_key ) ;
bch2_bkey_format_add_pos ( & format [ i ] , n [ i ] - > data - > max_key ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
n [ i ] - > data - > format = bch2_bkey_format_done ( & format [ i ] ) ;
btree_node_set_format ( n [ i ] , n [ i ] - > data - > format ) ;
2017-03-17 09:18:50 +03:00
}
2022-11-16 05:52:12 +03:00
u64s = 0 ;
for_each_btree_node_key ( b , k , & iter ) {
if ( bkey_deleted ( k ) )
continue ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
i = u64s > = n1_u64s ;
u64s + = k - > u64s ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
if ( bch2_bkey_transform ( & n [ i ] - > format , out [ i ] , bkey_packed ( k )
? & b - > format : & bch2_bkey_format_current , k ) )
out [ i ] - > format = KEY_FORMAT_LOCAL_BTREE ;
else
bch2_bkey_unpack ( b , ( void * ) out [ i ] , k ) ;
2021-03-27 03:08:56 +03:00
2022-11-16 05:52:12 +03:00
out [ i ] - > needs_whiteout = false ;
2021-03-27 03:08:56 +03:00
2022-11-16 05:52:12 +03:00
btree_keys_account_key_add ( & n [ i ] - > nr , 0 , out [ i ] ) ;
2023-03-05 07:05:55 +03:00
out [ i ] = bkey_p_next ( out [ i ] ) ;
2021-03-27 03:08:56 +03:00
}
2022-11-16 05:52:12 +03:00
for ( i = 0 ; i < 2 ; i + + ) {
bsets [ i ] - > u64s = cpu_to_le16 ( ( u64 * ) out [ i ] - bsets [ i ] - > _data ) ;
2021-03-27 03:08:56 +03:00
2022-11-16 05:52:12 +03:00
BUG_ON ( ! bsets [ i ] - > u64s ) ;
2021-03-27 03:08:56 +03:00
2022-11-16 05:52:12 +03:00
set_btree_bset_end ( n [ i ] , n [ i ] - > set ) ;
2021-03-27 03:08:56 +03:00
2022-11-16 05:52:12 +03:00
btree_node_reset_sib_u64s ( n [ i ] ) ;
2021-03-27 03:08:56 +03:00
2022-11-16 05:52:12 +03:00
bch2_verify_btree_nr_keys ( n [ i ] ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
if ( b - > c . level )
btree_node_interior_verify ( as - > c , n [ i ] ) ;
2017-03-17 09:18:50 +03:00
}
}
/*
* For updates to interior nodes , we ' ve got to do the insert before we split
* because the stuff we ' re inserting has to be inserted atomically . Post split ,
* the keys might have to go in different nodes and the split would no longer be
* atomic .
*
* Worse , if the insert is from btree node coalescing , if we do the insert after
* we do the split ( and pick the pivot ) - the pivot we pick might be between
* nodes that were coalesced , and thus in the middle of a child node post
* coalescing :
*/
2021-08-25 04:30:06 +03:00
static void btree_split_insert_keys ( struct btree_update * as ,
struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2021-08-25 04:30:06 +03:00
struct btree * b ,
2017-03-17 09:18:50 +03:00
struct keylist * keys )
{
2022-11-16 05:52:12 +03:00
if ( ! bch2_keylist_empty ( keys ) & &
2022-11-24 11:12:22 +03:00
bpos_le ( bch2_keylist_front ( keys ) - > k . p , b - > data - > max_key ) ) {
2022-11-16 05:52:12 +03:00
struct btree_node_iter node_iter ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
bch2_btree_node_iter_init ( & node_iter , b , & bch2_keylist_front ( keys ) - > k . p ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
__bch2_btree_insert_keys_interior ( as , trans , path , b , node_iter , keys ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
btree_node_interior_verify ( as - > c , b ) ;
2019-11-10 07:50:52 +03:00
}
2017-03-17 09:18:50 +03:00
}
2022-10-02 05:15:30 +03:00
static int btree_split ( struct btree_update * as , struct btree_trans * trans ,
struct btree_path * path , struct btree * b ,
struct keylist * keys , unsigned flags )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
2021-08-30 22:18:31 +03:00
struct btree * parent = btree_node_parent ( path , b ) ;
2017-03-17 09:18:50 +03:00
struct btree * n1 , * n2 = NULL , * n3 = NULL ;
2022-09-16 21:42:38 +03:00
struct btree_path * path1 = NULL , * path2 = NULL ;
2017-03-17 09:18:50 +03:00
u64 start_time = local_clock ( ) ;
2022-10-02 05:15:30 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
BUG_ON ( ! parent & & ( b ! = btree_node_root ( c , b ) ) ) ;
2022-09-28 01:57:34 +03:00
BUG_ON ( parent & & ! btree_node_intent_locked ( path , b - > c . level + 1 ) ) ;
2017-03-17 09:18:50 +03:00
bch2_btree_interior_update_will_free_node ( as , b ) ;
2022-11-16 05:52:12 +03:00
if ( b - > nr . live_u64s > BTREE_SPLIT_THRESHOLD ( c ) ) {
struct btree * n [ 2 ] ;
2017-03-17 09:18:50 +03:00
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_split , c , b ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
n [ 0 ] = n1 = bch2_btree_node_alloc ( as , trans , b - > c . level ) ;
n [ 1 ] = n2 = bch2_btree_node_alloc ( as , trans , b - > c . level ) ;
__btree_split_node ( as , trans , b , n ) ;
if ( keys ) {
btree_split_insert_keys ( as , trans , path , n1 , keys ) ;
btree_split_insert_keys ( as , trans , path , n2 , keys ) ;
BUG_ON ( ! bch2_keylist_empty ( keys ) ) ;
}
2017-03-17 09:18:50 +03:00
bch2_btree_build_aux_trees ( n2 ) ;
bch2_btree_build_aux_trees ( n1 ) ;
2022-10-12 18:04:28 +03:00
bch2_btree_update_add_new_node ( as , n1 ) ;
bch2_btree_update_add_new_node ( as , n2 ) ;
2020-06-06 19:28:01 +03:00
six_unlock_write ( & n2 - > c . lock ) ;
six_unlock_write ( & n1 - > c . lock ) ;
2017-03-17 09:18:50 +03:00
2022-09-16 21:42:38 +03:00
path1 = get_unlocked_mut_path ( trans , path - > btree_id , n1 - > c . level , n1 - > key . k . p ) ;
six_lock_increment ( & n1 - > c . lock , SIX_LOCK_intent ) ;
2023-09-10 04:14:54 +03:00
mark_btree_node_locked ( trans , path1 , n1 - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_level_init ( trans , path1 , n1 ) ;
path2 = get_unlocked_mut_path ( trans , path - > btree_id , n2 - > c . level , n2 - > key . k . p ) ;
six_lock_increment ( & n2 - > c . lock , SIX_LOCK_intent ) ;
2023-09-10 04:14:54 +03:00
mark_btree_node_locked ( trans , path2 , n2 - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_level_init ( trans , path2 , n2 ) ;
2017-03-17 09:18:50 +03:00
/*
* Note that on recursive parent_keys = = keys , so we
* can ' t start adding new keys to parent_keys before emptying it
* out ( which we did with btree_split_insert_keys ( ) above )
*/
bch2_keylist_add ( & as - > parent_keys , & n1 - > key ) ;
bch2_keylist_add ( & as - > parent_keys , & n2 - > key ) ;
if ( ! parent ) {
/* Depth increases, make a new root */
2022-08-21 21:29:43 +03:00
n3 = __btree_root_alloc ( as , trans , b - > c . level + 1 ) ;
2017-03-17 09:18:50 +03:00
2022-10-12 18:04:28 +03:00
bch2_btree_update_add_new_node ( as , n3 ) ;
six_unlock_write ( & n3 - > c . lock ) ;
2022-09-16 21:42:38 +03:00
path2 - > locks_want + + ;
BUG_ON ( btree_node_locked ( path2 , n3 - > c . level ) ) ;
six_lock_increment ( & n3 - > c . lock , SIX_LOCK_intent ) ;
2023-09-10 04:14:54 +03:00
mark_btree_node_locked ( trans , path2 , n3 - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_level_init ( trans , path2 , n3 ) ;
2017-03-17 09:18:50 +03:00
n3 - > sib_u64s [ 0 ] = U16_MAX ;
n3 - > sib_u64s [ 1 ] = U16_MAX ;
2022-10-01 07:34:02 +03:00
btree_split_insert_keys ( as , trans , path , n3 , & as - > parent_keys ) ;
2017-03-17 09:18:50 +03:00
}
} else {
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_compact , c , b ) ;
2017-03-17 09:18:50 +03:00
2022-11-16 05:52:12 +03:00
n1 = bch2_btree_node_alloc_replacement ( as , trans , b ) ;
if ( keys ) {
btree_split_insert_keys ( as , trans , path , n1 , keys ) ;
BUG_ON ( ! bch2_keylist_empty ( keys ) ) ;
}
2017-03-17 09:18:50 +03:00
bch2_btree_build_aux_trees ( n1 ) ;
2022-10-12 18:04:28 +03:00
bch2_btree_update_add_new_node ( as , n1 ) ;
2020-06-06 19:28:01 +03:00
six_unlock_write ( & n1 - > c . lock ) ;
2017-03-17 09:18:50 +03:00
2022-09-16 21:42:38 +03:00
path1 = get_unlocked_mut_path ( trans , path - > btree_id , n1 - > c . level , n1 - > key . k . p ) ;
six_lock_increment ( & n1 - > c . lock , SIX_LOCK_intent ) ;
2023-09-10 04:14:54 +03:00
mark_btree_node_locked ( trans , path1 , n1 - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_level_init ( trans , path1 , n1 ) ;
2020-02-09 03:06:31 +03:00
if ( parent )
bch2_keylist_add ( & as - > parent_keys , & n1 - > key ) ;
2017-03-17 09:18:50 +03:00
}
/* New nodes all written, now make them visible: */
if ( parent ) {
/* Split a non root node */
2022-10-02 05:15:30 +03:00
ret = bch2_btree_insert_node ( as , trans , path , parent , & as - > parent_keys , flags ) ;
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
} else if ( n3 ) {
2021-08-30 22:18:31 +03:00
bch2_btree_set_root ( as , trans , path , n3 ) ;
2017-03-17 09:18:50 +03:00
} else {
/* Root filled up but didn't need to be split */
2021-08-30 22:18:31 +03:00
bch2_btree_set_root ( as , trans , path , n1 ) ;
2017-03-17 09:18:50 +03:00
}
2022-10-01 07:34:02 +03:00
if ( n3 ) {
2020-05-25 21:57:06 +03:00
bch2_btree_update_get_open_buckets ( as , n3 ) ;
2022-10-01 07:34:02 +03:00
bch2_btree_node_write ( c , n3 , SIX_LOCK_intent , 0 ) ;
}
if ( n2 ) {
bch2_btree_update_get_open_buckets ( as , n2 ) ;
bch2_btree_node_write ( c , n2 , SIX_LOCK_intent , 0 ) ;
}
bch2_btree_update_get_open_buckets ( as , n1 ) ;
bch2_btree_node_write ( c , n1 , SIX_LOCK_intent , 0 ) ;
2017-03-17 09:18:50 +03:00
2019-10-11 21:45:22 +03:00
/*
* The old node must be freed ( in memory ) _before_ unlocking the new
* nodes - else another thread could re - acquire a read lock on the old
* node after another thread has locked and updated the new node , thus
* seeing stale data :
*/
2022-09-03 05:59:39 +03:00
bch2_btree_node_free_inmem ( trans , path , b ) ;
if ( n3 )
bch2_trans_node_add ( trans , n3 ) ;
if ( n2 )
bch2_trans_node_add ( trans , n2 ) ;
bch2_trans_node_add ( trans , n1 ) ;
2018-11-23 13:19:25 +03:00
2019-10-11 21:45:22 +03:00
if ( n3 )
six_unlock_intent ( & n3 - > c . lock ) ;
if ( n2 )
six_unlock_intent ( & n2 - > c . lock ) ;
six_unlock_intent ( & n1 - > c . lock ) ;
2022-10-02 05:15:30 +03:00
out :
2022-09-16 21:42:38 +03:00
if ( path2 ) {
__bch2_btree_path_unlock ( trans , path2 ) ;
bch2_path_put ( trans , path2 , true ) ;
}
if ( path1 ) {
__bch2_btree_path_unlock ( trans , path1 ) ;
bch2_path_put ( trans , path1 , true ) ;
}
2021-08-30 21:45:11 +03:00
bch2_trans_verify_locks ( trans ) ;
2018-11-23 13:19:25 +03:00
2021-12-10 23:41:38 +03:00
bch2_time_stats_update ( & c - > times [ n2
? BCH_TIME_btree_node_split
: BCH_TIME_btree_node_compact ] ,
2019-03-21 23:28:57 +03:00
start_time ) ;
2022-10-02 05:15:30 +03:00
return ret ;
err :
if ( n3 )
bch2_btree_node_free_never_used ( as , trans , n3 ) ;
if ( n2 )
bch2_btree_node_free_never_used ( as , trans , n2 ) ;
bch2_btree_node_free_never_used ( as , trans , n1 ) ;
goto out ;
2017-03-17 09:18:50 +03:00
}
static void
2021-08-25 04:30:06 +03:00
bch2_btree_insert_keys_interior ( struct btree_update * as ,
struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2021-08-25 04:30:06 +03:00
struct btree * b ,
struct keylist * keys )
2017-03-17 09:18:50 +03:00
{
2021-08-30 22:18:31 +03:00
struct btree_path * linked ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
__bch2_btree_insert_keys_interior ( as , trans , path , b ,
path - > l [ b - > c . level ] . iter , keys ) ;
2017-03-17 09:18:50 +03:00
btree_update_updated_node ( as , b ) ;
2021-08-30 22:18:31 +03:00
trans_for_each_path_with_node ( trans , b , linked )
2020-06-06 19:28:01 +03:00
bch2_btree_node_iter_peek ( & linked - > l [ b - > c . level ] . iter , b ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
bch2_trans_verify_paths ( trans ) ;
2017-03-17 09:18:50 +03:00
}
/**
2023-09-13 01:41:22 +03:00
* bch2_btree_insert_node - insert bkeys into a given btree node
2017-03-17 09:18:50 +03:00
*
2023-09-13 01:41:22 +03:00
* @ as : btree_update object
* @ trans : btree_trans object
* @ path : path that points to current node
* @ b : node to insert keys into
2017-03-17 09:18:50 +03:00
* @ keys : list of keys to insert
2023-09-13 01:41:22 +03:00
* @ flags : transaction commit flags
*
* Returns : 0 on success , typically transaction restart error on failure
2017-03-17 09:18:50 +03:00
*
* Inserts as many keys as it can into a given btree node , splitting it if full .
* If a split occurred , this function will return early . This can only happen
* for leaf nodes - - inserts into interior nodes have to be atomic .
*/
2022-10-02 05:15:30 +03:00
static int bch2_btree_insert_node ( struct btree_update * as , struct btree_trans * trans ,
struct btree_path * path , struct btree * b ,
struct keylist * keys , unsigned flags )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = as - > c ;
int old_u64s = le16_to_cpu ( btree_bset_last ( b ) - > u64s ) ;
int old_live_u64s = b - > nr . live_u64s ;
int live_u64s_added , u64s_added ;
2022-10-09 12:04:38 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2021-03-31 22:21:37 +03:00
lockdep_assert_held ( & c - > gc_lock ) ;
2022-09-28 01:57:34 +03:00
BUG_ON ( ! btree_node_intent_locked ( path , b - > c . level ) ) ;
2020-06-06 19:28:01 +03:00
BUG_ON ( ! b - > c . level ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( ! as | | as - > b ) ;
bch2_verify_keylist_sorted ( keys ) ;
2022-10-09 12:04:38 +03:00
ret = bch2_btree_node_lock_write ( trans , path , & b - > c ) ;
if ( ret )
return ret ;
bch2_btree_node_prep_for_write ( trans , path , b ) ;
2017-03-17 09:18:50 +03:00
2020-05-25 21:57:06 +03:00
if ( ! bch2_btree_node_insert_fits ( c , b , bch2_keylist_u64s ( keys ) ) ) {
2021-08-30 22:18:31 +03:00
bch2_btree_node_unlock_write ( trans , path , b ) ;
2017-03-17 09:18:50 +03:00
goto split ;
}
2020-11-16 22:16:42 +03:00
btree_node_interior_verify ( c , b ) ;
2021-08-30 22:18:31 +03:00
bch2_btree_insert_keys_interior ( as , trans , path , b , keys ) ;
2017-03-17 09:18:50 +03:00
live_u64s_added = ( int ) b - > nr . live_u64s - old_live_u64s ;
u64s_added = ( int ) le16_to_cpu ( btree_bset_last ( b ) - > u64s ) - old_u64s ;
if ( b - > sib_u64s [ 0 ] ! = U16_MAX & & live_u64s_added < 0 )
b - > sib_u64s [ 0 ] = max ( 0 , ( int ) b - > sib_u64s [ 0 ] + live_u64s_added ) ;
if ( b - > sib_u64s [ 1 ] ! = U16_MAX & & live_u64s_added < 0 )
b - > sib_u64s [ 1 ] = max ( 0 , ( int ) b - > sib_u64s [ 1 ] + live_u64s_added ) ;
if ( u64s_added > live_u64s_added & &
bch2_maybe_compact_whiteouts ( c , b ) )
2021-08-30 21:36:03 +03:00
bch2_trans_node_reinit_iter ( trans , b ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
bch2_btree_node_unlock_write ( trans , path , b ) ;
2017-03-17 09:18:50 +03:00
2020-07-21 18:51:17 +03:00
btree_node_interior_verify ( c , b ) ;
2022-10-02 05:15:30 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
split :
2022-09-28 01:57:34 +03:00
/*
* We could attempt to avoid the transaction restart , by calling
* bch2_btree_path_upgrade ( ) and allocating more nodes :
*/
2023-02-21 00:41:03 +03:00
if ( b - > c . level > = as - > update_level ) {
2023-03-30 23:04:02 +03:00
trace_and_count ( c , trans_restart_split_race , trans , _THIS_IP_ , b ) ;
2022-09-28 01:57:34 +03:00
return btree_trans_restart ( trans , BCH_ERR_transaction_restart_split_race ) ;
2023-02-21 00:41:03 +03:00
}
2022-09-28 01:57:34 +03:00
2022-10-02 05:15:30 +03:00
return btree_split ( as , trans , path , b , keys , flags ) ;
2017-03-17 09:18:50 +03:00
}
2021-07-11 06:22:06 +03:00
int bch2_btree_split_leaf ( struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2017-03-17 09:18:50 +03:00
unsigned flags )
{
2021-08-30 22:18:31 +03:00
struct btree * b = path_l ( path ) - > b ;
2017-03-17 09:18:50 +03:00
struct btree_update * as ;
2021-03-31 22:39:16 +03:00
unsigned l ;
int ret = 0 ;
2020-04-11 19:32:27 +03:00
2021-08-30 22:18:31 +03:00
as = bch2_btree_update_start ( trans , path , path - > level ,
2022-03-05 03:15:46 +03:00
true , flags ) ;
2021-03-31 22:21:37 +03:00
if ( IS_ERR ( as ) )
return PTR_ERR ( as ) ;
2017-03-17 09:18:50 +03:00
2022-10-02 05:15:30 +03:00
ret = btree_split ( as , trans , path , b , NULL , flags ) ;
if ( ret ) {
bch2_btree_update_free ( as , trans ) ;
return ret ;
}
2022-08-21 21:29:43 +03:00
bch2_btree_update_done ( as , trans ) ;
2021-03-31 22:39:16 +03:00
2022-09-28 01:57:34 +03:00
for ( l = path - > level + 1 ; btree_node_intent_locked ( path , l ) & & ! ret ; l + + )
2021-08-30 22:18:31 +03:00
ret = bch2_foreground_maybe_merge ( trans , path , l , flags ) ;
2021-03-31 22:39:16 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2021-07-11 06:22:06 +03:00
int __bch2_foreground_maybe_merge ( struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2021-03-31 23:16:39 +03:00
unsigned level ,
unsigned flags ,
enum btree_node_sibling sib )
2017-03-17 09:18:50 +03:00
{
2021-07-11 06:22:06 +03:00
struct bch_fs * c = trans - > c ;
2022-09-16 21:42:38 +03:00
struct btree_path * sib_path = NULL , * new_path = NULL ;
2017-03-17 09:18:50 +03:00
struct btree_update * as ;
struct bkey_format_state new_s ;
struct bkey_format new_f ;
struct bkey_i delete ;
struct btree * b , * m , * n , * prev , * next , * parent ;
2021-03-29 08:13:31 +03:00
struct bpos sib_pos ;
2017-03-17 09:18:50 +03:00
size_t sib_u64s ;
2021-12-10 23:41:38 +03:00
u64 start_time = local_clock ( ) ;
2021-09-05 07:22:32 +03:00
int ret = 0 ;
2021-03-29 08:13:31 +03:00
2021-08-30 22:18:31 +03:00
BUG_ON ( ! path - > should_be_locked ) ;
BUG_ON ( ! btree_node_locked ( path , level ) ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
b = path - > l [ level ] . b ;
2017-03-17 09:18:50 +03:00
2022-11-24 11:12:22 +03:00
if ( ( sib = = btree_prev_sib & & bpos_eq ( b - > data - > min_key , POS_MIN ) ) | |
( sib = = btree_next_sib & & bpos_eq ( b - > data - > max_key , SPOS_MAX ) ) ) {
2021-03-29 08:13:31 +03:00
b - > sib_u64s [ sib ] = U16_MAX ;
2021-09-05 07:22:32 +03:00
return 0 ;
2021-03-29 08:13:31 +03:00
}
2017-03-17 09:18:50 +03:00
2021-03-29 08:13:31 +03:00
sib_pos = sib = = btree_prev_sib
? bpos_predecessor ( b - > data - > min_key )
: bpos_successor ( b - > data - > max_key ) ;
2017-03-17 09:18:50 +03:00
2021-12-22 04:48:26 +03:00
sib_path = bch2_path_get ( trans , path - > btree_id , sib_pos ,
2023-01-09 09:11:18 +03:00
U8_MAX , level , BTREE_ITER_INTENT , _THIS_IP_ ) ;
2021-08-30 22:18:31 +03:00
ret = bch2_btree_path_traverse ( trans , sib_path , false ) ;
2021-03-29 08:13:31 +03:00
if ( ret )
2017-03-17 09:18:50 +03:00
goto err ;
2022-08-11 01:55:53 +03:00
btree_path_set_should_be_locked ( sib_path ) ;
2021-08-30 22:18:31 +03:00
m = sib_path - > l [ level ] . b ;
2021-03-29 08:13:31 +03:00
2021-08-30 22:18:31 +03:00
if ( btree_node_parent ( path , b ) ! =
btree_node_parent ( sib_path , m ) ) {
2017-03-17 09:18:50 +03:00
b - > sib_u64s [ sib ] = U16_MAX ;
goto out ;
}
if ( sib = = btree_prev_sib ) {
prev = m ;
next = b ;
} else {
prev = b ;
next = m ;
}
2022-11-24 11:12:22 +03:00
if ( ! bpos_eq ( bpos_successor ( prev - > data - > max_key ) , next - > data - > min_key ) ) {
2022-02-25 21:18:19 +03:00
struct printbuf buf1 = PRINTBUF , buf2 = PRINTBUF ;
2021-04-23 23:05:49 +03:00
2022-02-25 21:18:19 +03:00
bch2_bpos_to_text ( & buf1 , prev - > data - > max_key ) ;
bch2_bpos_to_text ( & buf2 , next - > data - > min_key ) ;
2021-04-24 23:32:35 +03:00
bch_err ( c ,
2022-10-22 22:10:28 +03:00
" %s(): btree topology error: \n "
2021-04-24 23:32:35 +03:00
" prev ends at %s \n "
" next starts at %s " ,
2022-10-22 22:10:28 +03:00
__func__ , buf1 . buf , buf2 . buf ) ;
2022-02-25 21:18:19 +03:00
printbuf_exit ( & buf1 ) ;
printbuf_exit ( & buf2 ) ;
2021-04-24 23:32:35 +03:00
bch2_topology_error ( c ) ;
2021-04-23 23:05:49 +03:00
ret = - EIO ;
goto err ;
}
2021-03-29 08:13:31 +03:00
2017-03-17 09:18:50 +03:00
bch2_bkey_format_init ( & new_s ) ;
2021-03-27 03:29:04 +03:00
bch2_bkey_format_add_pos ( & new_s , prev - > data - > min_key ) ;
__bch2_btree_calc_format ( & new_s , prev ) ;
__bch2_btree_calc_format ( & new_s , next ) ;
bch2_bkey_format_add_pos ( & new_s , next - > data - > max_key ) ;
2017-03-17 09:18:50 +03:00
new_f = bch2_bkey_format_done ( & new_s ) ;
sib_u64s = btree_node_u64s_with_format ( b , & new_f ) +
btree_node_u64s_with_format ( m , & new_f ) ;
if ( sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS ( c ) ) {
sib_u64s - = BTREE_FOREGROUND_MERGE_HYSTERESIS ( c ) ;
sib_u64s / = 2 ;
sib_u64s + = BTREE_FOREGROUND_MERGE_HYSTERESIS ( c ) ;
}
sib_u64s = min ( sib_u64s , btree_max_u64s ( c ) ) ;
2021-03-29 08:13:31 +03:00
sib_u64s = min ( sib_u64s , ( size_t ) U16_MAX - 1 ) ;
2017-03-17 09:18:50 +03:00
b - > sib_u64s [ sib ] = sib_u64s ;
2021-03-29 08:13:31 +03:00
if ( b - > sib_u64s [ sib ] > c - > btree_foreground_merge_threshold )
2017-03-17 09:18:50 +03:00
goto out ;
2021-08-30 22:18:31 +03:00
parent = btree_node_parent ( path , b ) ;
2022-03-05 03:15:46 +03:00
as = bch2_btree_update_start ( trans , path , level , false ,
2023-06-28 00:32:48 +03:00
BTREE_INSERT_NOFAIL | flags ) ;
2021-03-31 22:21:37 +03:00
ret = PTR_ERR_OR_ZERO ( as ) ;
2021-03-29 08:13:31 +03:00
if ( ret )
2021-03-31 22:21:37 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_merge , c , b ) ;
2017-03-17 09:18:50 +03:00
bch2_btree_interior_update_will_free_node ( as , b ) ;
bch2_btree_interior_update_will_free_node ( as , m ) ;
2022-08-21 21:29:43 +03:00
n = bch2_btree_node_alloc ( as , trans , b - > c . level ) ;
2017-03-17 09:18:50 +03:00
2022-02-23 01:16:45 +03:00
SET_BTREE_NODE_SEQ ( n - > data ,
max ( BTREE_NODE_SEQ ( b - > data ) ,
BTREE_NODE_SEQ ( m - > data ) ) + 1 ) ;
2020-02-07 21:38:02 +03:00
btree_set_min ( n , prev - > data - > min_key ) ;
btree_set_max ( n , next - > data - > max_key ) ;
2017-03-17 09:18:50 +03:00
2022-04-14 22:37:16 +03:00
n - > data - > format = new_f ;
2017-03-17 09:18:50 +03:00
btree_node_set_format ( n , new_f ) ;
bch2_btree_sort_into ( c , n , prev ) ;
bch2_btree_sort_into ( c , n , next ) ;
bch2_btree_build_aux_trees ( n ) ;
2022-10-01 07:34:02 +03:00
bch2_btree_update_add_new_node ( as , n ) ;
2022-10-12 18:04:28 +03:00
six_unlock_write ( & n - > c . lock ) ;
2022-10-01 07:34:02 +03:00
2022-09-16 21:42:38 +03:00
new_path = get_unlocked_mut_path ( trans , path - > btree_id , n - > c . level , n - > key . k . p ) ;
six_lock_increment ( & n - > c . lock , SIX_LOCK_intent ) ;
2023-09-10 04:14:54 +03:00
mark_btree_node_locked ( trans , new_path , n - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_level_init ( trans , new_path , n ) ;
2017-03-17 09:18:50 +03:00
bkey_init ( & delete . k ) ;
delete . k . p = prev - > key . k . p ;
bch2_keylist_add ( & as - > parent_keys , & delete ) ;
bch2_keylist_add ( & as - > parent_keys , & n - > key ) ;
2021-09-05 04:23:11 +03:00
bch2_trans_verify_paths ( trans ) ;
2022-10-02 05:15:30 +03:00
ret = bch2_btree_insert_node ( as , trans , path , parent , & as - > parent_keys , flags ) ;
if ( ret )
goto err_free_update ;
2017-03-17 09:18:50 +03:00
2021-09-05 04:23:11 +03:00
bch2_trans_verify_paths ( trans ) ;
2020-05-25 21:57:06 +03:00
bch2_btree_update_get_open_buckets ( as , n ) ;
2022-10-01 07:34:02 +03:00
bch2_btree_node_write ( c , n , SIX_LOCK_intent , 0 ) ;
2018-11-23 13:19:25 +03:00
2022-09-03 05:59:39 +03:00
bch2_btree_node_free_inmem ( trans , path , b ) ;
bch2_btree_node_free_inmem ( trans , sib_path , m ) ;
2019-03-29 21:42:34 +03:00
2021-08-30 21:36:03 +03:00
bch2_trans_node_add ( trans , n ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
bch2_trans_verify_paths ( trans ) ;
2017-03-17 09:18:50 +03:00
2019-10-11 21:45:22 +03:00
six_unlock_intent ( & n - > c . lock ) ;
2022-08-21 21:29:43 +03:00
bch2_btree_update_done ( as , trans ) ;
2021-12-10 23:41:38 +03:00
bch2_time_stats_update ( & c - > times [ BCH_TIME_btree_node_merge ] , start_time ) ;
2017-03-17 09:18:50 +03:00
out :
err :
2022-09-16 21:42:38 +03:00
if ( new_path )
bch2_path_put ( trans , new_path , true ) ;
2021-09-05 07:22:32 +03:00
bch2_path_put ( trans , sib_path , true ) ;
bch2_trans_verify_locks ( trans ) ;
return ret ;
2022-10-02 05:15:30 +03:00
err_free_update :
bch2_btree_node_free_never_used ( as , trans , n ) ;
bch2_btree_update_free ( as , trans ) ;
goto out ;
2017-03-17 09:18:50 +03:00
}
2021-07-11 06:22:06 +03:00
int bch2_btree_node_rewrite ( struct btree_trans * trans ,
struct btree_iter * iter ,
2021-10-24 23:59:33 +03:00
struct btree * b ,
unsigned flags )
2017-03-17 09:18:50 +03:00
{
2021-07-11 06:22:06 +03:00
struct bch_fs * c = trans - > c ;
2022-09-16 21:42:38 +03:00
struct btree_path * new_path = NULL ;
2021-10-24 23:59:33 +03:00
struct btree * n , * parent ;
2017-03-17 09:18:50 +03:00
struct btree_update * as ;
2021-03-31 22:21:37 +03:00
int ret ;
flags | = BTREE_INSERT_NOFAIL ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
parent = btree_node_parent ( iter - > path , b ) ;
as = bch2_btree_update_start ( trans , iter - > path , b - > c . level ,
2022-03-05 03:15:46 +03:00
false , flags ) ;
2021-03-31 22:21:37 +03:00
ret = PTR_ERR_OR_ZERO ( as ) ;
2022-04-18 01:06:31 +03:00
if ( ret )
2021-03-31 22:21:37 +03:00
goto out ;
2017-03-17 09:18:50 +03:00
bch2_btree_interior_update_will_free_node ( as , b ) ;
2022-08-21 21:29:43 +03:00
n = bch2_btree_node_alloc_replacement ( as , trans , b ) ;
2017-03-17 09:18:50 +03:00
bch2_btree_build_aux_trees ( n ) ;
2022-10-12 18:04:28 +03:00
bch2_btree_update_add_new_node ( as , n ) ;
2020-06-06 19:28:01 +03:00
six_unlock_write ( & n - > c . lock ) ;
2017-03-17 09:18:50 +03:00
2022-09-16 21:42:38 +03:00
new_path = get_unlocked_mut_path ( trans , iter - > btree_id , n - > c . level , n - > key . k . p ) ;
six_lock_increment ( & n - > c . lock , SIX_LOCK_intent ) ;
2023-09-10 04:14:54 +03:00
mark_btree_node_locked ( trans , new_path , n - > c . level , BTREE_NODE_INTENT_LOCKED ) ;
2022-09-16 21:42:38 +03:00
bch2_btree_path_level_init ( trans , new_path , n ) ;
2022-08-27 19:48:36 +03:00
trace_and_count ( c , btree_node_rewrite , c , b ) ;
2017-03-17 09:18:50 +03:00
if ( parent ) {
bch2_keylist_add ( & as - > parent_keys , & n - > key ) ;
2022-10-02 05:15:30 +03:00
ret = bch2_btree_insert_node ( as , trans , iter - > path , parent ,
& as - > parent_keys , flags ) ;
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
} else {
2021-08-30 22:18:31 +03:00
bch2_btree_set_root ( as , trans , iter - > path , n ) ;
2017-03-17 09:18:50 +03:00
}
2020-05-25 21:57:06 +03:00
bch2_btree_update_get_open_buckets ( as , n ) ;
2022-10-01 07:34:02 +03:00
bch2_btree_node_write ( c , n , SIX_LOCK_intent , 0 ) ;
2017-03-17 09:18:50 +03:00
2022-09-03 05:59:39 +03:00
bch2_btree_node_free_inmem ( trans , iter - > path , b ) ;
2021-08-30 21:36:03 +03:00
bch2_trans_node_add ( trans , n ) ;
2019-10-11 21:45:22 +03:00
six_unlock_intent ( & n - > c . lock ) ;
2017-03-17 09:18:50 +03:00
2022-08-21 21:29:43 +03:00
bch2_btree_update_done ( as , trans ) ;
2021-03-31 22:21:37 +03:00
out :
2022-10-02 05:15:30 +03:00
if ( new_path )
bch2_path_put ( trans , new_path , true ) ;
2023-10-27 22:23:46 +03:00
bch2_trans_downgrade ( trans ) ;
2017-03-17 09:18:50 +03:00
return ret ;
2022-10-02 05:15:30 +03:00
err :
bch2_btree_node_free_never_used ( as , trans , n ) ;
bch2_btree_update_free ( as , trans ) ;
goto out ;
2017-03-17 09:18:50 +03:00
}
2021-04-24 09:47:41 +03:00
struct async_btree_rewrite {
struct bch_fs * c ;
struct work_struct work ;
2023-02-11 20:57:04 +03:00
struct list_head list ;
2021-04-24 09:47:41 +03:00
enum btree_id btree_id ;
unsigned level ;
struct bpos pos ;
__le64 seq ;
} ;
2021-10-24 23:59:33 +03:00
static int async_btree_node_rewrite_trans ( struct btree_trans * trans ,
struct async_btree_rewrite * a )
{
2023-02-10 23:47:46 +03:00
struct bch_fs * c = trans - > c ;
2021-10-24 23:59:33 +03:00
struct btree_iter iter ;
struct btree * b ;
int ret ;
bch2_trans_node_iter_init ( trans , & iter , a - > btree_id , a - > pos ,
BTREE_MAX_DEPTH , a - > level , 0 ) ;
b = bch2_btree_iter_peek_node ( & iter ) ;
ret = PTR_ERR_OR_ZERO ( b ) ;
if ( ret )
goto out ;
2023-02-10 23:47:46 +03:00
if ( ! b | | b - > data - > keys . seq ! = a - > seq ) {
struct printbuf buf = PRINTBUF ;
if ( b )
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
else
prt_str ( & buf , " (null " ) ;
bch_info ( c , " %s: node to rewrite not found:, searching for seq %llu, got \n %s " ,
__func__ , a - > seq , buf . buf ) ;
printbuf_exit ( & buf ) ;
2021-10-24 23:59:33 +03:00
goto out ;
2023-02-10 23:47:46 +03:00
}
2021-10-24 23:59:33 +03:00
ret = bch2_btree_node_rewrite ( trans , & iter , b , 0 ) ;
2022-10-20 01:31:33 +03:00
out :
2021-10-24 23:59:33 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2023-07-07 05:47:42 +03:00
static void async_btree_node_rewrite_work ( struct work_struct * work )
2021-04-24 09:47:41 +03:00
{
struct async_btree_rewrite * a =
container_of ( work , struct async_btree_rewrite , work ) ;
struct bch_fs * c = a - > c ;
2023-02-10 23:47:46 +03:00
int ret ;
2021-04-24 09:47:41 +03:00
2023-02-10 23:47:46 +03:00
ret = bch2_trans_do ( c , NULL , NULL , 0 ,
2023-09-13 00:16:02 +03:00
async_btree_node_rewrite_trans ( trans , a ) ) ;
2023-02-10 23:47:46 +03:00
if ( ret )
2023-09-11 08:37:34 +03:00
bch_err_fn ( c , ret ) ;
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_node_rewrite ) ;
2021-04-24 09:47:41 +03:00
kfree ( a ) ;
}
void bch2_btree_node_rewrite_async ( struct bch_fs * c , struct btree * b )
{
struct async_btree_rewrite * a ;
2023-02-11 20:57:04 +03:00
int ret ;
2021-04-24 09:47:41 +03:00
a = kmalloc ( sizeof ( * a ) , GFP_NOFS ) ;
if ( ! a ) {
2023-02-10 23:47:46 +03:00
bch_err ( c , " %s: error allocating memory " , __func__ ) ;
2021-04-24 09:47:41 +03:00
return ;
}
a - > c = c ;
a - > btree_id = b - > c . btree_id ;
a - > level = b - > c . level ;
a - > pos = b - > key . k . p ;
a - > seq = b - > data - > keys . seq ;
INIT_WORK ( & a - > work , async_btree_node_rewrite_work ) ;
2023-02-11 20:57:04 +03:00
if ( unlikely ( ! test_bit ( BCH_FS_MAY_GO_RW , & c - > flags ) ) ) {
mutex_lock ( & c - > pending_node_rewrites_lock ) ;
list_add ( & a - > list , & c - > pending_node_rewrites ) ;
mutex_unlock ( & c - > pending_node_rewrites_lock ) ;
return ;
}
if ( ! bch2_write_ref_tryget ( c , BCH_WRITE_REF_node_rewrite ) ) {
if ( test_bit ( BCH_FS_STARTED , & c - > flags ) ) {
bch_err ( c , " %s: error getting c->writes ref " , __func__ ) ;
kfree ( a ) ;
return ;
}
ret = bch2_fs_read_write_early ( c ) ;
if ( ret ) {
2023-09-11 08:37:34 +03:00
bch_err_msg ( c , ret , " going read-write " ) ;
2023-02-11 20:57:04 +03:00
kfree ( a ) ;
return ;
}
bch2_write_ref_get ( c , BCH_WRITE_REF_node_rewrite ) ;
}
2021-05-23 00:37:25 +03:00
queue_work ( c - > btree_interior_update_worker , & a - > work ) ;
2021-04-24 09:47:41 +03:00
}
2023-02-11 20:57:04 +03:00
void bch2_do_pending_node_rewrites ( struct bch_fs * c )
{
struct async_btree_rewrite * a , * n ;
mutex_lock ( & c - > pending_node_rewrites_lock ) ;
list_for_each_entry_safe ( a , n , & c - > pending_node_rewrites , list ) {
list_del ( & a - > list ) ;
bch2_write_ref_get ( c , BCH_WRITE_REF_node_rewrite ) ;
queue_work ( c - > btree_interior_update_worker , & a - > work ) ;
}
mutex_unlock ( & c - > pending_node_rewrites_lock ) ;
}
void bch2_free_pending_node_rewrites ( struct bch_fs * c )
{
struct async_btree_rewrite * a , * n ;
mutex_lock ( & c - > pending_node_rewrites_lock ) ;
list_for_each_entry_safe ( a , n , & c - > pending_node_rewrites , list ) {
list_del ( & a - > list ) ;
kfree ( a ) ;
}
mutex_unlock ( & c - > pending_node_rewrites_lock ) ;
}
2021-07-10 20:44:42 +03:00
static int __bch2_btree_node_update_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct btree * b , struct btree * new_hash ,
struct bkey_i * new_key ,
2023-06-28 00:32:48 +03:00
unsigned commit_flags ,
2021-07-10 20:44:42 +03:00
bool skip_triggers )
2017-03-17 09:18:50 +03:00
{
2021-07-10 20:44:42 +03:00
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter2 = { NULL } ;
2017-03-17 09:18:50 +03:00
struct btree * parent ;
int ret ;
2021-07-10 20:44:42 +03:00
if ( ! skip_triggers ) {
2022-04-14 22:37:16 +03:00
ret = bch2_trans_mark_old ( trans , b - > c . btree_id , b - > c . level + 1 ,
bkey_i_to_s_c ( & b - > key ) , 0 ) ;
2021-07-10 20:44:42 +03:00
if ( ret )
return ret ;
2022-04-14 22:37:16 +03:00
ret = bch2_trans_mark_new ( trans , b - > c . btree_id , b - > c . level + 1 ,
new_key , 0 ) ;
2021-07-10 20:44:42 +03:00
if ( ret )
return ret ;
}
if ( new_hash ) {
bkey_copy ( & new_hash - > key , new_key ) ;
ret = bch2_btree_node_hash_insert ( & c - > btree_cache ,
new_hash , b - > c . level , b - > c . btree_id ) ;
BUG_ON ( ret ) ;
}
2018-07-23 12:32:01 +03:00
2021-08-30 22:18:31 +03:00
parent = btree_node_parent ( iter - > path , b ) ;
2017-03-17 09:18:50 +03:00
if ( parent ) {
2021-08-30 22:18:31 +03:00
bch2_trans_copy_iter ( & iter2 , iter ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
iter2 . path = bch2_btree_path_make_mut ( trans , iter2 . path ,
2023-01-09 09:11:18 +03:00
iter2 . flags & BTREE_ITER_INTENT ,
_THIS_IP_ ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
BUG_ON ( iter2 . path - > level ! = b - > c . level ) ;
2022-11-24 11:12:22 +03:00
BUG_ON ( ! bpos_eq ( iter2 . path - > pos , new_key - > k . p ) ) ;
2017-03-17 09:18:50 +03:00
2022-08-11 02:08:30 +03:00
btree_path_set_level_up ( trans , iter2 . path ) ;
2021-08-30 22:18:31 +03:00
2022-02-26 06:33:01 +03:00
trans - > paths_sorted = false ;
2021-08-30 22:18:31 +03:00
ret = bch2_btree_iter_traverse ( & iter2 ) ? :
bch2_trans_update ( trans , & iter2 , new_key , BTREE_TRIGGER_NORUN ) ;
2021-07-10 20:44:42 +03:00
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
} else {
BUG_ON ( btree_node_root ( c , b ) ! = b ) ;
2022-03-29 23:29:10 +03:00
ret = darray_make_room ( & trans - > extra_journal_entries ,
jset_u64s ( new_key - > k . u64s ) ) ;
if ( ret )
return ret ;
journal_entry_set ( ( void * ) & darray_top ( trans - > extra_journal_entries ) ,
BCH_JSET_ENTRY_btree_root ,
b - > c . btree_id , b - > c . level ,
new_key , new_key - > k . u64s ) ;
trans - > extra_journal_entries . nr + = jset_u64s ( new_key - > k . u64s ) ;
2021-07-10 20:44:42 +03:00
}
2017-03-17 09:18:50 +03:00
2023-06-28 00:32:48 +03:00
ret = bch2_trans_commit ( trans , NULL , NULL , commit_flags ) ;
2021-07-10 20:44:42 +03:00
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2022-09-04 04:09:54 +03:00
bch2_btree_node_lock_write_nofail ( trans , iter - > path , & b - > c ) ;
2017-03-17 09:18:50 +03:00
2021-07-10 20:44:42 +03:00
if ( new_hash ) {
mutex_lock ( & c - > btree_cache . lock ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , new_hash ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
bkey_copy ( & b - > key , new_key ) ;
ret = __bch2_btree_node_hash_insert ( & c - > btree_cache , b ) ;
BUG_ON ( ret ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
} else {
bkey_copy ( & b - > key , new_key ) ;
2017-03-17 09:18:50 +03:00
}
2021-08-30 22:18:31 +03:00
bch2_btree_node_unlock_write ( trans , iter - > path , b ) ;
2021-07-10 20:44:42 +03:00
out :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter2 ) ;
2021-07-10 20:44:42 +03:00
return ret ;
err :
if ( new_hash ) {
mutex_lock ( & c - > btree_cache . lock ) ;
bch2_btree_node_hash_remove ( & c - > btree_cache , b ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
}
goto out ;
2017-03-17 09:18:50 +03:00
}
2021-07-10 20:44:42 +03:00
int bch2_btree_node_update_key ( struct btree_trans * trans , struct btree_iter * iter ,
struct btree * b , struct bkey_i * new_key ,
2023-06-28 00:32:48 +03:00
unsigned commit_flags , bool skip_triggers )
2017-03-17 09:18:50 +03:00
{
2021-07-11 06:22:06 +03:00
struct bch_fs * c = trans - > c ;
2017-03-17 09:18:50 +03:00
struct btree * new_hash = NULL ;
2021-10-07 21:56:56 +03:00
struct btree_path * path = iter - > path ;
2017-03-17 09:18:50 +03:00
struct closure cl ;
2021-03-31 22:21:37 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2022-09-17 21:36:24 +03:00
ret = bch2_btree_path_upgrade ( trans , path , b - > c . level + 1 ) ;
if ( ret )
return ret ;
2021-10-07 21:56:56 +03:00
2017-03-17 09:18:50 +03:00
closure_init_stack ( & cl ) ;
2020-02-19 01:15:32 +03:00
/*
* check btree_ptr_hash_val ( ) after @ b is locked by
* btree_iter_traverse ( ) :
*/
if ( btree_ptr_hash_val ( new_key ) ! = b - > hash_val ) {
2017-03-17 09:18:50 +03:00
ret = bch2_btree_cache_cannibalize_lock ( c , & cl ) ;
if ( ret ) {
2023-05-29 01:06:27 +03:00
ret = drop_locks_do ( trans , ( closure_sync ( & cl ) , 0 ) ) ;
2022-07-18 06:06:38 +03:00
if ( ret )
return ret ;
2017-03-17 09:18:50 +03:00
}
2023-03-02 10:12:18 +03:00
new_hash = bch2_btree_node_mem_alloc ( trans , false ) ;
2017-03-17 09:18:50 +03:00
}
2021-10-07 21:56:56 +03:00
path - > intent_ref + + ;
2023-06-28 00:32:48 +03:00
ret = __bch2_btree_node_update_key ( trans , iter , b , new_hash , new_key ,
commit_flags , skip_triggers ) ;
2021-10-07 21:56:56 +03:00
- - path - > intent_ref ;
2017-03-17 09:18:50 +03:00
if ( new_hash ) {
mutex_lock ( & c - > btree_cache . lock ) ;
list_move ( & new_hash - > list , & c - > btree_cache . freeable ) ;
mutex_unlock ( & c - > btree_cache . lock ) ;
2020-06-06 19:28:01 +03:00
six_unlock_write ( & new_hash - > c . lock ) ;
six_unlock_intent ( & new_hash - > c . lock ) ;
2017-03-17 09:18:50 +03:00
}
closure_sync ( & cl ) ;
2021-03-31 22:21:37 +03:00
bch2_btree_cache_cannibalize_unlock ( c ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2021-07-10 20:44:42 +03:00
int bch2_btree_node_update_key_get_iter ( struct btree_trans * trans ,
struct btree * b , struct bkey_i * new_key ,
2023-06-28 00:32:48 +03:00
unsigned commit_flags , bool skip_triggers )
2021-07-10 20:44:42 +03:00
{
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-07-10 20:44:42 +03:00
int ret ;
2021-08-30 22:18:31 +03:00
bch2_trans_node_iter_init ( trans , & iter , b - > c . btree_id , b - > key . k . p ,
BTREE_MAX_DEPTH , b - > c . level ,
BTREE_ITER_INTENT ) ;
ret = bch2_btree_iter_traverse ( & iter ) ;
2021-07-10 20:44:42 +03:00
if ( ret )
goto out ;
/* has node been freed? */
2021-08-30 22:18:31 +03:00
if ( iter . path - > l [ b - > c . level ] . b ! = b ) {
2021-07-10 20:44:42 +03:00
/* node has been freed: */
BUG_ON ( ! btree_node_dying ( b ) ) ;
goto out ;
}
BUG_ON ( ! btree_node_hashed ( b ) ) ;
2023-11-29 03:26:23 +03:00
struct bch_extent_ptr * ptr ;
bch2_bkey_drop_ptrs ( bkey_i_to_s ( new_key ) , ptr ,
! bch2_bkey_has_device ( bkey_i_to_s ( & b - > key ) , ptr - > dev ) ) ;
2023-06-28 00:32:48 +03:00
ret = bch2_btree_node_update_key ( trans , & iter , b , new_key ,
commit_flags , skip_triggers ) ;
2021-07-10 20:44:42 +03:00
out :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-07-10 20:44:42 +03:00
return ret ;
}
2017-03-17 09:18:50 +03:00
/* Init code: */
/*
* Only for filesystem bringup , when first reading the btree roots or allocating
* btree roots when initializing a new filesystem :
*/
void bch2_btree_set_root_for_read ( struct bch_fs * c , struct btree * b )
{
BUG_ON ( btree_node_root ( c , b ) ) ;
2020-05-25 21:57:06 +03:00
bch2_btree_set_root_inmem ( c , b ) ;
2017-03-17 09:18:50 +03:00
}
2023-03-02 10:12:18 +03:00
static int __bch2_btree_root_alloc ( struct btree_trans * trans , enum btree_id id )
2017-03-17 09:18:50 +03:00
{
2023-03-02 10:12:18 +03:00
struct bch_fs * c = trans - > c ;
2017-03-17 09:18:50 +03:00
struct closure cl ;
struct btree * b ;
int ret ;
closure_init_stack ( & cl ) ;
do {
ret = bch2_btree_cache_cannibalize_lock ( c , & cl ) ;
closure_sync ( & cl ) ;
} while ( ret ) ;
2023-03-02 10:12:18 +03:00
b = bch2_btree_node_mem_alloc ( trans , false ) ;
2017-03-17 09:18:50 +03:00
bch2_btree_cache_cannibalize_unlock ( c ) ;
set_btree_node_fake ( b ) ;
2020-07-03 23:32:00 +03:00
set_btree_node_need_rewrite ( b ) ;
2020-06-06 19:28:01 +03:00
b - > c . level = 0 ;
b - > c . btree_id = id ;
2017-03-17 09:18:50 +03:00
2018-11-01 22:10:01 +03:00
bkey_btree_ptr_init ( & b - > key ) ;
2021-07-06 05:02:07 +03:00
b - > key . k . p = SPOS_MAX ;
2020-02-19 01:15:32 +03:00
* ( ( u64 * ) bkey_i_to_btree_ptr ( & b - > key ) - > v . start ) = U64_MAX - id ;
2017-03-17 09:18:50 +03:00
bch2_bset_init_first ( b , & b - > data - > keys ) ;
bch2_btree_build_aux_trees ( b ) ;
2019-10-12 03:56:27 +03:00
b - > data - > flags = 0 ;
2020-02-07 21:38:02 +03:00
btree_set_min ( b , POS_MIN ) ;
2021-07-06 05:02:07 +03:00
btree_set_max ( b , SPOS_MAX ) ;
2017-03-17 09:18:50 +03:00
b - > data - > format = bch2_btree_calc_format ( b ) ;
btree_node_set_format ( b , b - > data - > format ) ;
2020-06-06 19:28:01 +03:00
ret = bch2_btree_node_hash_insert ( & c - > btree_cache , b ,
b - > c . level , b - > c . btree_id ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( ret ) ;
2020-05-25 21:57:06 +03:00
bch2_btree_set_root_inmem ( c , b ) ;
2017-03-17 09:18:50 +03:00
2020-06-06 19:28:01 +03:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
2023-03-02 10:12:18 +03:00
return 0 ;
}
void bch2_btree_root_alloc ( struct bch_fs * c , enum btree_id id )
{
2023-09-13 00:16:02 +03:00
bch2_trans_run ( c , __bch2_btree_root_alloc ( trans , id ) ) ;
2017-03-17 09:18:50 +03:00
}
2020-07-26 00:06:11 +03:00
void bch2_btree_updates_to_text ( struct printbuf * out , struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
struct btree_update * as ;
mutex_lock ( & c - > btree_interior_update_lock ) ;
list_for_each_entry ( as , & c - > btree_interior_update_list , list )
2023-02-04 05:01:40 +03:00
prt_printf ( out , " %p m %u w %u r %u j %llu \n " ,
2018-11-09 09:24:07 +03:00
as ,
as - > mode ,
as - > nodes_written ,
2023-08-12 23:51:45 +03:00
closure_nr_remaining ( & as - > cl ) ,
2018-11-09 09:24:07 +03:00
as - > journal . seq ) ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
}
2022-04-18 00:30:49 +03:00
static bool bch2_btree_interior_updates_pending ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
2022-04-18 00:30:49 +03:00
bool ret ;
2017-03-17 09:18:50 +03:00
mutex_lock ( & c - > btree_interior_update_lock ) ;
2022-04-18 00:30:49 +03:00
ret = ! list_empty ( & c - > btree_interior_update_list ) ;
2017-03-17 09:18:50 +03:00
mutex_unlock ( & c - > btree_interior_update_lock ) ;
return ret ;
}
2020-05-26 03:35:53 +03:00
2022-04-18 00:30:49 +03:00
bool bch2_btree_interior_updates_flush ( struct bch_fs * c )
{
bool ret = bch2_btree_interior_updates_pending ( c ) ;
if ( ret )
closure_wait_event ( & c - > btree_interior_update_wait ,
! bch2_btree_interior_updates_pending ( c ) ) ;
return ret ;
}
2023-02-19 08:49:51 +03:00
void bch2_journal_entry_to_btree_root ( struct bch_fs * c , struct jset_entry * entry )
2020-05-25 21:57:06 +03:00
{
2023-06-29 05:09:13 +03:00
struct btree_root * r = bch2_btree_id_root ( c , entry - > btree_id ) ;
2020-05-25 21:57:06 +03:00
mutex_lock ( & c - > btree_root_lock ) ;
2023-02-19 08:49:51 +03:00
r - > level = entry - > level ;
r - > alive = true ;
2023-11-01 01:05:22 +03:00
bkey_copy ( & r - > key , ( struct bkey_i * ) entry - > start ) ;
2020-05-25 21:57:06 +03:00
mutex_unlock ( & c - > btree_root_lock ) ;
}
struct jset_entry *
bch2_btree_roots_to_journal_entries ( struct bch_fs * c ,
2023-11-03 04:43:26 +03:00
struct jset_entry * end ,
unsigned long skip )
2020-05-25 21:57:06 +03:00
{
unsigned i ;
mutex_lock ( & c - > btree_root_lock ) ;
2023-06-29 05:09:13 +03:00
for ( i = 0 ; i < btree_id_nr_alive ( c ) ; i + + ) {
struct btree_root * r = bch2_btree_id_root ( c , i ) ;
2023-11-03 04:43:26 +03:00
if ( r - > alive & & ! test_bit ( i , & skip ) ) {
2023-06-29 05:09:13 +03:00
journal_entry_set ( end , BCH_JSET_ENTRY_btree_root ,
i , r - > level , & r - > key , r - > key . k . u64s ) ;
2020-05-25 21:57:06 +03:00
end = vstruct_next ( end ) ;
}
2023-06-29 05:09:13 +03:00
}
2020-05-25 21:57:06 +03:00
mutex_unlock ( & c - > btree_root_lock ) ;
return end ;
}
2020-05-26 03:35:53 +03:00
void bch2_fs_btree_interior_update_exit ( struct bch_fs * c )
{
2020-05-25 21:57:06 +03:00
if ( c - > btree_interior_update_worker )
destroy_workqueue ( c - > btree_interior_update_worker ) ;
2020-05-26 03:35:53 +03:00
mempool_exit ( & c - > btree_interior_update_pool ) ;
}
2023-06-28 07:01:19 +03:00
void bch2_fs_btree_interior_update_init_early ( struct bch_fs * c )
2020-05-26 03:35:53 +03:00
{
mutex_init ( & c - > btree_reserve_cache_lock ) ;
INIT_LIST_HEAD ( & c - > btree_interior_update_list ) ;
INIT_LIST_HEAD ( & c - > btree_interior_updates_unwritten ) ;
mutex_init ( & c - > btree_interior_update_lock ) ;
2020-05-25 21:57:06 +03:00
INIT_WORK ( & c - > btree_interior_update_work , btree_interior_update_work ) ;
2023-02-11 20:57:04 +03:00
INIT_LIST_HEAD ( & c - > pending_node_rewrites ) ;
mutex_init ( & c - > pending_node_rewrites_lock ) ;
2023-06-28 07:01:19 +03:00
}
2023-02-11 20:57:04 +03:00
2023-06-28 07:01:19 +03:00
int bch2_fs_btree_interior_update_init ( struct bch_fs * c )
{
2020-05-25 21:57:06 +03:00
c - > btree_interior_update_worker =
alloc_workqueue ( " btree_update " , WQ_UNBOUND | WQ_MEM_RECLAIM , 1 ) ;
if ( ! c - > btree_interior_update_worker )
2023-03-14 22:35:57 +03:00
return - BCH_ERR_ENOMEM_btree_interior_update_worker_init ;
2020-05-26 03:35:53 +03:00
2023-03-14 22:35:57 +03:00
if ( mempool_init_kmalloc_pool ( & c - > btree_interior_update_pool , 1 ,
sizeof ( struct btree_update ) ) )
return - BCH_ERR_ENOMEM_btree_interior_update_pool_init ;
return 0 ;
2020-05-26 03:35:53 +03:00
}