2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
# include "btree_update.h"
# include "btree_update_interior.h"
2019-03-29 19:49:17 -04:00
# include "btree_gc.h"
2017-03-16 22:18:50 -08:00
# include "btree_io.h"
# include "btree_iter.h"
2019-03-07 19:46:10 -05:00
# include "btree_key_cache.h"
2017-03-16 22:18:50 -08:00
# include "btree_locking.h"
2018-11-05 02:31:48 -05:00
# include "buckets.h"
2017-03-16 22:18:50 -08:00
# include "debug.h"
2022-07-17 23:06:38 -04:00
# include "errcode.h"
2018-07-17 13:50:15 -04:00
# include "error.h"
2019-11-15 15:52:28 -05:00
# include "extent_update.h"
2017-03-16 22:18:50 -08:00
# include "journal.h"
# include "journal_reclaim.h"
# include "keylist.h"
2021-12-31 17:54:13 -05:00
# include "recovery.h"
2021-03-16 00:42:25 -04:00
# include "subvolume.h"
2018-11-07 17:48:32 -05:00
# include "replicas.h"
2017-03-16 22:18:50 -08:00
# include "trace.h"
2019-10-28 19:35:13 -04:00
# include <linux/prefetch.h>
2017-03-16 22:18:50 -08:00
# include <linux/sort.h>
2022-01-12 01:14:47 -05:00
static int __must_check
bch2_trans_update_by_path ( struct btree_trans * , struct btree_path * ,
struct bkey_i * , enum btree_update_flags ) ;
2021-02-20 20:51:57 -05:00
static inline int btree_insert_entry_cmp ( const struct btree_insert_entry * l ,
const struct btree_insert_entry * r )
{
return cmp_int ( l - > btree_id , r - > btree_id ) ? :
2022-01-12 01:14:47 -05:00
cmp_int ( l - > cached , r - > cached ) ? :
2021-02-20 20:51:57 -05:00
- cmp_int ( l - > level , r - > level ) ? :
2021-03-04 16:20:16 -05:00
bpos_cmp ( l - > k - > k . p , r - > k - > k . p ) ;
2021-02-20 20:51:57 -05:00
}
2021-08-30 15:18:31 -04:00
static inline struct btree_path_level * insert_l ( struct btree_insert_entry * i )
2021-08-30 16:08:34 -04:00
{
2021-08-30 15:18:31 -04:00
return i - > path - > l + i - > level ;
2021-08-30 16:08:34 -04:00
}
2019-09-07 14:16:00 -04:00
static inline bool same_leaf_as_prev ( struct btree_trans * trans ,
2019-12-31 19:37:10 -05:00
struct btree_insert_entry * i )
2019-09-07 14:16:00 -04:00
{
2021-06-07 14:54:56 -04:00
return i ! = trans - > updates & &
2021-08-30 16:08:34 -04:00
insert_l ( & i [ 0 ] ) - > b = = insert_l ( & i [ - 1 ] ) - > b ;
2019-09-07 14:16:00 -04:00
}
2021-08-27 20:55:44 -04:00
static inline bool same_leaf_as_next ( struct btree_trans * trans ,
struct btree_insert_entry * i )
{
return i + 1 < trans - > updates + trans - > nr_updates & &
2021-08-30 16:08:34 -04:00
insert_l ( & i [ 0 ] ) - > b = = insert_l ( & i [ 1 ] ) - > b ;
2021-08-27 20:55:44 -04:00
}
2021-09-07 21:24:05 -04:00
static inline void bch2_btree_node_prep_for_write ( struct btree_trans * trans ,
struct btree_path * path ,
struct btree * b )
2019-03-15 17:11:58 -04:00
{
2021-07-10 23:22:06 -04:00
struct bch_fs * c = trans - > c ;
2021-08-30 15:18:31 -04:00
if ( path - > cached )
2019-03-07 19:46:10 -05:00
return ;
2019-09-21 15:29:34 -04:00
if ( unlikely ( btree_node_just_written ( b ) ) & &
2019-03-15 17:11:58 -04:00
bch2_btree_post_write_cleanup ( c , b ) )
2021-08-30 14:36:03 -04:00
bch2_trans_node_reinit_iter ( trans , b ) ;
2019-03-15 17:11:58 -04:00
/*
* If the last bset has been written , or if it ' s gotten too big - start
* a new bset to insert into :
*/
if ( want_new_bset ( c , b ) )
2021-08-30 14:36:03 -04:00
bch2_btree_init_next ( trans , b ) ;
2019-03-15 17:11:58 -04:00
}
2021-09-07 21:24:05 -04:00
void bch2_btree_node_lock_for_insert ( struct btree_trans * trans ,
struct btree_path * path ,
struct btree * b )
{
2022-09-03 21:09:54 -04:00
bch2_btree_node_lock_write_nofail ( trans , path , & b - > c ) ;
2021-09-07 21:24:05 -04:00
bch2_btree_node_prep_for_write ( trans , path , b ) ;
}
2017-03-16 22:18:50 -08:00
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
2021-08-24 21:30:06 -04:00
bool bch2_btree_bset_insert_key ( struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_path * path ,
2017-03-16 22:18:50 -08:00
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bkey_i * insert )
{
struct bkey_packed * k ;
2020-01-30 20:26:08 -05:00
unsigned clobber_u64s = 0 , new_u64s = 0 ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( btree_node_just_written ( b ) ) ;
EBUG_ON ( bset_written ( b , btree_bset_last ( b ) ) ) ;
EBUG_ON ( bkey_deleted ( & insert - > k ) & & bkey_val_u64s ( & insert - > k ) ) ;
2021-03-04 16:20:16 -05:00
EBUG_ON ( bpos_cmp ( insert - > k . p , b - > data - > min_key ) < 0 ) ;
EBUG_ON ( bpos_cmp ( insert - > k . p , b - > data - > max_key ) > 0 ) ;
2019-12-30 14:37:25 -05:00
EBUG_ON ( insert - > k . u64s >
2021-08-24 21:30:06 -04:00
bch_btree_keys_u64s_remaining ( trans - > c , b ) ) ;
2017-03-16 22:18:50 -08:00
k = bch2_btree_node_iter_peek_all ( node_iter , b ) ;
2020-11-07 12:31:20 -05:00
if ( k & & bkey_cmp_left_packed ( b , k , & insert - > k . p ) )
2020-01-06 23:43:04 -05:00
k = NULL ;
2017-03-16 22:18:50 -08:00
2020-01-06 23:43:04 -05:00
/* @k is the key being overwritten/deleted, if any: */
2021-02-19 23:41:40 -05:00
EBUG_ON ( k & & bkey_deleted ( k ) ) ;
2017-03-16 22:18:50 -08:00
2020-01-30 20:26:08 -05:00
/* Deleting, but not found? nothing to do: */
2021-02-19 23:41:40 -05:00
if ( bkey_deleted ( & insert - > k ) & & ! k )
2020-01-30 20:26:08 -05:00
return false ;
2021-02-19 23:41:40 -05:00
if ( bkey_deleted ( & insert - > k ) ) {
2020-01-06 23:43:04 -05:00
/* Deleting: */
btree_account_key_drop ( b , k ) ;
k - > type = KEY_TYPE_deleted ;
2019-11-29 14:08:51 -05:00
2020-01-30 20:26:08 -05:00
if ( k - > needs_whiteout )
2021-08-24 21:30:06 -04:00
push_whiteout ( trans - > c , b , insert - > k . p ) ;
2020-01-30 20:26:08 -05:00
k - > needs_whiteout = false ;
2017-03-16 22:18:50 -08:00
2020-01-06 23:43:04 -05:00
if ( k > = btree_bset_last ( b ) - > start ) {
clobber_u64s = k - > u64s ;
bch2_bset_delete ( b , k , clobber_u64s ) ;
2020-01-30 20:26:08 -05:00
goto fix_iter ;
2020-01-06 23:43:04 -05:00
} else {
2021-08-30 15:18:31 -04:00
bch2_btree_path_fix_key_modified ( trans , b , k ) ;
2020-01-06 23:43:04 -05:00
}
return true ;
}
2019-11-29 14:08:51 -05:00
2020-01-06 23:43:04 -05:00
if ( k ) {
/* Overwriting: */
btree_account_key_drop ( b , k ) ;
k - > type = KEY_TYPE_deleted ;
2020-01-05 18:20:23 -05:00
insert - > k . needs_whiteout = k - > needs_whiteout ;
k - > needs_whiteout = false ;
2019-11-29 14:08:51 -05:00
if ( k > = btree_bset_last ( b ) - > start ) {
clobber_u64s = k - > u64s ;
2017-03-16 22:18:50 -08:00
goto overwrite ;
2020-01-06 23:43:04 -05:00
} else {
2021-08-30 15:18:31 -04:00
bch2_btree_path_fix_key_modified ( trans , b , k ) ;
2017-03-16 22:18:50 -08:00
}
}
2018-08-11 19:12:05 -04:00
k = bch2_btree_node_iter_bset_pos ( node_iter , b , bset_tree_last ( b ) ) ;
2017-03-16 22:18:50 -08:00
overwrite :
bch2_bset_insert ( b , node_iter , k , insert , clobber_u64s ) ;
2020-01-30 20:26:08 -05:00
new_u64s = k - > u64s ;
fix_iter :
if ( clobber_u64s ! = new_u64s )
2021-08-30 15:18:31 -04:00
bch2_btree_node_iter_fix ( trans , path , b , node_iter , k ,
2020-01-30 20:26:08 -05:00
clobber_u64s , new_u64s ) ;
2017-03-16 22:18:50 -08:00
return true ;
}
2021-04-03 16:24:13 -04:00
static int __btree_node_flush ( struct journal * j , struct journal_entry_pin * pin ,
2017-03-16 22:18:50 -08:00
unsigned i , u64 seq )
{
struct bch_fs * c = container_of ( j , struct bch_fs , journal ) ;
struct btree_write * w = container_of ( pin , struct btree_write , journal ) ;
struct btree * b = container_of ( w , struct btree , writes [ i ] ) ;
2022-08-21 14:29:43 -04:00
struct btree_trans trans ;
2022-02-27 09:42:46 -05:00
unsigned long old , new , v ;
unsigned idx = w - b - > writes ;
2017-03-16 22:18:50 -08:00
2022-08-21 14:29:43 -04:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
btree_node_lock_nopath_nofail ( & trans , & b - > c , SIX_LOCK_read ) ;
2022-02-27 09:42:46 -05:00
v = READ_ONCE ( b - > flags ) ;
do {
old = new = v ;
if ( ! ( old & ( 1 < < BTREE_NODE_dirty ) ) | |
! ! ( old & ( 1 < < BTREE_NODE_write_idx ) ) ! = idx | |
w - > journal . seq ! = seq )
break ;
new | = 1 < < BTREE_NODE_need_write ;
} while ( ( v = cmpxchg ( & b - > flags , old , new ) ) ! = old ) ;
btree_node_write_if_need ( c , b , SIX_LOCK_read ) ;
2020-06-06 12:28:01 -04:00
six_unlock_read ( & b - > c . lock ) ;
2022-08-21 14:29:43 -04:00
bch2_trans_exit ( & trans ) ;
2021-04-03 16:24:13 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
}
2021-04-03 16:24:13 -04:00
static int btree_node_flush0 ( struct journal * j , struct journal_entry_pin * pin , u64 seq )
2017-03-16 22:18:50 -08:00
{
return __btree_node_flush ( j , pin , 0 , seq ) ;
}
2021-04-03 16:24:13 -04:00
static int btree_node_flush1 ( struct journal * j , struct journal_entry_pin * pin , u64 seq )
2017-03-16 22:18:50 -08:00
{
return __btree_node_flush ( j , pin , 1 , seq ) ;
}
2020-02-08 19:06:31 -05:00
inline void bch2_btree_add_journal_pin ( struct bch_fs * c ,
struct btree * b , u64 seq )
{
struct btree_write * w = btree_current_write ( b ) ;
bch2_journal_pin_add ( & c - > journal , seq , & w - > journal ,
btree_node_write_idx ( b ) = = 0
? btree_node_flush0
: btree_node_flush1 ) ;
}
2017-03-16 22:18:50 -08:00
/**
* btree_insert_key - insert a key one key into a leaf node
*/
2022-03-11 18:16:42 -05:00
static void btree_insert_key_leaf ( struct btree_trans * trans ,
2021-08-30 16:08:34 -04:00
struct btree_insert_entry * insert )
2017-03-16 22:18:50 -08:00
{
struct bch_fs * c = trans - > c ;
2021-08-30 16:08:34 -04:00
struct btree * b = insert_l ( insert ) - > b ;
2019-10-19 19:03:23 -04:00
struct bset_tree * t = bset_tree_last ( b ) ;
2020-06-09 21:00:29 -04:00
struct bset * i = bset ( b , t ) ;
2019-10-19 19:03:23 -04:00
int old_u64s = bset_u64s ( t ) ;
2017-03-16 22:18:50 -08:00
int old_live_u64s = b - > nr . live_u64s ;
int live_u64s_added , u64s_added ;
2021-08-30 15:18:31 -04:00
if ( unlikely ( ! bch2_btree_bset_insert_key ( trans , insert - > path , b ,
2021-08-30 16:08:34 -04:00
& insert_l ( insert ) - > iter , insert - > k ) ) )
2022-03-11 18:16:42 -05:00
return ;
2020-06-09 21:00:29 -04:00
i - > journal_seq = cpu_to_le64 ( max ( trans - > journal_res . seq ,
le64_to_cpu ( i - > journal_seq ) ) ) ;
2019-11-26 17:26:04 -05:00
2020-06-09 21:00:29 -04:00
bch2_btree_add_journal_pin ( c , b , trans - > journal_res . seq ) ;
if ( unlikely ( ! btree_node_dirty ( b ) ) )
2022-02-26 11:10:20 -05:00
set_btree_node_dirty_acct ( c , b ) ;
2017-03-16 22:18:50 -08:00
live_u64s_added = ( int ) b - > nr . live_u64s - old_live_u64s ;
2019-10-19 19:03:23 -04:00
u64s_added = ( int ) bset_u64s ( t ) - old_u64s ;
2017-03-16 22:18:50 -08:00
if ( b - > sib_u64s [ 0 ] ! = U16_MAX & & live_u64s_added < 0 )
b - > sib_u64s [ 0 ] = max ( 0 , ( int ) b - > sib_u64s [ 0 ] + live_u64s_added ) ;
if ( b - > sib_u64s [ 1 ] ! = U16_MAX & & live_u64s_added < 0 )
b - > sib_u64s [ 1 ] = max ( 0 , ( int ) b - > sib_u64s [ 1 ] + live_u64s_added ) ;
if ( u64s_added > live_u64s_added & &
bch2_maybe_compact_whiteouts ( c , b ) )
2021-08-30 14:36:03 -04:00
bch2_trans_node_reinit_iter ( trans , b ) ;
2017-03-16 22:18:50 -08:00
}
2019-03-07 19:46:10 -05:00
/* Cached btree updates: */
2019-03-15 17:11:58 -04:00
/* Normal update interface: */
2017-03-16 22:18:50 -08:00
2019-03-18 16:18:39 -04:00
static inline void btree_insert_entry_checks ( struct btree_trans * trans ,
2021-02-20 20:51:57 -05:00
struct btree_insert_entry * i )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
BUG_ON ( bpos_cmp ( i - > k - > k . p , i - > path - > pos ) ) ;
BUG_ON ( i - > cached ! = i - > path - > cached ) ;
BUG_ON ( i - > level ! = i - > path - > level ) ;
BUG_ON ( i - > btree_id ! = i - > path - > btree_id ) ;
2021-03-16 00:42:25 -04:00
EBUG_ON ( ! i - > level & &
! ( i - > flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE ) & &
test_bit ( JOURNAL_REPLAY_DONE , & trans - > c - > journal . flags ) & &
i - > k - > k . p . snapshot & &
bch2_snapshot_internal_node ( trans - > c , i - > k - > k . p . snapshot ) ) ;
2019-03-15 17:11:58 -04:00
}
2017-03-16 22:18:50 -08:00
2019-10-19 19:03:23 -04:00
static noinline int
2021-06-04 15:18:10 -04:00
bch2_trans_journal_preres_get_cold ( struct btree_trans * trans , unsigned u64s ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2019-03-15 17:11:58 -04:00
struct bch_fs * c = trans - > c ;
int ret ;
2017-03-16 22:18:50 -08:00
2019-05-15 09:47:40 -04:00
bch2_trans_unlock ( trans ) ;
2017-03-16 22:18:50 -08:00
2019-03-15 17:11:58 -04:00
ret = bch2_journal_preres_get ( & c - > journal ,
& trans - > journal_preres , u64s , 0 ) ;
if ( ret )
return ret ;
2017-03-16 22:18:50 -08:00
2022-07-17 23:06:38 -04:00
ret = bch2_trans_relock ( trans ) ;
if ( ret ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_journal_preres_get , trans , trace_ip , 0 ) ;
2022-07-17 23:06:38 -04:00
return ret ;
2019-03-15 17:11:58 -04:00
}
2017-03-16 22:18:50 -08:00
2019-03-15 17:11:58 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
}
2019-10-19 19:03:23 -04:00
static inline int bch2_trans_journal_res_get ( struct btree_trans * trans ,
unsigned flags )
2019-02-26 17:13:46 -05:00
{
2019-03-15 17:11:58 -04:00
struct bch_fs * c = trans - > c ;
int ret ;
2019-02-26 17:13:46 -05:00
2019-03-15 17:11:58 -04:00
ret = bch2_journal_res_get ( & c - > journal , & trans - > journal_res ,
2022-03-14 21:48:42 -04:00
trans - > journal_u64s ,
flags |
( trans - > flags & JOURNAL_WATERMARK_MASK ) ) ;
2019-03-15 17:11:58 -04:00
return ret = = - EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret ;
}
2017-03-16 22:18:50 -08:00
2021-12-31 16:12:54 -05:00
# define JSET_ENTRY_LOG_U64s 4
static noinline void journal_transaction_name ( struct btree_trans * trans )
{
struct bch_fs * c = trans - > c ;
2022-06-05 15:29:00 -04:00
struct journal * j = & c - > journal ;
struct jset_entry * entry =
bch2_journal_add_entry ( j , & trans - > journal_res ,
BCH_JSET_ENTRY_log , 0 , 0 ,
JSET_ENTRY_LOG_U64s ) ;
struct jset_entry_log * l =
container_of ( entry , struct jset_entry_log , entry ) ;
strncpy ( l - > d , trans - > fn , JSET_ENTRY_LOG_U64s * sizeof ( u64 ) ) ;
2021-12-31 16:12:54 -05:00
}
2021-08-30 16:08:34 -04:00
static inline enum btree_insert_ret
2019-03-13 22:44:04 -04:00
btree_key_can_insert ( struct btree_trans * trans ,
2021-08-30 16:08:34 -04:00
struct btree * b ,
2020-06-28 18:11:12 -04:00
unsigned u64s )
2018-08-03 19:41:44 -04:00
{
struct bch_fs * c = trans - > c ;
2020-07-25 14:19:37 -04:00
if ( ! bch2_btree_node_insert_fits ( c , b , u64s ) )
2018-08-03 19:41:44 -04:00
return BTREE_INSERT_BTREE_NODE_FULL ;
return BTREE_INSERT_OK ;
}
2019-03-07 19:46:10 -05:00
static enum btree_insert_ret
btree_key_can_insert_cached ( struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_path * path ,
2020-06-28 18:11:12 -04:00
unsigned u64s )
2019-03-07 19:46:10 -05:00
{
2021-12-30 20:14:52 -05:00
struct bch_fs * c = trans - > c ;
2021-08-30 15:18:31 -04:00
struct bkey_cached * ck = ( void * ) path - > l [ 0 ] . b ;
2022-03-17 21:35:51 -04:00
unsigned old_u64s = ck - > u64s , new_u64s ;
2019-03-07 19:46:10 -05:00
struct bkey_i * new_k ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( path - > level ) ;
2019-03-07 19:46:10 -05:00
2020-11-19 21:40:03 -05:00
if ( ! test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) & &
2021-12-30 20:14:52 -05:00
bch2_btree_key_cache_must_wait ( c ) & &
2021-03-07 19:04:16 -05:00
! ( trans - > flags & BTREE_INSERT_JOURNAL_RECLAIM ) )
2020-11-19 21:40:03 -05:00
return BTREE_INSERT_NEED_JOURNAL_RECLAIM ;
2021-04-24 00:42:02 -04:00
/*
* bch2_varint_decode can read past the end of the buffer by at most 7
* bytes ( it won ' t be used ) :
*/
u64s + = 1 ;
2020-06-28 18:11:12 -04:00
if ( u64s < = ck - > u64s )
2019-03-07 19:46:10 -05:00
return BTREE_INSERT_OK ;
2020-06-28 18:11:12 -04:00
new_u64s = roundup_pow_of_two ( u64s ) ;
2019-03-07 19:46:10 -05:00
new_k = krealloc ( ck - > k , new_u64s * sizeof ( u64 ) , GFP_NOFS ) ;
2021-12-30 20:14:52 -05:00
if ( ! new_k ) {
bch_err ( c , " error allocating memory for key cache key, btree %s u64s %u " ,
bch2_btree_ids [ path - > btree_id ] , new_u64s ) ;
2019-03-07 19:46:10 -05:00
return - ENOMEM ;
2021-12-30 20:14:52 -05:00
}
2019-03-07 19:46:10 -05:00
ck - > u64s = new_u64s ;
ck - > k = new_k ;
2022-02-27 11:34:21 -05:00
/*
* Keys returned by peek ( ) are no longer valid pointers , so we need a
* transaction restart :
*/
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_key_cache_key_realloced , trans , _RET_IP_ , path , old_u64s , new_u64s ) ;
2022-07-17 23:06:38 -04:00
return btree_trans_restart_nounlock ( trans , BCH_ERR_transaction_restart_key_cache_realloced ) ;
2019-03-07 19:46:10 -05:00
}
2022-02-24 11:30:17 -05:00
/* Triggers: */
static int run_one_mem_trigger ( struct btree_trans * trans ,
struct btree_insert_entry * i ,
unsigned flags )
{
2022-02-24 11:02:58 -05:00
struct bkey_s_c old = { & i - > old_k , i - > old_v } ;
2022-02-24 11:30:17 -05:00
struct bkey_i * new = i - > k ;
int ret ;
if ( unlikely ( flags & BTREE_TRIGGER_NORUN ) )
return 0 ;
2022-02-24 11:02:58 -05:00
if ( ! btree_node_type_needs_gc ( i - > btree_id ) )
2022-02-24 11:30:17 -05:00
return 0 ;
2022-03-13 00:30:16 -05:00
if ( bch2_bkey_ops [ old . k - > type ] . atomic_trigger = =
bch2_bkey_ops [ i - > k - > k . type ] . atomic_trigger & &
2022-02-24 11:30:17 -05:00
( ( 1U < < old . k - > type ) & BTREE_TRIGGER_WANTS_OLD_AND_NEW ) ) {
ret = bch2_mark_key ( trans , old , bkey_i_to_s_c ( new ) ,
BTREE_TRIGGER_INSERT | BTREE_TRIGGER_OVERWRITE | flags ) ;
} else {
2022-02-24 11:02:58 -05:00
struct bkey _deleted = KEY ( 0 , 0 , 0 ) ;
struct bkey_s_c deleted = ( struct bkey_s_c ) { & _deleted , NULL } ;
_deleted . p = i - > path - > pos ;
2022-02-24 11:30:17 -05:00
ret = bch2_mark_key ( trans , deleted , bkey_i_to_s_c ( new ) ,
BTREE_TRIGGER_INSERT | flags ) ? :
bch2_mark_key ( trans , old , deleted ,
BTREE_TRIGGER_OVERWRITE | flags ) ;
}
return ret ;
}
2022-02-24 11:02:58 -05:00
static int run_one_trans_trigger ( struct btree_trans * trans , struct btree_insert_entry * i ,
2022-03-30 23:39:48 -04:00
bool overwrite )
2022-02-24 11:30:17 -05:00
{
2022-02-24 11:02:58 -05:00
/*
* Transactional triggers create new btree_insert_entries , so we can ' t
* pass them a pointer to a btree_insert_entry , that memory is going to
* move :
*/
struct bkey old_k = i - > old_k ;
struct bkey_s_c old = { & old_k , i - > old_v } ;
2022-02-24 11:30:17 -05:00
if ( ( i - > flags & BTREE_TRIGGER_NORUN ) | |
! ( BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & ( 1U < < i - > bkey_type ) ) )
return 0 ;
2022-03-30 23:39:48 -04:00
if ( ! i - > insert_trigger_run & &
! i - > overwrite_trigger_run & &
bch2_bkey_ops [ old . k - > type ] . trans_trigger = =
bch2_bkey_ops [ i - > k - > k . type ] . trans_trigger & &
2022-02-24 11:30:17 -05:00
( ( 1U < < old . k - > type ) & BTREE_TRIGGER_WANTS_OLD_AND_NEW ) ) {
i - > overwrite_trigger_run = true ;
2022-03-30 23:39:48 -04:00
i - > insert_trigger_run = true ;
2022-03-31 21:44:55 -04:00
return bch2_trans_mark_key ( trans , i - > btree_id , i - > level , old , i - > k ,
2022-03-30 23:39:48 -04:00
BTREE_TRIGGER_INSERT |
BTREE_TRIGGER_OVERWRITE |
i - > flags ) ? : 1 ;
} else if ( overwrite & & ! i - > overwrite_trigger_run ) {
i - > overwrite_trigger_run = true ;
2022-03-31 21:44:55 -04:00
return bch2_trans_mark_old ( trans , i - > btree_id , i - > level , old , i - > flags ) ? : 1 ;
2022-03-30 23:39:48 -04:00
} else if ( ! overwrite & & ! i - > insert_trigger_run ) {
i - > insert_trigger_run = true ;
2022-03-31 21:44:55 -04:00
return bch2_trans_mark_new ( trans , i - > btree_id , i - > level , i - > k , i - > flags ) ? : 1 ;
2022-02-24 11:30:17 -05:00
} else {
2022-03-30 23:39:48 -04:00
return 0 ;
2022-02-24 11:30:17 -05:00
}
}
static int run_btree_triggers ( struct btree_trans * trans , enum btree_id btree_id ,
struct btree_insert_entry * btree_id_start )
{
struct btree_insert_entry * i ;
bool trans_trigger_run ;
int ret , overwrite ;
2022-03-30 23:39:48 -04:00
for ( overwrite = 1 ; overwrite > = 0 ; - - overwrite ) {
2022-02-24 11:30:17 -05:00
/*
* Running triggers will append more updates to the list of updates as
* we ' re walking it :
*/
do {
trans_trigger_run = false ;
for ( i = btree_id_start ;
i < trans - > updates + trans - > nr_updates & & i - > btree_id < = btree_id ;
i + + ) {
2022-01-06 01:20:41 -05:00
if ( i - > btree_id ! = btree_id )
continue ;
2022-02-24 11:30:17 -05:00
ret = run_one_trans_trigger ( trans , i , overwrite ) ;
if ( ret < 0 )
return ret ;
if ( ret )
trans_trigger_run = true ;
}
} while ( trans_trigger_run ) ;
}
return 0 ;
}
static int bch2_trans_commit_run_triggers ( struct btree_trans * trans )
{
struct btree_insert_entry * i = NULL , * btree_id_start = trans - > updates ;
unsigned btree_id = 0 ;
int ret = 0 ;
/*
*
* For a given btree , this algorithm runs insert triggers before
* overwrite triggers : this is so that when extents are being moved
* ( e . g . by FALLOCATE_FL_INSERT_RANGE ) , we don ' t drop references before
* they are re - added .
*/
for ( btree_id = 0 ; btree_id < BTREE_ID_NR ; btree_id + + ) {
2022-01-06 01:20:41 -05:00
if ( btree_id = = BTREE_ID_alloc )
continue ;
2022-02-24 11:30:17 -05:00
while ( btree_id_start < trans - > updates + trans - > nr_updates & &
btree_id_start - > btree_id < btree_id )
btree_id_start + + ;
ret = run_btree_triggers ( trans , btree_id , btree_id_start ) ;
if ( ret )
return ret ;
}
2022-01-06 01:20:41 -05:00
trans_for_each_update ( trans , i ) {
if ( i - > btree_id > BTREE_ID_alloc )
break ;
if ( i - > btree_id = = BTREE_ID_alloc ) {
ret = run_btree_triggers ( trans , BTREE_ID_alloc , i ) ;
if ( ret )
return ret ;
break ;
}
}
2022-02-24 11:30:17 -05:00
trans_for_each_update ( trans , i )
BUG_ON ( ! ( i - > flags & BTREE_TRIGGER_NORUN ) & &
( BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & ( 1U < < i - > bkey_type ) ) & &
( ! i - > insert_trigger_run | | ! i - > overwrite_trigger_run ) ) ;
return 0 ;
}
static noinline int bch2_trans_commit_run_gc_triggers ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
2022-01-04 22:32:09 -05:00
int ret = 0 ;
2017-03-16 22:18:50 -08:00
2019-03-07 19:46:10 -05:00
trans_for_each_update ( trans , i ) {
/*
* XXX : synchronization of cached update triggers with gc
2021-08-30 15:18:31 -04:00
* XXX : synchronization of interior node updates with gc
2019-03-07 19:46:10 -05:00
*/
2021-08-30 16:08:34 -04:00
BUG_ON ( i - > cached | | i - > level ) ;
2019-03-07 19:46:10 -05:00
2022-01-04 22:32:09 -05:00
if ( gc_visited ( c , gc_pos_btree_node ( insert_l ( i ) - > b ) ) ) {
2022-02-24 11:30:17 -05:00
ret = run_one_mem_trigger ( trans , i , i - > flags | BTREE_TRIGGER_GC ) ;
2022-01-04 22:32:09 -05:00
if ( ret )
break ;
}
2019-03-07 19:46:10 -05:00
}
2022-01-04 22:32:09 -05:00
return ret ;
2019-10-19 19:03:23 -04:00
}
2019-09-07 14:16:00 -04:00
2019-10-19 19:03:23 -04:00
static inline int
bch2_trans_commit_write_locked ( struct btree_trans * trans ,
2021-06-04 15:18:10 -04:00
struct btree_insert_entry * * stopped_at ,
unsigned long trace_ip )
2019-10-19 19:03:23 -04:00
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
2021-02-03 21:51:56 -05:00
struct btree_trans_commit_hook * h ;
2019-12-31 19:37:10 -05:00
unsigned u64s = 0 ;
2019-10-28 19:35:13 -04:00
bool marking = false ;
2019-10-19 19:03:23 -04:00
int ret ;
2019-03-15 18:20:46 -04:00
2017-03-16 22:18:50 -08:00
if ( race_fault ( ) ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_fault_inject , trans , trace_ip ) ;
2022-07-17 23:06:38 -04:00
return btree_trans_restart_nounlock ( trans , BCH_ERR_transaction_restart_fault_inject ) ;
2017-03-16 22:18:50 -08:00
}
2018-08-03 19:41:44 -04:00
/*
* Check if the insert will fit in the leaf node with the write lock
* held , otherwise another thread could write the node changing the
* amount of space available :
*/
2019-02-26 17:13:46 -05:00
2019-10-28 19:35:13 -04:00
prefetch ( & trans - > c - > journal . flags ) ;
2019-03-11 14:59:58 -04:00
2021-02-03 21:51:56 -05:00
h = trans - > hooks ;
while ( h ) {
ret = h - > fn ( trans , h ) ;
if ( ret )
return ret ;
h = h - > next ;
}
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i ) {
2019-10-28 19:35:13 -04:00
/* Multiple inserts might go to same leaf: */
2019-12-31 19:37:10 -05:00
if ( ! same_leaf_as_prev ( trans , i ) )
2019-10-28 19:35:13 -04:00
u64s = 0 ;
2019-03-11 14:59:58 -04:00
2019-10-28 19:35:13 -04:00
u64s + = i - > k - > k . u64s ;
2021-08-30 16:08:34 -04:00
ret = ! i - > cached
? btree_key_can_insert ( trans , insert_l ( i ) - > b , u64s )
2021-08-30 15:18:31 -04:00
: btree_key_can_insert_cached ( trans , i - > path , u64s ) ;
2019-10-28 19:35:13 -04:00
if ( ret ) {
* stopped_at = i ;
return ret ;
2019-03-11 14:59:58 -04:00
}
2019-10-28 19:35:13 -04:00
2021-02-20 20:51:57 -05:00
if ( btree_node_type_needs_gc ( i - > bkey_type ) )
2019-10-28 19:35:13 -04:00
marking = true ;
2022-03-12 16:14:55 -05:00
/*
* Revalidate before calling mem triggers - XXX , ugly :
*
* - successful btree node splits don ' t cause transaction
* restarts and will have invalidated the pointer to the bkey
* value
* - btree_node_lock_for_insert ( ) - > btree_node_prep_for_write ( )
* when it has to resort
* - btree_key_can_insert_cached ( ) when it has to reallocate
*
* Ugly because we currently have no way to tell if the
* pointer ' s been invalidated , which means it ' s debatabale
* whether we should be stashing the old key at all .
*/
i - > old_v = bch2_btree_path_peek_slot ( i - > path , & i - > old_k ) . v ;
if ( unlikely ( ! test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) ) ) {
struct bkey_i * j_k =
2022-04-11 20:28:13 -04:00
bch2_journal_keys_peek_slot ( c , i - > btree_id , i - > level ,
i - > k - > k . p ) ;
2022-03-12 16:14:55 -05:00
2022-04-11 20:28:13 -04:00
if ( j_k ) {
2022-03-12 16:14:55 -05:00
i - > old_k = j_k - > k ;
i - > old_v = & j_k - > v ;
}
}
2019-10-28 19:35:13 -04:00
}
2019-03-15 17:11:58 -04:00
/*
* Don ' t get journal reservation until after we know insert will
* succeed :
*/
2019-05-15 09:49:46 -04:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) ) {
2019-10-19 19:03:23 -04:00
ret = bch2_trans_journal_res_get ( trans ,
JOURNAL_RES_GET_NONBLOCK ) ;
2019-05-15 09:49:46 -04:00
if ( ret )
2021-11-28 14:08:58 -05:00
return ret ;
2021-12-31 16:12:54 -05:00
if ( unlikely ( trans - > journal_transaction_names ) )
journal_transaction_name ( trans ) ;
2020-06-09 21:00:29 -04:00
} else {
trans - > journal_res . seq = c - > journal . replay_journal_seq ;
2019-05-15 09:49:46 -04:00
}
2019-02-26 17:13:46 -05:00
2022-03-29 16:29:10 -04:00
if ( unlikely ( trans - > extra_journal_entries . nr ) ) {
2020-05-25 14:57:06 -04:00
memcpy_u64s_small ( journal_res_entry ( & c - > journal , & trans - > journal_res ) ,
2022-03-29 16:29:10 -04:00
trans - > extra_journal_entries . data ,
trans - > extra_journal_entries . nr ) ;
2020-05-25 19:29:48 -04:00
2022-03-29 16:29:10 -04:00
trans - > journal_res . offset + = trans - > extra_journal_entries . nr ;
trans - > journal_res . u64s - = trans - > extra_journal_entries . nr ;
2020-05-25 19:29:48 -04:00
}
2019-10-19 19:03:23 -04:00
/*
* Not allowed to fail after we ' ve gotten our journal reservation - we
* have to use it :
*/
2017-03-16 22:18:50 -08:00
if ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) {
2020-11-02 18:20:44 -05:00
if ( bch2_journal_seq_verify )
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2017-03-16 22:18:50 -08:00
i - > k - > k . version . lo = trans - > journal_res . seq ;
2020-11-02 18:20:44 -05:00
else if ( bch2_inject_invalid_keys )
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2017-03-16 22:18:50 -08:00
i - > k - > k . version = MAX_VERSION ;
}
2021-11-28 14:31:19 -05:00
if ( trans - > fs_usage_deltas & &
bch2_trans_fs_usage_apply ( trans , trans - > fs_usage_deltas ) )
2021-11-28 14:08:58 -05:00
return BTREE_INSERT_NEED_MARK_REPLICAS ;
2019-09-22 18:49:16 -04:00
trans_for_each_update ( trans , i )
2022-01-04 22:32:09 -05:00
if ( BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & ( 1U < < i - > bkey_type ) ) {
2022-02-24 11:30:17 -05:00
ret = run_one_mem_trigger ( trans , i , i - > flags ) ;
2022-01-04 22:32:09 -05:00
if ( ret )
return ret ;
}
2019-03-11 14:59:58 -04:00
2022-01-04 22:32:09 -05:00
if ( unlikely ( c - > gc_pos . phase ) ) {
2022-02-24 11:30:17 -05:00
ret = bch2_trans_commit_run_gc_triggers ( trans ) ;
2022-01-04 22:32:09 -05:00
if ( ret )
return ret ;
}
2019-03-11 14:59:58 -04:00
2022-06-05 15:32:57 -04:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) ) {
trans_for_each_update ( trans , i ) {
struct journal * j = & c - > journal ;
struct jset_entry * entry ;
if ( i - > key_cache_already_flushed )
continue ;
if ( i - > flags & BTREE_UPDATE_NOJOURNAL )
continue ;
if ( trans - > journal_transaction_names ) {
entry = bch2_journal_add_entry ( j , & trans - > journal_res ,
BCH_JSET_ENTRY_overwrite ,
i - > btree_id , i - > level ,
i - > old_k . u64s ) ;
bkey_reassemble ( & entry - > start [ 0 ] ,
( struct bkey_s_c ) { & i - > old_k , i - > old_v } ) ;
}
entry = bch2_journal_add_entry ( j , & trans - > journal_res ,
BCH_JSET_ENTRY_btree_keys ,
i - > btree_id , i - > level ,
i - > k - > k . u64s ) ;
bkey_copy ( & entry - > start [ 0 ] , i - > k ) ;
}
if ( trans - > journal_seq )
* trans - > journal_seq = trans - > journal_res . seq ;
}
trans_for_each_update ( trans , i ) {
i - > k - > k . needs_whiteout = false ;
if ( ! i - > cached )
btree_insert_key_leaf ( trans , i ) ;
else if ( ! i - > key_cache_already_flushed )
bch2_btree_insert_key_cached ( trans , i - > path , i - > k ) ;
2022-08-11 13:23:04 -04:00
else {
2022-06-05 15:32:57 -04:00
bch2_btree_key_cache_drop ( trans , i - > path ) ;
2022-08-11 13:23:04 -04:00
btree_path_set_dirty ( i - > path , BTREE_ITER_NEED_TRAVERSE ) ;
}
2022-06-05 15:32:57 -04:00
}
2021-11-28 14:08:58 -05:00
2019-10-19 19:03:23 -04:00
return ret ;
}
2021-11-03 12:08:02 -04:00
static inline void path_upgrade_readers ( struct btree_trans * trans , struct btree_path * path )
{
unsigned l ;
for ( l = 0 ; l < BTREE_MAX_DEPTH ; l + + )
if ( btree_node_read_locked ( path , l ) )
BUG_ON ( ! bch2_btree_node_upgrade ( trans , path , l ) ) ;
}
2021-09-07 21:24:05 -04:00
static inline void upgrade_readers ( struct btree_trans * trans , struct btree_path * path )
{
struct btree * b = path_l ( path ) - > b ;
2022-08-21 18:17:51 -04:00
unsigned l ;
2021-09-07 21:24:05 -04:00
do {
2022-08-21 18:17:51 -04:00
for ( l = 0 ; l < BTREE_MAX_DEPTH ; l + + )
if ( btree_node_read_locked ( path , l ) )
path_upgrade_readers ( trans , path ) ;
2021-09-07 21:24:05 -04:00
} while ( ( path = prev_btree_path ( trans , path ) ) & &
path_l ( path ) - > b = = b ) ;
}
/*
* Check for nodes that we have both read and intent locks on , and upgrade the
* readers to intent :
*/
static inline void normalize_read_intent_locks ( struct btree_trans * trans )
{
struct btree_path * path ;
unsigned i , nr_read = 0 , nr_intent = 0 ;
trans_for_each_path_inorder ( trans , path , i ) {
struct btree_path * next = i + 1 < trans - > nr_sorted
? trans - > paths + trans - > sorted [ i + 1 ]
: NULL ;
2022-08-21 18:17:51 -04:00
switch ( btree_node_locked_type ( path , path - > level ) ) {
case BTREE_NODE_READ_LOCKED :
nr_read + + ;
break ;
case BTREE_NODE_INTENT_LOCKED :
nr_intent + + ;
break ;
2021-09-07 21:24:05 -04:00
}
if ( ! next | | path_l ( path ) - > b ! = path_l ( next ) - > b ) {
if ( nr_read & & nr_intent )
upgrade_readers ( trans , path ) ;
nr_read = nr_intent = 0 ;
}
}
bch2_trans_verify_locks ( trans ) ;
}
2021-09-07 21:25:32 -04:00
static inline bool have_conflicting_read_lock ( struct btree_trans * trans , struct btree_path * pos )
{
struct btree_path * path ;
unsigned i ;
trans_for_each_path_inorder ( trans , path , i ) {
//if (path == pos)
// break;
2022-08-21 18:17:51 -04:00
if ( btree_node_read_locked ( path , path - > level ) & &
2021-11-03 17:23:03 -04:00
! bch2_btree_path_upgrade ( trans , path , path - > level + 1 ) )
2021-09-07 21:25:32 -04:00
return true ;
}
return false ;
}
static inline int trans_lock_write ( struct btree_trans * trans )
{
struct btree_insert_entry * i ;
2022-07-17 23:06:38 -04:00
int ret ;
2021-09-07 21:25:32 -04:00
trans_for_each_update ( trans , i ) {
if ( same_leaf_as_prev ( trans , i ) )
continue ;
2022-08-26 14:55:00 -04:00
/*
* six locks are unfair , and read locks block while a thread
* wants a write lock : thus , we need to tell the cycle detector
* we have a write lock _before_ taking the lock :
*/
mark_btree_node_locked_noreset ( i - > path , i - > level , SIX_LOCK_write ) ;
2021-09-07 21:25:32 -04:00
if ( ! six_trylock_write ( & insert_l ( i ) - > b - > c . lock ) ) {
if ( have_conflicting_read_lock ( trans , i - > path ) )
goto fail ;
2022-07-17 23:06:38 -04:00
ret = btree_node_lock_type ( trans , i - > path ,
2022-08-22 13:21:10 -04:00
& insert_l ( i ) - > b - > c ,
2022-02-15 22:28:37 -05:00
i - > path - > pos , i - > level ,
SIX_LOCK_write , NULL , NULL ) ;
2022-07-17 23:06:38 -04:00
BUG_ON ( ret ) ;
2021-09-07 21:25:32 -04:00
}
bch2_btree_node_prep_for_write ( trans , i - > path , insert_l ( i ) - > b ) ;
}
return 0 ;
fail :
2022-08-26 14:55:00 -04:00
mark_btree_node_locked_noreset ( i - > path , i - > level , SIX_LOCK_intent ) ;
2021-09-07 21:25:32 -04:00
while ( - - i > = trans - > updates ) {
if ( same_leaf_as_prev ( trans , i ) )
continue ;
bch2_btree_node_unlock_write_inlined ( trans , i - > path , insert_l ( i ) - > b ) ;
}
2022-08-27 12:48:36 -04:00
trace_and_count ( trans - > c , trans_restart_would_deadlock_write , trans ) ;
2022-07-17 23:06:38 -04:00
return btree_trans_restart ( trans , BCH_ERR_transaction_restart_would_deadlock_write ) ;
2021-09-07 21:25:32 -04:00
}
2021-12-31 17:54:13 -05:00
static noinline void bch2_drop_overwrites_from_journal ( struct btree_trans * trans )
{
struct btree_insert_entry * i ;
trans_for_each_update ( trans , i )
bch2_journal_key_overwritten ( trans - > c , i - > btree_id , i - > level , i - > k - > k . p ) ;
}
2019-10-19 19:03:23 -04:00
/*
* Get journal reservation , take write locks , and attempt to do btree update ( s ) :
*/
static inline int do_bch2_trans_commit ( struct btree_trans * trans ,
2021-06-04 15:18:10 -04:00
struct btree_insert_entry * * stopped_at ,
unsigned long trace_ip )
2019-10-19 19:03:23 -04:00
{
2021-03-29 01:13:31 -04:00
struct bch_fs * c = trans - > c ;
2019-10-19 19:03:23 -04:00
struct btree_insert_entry * i ;
2022-04-03 17:50:01 -04:00
struct printbuf buf = PRINTBUF ;
2021-08-27 20:55:44 -04:00
int ret , u64s_delta = 0 ;
2022-04-10 00:48:36 -04:00
int rw = ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ? READ : WRITE ;
2019-10-19 19:03:23 -04:00
2021-09-07 21:25:32 -04:00
trans_for_each_update ( trans , i ) {
2022-04-03 21:50:25 -04:00
if ( bch2_bkey_invalid ( c , bkey_i_to_s_c ( i - > k ) ,
2022-04-10 00:48:36 -04:00
i - > bkey_type , rw , & buf ) ) {
2022-04-03 17:50:01 -04:00
printbuf_reset ( & buf ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( & buf , " invalid bkey on insert from %s -> %ps " ,
2022-04-03 17:50:01 -04:00
trans - > fn , ( void * ) i - > ip_allocated ) ;
2023-02-03 21:01:40 -05:00
prt_newline ( & buf ) ;
printbuf_indent_add ( & buf , 2 ) ;
2021-09-07 21:25:32 -04:00
2022-02-25 13:18:19 -05:00
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( i - > k ) ) ;
2023-02-03 21:01:40 -05:00
prt_newline ( & buf ) ;
2022-04-03 17:50:01 -04:00
2022-04-03 21:50:25 -04:00
bch2_bkey_invalid ( c , bkey_i_to_s_c ( i - > k ) ,
2022-04-10 00:48:36 -04:00
i - > bkey_type , rw , & buf ) ;
2022-04-03 17:50:01 -04:00
2022-04-07 17:34:57 -04:00
bch2_trans_inconsistent ( trans , " %s " , buf . buf ) ;
2022-02-25 13:18:19 -05:00
printbuf_exit ( & buf ) ;
2021-09-07 21:25:32 -04:00
return - EINVAL ;
}
btree_insert_entry_checks ( trans , i ) ;
}
2022-04-03 17:50:01 -04:00
printbuf_exit ( & buf ) ;
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i ) {
2022-02-24 11:02:58 -05:00
if ( i - > cached )
2021-08-27 20:55:44 -04:00
continue ;
2021-03-29 01:13:31 -04:00
2021-08-27 20:55:44 -04:00
u64s_delta + = ! bkey_deleted ( & i - > k - > k ) ? i - > k - > k . u64s : 0 ;
2022-02-24 11:02:58 -05:00
u64s_delta - = i - > old_btree_u64s ;
2021-08-27 20:55:44 -04:00
if ( ! same_leaf_as_next ( trans , i ) ) {
if ( u64s_delta < = 0 ) {
2021-08-30 15:18:31 -04:00
ret = bch2_foreground_maybe_merge ( trans , i - > path ,
2021-08-30 16:08:34 -04:00
i - > level , trans - > flags ) ;
2021-08-27 20:55:44 -04:00
if ( unlikely ( ret ) )
return ret ;
}
2021-03-29 01:13:31 -04:00
2021-08-27 20:55:44 -04:00
u64s_delta = 0 ;
2021-03-29 01:13:31 -04:00
}
}
ret = bch2_journal_preres_get ( & c - > journal ,
2019-12-24 18:03:53 -05:00
& trans - > journal_preres , trans - > journal_preres_u64s ,
2019-03-07 19:46:10 -05:00
JOURNAL_RES_GET_NONBLOCK |
2022-03-14 21:48:42 -04:00
( trans - > flags & JOURNAL_WATERMARK_MASK ) ) ;
2019-10-19 19:03:23 -04:00
if ( unlikely ( ret = = - EAGAIN ) )
ret = bch2_trans_journal_preres_get_cold ( trans ,
2021-06-04 15:18:10 -04:00
trans - > journal_preres_u64s , trace_ip ) ;
2019-10-19 19:03:23 -04:00
if ( unlikely ( ret ) )
return ret ;
2021-09-07 21:24:05 -04:00
normalize_read_intent_locks ( trans ) ;
2019-10-19 19:03:23 -04:00
2021-09-07 21:25:32 -04:00
ret = trans_lock_write ( trans ) ;
if ( unlikely ( ret ) )
return ret ;
2019-10-28 19:35:13 -04:00
2021-06-04 15:18:10 -04:00
ret = bch2_trans_commit_write_locked ( trans , stopped_at , trace_ip ) ;
2019-10-28 19:35:13 -04:00
2021-12-25 20:07:00 -05:00
if ( ! ret & & unlikely ( trans - > journal_replay_not_finished ) )
2021-12-31 17:54:13 -05:00
bch2_drop_overwrites_from_journal ( trans ) ;
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2019-12-31 19:37:10 -05:00
if ( ! same_leaf_as_prev ( trans , i ) )
2021-08-30 15:18:31 -04:00
bch2_btree_node_unlock_write_inlined ( trans , i - > path ,
2021-08-30 16:08:34 -04:00
insert_l ( i ) - > b ) ;
2019-10-19 19:03:23 -04:00
2020-05-25 14:57:06 -04:00
if ( ! ret & & trans - > journal_pin )
2021-03-29 01:13:31 -04:00
bch2_journal_pin_add ( & c - > journal , trans - > journal_res . seq ,
2020-05-25 14:57:06 -04:00
trans - > journal_pin , NULL ) ;
2019-10-19 19:03:23 -04:00
/*
* Drop journal reservation after dropping write locks , since dropping
* the journal reservation may kick off a journal write :
*/
2021-03-29 01:13:31 -04:00
bch2_journal_res_put ( & c - > journal , & trans - > journal_res ) ;
2019-10-19 19:03:23 -04:00
if ( unlikely ( ret ) )
return ret ;
2020-06-08 13:26:48 -04:00
bch2_trans_downgrade ( trans ) ;
2019-10-19 19:03:23 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
}
2021-03-31 17:52:52 -04:00
static int journal_reclaim_wait_done ( struct bch_fs * c )
{
2021-04-14 22:15:55 -04:00
int ret = bch2_journal_error ( & c - > journal ) ? :
! bch2_btree_key_cache_must_wait ( c ) ;
2021-03-31 17:52:52 -04:00
if ( ! ret )
2021-04-14 22:15:55 -04:00
journal_reclaim_kick ( & c - > journal ) ;
2021-03-31 17:52:52 -04:00
return ret ;
}
2019-03-21 21:12:01 -04:00
static noinline
int bch2_trans_commit_error ( struct btree_trans * trans ,
struct btree_insert_entry * i ,
2021-06-04 15:18:10 -04:00
int ret , unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
struct bch_fs * c = trans - > c ;
2018-11-07 17:48:32 -05:00
switch ( ret ) {
case BTREE_INSERT_BTREE_NODE_FULL :
2021-08-30 15:18:31 -04:00
ret = bch2_btree_split_leaf ( trans , i - > path , trans - > flags ) ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_btree_node_split , trans , trace_ip , i - > path ) ;
2018-11-07 17:48:32 -05:00
break ;
case BTREE_INSERT_NEED_MARK_REPLICAS :
2019-03-13 22:44:04 -04:00
bch2_trans_unlock ( trans ) ;
2019-03-15 19:34:16 -04:00
2021-04-03 19:41:09 -04:00
ret = bch2_replicas_delta_list_mark ( c , trans - > fs_usage_deltas ) ;
if ( ret )
2021-07-25 17:19:52 -04:00
break ;
2019-03-21 21:12:01 -04:00
2022-07-17 23:06:38 -04:00
ret = bch2_trans_relock ( trans ) ;
if ( ret )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_mark_replicas , trans , trace_ip ) ;
2018-11-07 17:48:32 -05:00
break ;
2019-03-15 17:11:58 -04:00
case BTREE_INSERT_NEED_JOURNAL_RES :
2019-05-15 09:47:40 -04:00
bch2_trans_unlock ( trans ) ;
2019-03-15 17:11:58 -04:00
2021-04-03 16:24:13 -04:00
if ( ( trans - > flags & BTREE_INSERT_JOURNAL_RECLAIM ) & &
2022-03-14 21:48:42 -04:00
! ( trans - > flags & JOURNAL_WATERMARK_reserved ) ) {
2022-07-17 23:06:38 -04:00
ret = - BCH_ERR_journal_reclaim_would_deadlock ;
2021-07-25 17:19:52 -04:00
break ;
}
2021-04-03 16:24:13 -04:00
2019-03-15 17:11:58 -04:00
ret = bch2_trans_journal_res_get ( trans , JOURNAL_RES_GET_CHECK ) ;
if ( ret )
2021-07-25 17:19:52 -04:00
break ;
2019-03-15 17:11:58 -04:00
2022-07-17 23:06:38 -04:00
ret = bch2_trans_relock ( trans ) ;
if ( ret )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_journal_res_get , trans , trace_ip ) ;
2020-11-19 21:40:03 -05:00
break ;
case BTREE_INSERT_NEED_JOURNAL_RECLAIM :
bch2_trans_unlock ( trans ) ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_blocked_journal_reclaim , trans , trace_ip ) ;
2021-04-29 00:21:54 -04:00
2021-04-14 22:15:55 -04:00
wait_event_freezable ( c - > journal . reclaim_wait ,
( ret = journal_reclaim_wait_done ( c ) ) ) ;
2021-03-31 17:52:52 -04:00
if ( ret < 0 )
2021-07-25 17:19:52 -04:00
break ;
2020-11-19 21:40:03 -05:00
2022-07-17 23:06:38 -04:00
ret = bch2_trans_relock ( trans ) ;
if ( ret )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_journal_reclaim , trans , trace_ip ) ;
2019-03-15 17:11:58 -04:00
break ;
2018-11-07 17:48:32 -05:00
default :
BUG_ON ( ret > = 0 ) ;
break ;
2017-03-16 22:18:50 -08:00
}
2022-07-17 23:06:38 -04:00
BUG_ON ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ! = ! ! trans - > restarted ) ;
2022-01-02 22:24:43 -05:00
BUG_ON ( ret = = - ENOSPC & &
! ( trans - > flags & BTREE_INSERT_NOWAIT ) & &
( trans - > flags & BTREE_INSERT_NOFAIL ) ) ;
2021-05-20 15:49:23 -04:00
2019-03-21 21:12:01 -04:00
return ret ;
}
2019-10-19 19:03:23 -04:00
static noinline int
bch2_trans_commit_get_rw_cold ( struct btree_trans * trans )
2019-03-21 21:12:01 -04:00
{
struct bch_fs * c = trans - > c ;
int ret ;
2022-01-09 20:55:58 -05:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_LAZY_RW ) ) | |
test_bit ( BCH_FS_STARTED , & c - > flags ) )
2019-10-19 19:03:23 -04:00
return - EROFS ;
2019-03-21 21:12:01 -04:00
2019-10-19 19:03:23 -04:00
bch2_trans_unlock ( trans ) ;
2019-05-10 17:15:30 -04:00
2022-07-17 23:06:38 -04:00
ret = bch2_fs_read_write_early ( c ) ? :
bch2_trans_relock ( trans ) ;
2019-10-19 19:03:23 -04:00
if ( ret )
return ret ;
2019-03-21 21:12:01 -04:00
2019-10-19 19:03:23 -04:00
percpu_ref_get ( & c - > writes ) ;
return 0 ;
2017-03-16 22:18:50 -08:00
}
2022-02-19 02:48:27 -05:00
/*
* This is for updates done in the early part of fsck - btree_gc - before we ' ve
* gone RW . we only add the new key to the list of keys for journal replay to
* do .
*/
static noinline int
do_bch2_trans_commit_to_journal_replay ( struct btree_trans * trans )
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
int ret = 0 ;
trans_for_each_update ( trans , i ) {
ret = bch2_journal_key_insert ( c , i - > btree_id , i - > level , i - > k ) ;
if ( ret )
break ;
}
return ret ;
}
2019-10-19 19:03:23 -04:00
int __bch2_trans_commit ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
2021-12-31 16:12:54 -05:00
struct bch_fs * c = trans - > c ;
2019-08-22 16:34:59 -04:00
struct btree_insert_entry * i = NULL ;
2021-07-24 23:57:28 -04:00
unsigned u64s ;
2019-03-21 16:28:57 -04:00
int ret = 0 ;
2017-03-16 22:18:50 -08:00
2021-07-10 13:44:42 -04:00
if ( ! trans - > nr_updates & &
2022-03-29 16:29:10 -04:00
! trans - > extra_journal_entries . nr )
2021-01-21 19:30:35 -05:00
goto out_reset ;
2017-03-16 22:18:50 -08:00
2019-10-19 19:03:23 -04:00
if ( trans - > flags & BTREE_INSERT_GC_LOCK_HELD )
2021-12-31 16:12:54 -05:00
lockdep_assert_held ( & c - > gc_lock ) ;
2019-03-21 21:12:01 -04:00
2022-02-19 02:40:45 -05:00
ret = bch2_trans_commit_run_triggers ( trans ) ;
if ( ret )
goto out_reset ;
2022-02-19 02:48:27 -05:00
if ( unlikely ( ! test_bit ( BCH_FS_MAY_GO_RW , & c - > flags ) ) ) {
ret = do_bch2_trans_commit_to_journal_replay ( trans ) ;
goto out_reset ;
}
2022-02-19 02:40:45 -05:00
if ( ! ( trans - > flags & BTREE_INSERT_NOCHECK_RW ) & &
2022-06-18 19:55:32 -04:00
unlikely ( ! percpu_ref_tryget_live ( & c - > writes ) ) ) {
2022-02-19 02:40:45 -05:00
ret = bch2_trans_commit_get_rw_cold ( trans ) ;
if ( ret )
goto out_reset ;
}
2022-04-17 17:30:49 -04:00
EBUG_ON ( test_bit ( BCH_FS_CLEAN_SHUTDOWN , & c - > flags ) ) ;
2019-03-15 17:11:58 -04:00
memset ( & trans - > journal_preres , 0 , sizeof ( trans - > journal_preres ) ) ;
2019-03-13 22:44:04 -04:00
2022-03-29 16:29:10 -04:00
trans - > journal_u64s = trans - > extra_journal_entries . nr ;
2019-12-24 18:03:53 -05:00
trans - > journal_preres_u64s = 0 ;
2021-12-31 16:12:54 -05:00
trans - > journal_transaction_names = READ_ONCE ( c - > opts . journal_transaction_names ) ;
if ( trans - > journal_transaction_names )
2022-06-05 15:29:00 -04:00
trans - > journal_u64s + = jset_u64s ( JSET_ENTRY_LOG_U64s ) ;
2021-12-31 16:12:54 -05:00
2019-12-24 18:03:53 -05:00
trans_for_each_update ( trans , i ) {
2021-08-30 15:18:31 -04:00
BUG_ON ( ! i - > path - > should_be_locked ) ;
2021-03-09 19:37:40 -05:00
2021-08-30 15:18:31 -04:00
if ( unlikely ( ! bch2_btree_path_upgrade ( trans , i - > path , i - > level + 1 ) ) ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_upgrade , trans , _RET_IP_ , i - > path ) ;
2022-07-17 23:06:38 -04:00
ret = btree_trans_restart ( trans , BCH_ERR_transaction_restart_upgrade ) ;
2021-02-20 20:51:57 -05:00
goto out ;
}
2021-08-30 15:18:31 -04:00
BUG_ON ( ! btree_node_intent_locked ( i - > path , i - > level ) ) ;
2021-03-31 16:43:50 -04:00
2022-06-05 15:32:57 -04:00
if ( i - > key_cache_already_flushed )
continue ;
/* we're going to journal the key being updated: */
2019-12-24 18:03:53 -05:00
u64s = jset_u64s ( i - > k - > k . u64s ) ;
2021-08-30 16:08:34 -04:00
if ( i - > cached & &
2019-03-07 19:46:10 -05:00
likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) )
2019-12-24 18:03:53 -05:00
trans - > journal_preres_u64s + = u64s ;
2022-12-31 00:15:23 -05:00
2022-06-05 15:32:57 -04:00
if ( i - > flags & BTREE_UPDATE_NOJOURNAL )
continue ;
trans - > journal_u64s + = u64s ;
/* and we're also going to log the overwrite: */
if ( trans - > journal_transaction_names )
trans - > journal_u64s + = jset_u64s ( i - > old_k . u64s ) ;
2019-12-24 18:03:53 -05:00
}
2021-06-12 15:45:56 -04:00
if ( trans - > extra_journal_res ) {
2021-12-31 16:12:54 -05:00
ret = bch2_disk_reservation_add ( c , trans - > disk_res ,
2021-06-12 15:45:56 -04:00
trans - > extra_journal_res ,
( trans - > flags & BTREE_INSERT_NOFAIL )
? BCH_DISK_RESERVATION_NOFAIL : 0 ) ;
if ( ret )
goto err ;
}
2019-10-19 19:03:23 -04:00
retry :
2021-07-25 17:19:52 -04:00
BUG_ON ( trans - > restarted ) ;
2019-10-19 19:03:23 -04:00
memset ( & trans - > journal_res , 0 , sizeof ( trans - > journal_res ) ) ;
2019-03-21 22:19:57 -04:00
2021-06-04 15:18:10 -04:00
ret = do_bch2_trans_commit ( trans , & i , _RET_IP_ ) ;
2019-03-21 22:19:57 -04:00
2019-10-19 19:03:23 -04:00
/* make sure we didn't drop or screw up locks: */
2021-08-30 14:45:11 -04:00
bch2_trans_verify_locks ( trans ) ;
2019-10-19 19:03:23 -04:00
2019-03-21 21:12:01 -04:00
if ( ret )
goto err ;
2022-04-17 18:06:31 -04:00
2022-08-27 12:48:36 -04:00
trace_and_count ( c , transaction_commit , trans , _RET_IP_ ) ;
2019-03-21 21:12:01 -04:00
out :
2021-12-31 16:12:54 -05:00
bch2_journal_preres_put ( & c - > journal , & trans - > journal_preres ) ;
2019-03-15 17:11:58 -04:00
2019-10-19 19:03:23 -04:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_NOCHECK_RW ) ) )
2021-12-31 16:12:54 -05:00
percpu_ref_put ( & c - > writes ) ;
2021-01-21 19:30:35 -05:00
out_reset :
2022-05-29 11:38:48 -04:00
bch2_trans_reset_updates ( trans ) ;
2021-07-24 23:57:28 -04:00
if ( trans - > fs_usage_deltas ) {
trans - > fs_usage_deltas - > used = 0 ;
memset ( ( void * ) trans - > fs_usage_deltas +
offsetof ( struct replicas_delta_list , memset_start ) , 0 ,
( void * ) & trans - > fs_usage_deltas - > memset_end -
( void * ) & trans - > fs_usage_deltas - > memset_start ) ;
}
2019-12-24 18:03:53 -05:00
2019-03-13 22:44:04 -04:00
return ret ;
2019-03-21 21:12:01 -04:00
err :
2021-06-04 15:18:10 -04:00
ret = bch2_trans_commit_error ( trans , i , ret , _RET_IP_ ) ;
2019-10-19 19:03:23 -04:00
if ( ret )
goto out ;
2019-03-11 14:59:58 -04:00
2019-10-19 19:03:23 -04:00
goto retry ;
2017-03-16 22:18:50 -08:00
}
2021-08-05 00:41:41 -04:00
static int check_pos_snapshot_overwritten ( struct btree_trans * trans ,
enum btree_id id ,
struct bpos pos )
{
struct bch_fs * c = trans - > c ;
struct btree_iter iter ;
struct bkey_s_c k ;
int ret ;
2022-01-06 00:04:56 -05:00
if ( ! btree_type_has_snapshots ( id ) )
return 0 ;
2021-08-05 00:41:41 -04:00
if ( ! snapshot_t ( c , pos . snapshot ) - > children [ 0 ] )
return 0 ;
bch2_trans_iter_init ( trans , & iter , id , pos ,
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_ALL_SNAPSHOTS ) ;
while ( 1 ) {
k = bch2_btree_iter_prev ( & iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
break ;
if ( ! k . k )
break ;
if ( bkey_cmp ( pos , k . k - > p ) )
break ;
if ( bch2_snapshot_is_ancestor ( c , k . k - > p . snapshot , pos . snapshot ) ) {
ret = 1 ;
break ;
}
}
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2021-08-25 01:03:25 -04:00
static noinline int extent_front_merge ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k ,
struct bkey_i * * insert ,
enum btree_update_flags flags )
{
struct bch_fs * c = trans - > c ;
struct bkey_i * update ;
int ret ;
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
return ret ;
bkey_reassemble ( update , k ) ;
2021-08-05 00:41:41 -04:00
if ( ! bch2_bkey_merge ( c , bkey_i_to_s ( update ) , bkey_i_to_s_c ( * insert ) ) )
return 0 ;
2021-08-25 01:03:25 -04:00
2021-08-05 00:41:41 -04:00
ret = check_pos_snapshot_overwritten ( trans , iter - > btree_id , k . k - > p ) ? :
check_pos_snapshot_overwritten ( trans , iter - > btree_id , ( * insert ) - > k . p ) ;
if ( ret < 0 )
return ret ;
if ( ret )
return 0 ;
2021-08-25 01:03:25 -04:00
2021-08-05 00:41:41 -04:00
ret = bch2_btree_delete_at ( trans , iter , flags ) ;
if ( ret )
return ret ;
* insert = update ;
return 0 ;
}
static noinline int extent_back_merge ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_i * insert ,
struct bkey_s_c k )
{
struct bch_fs * c = trans - > c ;
int ret ;
ret = check_pos_snapshot_overwritten ( trans , iter - > btree_id , insert - > k . p ) ? :
check_pos_snapshot_overwritten ( trans , iter - > btree_id , k . k - > p ) ;
if ( ret < 0 )
return ret ;
if ( ret )
return 0 ;
bch2_bkey_merge ( c , bkey_i_to_s ( insert ) , k ) ;
2021-08-25 01:03:25 -04:00
return 0 ;
}
2022-01-08 21:22:31 -05:00
int bch2_trans_update_extent ( struct btree_trans * trans ,
struct btree_iter * orig_iter ,
struct bkey_i * insert ,
enum btree_update_flags flags )
2021-08-25 01:03:25 -04:00
{
2021-08-30 15:18:31 -04:00
struct btree_iter iter , update_iter ;
2021-08-25 01:03:25 -04:00
struct bpos start = bkey_start_pos ( & insert - > k ) ;
struct bkey_i * update ;
struct bkey_s_c k ;
enum btree_id btree_id = orig_iter - > btree_id ;
int ret = 0 , compressed_sectors ;
2021-08-30 15:18:31 -04:00
bch2_trans_iter_init ( trans , & iter , btree_id , start ,
BTREE_ITER_INTENT |
BTREE_ITER_WITH_UPDATES |
BTREE_ITER_NOT_EXTENTS ) ;
2022-03-11 12:31:52 -05:00
k = bch2_btree_iter_peek_upto ( & iter , POS ( insert - > k . p . inode , U64_MAX ) ) ;
2021-08-25 01:03:25 -04:00
if ( ( ret = bkey_err ( k ) ) )
goto err ;
if ( ! k . k )
goto out ;
if ( ! bkey_cmp ( k . k - > p , bkey_start_pos ( & insert - > k ) ) ) {
if ( bch2_bkey_maybe_mergable ( k . k , & insert - > k ) ) {
2021-08-30 15:18:31 -04:00
ret = extent_front_merge ( trans , & iter , k , & insert , flags ) ;
2021-08-25 01:03:25 -04:00
if ( ret )
2021-10-21 14:33:31 -04:00
goto err ;
2021-08-25 01:03:25 -04:00
}
goto next ;
}
while ( bkey_cmp ( insert - > k . p , bkey_start_pos ( k . k ) ) > 0 ) {
2021-02-02 17:09:10 -05:00
bool front_split = bkey_cmp ( bkey_start_pos ( k . k ) , start ) < 0 ;
bool back_split = bkey_cmp ( k . k - > p , insert - > k . p ) > 0 ;
2021-08-25 01:03:25 -04:00
/*
* If we ' re going to be splitting a compressed extent , note it
* so that __bch2_trans_commit ( ) can increase our disk
* reservation :
*/
2021-02-02 17:09:10 -05:00
if ( ( ( front_split & & back_split ) | |
( ( front_split | | back_split ) & & k . k - > p . snapshot ! = insert - > k . p . snapshot ) ) & &
2021-08-25 01:03:25 -04:00
( compressed_sectors = bch2_bkey_sectors_compressed ( k ) ) )
trans - > extra_journal_res + = compressed_sectors ;
2021-02-02 17:09:10 -05:00
if ( front_split ) {
2021-08-25 01:03:25 -04:00
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
goto err ;
bkey_reassemble ( update , k ) ;
bch2_cut_back ( start , update ) ;
2021-08-30 15:18:31 -04:00
bch2_trans_iter_init ( trans , & update_iter , btree_id , update - > k . p ,
BTREE_ITER_NOT_EXTENTS |
2021-02-02 17:09:10 -05:00
BTREE_ITER_ALL_SNAPSHOTS |
BTREE_ITER_INTENT ) ;
ret = bch2_btree_iter_traverse ( & update_iter ) ? :
bch2_trans_update ( trans , & update_iter , update ,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
flags ) ;
bch2_trans_iter_exit ( trans , & update_iter ) ;
if ( ret )
goto err ;
}
if ( k . k - > p . snapshot ! = insert - > k . p . snapshot & &
( front_split | | back_split ) ) {
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
goto err ;
bkey_reassemble ( update , k ) ;
bch2_cut_front ( start , update ) ;
bch2_cut_back ( insert - > k . p , update ) ;
bch2_trans_iter_init ( trans , & update_iter , btree_id , update - > k . p ,
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_ALL_SNAPSHOTS |
2021-08-30 15:18:31 -04:00
BTREE_ITER_INTENT ) ;
ret = bch2_btree_iter_traverse ( & update_iter ) ? :
bch2_trans_update ( trans , & update_iter , update ,
2021-08-25 01:03:25 -04:00
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
flags ) ;
2021-08-30 15:18:31 -04:00
bch2_trans_iter_exit ( trans , & update_iter ) ;
2021-08-25 01:03:25 -04:00
if ( ret )
goto err ;
}
if ( bkey_cmp ( k . k - > p , insert - > k . p ) < = 0 ) {
2021-02-02 17:09:10 -05:00
update = bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . p = k . k - > p ;
if ( insert - > k . p . snapshot ! = k . k - > p . snapshot ) {
update - > k . p . snapshot = insert - > k . p . snapshot ;
update - > k . type = KEY_TYPE_whiteout ;
}
bch2_trans_iter_init ( trans , & update_iter , btree_id , update - > k . p ,
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_INTENT ) ;
ret = bch2_btree_iter_traverse ( & update_iter ) ? :
bch2_trans_update ( trans , & update_iter , update ,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
flags ) ;
bch2_trans_iter_exit ( trans , & update_iter ) ;
2021-08-25 01:03:25 -04:00
if ( ret )
goto err ;
}
2021-02-02 17:09:10 -05:00
if ( back_split ) {
2021-08-25 01:03:25 -04:00
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
goto err ;
bkey_reassemble ( update , k ) ;
bch2_cut_front ( insert - > k . p , update ) ;
2022-01-08 21:22:31 -05:00
ret = bch2_trans_update_by_path ( trans , iter . path , update ,
2021-02-02 17:09:10 -05:00
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
flags ) ;
2021-09-06 15:38:12 -04:00
if ( ret )
goto err ;
2021-08-25 01:03:25 -04:00
goto out ;
}
next :
2022-03-11 12:31:52 -05:00
bch2_btree_iter_advance ( & iter ) ;
k = bch2_btree_iter_peek_upto ( & iter , POS ( insert - > k . p . inode , U64_MAX ) ) ;
2021-08-25 01:03:25 -04:00
if ( ( ret = bkey_err ( k ) ) )
goto err ;
if ( ! k . k )
goto out ;
}
2021-10-21 14:33:31 -04:00
if ( bch2_bkey_maybe_mergable ( & insert - > k , k . k ) ) {
ret = extent_back_merge ( trans , & iter , insert , k ) ;
if ( ret )
goto err ;
}
2021-08-25 01:03:25 -04:00
out :
if ( ! bkey_deleted ( & insert - > k ) ) {
2021-09-07 15:34:16 -04:00
/*
* Rewinding iterators is expensive : get a new one and the one
* that points to the start of insert will be cloned from :
*/
bch2_trans_iter_exit ( trans , & iter ) ;
bch2_trans_iter_init ( trans , & iter , btree_id , insert - > k . p ,
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_INTENT ) ;
2021-08-30 15:18:31 -04:00
ret = bch2_btree_iter_traverse ( & iter ) ? :
bch2_trans_update ( trans , & iter , insert , flags ) ;
2021-08-25 01:03:25 -04:00
}
err :
2021-08-30 15:18:31 -04:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-08-25 01:03:25 -04:00
return ret ;
}
2021-02-02 17:09:10 -05:00
/*
* When deleting , check if we need to emit a whiteout ( because we ' re overwriting
* something in an ancestor snapshot )
*/
static int need_whiteout_for_snapshot ( struct btree_trans * trans ,
enum btree_id btree_id , struct bpos pos )
{
struct btree_iter iter ;
struct bkey_s_c k ;
u32 snapshot = pos . snapshot ;
int ret ;
if ( ! bch2_snapshot_parent ( trans - > c , pos . snapshot ) )
return 0 ;
pos . snapshot + + ;
2021-10-21 12:05:21 -04:00
for_each_btree_key_norestart ( trans , iter , btree_id , pos ,
2021-12-21 20:48:26 -05:00
BTREE_ITER_ALL_SNAPSHOTS |
BTREE_ITER_NOPRESERVE , k , ret ) {
2021-02-02 17:09:10 -05:00
if ( bkey_cmp ( k . k - > p , pos ) )
break ;
if ( bch2_snapshot_is_ancestor ( trans - > c , snapshot ,
k . k - > p . snapshot ) ) {
ret = ! bkey_whiteout ( k . k ) ;
break ;
}
}
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2022-01-12 01:14:47 -05:00
static int __must_check
bch2_trans_update_by_path_trace ( struct btree_trans * trans , struct btree_path * path ,
struct bkey_i * k , enum btree_update_flags flags ,
unsigned long ip )
2019-12-31 19:37:10 -05:00
{
2022-02-24 08:08:53 -05:00
struct bch_fs * c = trans - > c ;
2021-08-30 15:18:31 -04:00
struct btree_insert_entry * i , n ;
2022-01-12 01:14:47 -05:00
int ret = 0 ;
2021-08-30 15:18:31 -04:00
2022-01-08 21:22:31 -05:00
BUG_ON ( ! path - > should_be_locked ) ;
2021-08-30 15:18:31 -04:00
BUG_ON ( trans - > nr_updates > = BTREE_ITER_MAX ) ;
2022-01-08 21:22:31 -05:00
BUG_ON ( bpos_cmp ( k - > k . p , path - > pos ) ) ;
2021-08-30 15:18:31 -04:00
n = ( struct btree_insert_entry ) {
2021-07-05 22:16:02 -04:00
. flags = flags ,
2022-01-08 21:22:31 -05:00
. bkey_type = __btree_node_type ( path - > level , path - > btree_id ) ,
. btree_id = path - > btree_id ,
. level = path - > level ,
. cached = path - > cached ,
. path = path ,
2021-08-24 21:26:43 -04:00
. k = k ,
2022-01-12 01:14:47 -05:00
. ip_allocated = ip ,
2019-12-31 19:37:10 -05:00
} ;
2021-01-18 19:59:03 -05:00
# ifdef CONFIG_BCACHEFS_DEBUG
2021-06-12 15:45:56 -04:00
trans_for_each_update ( trans , i )
2021-01-18 19:59:03 -05:00
BUG_ON ( i ! = trans - > updates & &
2021-02-20 20:51:57 -05:00
btree_insert_entry_cmp ( i - 1 , i ) > = 0 ) ;
2021-01-18 19:59:03 -05:00
# endif
2021-06-02 00:18:34 -04:00
2019-12-31 19:37:10 -05:00
/*
2021-02-20 20:51:57 -05:00
* Pending updates are kept sorted : first , find position of new update ,
* then delete / trim any updates the new update overwrites :
2019-12-31 19:37:10 -05:00
*/
2021-06-02 00:18:34 -04:00
trans_for_each_update ( trans , i )
if ( btree_insert_entry_cmp ( & n , i ) < = 0 )
break ;
2019-12-31 19:37:10 -05:00
2021-06-02 00:18:34 -04:00
if ( i < trans - > updates + trans - > nr_updates & &
! btree_insert_entry_cmp ( & n , i ) ) {
2021-10-27 12:51:12 -04:00
BUG_ON ( i - > insert_trigger_run | | i - > overwrite_trigger_run ) ;
2021-07-30 18:01:33 -04:00
2021-12-01 03:47:54 -05:00
bch2_path_put ( trans , i - > path , true ) ;
2022-02-24 11:02:58 -05:00
i - > flags = n . flags ;
i - > cached = n . cached ;
i - > k = n . k ;
i - > path = n . path ;
i - > ip_allocated = n . ip_allocated ;
} else {
2021-02-20 20:51:57 -05:00
array_insert_item ( trans - > updates , trans - > nr_updates ,
i - trans - > updates , n ) ;
2019-12-31 19:37:10 -05:00
2022-02-24 11:02:58 -05:00
i - > old_v = bch2_btree_path_peek_slot ( path , & i - > old_k ) . v ;
i - > old_btree_u64s = ! bkey_deleted ( & i - > old_k ) ? i - > old_k . u64s : 0 ;
2022-02-24 08:08:53 -05:00
if ( unlikely ( trans - > journal_replay_not_finished ) ) {
struct bkey_i * j_k =
2022-04-11 20:28:13 -04:00
bch2_journal_keys_peek_slot ( c , n . btree_id , n . level , k - > k . p ) ;
2022-02-24 08:08:53 -05:00
2022-04-11 20:28:13 -04:00
if ( j_k ) {
2022-02-24 08:08:53 -05:00
i - > old_k = j_k - > k ;
i - > old_v = & j_k - > v ;
}
}
2022-02-24 11:02:58 -05:00
}
2022-01-12 01:14:47 -05:00
__btree_path_get ( i - > path , true ) ;
/*
* If a key is present in the key cache , it must also exist in the
* btree - this is necessary for cache coherency . When iterating over
* a btree that ' s cached in the key cache , the btree iter code checks
* the key cache - but the key has to exist in the btree for that to
* work :
*/
if ( path - > cached & &
bkey_deleted ( & i - > old_k ) & &
! ( flags & BTREE_UPDATE_NO_KEY_CACHE_COHERENCY ) ) {
struct btree_path * btree_path ;
i - > key_cache_already_flushed = true ;
i - > flags | = BTREE_TRIGGER_NORUN ;
btree_path = bch2_path_get ( trans , path - > btree_id , path - > pos ,
1 , 0 , BTREE_ITER_INTENT ) ;
ret = bch2_btree_path_traverse ( trans , btree_path , 0 ) ;
if ( ret )
goto err ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( btree_path ) ;
2022-01-12 01:14:47 -05:00
ret = bch2_trans_update_by_path_trace ( trans , btree_path , k , flags , ip ) ;
err :
bch2_path_put ( trans , btree_path , true ) ;
}
return ret ;
}
static int __must_check
bch2_trans_update_by_path ( struct btree_trans * trans , struct btree_path * path ,
struct bkey_i * k , enum btree_update_flags flags )
{
return bch2_trans_update_by_path_trace ( trans , path , k , flags , _RET_IP_ ) ;
2019-12-31 19:37:10 -05:00
}
2022-01-08 21:22:31 -05:00
int __must_check bch2_trans_update ( struct btree_trans * trans , struct btree_iter * iter ,
struct bkey_i * k , enum btree_update_flags flags )
{
2022-01-12 01:14:47 -05:00
struct btree_path * path = iter - > update_path ? : iter - > path ;
struct bkey_cached * ck ;
int ret ;
2022-01-08 21:22:31 -05:00
if ( iter - > flags & BTREE_ITER_IS_EXTENTS )
return bch2_trans_update_extent ( trans , iter , k , flags ) ;
if ( bkey_deleted ( & k - > k ) & &
2022-01-12 01:14:47 -05:00
! ( flags & BTREE_UPDATE_KEY_CACHE_RECLAIM ) & &
2022-01-08 21:22:31 -05:00
( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) ) {
2022-01-12 01:14:47 -05:00
ret = need_whiteout_for_snapshot ( trans , iter - > btree_id , k - > k . p ) ;
2022-01-08 21:22:31 -05:00
if ( unlikely ( ret < 0 ) )
return ret ;
if ( ret )
k - > k . type = KEY_TYPE_whiteout ;
}
2022-01-12 01:14:47 -05:00
/*
* Ensure that updates to cached btrees go to the key cache :
*/
if ( ! ( flags & BTREE_UPDATE_KEY_CACHE_RECLAIM ) & &
! path - > cached & &
! path - > level & &
btree_id_cached ( trans - > c , path - > btree_id ) ) {
if ( ! iter - > key_cache_path | |
! iter - > key_cache_path - > should_be_locked | |
bpos_cmp ( iter - > key_cache_path - > pos , k - > k . p ) ) {
if ( ! iter - > key_cache_path )
iter - > key_cache_path =
bch2_path_get ( trans , path - > btree_id , path - > pos , 1 , 0 ,
BTREE_ITER_INTENT | BTREE_ITER_CACHED ) ;
iter - > key_cache_path =
bch2_btree_path_set_pos ( trans , iter - > key_cache_path , path - > pos ,
iter - > flags & BTREE_ITER_INTENT ) ;
ret = bch2_btree_path_traverse ( trans , iter - > key_cache_path ,
BTREE_ITER_CACHED ) ;
if ( unlikely ( ret ) )
return ret ;
ck = ( void * ) iter - > key_cache_path - > l [ 0 ] . b ;
if ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( trans - > c , trans_restart_key_cache_raced , trans , _RET_IP_ ) ;
2022-07-17 23:06:38 -04:00
return btree_trans_restart ( trans , BCH_ERR_transaction_restart_key_cache_raced ) ;
2022-01-12 01:14:47 -05:00
}
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > key_cache_path ) ;
2022-01-12 01:14:47 -05:00
}
path = iter - > key_cache_path ;
}
return bch2_trans_update_by_path ( trans , path , k , flags ) ;
2022-01-08 21:22:31 -05:00
}
2021-02-03 21:51:56 -05:00
void bch2_trans_commit_hook ( struct btree_trans * trans ,
struct btree_trans_commit_hook * h )
{
h - > next = trans - > hooks ;
trans - > hooks = h ;
}
2020-02-26 15:39:46 -05:00
int __bch2_btree_insert ( struct btree_trans * trans ,
enum btree_id id , struct bkey_i * k )
2019-12-22 23:04:30 -05:00
{
2021-08-30 15:18:31 -04:00
struct btree_iter iter ;
2020-02-26 15:39:46 -05:00
int ret ;
2019-12-22 23:04:30 -05:00
2021-08-30 15:18:31 -04:00
bch2_trans_iter_init ( trans , & iter , id , bkey_start_pos ( & k - > k ) ,
2021-09-06 15:38:12 -04:00
BTREE_ITER_INTENT ) ;
2021-08-30 15:18:31 -04:00
ret = bch2_btree_iter_traverse ( & iter ) ? :
bch2_trans_update ( trans , & iter , k , 0 ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
2020-02-26 15:39:46 -05:00
return ret ;
2019-12-22 23:04:30 -05:00
}
2017-03-16 22:18:50 -08:00
/**
2019-03-15 17:11:58 -04:00
* bch2_btree_insert - insert keys into the extent btree
2017-03-16 22:18:50 -08:00
* @ c : pointer to struct bch_fs
* @ id : btree to insert into
* @ insert_keys : list of keys to insert
* @ hook : insert callback
*/
int bch2_btree_insert ( struct bch_fs * c , enum btree_id id ,
2019-12-22 23:04:30 -05:00
struct bkey_i * k ,
struct disk_reservation * disk_res ,
u64 * journal_seq , int flags )
2017-03-16 22:18:50 -08:00
{
2019-12-22 23:04:30 -05:00
return bch2_trans_do ( c , disk_res , journal_seq , flags ,
__bch2_btree_insert ( & trans , id , k ) ) ;
2017-03-16 22:18:50 -08:00
}
2022-04-09 15:07:11 -04:00
int bch2_btree_delete_extent_at ( struct btree_trans * trans , struct btree_iter * iter ,
unsigned len , unsigned update_flags )
2020-11-20 21:28:55 -05:00
{
2021-06-14 16:35:03 -04:00
struct bkey_i * k ;
2020-11-20 21:28:55 -05:00
2021-06-14 16:35:03 -04:00
k = bch2_trans_kmalloc ( trans , sizeof ( * k ) ) ;
if ( IS_ERR ( k ) )
return PTR_ERR ( k ) ;
2020-11-20 21:28:55 -05:00
2021-06-14 16:35:03 -04:00
bkey_init ( & k - > k ) ;
k - > k . p = iter - > pos ;
2022-04-09 15:07:11 -04:00
bch2_key_resize ( & k - > k , len ) ;
2021-07-05 22:16:02 -04:00
return bch2_trans_update ( trans , iter , k , update_flags ) ;
2020-11-20 21:28:55 -05:00
}
2022-04-09 15:07:11 -04:00
int bch2_btree_delete_at ( struct btree_trans * trans ,
struct btree_iter * iter , unsigned update_flags )
{
return bch2_btree_delete_extent_at ( trans , iter , 0 , update_flags ) ;
}
2020-11-20 21:28:55 -05:00
int bch2_btree_delete_range_trans ( struct btree_trans * trans , enum btree_id id ,
struct bpos start , struct bpos end ,
2022-03-05 18:23:47 -05:00
unsigned update_flags ,
2020-11-20 21:28:55 -05:00
u64 * journal_seq )
2017-03-16 22:18:50 -08:00
{
2022-08-18 13:00:26 -04:00
u32 restart_count = trans - > restart_count ;
2021-08-30 15:18:31 -04:00
struct btree_iter iter ;
2017-03-16 22:18:50 -08:00
struct bkey_s_c k ;
int ret = 0 ;
2020-11-20 21:28:55 -05:00
2022-03-05 18:23:47 -05:00
bch2_trans_iter_init ( trans , & iter , id , start , BTREE_ITER_INTENT ) ;
2019-05-11 17:36:31 -04:00
retry :
2022-08-18 13:00:26 -04:00
while ( ( k = bch2_btree_iter_peek ( & iter ) ) . k & &
! ( ret = bkey_err ( k ) ? :
btree_trans_too_many_iters ( trans ) ) & &
2021-08-30 15:18:31 -04:00
bkey_cmp ( iter . pos , end ) < 0 ) {
2021-12-09 15:21:26 -05:00
struct disk_reservation disk_res =
bch2_disk_reservation_init ( trans - > c , 0 ) ;
2017-03-16 22:18:50 -08:00
struct bkey_i delete ;
bkey_init ( & delete . k ) ;
2020-11-20 21:28:55 -05:00
/*
* This could probably be more efficient for extents :
*/
2017-03-16 22:18:50 -08:00
/*
* For extents , iter . pos won ' t necessarily be the same as
* bkey_start_pos ( k . k ) ( for non extents they always will be the
* same ) . It ' s important that we delete starting from iter . pos
* because the range we want to delete could start in the middle
* of k .
*
* ( bch2_btree_iter_peek ( ) does guarantee that iter . pos > =
* bkey_start_pos ( k . k ) ) .
*/
2021-08-30 15:18:31 -04:00
delete . k . p = iter . pos ;
2017-03-16 22:18:50 -08:00
2021-12-29 13:49:34 -05:00
if ( iter . flags & BTREE_ITER_IS_EXTENTS ) {
2019-05-11 17:36:31 -04:00
unsigned max_sectors =
KEY_SIZE_MAX & ( ~ 0 < < trans - > c - > block_bits ) ;
2017-03-16 22:18:50 -08:00
/* create the biggest key we can */
bch2_key_resize ( & delete . k , max_sectors ) ;
2019-11-09 19:02:48 -05:00
bch2_cut_back ( end , & delete ) ;
2019-08-16 09:58:07 -04:00
2021-08-30 15:18:31 -04:00
ret = bch2_extent_trim_atomic ( trans , & iter , & delete ) ;
2019-08-16 09:58:07 -04:00
if ( ret )
break ;
2017-03-16 22:18:50 -08:00
}
2022-03-05 18:23:47 -05:00
ret = bch2_trans_update ( trans , & iter , & delete , update_flags ) ? :
2021-12-09 15:21:26 -05:00
bch2_trans_commit ( trans , & disk_res , journal_seq ,
2022-03-05 18:23:47 -05:00
BTREE_INSERT_NOFAIL ) ;
2021-12-09 15:21:26 -05:00
bch2_disk_reservation_put ( trans - > c , & disk_res ) ;
2017-03-16 22:18:50 -08:00
if ( ret )
break ;
}
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ) {
2022-08-18 13:00:26 -04:00
bch2_trans_begin ( trans ) ;
2019-05-11 17:36:31 -04:00
ret = 0 ;
goto retry ;
}
2021-08-30 15:18:31 -04:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-08-18 13:00:26 -04:00
if ( ! ret & & trans_was_restarted ( trans , restart_count ) )
ret = - BCH_ERR_transaction_restart_nested ;
2019-05-11 17:36:31 -04:00
return ret ;
}
/*
* bch_btree_delete_range - delete everything within a given range
*
* Range is a half open interval - [ start , end )
*/
int bch2_btree_delete_range ( struct bch_fs * c , enum btree_id id ,
struct bpos start , struct bpos end ,
2022-03-05 18:23:47 -05:00
unsigned update_flags ,
2019-05-11 17:36:31 -04:00
u64 * journal_seq )
{
2022-08-18 13:00:26 -04:00
int ret = bch2_trans_run ( c ,
bch2_btree_delete_range_trans ( & trans , id , start , end ,
update_flags , journal_seq ) ) ;
if ( ret = = - BCH_ERR_transaction_restart_nested )
ret = 0 ;
return ret ;
2017-03-16 22:18:50 -08:00
}
2022-03-30 15:44:12 -04:00
int bch2_trans_log_msg ( struct btree_trans * trans , const char * msg )
{
unsigned len = strlen ( msg ) ;
unsigned u64s = DIV_ROUND_UP ( len , sizeof ( u64 ) ) ;
struct jset_entry_log * l ;
int ret ;
ret = darray_make_room ( & trans - > extra_journal_entries , jset_u64s ( u64s ) ) ;
if ( ret )
return ret ;
l = ( void * ) & darray_top ( trans - > extra_journal_entries ) ;
l - > entry . u64s = cpu_to_le16 ( u64s ) ;
l - > entry . btree_id = 0 ;
l - > entry . level = 1 ;
l - > entry . type = BCH_JSET_ENTRY_log ;
l - > entry . pad [ 0 ] = 0 ;
l - > entry . pad [ 1 ] = 0 ;
l - > entry . pad [ 2 ] = 0 ;
memcpy ( l - > d , msg , len ) ;
while ( len & 7 )
l - > d [ len + + ] = ' \0 ' ;
trans - > extra_journal_entries . nr + = jset_u64s ( u64s ) ;
return 0 ;
}