2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
# include "btree_update.h"
# include "btree_update_interior.h"
2019-03-29 19:49:17 -04:00
# include "btree_gc.h"
2017-03-16 22:18:50 -08:00
# include "btree_io.h"
# include "btree_iter.h"
2019-03-07 19:46:10 -05:00
# include "btree_key_cache.h"
2017-03-16 22:18:50 -08:00
# include "btree_locking.h"
2018-11-05 02:31:48 -05:00
# include "buckets.h"
2017-03-16 22:18:50 -08:00
# include "debug.h"
2018-07-17 13:50:15 -04:00
# include "error.h"
2019-11-15 15:52:28 -05:00
# include "extent_update.h"
2017-03-16 22:18:50 -08:00
# include "journal.h"
# include "journal_reclaim.h"
# include "keylist.h"
2018-11-07 17:48:32 -05:00
# include "replicas.h"
2017-03-16 22:18:50 -08:00
# include "trace.h"
2019-10-28 19:35:13 -04:00
# include <linux/prefetch.h>
2017-03-16 22:18:50 -08:00
# include <linux/sort.h>
2021-02-20 20:51:57 -05:00
static inline int btree_insert_entry_cmp ( const struct btree_insert_entry * l ,
const struct btree_insert_entry * r )
{
return cmp_int ( l - > btree_id , r - > btree_id ) ? :
- cmp_int ( l - > level , r - > level ) ? :
2021-03-04 16:20:16 -05:00
bpos_cmp ( l - > k - > k . p , r - > k - > k . p ) ;
2021-02-20 20:51:57 -05:00
}
2019-09-07 14:16:00 -04:00
static inline bool same_leaf_as_prev ( struct btree_trans * trans ,
2019-12-31 19:37:10 -05:00
struct btree_insert_entry * i )
2019-09-07 14:16:00 -04:00
{
2021-06-07 14:54:56 -04:00
return i ! = trans - > updates & &
2020-03-15 23:29:43 -04:00
iter_l ( i [ 0 ] . iter ) - > b = = iter_l ( i [ - 1 ] . iter ) - > b ;
2019-09-07 14:16:00 -04:00
}
2019-03-15 17:11:58 -04:00
inline void bch2_btree_node_lock_for_insert ( struct bch_fs * c , struct btree * b ,
struct btree_iter * iter )
{
bch2_btree_node_lock_write ( b , iter ) ;
2019-03-07 19:46:10 -05:00
if ( btree_iter_type ( iter ) = = BTREE_ITER_CACHED )
return ;
2019-09-21 15:29:34 -04:00
if ( unlikely ( btree_node_just_written ( b ) ) & &
2019-03-15 17:11:58 -04:00
bch2_btree_post_write_cleanup ( c , b ) )
bch2_btree_iter_reinit_node ( iter , b ) ;
/*
* If the last bset has been written , or if it ' s gotten too big - start
* a new bset to insert into :
*/
if ( want_new_bset ( c , b ) )
bch2_btree_init_next ( c , b , iter ) ;
}
2017-03-16 22:18:50 -08:00
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
bool bch2_btree_bset_insert_key ( struct btree_iter * iter ,
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bkey_i * insert )
{
struct bkey_packed * k ;
2020-01-30 20:26:08 -05:00
unsigned clobber_u64s = 0 , new_u64s = 0 ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( btree_node_just_written ( b ) ) ;
EBUG_ON ( bset_written ( b , btree_bset_last ( b ) ) ) ;
EBUG_ON ( bkey_deleted ( & insert - > k ) & & bkey_val_u64s ( & insert - > k ) ) ;
2021-03-04 16:20:16 -05:00
EBUG_ON ( bpos_cmp ( insert - > k . p , b - > data - > min_key ) < 0 ) ;
EBUG_ON ( bpos_cmp ( insert - > k . p , b - > data - > max_key ) > 0 ) ;
2019-12-30 14:37:25 -05:00
EBUG_ON ( insert - > k . u64s >
bch_btree_keys_u64s_remaining ( iter - > trans - > c , b ) ) ;
EBUG_ON ( iter - > flags & BTREE_ITER_IS_EXTENTS ) ;
2017-03-16 22:18:50 -08:00
k = bch2_btree_node_iter_peek_all ( node_iter , b ) ;
2020-11-07 12:31:20 -05:00
if ( k & & bkey_cmp_left_packed ( b , k , & insert - > k . p ) )
2020-01-06 23:43:04 -05:00
k = NULL ;
2017-03-16 22:18:50 -08:00
2020-01-06 23:43:04 -05:00
/* @k is the key being overwritten/deleted, if any: */
2021-02-19 23:41:40 -05:00
EBUG_ON ( k & & bkey_deleted ( k ) ) ;
2017-03-16 22:18:50 -08:00
2020-01-30 20:26:08 -05:00
/* Deleting, but not found? nothing to do: */
2021-02-19 23:41:40 -05:00
if ( bkey_deleted ( & insert - > k ) & & ! k )
2020-01-30 20:26:08 -05:00
return false ;
2021-02-19 23:41:40 -05:00
if ( bkey_deleted ( & insert - > k ) ) {
2020-01-06 23:43:04 -05:00
/* Deleting: */
btree_account_key_drop ( b , k ) ;
k - > type = KEY_TYPE_deleted ;
2019-11-29 14:08:51 -05:00
2020-01-30 20:26:08 -05:00
if ( k - > needs_whiteout )
2019-12-30 14:37:25 -05:00
push_whiteout ( iter - > trans - > c , b , insert - > k . p ) ;
2020-01-30 20:26:08 -05:00
k - > needs_whiteout = false ;
2017-03-16 22:18:50 -08:00
2020-01-06 23:43:04 -05:00
if ( k > = btree_bset_last ( b ) - > start ) {
clobber_u64s = k - > u64s ;
bch2_bset_delete ( b , k , clobber_u64s ) ;
2020-01-30 20:26:08 -05:00
goto fix_iter ;
2020-01-06 23:43:04 -05:00
} else {
bch2_btree_iter_fix_key_modified ( iter , b , k ) ;
}
return true ;
}
2019-11-29 14:08:51 -05:00
2020-01-06 23:43:04 -05:00
if ( k ) {
/* Overwriting: */
btree_account_key_drop ( b , k ) ;
k - > type = KEY_TYPE_deleted ;
2020-01-05 18:20:23 -05:00
insert - > k . needs_whiteout = k - > needs_whiteout ;
k - > needs_whiteout = false ;
2019-11-29 14:08:51 -05:00
if ( k > = btree_bset_last ( b ) - > start ) {
clobber_u64s = k - > u64s ;
2017-03-16 22:18:50 -08:00
goto overwrite ;
2020-01-06 23:43:04 -05:00
} else {
bch2_btree_iter_fix_key_modified ( iter , b , k ) ;
2017-03-16 22:18:50 -08:00
}
}
2018-08-11 19:12:05 -04:00
k = bch2_btree_node_iter_bset_pos ( node_iter , b , bset_tree_last ( b ) ) ;
2017-03-16 22:18:50 -08:00
overwrite :
bch2_bset_insert ( b , node_iter , k , insert , clobber_u64s ) ;
2020-01-30 20:26:08 -05:00
new_u64s = k - > u64s ;
fix_iter :
if ( clobber_u64s ! = new_u64s )
bch2_btree_node_iter_fix ( iter , b , node_iter , k ,
clobber_u64s , new_u64s ) ;
2017-03-16 22:18:50 -08:00
return true ;
}
2021-04-03 16:24:13 -04:00
static int __btree_node_flush ( struct journal * j , struct journal_entry_pin * pin ,
2017-03-16 22:18:50 -08:00
unsigned i , u64 seq )
{
struct bch_fs * c = container_of ( j , struct bch_fs , journal ) ;
struct btree_write * w = container_of ( pin , struct btree_write , journal ) ;
struct btree * b = container_of ( w , struct btree , writes [ i ] ) ;
btree_node_lock_type ( c , b , SIX_LOCK_read ) ;
bch2_btree_node_write_cond ( c , b ,
2018-07-17 12:19:14 -04:00
( btree_current_write ( b ) = = w & & w - > journal . seq = = seq ) ) ;
2020-06-06 12:28:01 -04:00
six_unlock_read ( & b - > c . lock ) ;
2021-04-03 16:24:13 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
}
2021-04-03 16:24:13 -04:00
static int btree_node_flush0 ( struct journal * j , struct journal_entry_pin * pin , u64 seq )
2017-03-16 22:18:50 -08:00
{
return __btree_node_flush ( j , pin , 0 , seq ) ;
}
2021-04-03 16:24:13 -04:00
static int btree_node_flush1 ( struct journal * j , struct journal_entry_pin * pin , u64 seq )
2017-03-16 22:18:50 -08:00
{
return __btree_node_flush ( j , pin , 1 , seq ) ;
}
2020-02-08 19:06:31 -05:00
inline void bch2_btree_add_journal_pin ( struct bch_fs * c ,
struct btree * b , u64 seq )
{
struct btree_write * w = btree_current_write ( b ) ;
bch2_journal_pin_add ( & c - > journal , seq , & w - > journal ,
btree_node_write_idx ( b ) = = 0
? btree_node_flush0
: btree_node_flush1 ) ;
}
2017-03-16 22:18:50 -08:00
/**
* btree_insert_key - insert a key one key into a leaf node
*/
2020-06-09 21:00:29 -04:00
static bool btree_insert_key_leaf ( struct btree_trans * trans ,
2019-12-30 13:08:26 -05:00
struct btree_iter * iter ,
struct bkey_i * insert )
2017-03-16 22:18:50 -08:00
{
struct bch_fs * c = trans - > c ;
2020-03-15 23:29:43 -04:00
struct btree * b = iter_l ( iter ) - > b ;
2019-10-19 19:03:23 -04:00
struct bset_tree * t = bset_tree_last ( b ) ;
2020-06-09 21:00:29 -04:00
struct bset * i = bset ( b , t ) ;
2019-10-19 19:03:23 -04:00
int old_u64s = bset_u64s ( t ) ;
2017-03-16 22:18:50 -08:00
int old_live_u64s = b - > nr . live_u64s ;
int live_u64s_added , u64s_added ;
2019-10-05 12:54:53 -04:00
EBUG_ON ( ! iter - > level & &
! test_bit ( BCH_FS_BTREE_INTERIOR_REPLAY_DONE , & c - > flags ) ) ;
2020-06-09 21:00:29 -04:00
if ( unlikely ( ! bch2_btree_bset_insert_key ( iter , b ,
& iter_l ( iter ) - > iter , insert ) ) )
return false ;
i - > journal_seq = cpu_to_le64 ( max ( trans - > journal_res . seq ,
le64_to_cpu ( i - > journal_seq ) ) ) ;
2019-11-26 17:26:04 -05:00
2020-06-09 21:00:29 -04:00
bch2_btree_add_journal_pin ( c , b , trans - > journal_res . seq ) ;
if ( unlikely ( ! btree_node_dirty ( b ) ) )
2020-11-09 13:01:52 -05:00
set_btree_node_dirty ( c , b ) ;
2017-03-16 22:18:50 -08:00
live_u64s_added = ( int ) b - > nr . live_u64s - old_live_u64s ;
2019-10-19 19:03:23 -04:00
u64s_added = ( int ) bset_u64s ( t ) - old_u64s ;
2017-03-16 22:18:50 -08:00
if ( b - > sib_u64s [ 0 ] ! = U16_MAX & & live_u64s_added < 0 )
b - > sib_u64s [ 0 ] = max ( 0 , ( int ) b - > sib_u64s [ 0 ] + live_u64s_added ) ;
if ( b - > sib_u64s [ 1 ] ! = U16_MAX & & live_u64s_added < 0 )
b - > sib_u64s [ 1 ] = max ( 0 , ( int ) b - > sib_u64s [ 1 ] + live_u64s_added ) ;
if ( u64s_added > live_u64s_added & &
bch2_maybe_compact_whiteouts ( c , b ) )
bch2_btree_iter_reinit_node ( iter , b ) ;
2019-12-30 13:08:26 -05:00
trace_btree_insert_key ( c , b , insert ) ;
2020-06-09 21:00:29 -04:00
return true ;
2017-03-16 22:18:50 -08:00
}
2019-03-07 19:46:10 -05:00
/* Cached btree updates: */
2019-03-15 17:11:58 -04:00
/* Normal update interface: */
2017-03-16 22:18:50 -08:00
2019-03-18 16:18:39 -04:00
static inline void btree_insert_entry_checks ( struct btree_trans * trans ,
2021-02-20 20:51:57 -05:00
struct btree_insert_entry * i )
2017-03-16 22:18:50 -08:00
{
2021-06-07 14:54:56 -04:00
BUG_ON ( bpos_cmp ( i - > k - > k . p , i - > iter - > real_pos ) ) ;
2021-02-20 20:51:57 -05:00
BUG_ON ( i - > level ! = i - > iter - > level ) ;
BUG_ON ( i - > btree_id ! = i - > iter - > btree_id ) ;
2019-03-15 17:11:58 -04:00
}
2017-03-16 22:18:50 -08:00
2019-10-19 19:03:23 -04:00
static noinline int
2021-06-04 15:18:10 -04:00
bch2_trans_journal_preres_get_cold ( struct btree_trans * trans , unsigned u64s ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2019-03-15 17:11:58 -04:00
struct bch_fs * c = trans - > c ;
int ret ;
2017-03-16 22:18:50 -08:00
2019-05-15 09:47:40 -04:00
bch2_trans_unlock ( trans ) ;
2017-03-16 22:18:50 -08:00
2019-03-15 17:11:58 -04:00
ret = bch2_journal_preres_get ( & c - > journal ,
& trans - > journal_preres , u64s , 0 ) ;
if ( ret )
return ret ;
2017-03-16 22:18:50 -08:00
2019-05-15 09:47:40 -04:00
if ( ! bch2_trans_relock ( trans ) ) {
2021-06-04 15:18:10 -04:00
trace_trans_restart_journal_preres_get ( trans - > ip , trace_ip ) ;
2019-03-15 17:11:58 -04:00
return - EINTR ;
}
2017-03-16 22:18:50 -08:00
2019-03-15 17:11:58 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
}
2019-10-19 19:03:23 -04:00
static inline int bch2_trans_journal_res_get ( struct btree_trans * trans ,
unsigned flags )
2019-02-26 17:13:46 -05:00
{
2019-03-15 17:11:58 -04:00
struct bch_fs * c = trans - > c ;
int ret ;
2019-02-26 17:13:46 -05:00
2019-03-15 17:11:58 -04:00
if ( trans - > flags & BTREE_INSERT_JOURNAL_RESERVED )
flags | = JOURNAL_RES_GET_RESERVED ;
2019-02-26 17:13:46 -05:00
2019-03-15 17:11:58 -04:00
ret = bch2_journal_res_get ( & c - > journal , & trans - > journal_res ,
2019-05-15 09:49:46 -04:00
trans - > journal_u64s , flags ) ;
2019-03-15 17:11:58 -04:00
return ret = = - EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret ;
}
2017-03-16 22:18:50 -08:00
2018-08-03 19:41:44 -04:00
static enum btree_insert_ret
2019-03-13 22:44:04 -04:00
btree_key_can_insert ( struct btree_trans * trans ,
2019-12-30 13:08:26 -05:00
struct btree_iter * iter ,
2020-06-28 18:11:12 -04:00
unsigned u64s )
2018-08-03 19:41:44 -04:00
{
struct bch_fs * c = trans - > c ;
2020-03-15 23:29:43 -04:00
struct btree * b = iter_l ( iter ) - > b ;
2018-08-03 19:41:44 -04:00
2020-07-25 14:19:37 -04:00
if ( ! bch2_btree_node_insert_fits ( c , b , u64s ) )
2018-08-03 19:41:44 -04:00
return BTREE_INSERT_BTREE_NODE_FULL ;
return BTREE_INSERT_OK ;
}
2019-03-07 19:46:10 -05:00
static enum btree_insert_ret
btree_key_can_insert_cached ( struct btree_trans * trans ,
struct btree_iter * iter ,
2020-06-28 18:11:12 -04:00
unsigned u64s )
2019-03-07 19:46:10 -05:00
{
struct bkey_cached * ck = ( void * ) iter - > l [ 0 ] . b ;
unsigned new_u64s ;
struct bkey_i * new_k ;
BUG_ON ( iter - > level ) ;
2020-11-19 21:40:03 -05:00
if ( ! test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) & &
2021-03-07 19:04:16 -05:00
bch2_btree_key_cache_must_wait ( trans - > c ) & &
! ( trans - > flags & BTREE_INSERT_JOURNAL_RECLAIM ) )
2020-11-19 21:40:03 -05:00
return BTREE_INSERT_NEED_JOURNAL_RECLAIM ;
2021-04-24 00:42:02 -04:00
/*
* bch2_varint_decode can read past the end of the buffer by at most 7
* bytes ( it won ' t be used ) :
*/
u64s + = 1 ;
2020-06-28 18:11:12 -04:00
if ( u64s < = ck - > u64s )
2019-03-07 19:46:10 -05:00
return BTREE_INSERT_OK ;
2020-06-28 18:11:12 -04:00
new_u64s = roundup_pow_of_two ( u64s ) ;
2019-03-07 19:46:10 -05:00
new_k = krealloc ( ck - > k , new_u64s * sizeof ( u64 ) , GFP_NOFS ) ;
if ( ! new_k )
return - ENOMEM ;
ck - > u64s = new_u64s ;
ck - > k = new_k ;
return BTREE_INSERT_OK ;
}
2019-03-13 22:44:04 -04:00
static inline void do_btree_insert_one ( struct btree_trans * trans ,
2021-04-19 17:07:20 -04:00
struct btree_insert_entry * i )
2018-07-17 13:50:15 -04:00
{
2020-06-09 21:00:29 -04:00
struct bch_fs * c = trans - > c ;
struct journal * j = & c - > journal ;
bool did_work ;
EBUG_ON ( trans - > journal_res . ref ! =
! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) ;
2021-04-19 17:07:20 -04:00
i - > k - > k . needs_whiteout = false ;
2020-06-09 21:00:29 -04:00
2021-04-19 17:07:20 -04:00
did_work = ( btree_iter_type ( i - > iter ) ! = BTREE_ITER_CACHED )
? btree_insert_key_leaf ( trans , i - > iter , i - > k )
: bch2_btree_insert_key_cached ( trans , i - > iter , i - > k ) ;
2020-06-09 21:00:29 -04:00
if ( ! did_work )
return ;
if ( likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) ) {
bch2_journal_add_keys ( j , & trans - > journal_res ,
2021-04-19 17:07:20 -04:00
i - > btree_id ,
i - > level ,
i - > k ) ;
2020-06-09 21:00:29 -04:00
bch2_journal_set_has_inode ( j , & trans - > journal_res ,
2021-04-19 17:07:20 -04:00
i - > k - > k . p . inode ) ;
2020-06-09 21:00:29 -04:00
if ( trans - > journal_seq )
* trans - > journal_seq = trans - > journal_res . seq ;
}
2018-07-17 13:50:15 -04:00
}
2019-10-19 19:03:23 -04:00
static noinline void bch2_btree_iter_unlock_noinline ( struct btree_iter * iter )
{
__bch2_btree_iter_unlock ( iter ) ;
}
static noinline void bch2_trans_mark_gc ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
2019-03-07 19:46:10 -05:00
trans_for_each_update ( trans , i ) {
/*
* XXX : synchronization of cached update triggers with gc
*/
BUG_ON ( btree_iter_type ( i - > iter ) = = BTREE_ITER_CACHED ) ;
if ( gc_visited ( c , gc_pos_btree_node ( i - > iter - > l [ 0 ] . b ) ) )
2019-12-30 13:08:26 -05:00
bch2_mark_update ( trans , i - > iter , i - > k , NULL ,
2019-12-31 16:17:42 -05:00
i - > trigger_flags | BTREE_TRIGGER_GC ) ;
2019-03-07 19:46:10 -05:00
}
2019-10-19 19:03:23 -04:00
}
2019-09-07 14:16:00 -04:00
2019-10-19 19:03:23 -04:00
static inline int
bch2_trans_commit_write_locked ( struct btree_trans * trans ,
2021-06-04 15:18:10 -04:00
struct btree_insert_entry * * stopped_at ,
unsigned long trace_ip )
2019-10-19 19:03:23 -04:00
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
2021-02-03 21:51:56 -05:00
struct btree_trans_commit_hook * h ;
2019-12-31 19:37:10 -05:00
unsigned u64s = 0 ;
2019-10-28 19:35:13 -04:00
bool marking = false ;
2019-10-19 19:03:23 -04:00
int ret ;
2019-03-15 18:20:46 -04:00
2017-03-16 22:18:50 -08:00
if ( race_fault ( ) ) {
2021-06-04 15:18:10 -04:00
trace_trans_restart_fault_inject ( trans - > ip , trace_ip ) ;
2019-10-19 19:03:23 -04:00
return - EINTR ;
2017-03-16 22:18:50 -08:00
}
2018-08-03 19:41:44 -04:00
/*
* Check if the insert will fit in the leaf node with the write lock
* held , otherwise another thread could write the node changing the
* amount of space available :
*/
2019-02-26 17:13:46 -05:00
2019-10-28 19:35:13 -04:00
prefetch ( & trans - > c - > journal . flags ) ;
2019-03-11 14:59:58 -04:00
2021-02-03 21:51:56 -05:00
h = trans - > hooks ;
while ( h ) {
ret = h - > fn ( trans , h ) ;
if ( ret )
return ret ;
h = h - > next ;
}
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i ) {
2019-10-28 19:35:13 -04:00
/* Multiple inserts might go to same leaf: */
2019-12-31 19:37:10 -05:00
if ( ! same_leaf_as_prev ( trans , i ) )
2019-10-28 19:35:13 -04:00
u64s = 0 ;
2019-03-11 14:59:58 -04:00
2019-10-28 19:35:13 -04:00
u64s + = i - > k - > k . u64s ;
2019-03-07 19:46:10 -05:00
ret = btree_iter_type ( i - > iter ) ! = BTREE_ITER_CACHED
2020-07-25 14:19:37 -04:00
? btree_key_can_insert ( trans , i - > iter , u64s )
: btree_key_can_insert_cached ( trans , i - > iter , u64s ) ;
2019-10-28 19:35:13 -04:00
if ( ret ) {
* stopped_at = i ;
return ret ;
2019-03-11 14:59:58 -04:00
}
2019-10-28 19:35:13 -04:00
2021-02-20 20:51:57 -05:00
if ( btree_node_type_needs_gc ( i - > bkey_type ) )
2019-10-28 19:35:13 -04:00
marking = true ;
}
if ( marking ) {
percpu_down_read ( & c - > mark_lock ) ;
2019-03-11 14:59:58 -04:00
}
2021-04-03 19:27:05 -04:00
/* Must be called under mark_lock: */
if ( marking & & trans - > fs_usage_deltas & &
2021-04-03 20:29:05 -04:00
! bch2_replicas_delta_list_marked ( c , trans - > fs_usage_deltas ) ) {
2021-04-03 19:27:05 -04:00
ret = BTREE_INSERT_NEED_MARK_REPLICAS ;
goto err ;
}
2019-03-15 17:11:58 -04:00
/*
* Don ' t get journal reservation until after we know insert will
* succeed :
*/
2019-05-15 09:49:46 -04:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) ) {
2019-10-19 19:03:23 -04:00
ret = bch2_trans_journal_res_get ( trans ,
JOURNAL_RES_GET_NONBLOCK ) ;
2019-05-15 09:49:46 -04:00
if ( ret )
2019-10-19 19:03:23 -04:00
goto err ;
2020-06-09 21:00:29 -04:00
} else {
trans - > journal_res . seq = c - > journal . replay_journal_seq ;
2019-05-15 09:49:46 -04:00
}
2019-02-26 17:13:46 -05:00
2020-05-25 19:29:48 -04:00
if ( unlikely ( trans - > extra_journal_entry_u64s ) ) {
2020-05-25 14:57:06 -04:00
memcpy_u64s_small ( journal_res_entry ( & c - > journal , & trans - > journal_res ) ,
2020-05-25 19:29:48 -04:00
trans - > extra_journal_entries ,
trans - > extra_journal_entry_u64s ) ;
trans - > journal_res . offset + = trans - > extra_journal_entry_u64s ;
trans - > journal_res . u64s - = trans - > extra_journal_entry_u64s ;
}
2019-10-19 19:03:23 -04:00
/*
* Not allowed to fail after we ' ve gotten our journal reservation - we
* have to use it :
*/
2017-03-16 22:18:50 -08:00
if ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) {
2020-11-02 18:20:44 -05:00
if ( bch2_journal_seq_verify )
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2017-03-16 22:18:50 -08:00
i - > k - > k . version . lo = trans - > journal_res . seq ;
2020-11-02 18:20:44 -05:00
else if ( bch2_inject_invalid_keys )
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2017-03-16 22:18:50 -08:00
i - > k - > k . version = MAX_VERSION ;
}
2019-09-22 18:49:16 -04:00
trans_for_each_update ( trans , i )
2021-02-20 20:51:57 -05:00
if ( BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & ( 1U < < i - > bkey_type ) )
2019-12-30 13:08:26 -05:00
bch2_mark_update ( trans , i - > iter , i - > k ,
2021-04-03 20:29:05 -04:00
NULL , i - > trigger_flags ) ;
2019-03-11 14:59:58 -04:00
2021-04-03 20:29:05 -04:00
if ( marking & & trans - > fs_usage_deltas )
bch2_trans_fs_usage_apply ( trans , trans - > fs_usage_deltas ) ;
2019-03-29 19:49:17 -04:00
2019-10-19 19:03:23 -04:00
if ( unlikely ( c - > gc_pos . phase ) )
bch2_trans_mark_gc ( trans ) ;
2019-03-11 14:59:58 -04:00
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2021-04-19 17:07:20 -04:00
do_btree_insert_one ( trans , i ) ;
2019-10-19 19:03:23 -04:00
err :
2019-10-28 19:35:13 -04:00
if ( marking ) {
2019-03-15 18:20:46 -04:00
percpu_up_read ( & c - > mark_lock ) ;
}
2019-10-19 19:03:23 -04:00
return ret ;
}
2021-03-29 01:13:31 -04:00
static noinline int maybe_do_btree_merge ( struct btree_trans * trans , struct btree_iter * iter )
{
struct btree_insert_entry * i ;
struct btree * b = iter_l ( iter ) - > b ;
struct bkey_s_c old ;
int u64s_delta = 0 ;
int ret ;
/*
* Inserting directly into interior nodes is an uncommon operation with
* various weird edge cases : also , a lot of things about
* BTREE_ITER_NODES iters need to be audited
*/
if ( unlikely ( btree_iter_type ( iter ) ! = BTREE_ITER_KEYS ) )
return 0 ;
BUG_ON ( iter - > level ) ;
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i ) {
2021-03-29 01:13:31 -04:00
if ( iter_l ( i - > iter ) - > b ! = b )
continue ;
old = bch2_btree_iter_peek_slot ( i - > iter ) ;
ret = bkey_err ( old ) ;
if ( ret )
return ret ;
u64s_delta + = ! bkey_deleted ( & i - > k - > k ) ? i - > k - > k . u64s : 0 ;
u64s_delta - = ! bkey_deleted ( old . k ) ? old . k - > u64s : 0 ;
}
return u64s_delta < = 0
? ( bch2_foreground_maybe_merge ( trans - > c , iter , iter - > level ,
trans - > flags & ~ BTREE_INSERT_NOUNLOCK ) ? : - EINTR )
: 0 ;
}
2019-10-19 19:03:23 -04:00
/*
* Get journal reservation , take write locks , and attempt to do btree update ( s ) :
*/
static inline int do_bch2_trans_commit ( struct btree_trans * trans ,
2021-06-04 15:18:10 -04:00
struct btree_insert_entry * * stopped_at ,
unsigned long trace_ip )
2019-10-19 19:03:23 -04:00
{
2021-03-29 01:13:31 -04:00
struct bch_fs * c = trans - > c ;
2019-10-19 19:03:23 -04:00
struct btree_insert_entry * i ;
struct btree_iter * iter ;
int ret ;
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i ) {
2021-03-29 01:13:31 -04:00
struct btree * b ;
BUG_ON ( ! btree_node_intent_locked ( i - > iter , i - > level ) ) ;
if ( btree_iter_type ( i - > iter ) = = BTREE_ITER_CACHED )
continue ;
b = iter_l ( i - > iter ) - > b ;
if ( b - > sib_u64s [ 0 ] < c - > btree_foreground_merge_threshold | |
b - > sib_u64s [ 1 ] < c - > btree_foreground_merge_threshold ) {
ret = maybe_do_btree_merge ( trans , i - > iter ) ;
if ( unlikely ( ret ) )
return ret ;
}
}
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2021-03-29 01:13:31 -04:00
BUG_ON ( ! btree_node_intent_locked ( i - > iter , i - > level ) ) ;
2017-03-16 22:18:50 -08:00
2021-03-29 01:13:31 -04:00
ret = bch2_journal_preres_get ( & c - > journal ,
2019-12-24 18:03:53 -05:00
& trans - > journal_preres , trans - > journal_preres_u64s ,
2019-03-07 19:46:10 -05:00
JOURNAL_RES_GET_NONBLOCK |
2021-04-03 16:24:13 -04:00
( ( trans - > flags & BTREE_INSERT_JOURNAL_RESERVED )
? JOURNAL_RES_GET_RESERVED : 0 ) ) ;
2019-10-19 19:03:23 -04:00
if ( unlikely ( ret = = - EAGAIN ) )
ret = bch2_trans_journal_preres_get_cold ( trans ,
2021-06-04 15:18:10 -04:00
trans - > journal_preres_u64s , trace_ip ) ;
2019-10-19 19:03:23 -04:00
if ( unlikely ( ret ) )
return ret ;
/*
* Can ' t be holding any read locks when we go to take write locks :
2020-12-16 13:35:16 -05:00
* another thread could be holding an intent lock on the same node we
* have a read lock on , and it ' ll block trying to take a write lock
* ( because we hold a read lock ) and it could be blocking us by holding
* its own read lock ( while we ' re trying to to take write locks ) .
2019-10-19 19:03:23 -04:00
*
* note - this must be done after bch2_trans_journal_preres_get_cold ( )
* or anything else that might call bch2_trans_relock ( ) , since that
* would just retake the read locks :
*/
2020-06-12 14:58:07 -04:00
trans_for_each_iter ( trans , iter ) {
2019-10-19 19:03:23 -04:00
if ( iter - > nodes_locked ! = iter - > nodes_intent_locked ) {
2021-02-20 22:19:34 -05:00
if ( btree_iter_keep ( trans , iter ) ) {
2020-12-16 13:35:16 -05:00
if ( ! bch2_btree_iter_upgrade ( iter , 1 ) ) {
2021-06-04 15:18:10 -04:00
trace_trans_restart_upgrade ( trans - > ip , trace_ip ,
iter - > btree_id ,
& iter - > real_pos ) ;
2020-12-16 13:35:16 -05:00
return - EINTR ;
}
} else {
bch2_btree_iter_unlock_noinline ( iter ) ;
}
2019-10-19 19:03:23 -04:00
}
}
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i ) {
2021-04-18 17:44:35 -04:00
const char * invalid = bch2_bkey_invalid ( c ,
bkey_i_to_s_c ( i - > k ) , i - > bkey_type ) ;
if ( invalid ) {
char buf [ 200 ] ;
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , bkey_i_to_s_c ( i - > k ) ) ;
bch_err ( c , " invalid bkey %s on insert: %s \n " , buf , invalid ) ;
bch2_fatal_error ( c ) ;
}
btree_insert_entry_checks ( trans , i ) ;
}
2019-10-19 19:03:23 -04:00
bch2_btree_trans_verify_locks ( trans ) ;
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2019-12-31 19:37:10 -05:00
if ( ! same_leaf_as_prev ( trans , i ) )
2021-03-29 01:13:31 -04:00
bch2_btree_node_lock_for_insert ( c ,
2020-03-15 23:29:43 -04:00
iter_l ( i - > iter ) - > b , i - > iter ) ;
2019-10-28 19:35:13 -04:00
2021-06-04 15:18:10 -04:00
ret = bch2_trans_commit_write_locked ( trans , stopped_at , trace_ip ) ;
2019-10-28 19:35:13 -04:00
2021-06-07 14:54:56 -04:00
trans_for_each_update ( trans , i )
2019-12-31 19:37:10 -05:00
if ( ! same_leaf_as_prev ( trans , i ) )
2020-03-15 23:29:43 -04:00
bch2_btree_node_unlock_write_inlined ( iter_l ( i - > iter ) - > b ,
2019-10-28 19:35:13 -04:00
i - > iter ) ;
2019-10-19 19:03:23 -04:00
2020-05-25 14:57:06 -04:00
if ( ! ret & & trans - > journal_pin )
2021-03-29 01:13:31 -04:00
bch2_journal_pin_add ( & c - > journal , trans - > journal_res . seq ,
2020-05-25 14:57:06 -04:00
trans - > journal_pin , NULL ) ;
2019-10-19 19:03:23 -04:00
/*
* Drop journal reservation after dropping write locks , since dropping
* the journal reservation may kick off a journal write :
*/
2021-03-29 01:13:31 -04:00
bch2_journal_res_put ( & c - > journal , & trans - > journal_res ) ;
2019-10-19 19:03:23 -04:00
if ( unlikely ( ret ) )
return ret ;
2020-06-08 13:26:48 -04:00
bch2_trans_downgrade ( trans ) ;
2019-10-19 19:03:23 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
}
2021-03-31 17:52:52 -04:00
static int journal_reclaim_wait_done ( struct bch_fs * c )
{
2021-04-14 22:15:55 -04:00
int ret = bch2_journal_error ( & c - > journal ) ? :
! bch2_btree_key_cache_must_wait ( c ) ;
2021-03-31 17:52:52 -04:00
if ( ! ret )
2021-04-14 22:15:55 -04:00
journal_reclaim_kick ( & c - > journal ) ;
2021-03-31 17:52:52 -04:00
return ret ;
}
2019-03-21 21:12:01 -04:00
static noinline
int bch2_trans_commit_error ( struct btree_trans * trans ,
struct btree_insert_entry * i ,
2021-06-04 15:18:10 -04:00
int ret , unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
struct bch_fs * c = trans - > c ;
2019-03-21 21:12:01 -04:00
unsigned flags = trans - > flags ;
2017-03-16 22:18:50 -08:00
/*
* BTREE_INSERT_NOUNLOCK means don ' t unlock _after_ successful btree
* update ; if we haven ' t done anything yet it doesn ' t apply
*/
2019-03-11 16:25:42 -04:00
flags & = ~ BTREE_INSERT_NOUNLOCK ;
2017-03-16 22:18:50 -08:00
2018-11-07 17:48:32 -05:00
switch ( ret ) {
case BTREE_INSERT_BTREE_NODE_FULL :
ret = bch2_btree_split_leaf ( c , i - > iter , flags ) ;
2017-03-16 22:18:50 -08:00
/*
* if the split succeeded without dropping locks the insert will
2019-12-22 23:39:28 -05:00
* still be atomic ( what the caller peeked ( ) and is overwriting
* won ' t have changed )
2017-03-16 22:18:50 -08:00
*/
#if 0
/*
* XXX :
* split - > btree node merging ( of parent node ) might still drop
* locks when we ' re not passing it BTREE_INSERT_NOUNLOCK
2019-03-11 16:25:42 -04:00
*
* we don ' t want to pass BTREE_INSERT_NOUNLOCK to split as that
* will inhibit merging - but we don ' t have a reliable way yet
* ( do we ? ) of checking if we dropped locks in this path
2017-03-16 22:18:50 -08:00
*/
2019-03-11 16:25:42 -04:00
if ( ! ret )
2017-03-16 22:18:50 -08:00
goto retry ;
# endif
/*
* don ' t care if we got ENOSPC because we told split it
* couldn ' t block :
*/
2019-05-14 14:08:23 -04:00
if ( ! ret | |
ret = = - EINTR | |
( flags & BTREE_INSERT_NOUNLOCK ) ) {
2021-06-04 15:18:10 -04:00
trace_trans_restart_btree_node_split ( trans - > ip , trace_ip ,
i - > iter - > btree_id ,
& i - > iter - > real_pos ) ;
2017-03-16 22:18:50 -08:00
ret = - EINTR ;
2018-07-12 23:30:45 -04:00
}
2018-11-07 17:48:32 -05:00
break ;
case BTREE_INSERT_ENOSPC :
2021-05-20 15:49:23 -04:00
BUG_ON ( flags & BTREE_INSERT_NOFAIL ) ;
2018-11-07 17:48:32 -05:00
ret = - ENOSPC ;
break ;
case BTREE_INSERT_NEED_MARK_REPLICAS :
2019-03-13 22:44:04 -04:00
bch2_trans_unlock ( trans ) ;
2019-03-15 19:34:16 -04:00
2021-04-03 19:41:09 -04:00
ret = bch2_replicas_delta_list_mark ( c , trans - > fs_usage_deltas ) ;
if ( ret )
return ret ;
2019-03-21 21:12:01 -04:00
2019-05-15 09:47:40 -04:00
if ( bch2_trans_relock ( trans ) )
2019-03-21 21:12:01 -04:00
return 0 ;
2021-06-04 15:18:10 -04:00
trace_trans_restart_mark_replicas ( trans - > ip , trace_ip ) ;
2019-03-21 21:12:01 -04:00
ret = - EINTR ;
2018-11-07 17:48:32 -05:00
break ;
2019-03-15 17:11:58 -04:00
case BTREE_INSERT_NEED_JOURNAL_RES :
2019-05-15 09:47:40 -04:00
bch2_trans_unlock ( trans ) ;
2019-03-15 17:11:58 -04:00
2021-04-03 16:24:13 -04:00
if ( ( trans - > flags & BTREE_INSERT_JOURNAL_RECLAIM ) & &
! ( trans - > flags & BTREE_INSERT_JOURNAL_RESERVED ) )
return - EAGAIN ;
2019-03-15 17:11:58 -04:00
ret = bch2_trans_journal_res_get ( trans , JOURNAL_RES_GET_CHECK ) ;
if ( ret )
2019-03-21 21:12:01 -04:00
return ret ;
2019-03-15 17:11:58 -04:00
2019-05-15 09:47:40 -04:00
if ( bch2_trans_relock ( trans ) )
2019-03-21 21:12:01 -04:00
return 0 ;
2019-03-15 17:11:58 -04:00
2021-06-04 15:18:10 -04:00
trace_trans_restart_journal_res_get ( trans - > ip , trace_ip ) ;
2019-03-15 17:11:58 -04:00
ret = - EINTR ;
2020-11-19 21:40:03 -05:00
break ;
case BTREE_INSERT_NEED_JOURNAL_RECLAIM :
bch2_trans_unlock ( trans ) ;
2021-06-04 15:18:10 -04:00
trace_trans_blocked_journal_reclaim ( trans - > ip , trace_ip ) ;
2021-04-29 00:21:54 -04:00
2021-04-14 22:15:55 -04:00
wait_event_freezable ( c - > journal . reclaim_wait ,
( ret = journal_reclaim_wait_done ( c ) ) ) ;
2021-03-31 17:52:52 -04:00
if ( ret < 0 )
return ret ;
2020-11-19 21:40:03 -05:00
2021-03-31 17:52:52 -04:00
if ( bch2_trans_relock ( trans ) )
2020-11-19 21:40:03 -05:00
return 0 ;
2021-06-04 15:18:10 -04:00
trace_trans_restart_journal_reclaim ( trans - > ip , trace_ip ) ;
2020-11-19 21:40:03 -05:00
ret = - EINTR ;
2019-03-15 17:11:58 -04:00
break ;
2018-11-07 17:48:32 -05:00
default :
BUG_ON ( ret > = 0 ) ;
break ;
2017-03-16 22:18:50 -08:00
}
2021-05-20 15:49:23 -04:00
BUG_ON ( ret = = - ENOSPC & & ( flags & BTREE_INSERT_NOFAIL ) ) ;
2019-03-21 21:12:01 -04:00
return ret ;
}
2019-10-19 19:03:23 -04:00
static noinline int
bch2_trans_commit_get_rw_cold ( struct btree_trans * trans )
2019-03-21 21:12:01 -04:00
{
struct bch_fs * c = trans - > c ;
int ret ;
2019-10-19 19:03:23 -04:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_LAZY_RW ) ) )
return - EROFS ;
2019-03-21 21:12:01 -04:00
2019-10-19 19:03:23 -04:00
bch2_trans_unlock ( trans ) ;
2019-05-10 17:15:30 -04:00
2019-10-19 19:03:23 -04:00
ret = bch2_fs_read_write_early ( c ) ;
if ( ret )
return ret ;
2019-03-21 21:12:01 -04:00
2019-10-19 19:03:23 -04:00
percpu_ref_get ( & c - > writes ) ;
return 0 ;
2017-03-16 22:18:50 -08:00
}
2021-04-28 23:52:19 -04:00
static int __btree_delete_at ( struct btree_trans * trans , enum btree_id btree_id ,
struct bpos pos , unsigned trigger_flags )
{
struct btree_iter * iter ;
struct bkey_i * update ;
int ret ;
update = bch2_trans_kmalloc ( trans , sizeof ( struct bkey ) ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
return ret ;
bkey_init ( & update - > k ) ;
update - > k . p = pos ;
iter = bch2_trans_get_iter ( trans , btree_id , pos ,
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_INTENT ) ;
bch2_trans_update ( trans , iter , update , trigger_flags ) ;
bch2_trans_iter_put ( trans , iter ) ;
return 0 ;
}
2021-04-28 23:52:19 -04:00
static noinline int extent_front_merge ( struct btree_trans * trans ,
struct bkey_s_c k ,
struct btree_insert_entry * i )
{
struct bch_fs * c = trans - > c ;
struct bpos l_pos = k . k - > p ;
struct bkey_i * update ;
int ret ;
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
return ret ;
bkey_reassemble ( update , k ) ;
if ( bch2_bkey_merge ( c , bkey_i_to_s ( update ) , bkey_i_to_s_c ( i - > k ) ) ) {
ret = __btree_delete_at ( trans , i - > btree_id , l_pos ,
i - > trigger_flags ) ;
if ( ret )
return ret ;
i - > k = update ;
}
return 0 ;
}
2019-12-30 14:37:25 -05:00
static int extent_handle_overwrites ( struct btree_trans * trans ,
2021-04-28 23:52:19 -04:00
struct btree_insert_entry * i )
2019-12-30 14:37:25 -05:00
{
2021-04-28 23:52:19 -04:00
struct bch_fs * c = trans - > c ;
2021-03-19 16:32:46 -04:00
struct btree_iter * iter , * update_iter ;
2021-04-28 23:52:19 -04:00
struct bpos start = bkey_start_pos ( & i - > k - > k ) ;
2019-12-30 14:37:25 -05:00
struct bkey_i * update ;
struct bkey_s_c k ;
2021-04-28 23:52:19 -04:00
int ret = 0 ;
2019-12-30 14:37:25 -05:00
2021-04-28 23:52:19 -04:00
iter = bch2_trans_get_iter ( trans , i - > btree_id , start ,
BTREE_ITER_INTENT |
BTREE_ITER_WITH_UPDATES |
BTREE_ITER_NOT_EXTENTS ) ;
k = bch2_btree_iter_peek ( iter ) ;
if ( ! k . k | | ( ret = bkey_err ( k ) ) )
goto out ;
2021-04-28 23:52:19 -04:00
if ( ! bkey_cmp ( k . k - > p , bkey_start_pos ( & i - > k - > k ) ) ) {
if ( bch2_bkey_maybe_mergable ( k . k , & i - > k - > k ) ) {
ret = extent_front_merge ( trans , k , i ) ;
if ( ret )
goto out ;
}
2021-04-28 23:52:19 -04:00
goto next ;
2021-04-28 23:52:19 -04:00
}
2019-12-30 14:37:25 -05:00
2021-04-28 23:52:19 -04:00
while ( bkey_cmp ( i - > k - > k . p , bkey_start_pos ( k . k ) ) > 0 ) {
2019-12-30 14:37:25 -05:00
if ( bkey_cmp ( bkey_start_pos ( k . k ) , start ) < 0 ) {
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
2021-04-28 23:52:19 -04:00
goto out ;
2019-12-30 14:37:25 -05:00
bkey_reassemble ( update , k ) ;
2021-03-20 21:04:57 -04:00
2019-12-30 14:37:25 -05:00
bch2_cut_back ( start , update ) ;
2021-04-28 23:52:19 -04:00
update_iter = bch2_trans_get_iter ( trans , i - > btree_id , update - > k . p ,
2021-03-20 21:04:57 -04:00
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_INTENT ) ;
2021-04-28 23:52:19 -04:00
bch2_trans_update ( trans , update_iter , update , i - > trigger_flags ) ;
2019-12-30 14:37:25 -05:00
bch2_trans_iter_put ( trans , update_iter ) ;
}
2021-04-28 23:52:19 -04:00
if ( bkey_cmp ( k . k - > p , i - > k - > k . p ) < = 0 ) {
ret = __btree_delete_at ( trans , i - > btree_id , k . k - > p ,
i - > trigger_flags ) ;
if ( ret )
goto out ;
2021-03-20 21:04:57 -04:00
}
2021-04-28 23:52:19 -04:00
if ( bkey_cmp ( k . k - > p , i - > k - > k . p ) > 0 ) {
2021-03-20 21:04:57 -04:00
update = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
2019-12-30 14:37:25 -05:00
if ( ( ret = PTR_ERR_OR_ZERO ( update ) ) )
2021-04-28 23:52:19 -04:00
goto out ;
2019-12-30 14:37:25 -05:00
2021-03-20 21:04:57 -04:00
bkey_reassemble ( update , k ) ;
2021-04-28 23:52:19 -04:00
bch2_cut_front ( i - > k - > k . p , update ) ;
2019-12-30 14:37:25 -05:00
2021-04-28 23:52:19 -04:00
update_iter = bch2_trans_get_iter ( trans , i - > btree_id , update - > k . p ,
2021-03-20 21:04:57 -04:00
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_INTENT ) ;
2021-06-02 00:18:34 -04:00
bch2_trans_update ( trans , update_iter , update ,
2021-04-28 23:52:19 -04:00
i - > trigger_flags ) ;
2019-12-30 14:37:25 -05:00
bch2_trans_iter_put ( trans , update_iter ) ;
2021-04-28 23:52:19 -04:00
goto out ;
2019-12-30 14:37:25 -05:00
}
2021-04-28 23:52:19 -04:00
next :
k = bch2_btree_iter_next ( iter ) ;
if ( ! k . k | | ( ret = bkey_err ( k ) ) )
goto out ;
2019-12-30 14:37:25 -05:00
}
2021-04-28 23:52:19 -04:00
if ( bch2_bkey_maybe_mergable ( & i - > k - > k , k . k ) )
bch2_bkey_merge ( c , bkey_i_to_s ( i - > k ) , k ) ;
2021-04-28 23:52:19 -04:00
out :
2021-03-19 16:32:46 -04:00
bch2_trans_iter_put ( trans , iter ) ;
2021-03-20 21:04:57 -04:00
2019-12-30 14:37:25 -05:00
return ret ;
}
2019-10-19 19:03:23 -04:00
int __bch2_trans_commit ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
2019-08-22 16:34:59 -04:00
struct btree_insert_entry * i = NULL ;
2019-12-31 19:37:10 -05:00
struct btree_iter * iter ;
bool trans_trigger_run ;
2021-03-27 20:58:57 -04:00
unsigned u64s , reset_flags = 0 ;
2019-03-21 16:28:57 -04:00
int ret = 0 ;
2017-03-16 22:18:50 -08:00
if ( ! trans - > nr_updates )
2021-01-21 19:30:35 -05:00
goto out_reset ;
2017-03-16 22:18:50 -08:00
2019-10-19 19:03:23 -04:00
if ( trans - > flags & BTREE_INSERT_GC_LOCK_HELD )
lockdep_assert_held ( & trans - > c - > gc_lock ) ;
2019-03-21 21:12:01 -04:00
2019-03-15 17:11:58 -04:00
memset ( & trans - > journal_preres , 0 , sizeof ( trans - > journal_preres ) ) ;
2019-03-13 22:44:04 -04:00
2020-05-25 19:29:48 -04:00
trans - > journal_u64s = trans - > extra_journal_entry_u64s ;
2019-12-24 18:03:53 -05:00
trans - > journal_preres_u64s = 0 ;
2019-10-19 19:03:23 -04:00
if ( ! ( trans - > flags & BTREE_INSERT_NOCHECK_RW ) & &
unlikely ( ! percpu_ref_tryget ( & trans - > c - > writes ) ) ) {
ret = bch2_trans_commit_get_rw_cold ( trans ) ;
2019-03-21 22:19:57 -04:00
if ( ret )
2021-01-21 19:30:35 -05:00
goto out_reset ;
2019-10-19 19:03:23 -04:00
}
2019-12-24 18:03:53 -05:00
2019-03-07 19:46:10 -05:00
# ifdef CONFIG_BCACHEFS_DEBUG
trans_for_each_update ( trans , i )
if ( btree_iter_type ( i - > iter ) ! = BTREE_ITER_CACHED & &
! ( i - > trigger_flags & BTREE_TRIGGER_NORUN ) )
bch2_btree_key_cache_verify_clean ( trans ,
2021-02-20 20:51:57 -05:00
i - > btree_id , i - > k - > k . p ) ;
2019-03-07 19:46:10 -05:00
# endif
2019-12-24 18:03:53 -05:00
/*
2019-12-31 19:37:10 -05:00
* Running triggers will append more updates to the list of updates as
* we ' re walking it :
2019-12-24 18:03:53 -05:00
*/
2019-12-31 19:37:10 -05:00
do {
trans_trigger_run = false ;
trans_for_each_update ( trans , i ) {
2021-02-20 20:51:57 -05:00
if ( ( BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & ( 1U < < i - > bkey_type ) ) & &
2019-12-31 19:37:10 -05:00
! i - > trans_triggers_run ) {
i - > trans_triggers_run = true ;
trans_trigger_run = true ;
ret = bch2_trans_mark_update ( trans , i - > iter , i - > k ,
i - > trigger_flags ) ;
if ( unlikely ( ret ) ) {
if ( ret = = - EINTR )
2021-06-04 15:18:10 -04:00
trace_trans_restart_mark ( trans - > ip , _RET_IP_ ,
i - > iter - > btree_id ,
& i - > iter - > pos ) ;
2019-12-31 19:37:10 -05:00
goto out ;
}
}
}
} while ( trans_trigger_run ) ;
2019-12-24 18:03:53 -05:00
trans_for_each_update ( trans , i ) {
2021-03-09 19:37:40 -05:00
ret = bch2_btree_iter_traverse ( i - > iter ) ;
if ( unlikely ( ret ) ) {
2021-06-04 15:18:10 -04:00
trace_trans_restart_traverse ( trans - > ip , _RET_IP_ ,
i - > iter - > btree_id ,
& i - > iter - > pos ) ;
2021-03-09 19:37:40 -05:00
goto out ;
}
2021-03-31 16:43:50 -04:00
if ( unlikely ( ! bch2_btree_iter_upgrade ( i - > iter , i - > level + 1 ) ) ) {
2021-06-04 15:18:10 -04:00
trace_trans_restart_upgrade ( trans - > ip , _RET_IP_ ,
i - > iter - > btree_id ,
& i - > iter - > pos ) ;
2021-02-20 20:51:57 -05:00
ret = - EINTR ;
goto out ;
}
2021-03-31 16:43:50 -04:00
BUG_ON ( ! btree_node_intent_locked ( i - > iter , i - > level ) ) ;
2019-12-24 18:03:53 -05:00
u64s = jset_u64s ( i - > k - > k . u64s ) ;
2019-03-07 19:46:10 -05:00
if ( btree_iter_type ( i - > iter ) = = BTREE_ITER_CACHED & &
likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) )
2019-12-24 18:03:53 -05:00
trans - > journal_preres_u64s + = u64s ;
trans - > journal_u64s + = u64s ;
}
2019-10-19 19:03:23 -04:00
retry :
memset ( & trans - > journal_res , 0 , sizeof ( trans - > journal_res ) ) ;
2019-03-21 22:19:57 -04:00
2021-06-04 15:18:10 -04:00
ret = do_bch2_trans_commit ( trans , & i , _RET_IP_ ) ;
2019-03-21 22:19:57 -04:00
2019-10-19 19:03:23 -04:00
/* make sure we didn't drop or screw up locks: */
bch2_btree_trans_verify_locks ( trans ) ;
2019-03-21 21:12:01 -04:00
if ( ret )
goto err ;
2019-12-31 19:37:10 -05:00
trans_for_each_iter ( trans , iter )
2021-02-20 22:19:34 -05:00
if ( btree_iter_live ( trans , iter ) & &
2021-02-07 20:16:21 -05:00
( iter - > flags & BTREE_ITER_SET_POS_AFTER_COMMIT ) )
bch2_btree_iter_set_pos ( iter , iter - > pos_after_commit ) ;
2019-03-21 21:12:01 -04:00
out :
2019-10-19 19:03:23 -04:00
bch2_journal_preres_put ( & trans - > c - > journal , & trans - > journal_preres ) ;
2019-03-15 17:11:58 -04:00
2019-10-19 19:03:23 -04:00
if ( likely ( ! ( trans - > flags & BTREE_INSERT_NOCHECK_RW ) ) )
percpu_ref_put ( & trans - > c - > writes ) ;
2021-01-21 19:30:35 -05:00
out_reset :
2021-03-27 20:58:57 -04:00
if ( ! ret )
reset_flags | = TRANS_RESET_NOTRAVERSE ;
if ( ! ret & & ( trans - > flags & BTREE_INSERT_NOUNLOCK ) )
reset_flags | = TRANS_RESET_NOUNLOCK ;
bch2_trans_reset ( trans , reset_flags ) ;
2019-12-24 18:03:53 -05:00
2019-03-13 22:44:04 -04:00
return ret ;
2019-03-21 21:12:01 -04:00
err :
2021-06-04 15:18:10 -04:00
ret = bch2_trans_commit_error ( trans , i , ret , _RET_IP_ ) ;
2019-10-19 19:03:23 -04:00
if ( ret )
goto out ;
2019-03-11 14:59:58 -04:00
2019-10-19 19:03:23 -04:00
goto retry ;
2017-03-16 22:18:50 -08:00
}
2019-12-31 19:37:10 -05:00
int bch2_trans_update ( struct btree_trans * trans , struct btree_iter * iter ,
struct bkey_i * k , enum btree_trigger_flags flags )
{
struct btree_insert_entry * i , n = ( struct btree_insert_entry ) {
2021-02-20 20:51:57 -05:00
. trigger_flags = flags ,
. bkey_type = __btree_node_type ( iter - > level , iter - > btree_id ) ,
. btree_id = iter - > btree_id ,
. level = iter - > level ,
. iter = iter ,
. k = k
2019-12-31 19:37:10 -05:00
} ;
2021-06-07 14:54:56 -04:00
bool is_extent = ( iter - > flags & BTREE_ITER_IS_EXTENTS ) ! = 0 ;
2021-06-02 00:18:34 -04:00
int ret = 0 ;
2019-12-31 19:37:10 -05:00
2021-02-20 20:51:57 -05:00
BUG_ON ( trans - > nr_updates > = BTREE_ITER_MAX ) ;
2021-01-18 19:59:03 -05:00
# ifdef CONFIG_BCACHEFS_DEBUG
BUG_ON ( bkey_cmp ( iter - > pos ,
2021-06-07 14:54:56 -04:00
is_extent ? bkey_start_pos ( & k - > k ) : k - > k . p ) ) ;
2021-01-18 19:59:03 -05:00
trans_for_each_update ( trans , i ) {
2021-06-07 14:54:56 -04:00
BUG_ON ( bkey_cmp ( i - > iter - > pos , i - > k - > k . p ) ) ;
2021-01-18 19:59:03 -05:00
BUG_ON ( i ! = trans - > updates & &
2021-02-20 20:51:57 -05:00
btree_insert_entry_cmp ( i - 1 , i ) > = 0 ) ;
2021-01-18 19:59:03 -05:00
}
# endif
2019-12-31 19:37:10 -05:00
2021-06-07 14:54:56 -04:00
if ( is_extent ) {
2021-06-02 00:18:34 -04:00
ret = bch2_extent_can_insert ( trans , n . iter , n . k ) ;
if ( ret )
return ret ;
2021-04-28 23:52:19 -04:00
ret = extent_handle_overwrites ( trans , & n ) ;
2021-06-02 00:18:34 -04:00
if ( ret )
return ret ;
2019-12-31 19:37:10 -05:00
iter - > pos_after_commit = k - > k . p ;
iter - > flags | = BTREE_ITER_SET_POS_AFTER_COMMIT ;
2021-06-02 00:18:34 -04:00
if ( bkey_deleted ( & n . k - > k ) )
return 0 ;
2021-04-28 23:52:19 -04:00
n . iter = bch2_trans_get_iter ( trans , n . btree_id , n . k - > k . p ,
2021-06-02 00:18:34 -04:00
BTREE_ITER_INTENT |
BTREE_ITER_NOT_EXTENTS ) ;
bch2_trans_iter_put ( trans , n . iter ) ;
2019-12-31 19:37:10 -05:00
}
2021-06-02 00:18:34 -04:00
BUG_ON ( n . iter - > flags & BTREE_ITER_IS_EXTENTS ) ;
n . iter - > flags | = BTREE_ITER_KEEP_UNTIL_COMMIT ;
2019-12-31 19:37:10 -05:00
/*
2021-02-20 20:51:57 -05:00
* Pending updates are kept sorted : first , find position of new update ,
* then delete / trim any updates the new update overwrites :
2019-12-31 19:37:10 -05:00
*/
2021-06-02 00:18:34 -04:00
trans_for_each_update ( trans , i )
if ( btree_insert_entry_cmp ( & n , i ) < = 0 )
break ;
2019-12-31 19:37:10 -05:00
2021-06-02 00:18:34 -04:00
if ( i < trans - > updates + trans - > nr_updates & &
! btree_insert_entry_cmp ( & n , i ) ) {
BUG_ON ( i - > trans_triggers_run ) ;
* i = n ;
} else
2021-02-20 20:51:57 -05:00
array_insert_item ( trans - > updates , trans - > nr_updates ,
i - trans - > updates , n ) ;
2019-12-31 19:37:10 -05:00
return 0 ;
}
2021-02-03 21:51:56 -05:00
void bch2_trans_commit_hook ( struct btree_trans * trans ,
struct btree_trans_commit_hook * h )
{
h - > next = trans - > hooks ;
trans - > hooks = h ;
}
2020-02-26 15:39:46 -05:00
int __bch2_btree_insert ( struct btree_trans * trans ,
enum btree_id id , struct bkey_i * k )
2019-12-22 23:04:30 -05:00
{
struct btree_iter * iter ;
2020-02-26 15:39:46 -05:00
int ret ;
2019-12-22 23:04:30 -05:00
iter = bch2_trans_get_iter ( trans , id , bkey_start_pos ( & k - > k ) ,
BTREE_ITER_INTENT ) ;
2021-04-14 17:45:31 -04:00
ret = bch2_trans_update ( trans , iter , k , 0 ) ;
2020-02-26 15:39:46 -05:00
bch2_trans_iter_put ( trans , iter ) ;
return ret ;
2019-12-22 23:04:30 -05:00
}
2017-03-16 22:18:50 -08:00
/**
2019-03-15 17:11:58 -04:00
* bch2_btree_insert - insert keys into the extent btree
2017-03-16 22:18:50 -08:00
* @ c : pointer to struct bch_fs
* @ id : btree to insert into
* @ insert_keys : list of keys to insert
* @ hook : insert callback
*/
int bch2_btree_insert ( struct bch_fs * c , enum btree_id id ,
2019-12-22 23:04:30 -05:00
struct bkey_i * k ,
struct disk_reservation * disk_res ,
u64 * journal_seq , int flags )
2017-03-16 22:18:50 -08:00
{
2019-12-22 23:04:30 -05:00
return bch2_trans_do ( c , disk_res , journal_seq , flags ,
__bch2_btree_insert ( & trans , id , k ) ) ;
2017-03-16 22:18:50 -08:00
}
2020-11-20 21:28:55 -05:00
int bch2_btree_delete_at ( struct btree_trans * trans ,
struct btree_iter * iter , unsigned flags )
{
struct bkey_i k ;
bkey_init ( & k . k ) ;
k . k . p = iter - > pos ;
2021-06-02 00:15:07 -04:00
return bch2_trans_update ( trans , iter , & k , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
BTREE_INSERT_NOFAIL | flags ) ;
2020-11-20 21:28:55 -05:00
}
int bch2_btree_delete_range_trans ( struct btree_trans * trans , enum btree_id id ,
struct bpos start , struct bpos end ,
u64 * journal_seq )
2017-03-16 22:18:50 -08:00
{
2020-11-20 21:28:55 -05:00
struct btree_iter * iter ;
2017-03-16 22:18:50 -08:00
struct bkey_s_c k ;
int ret = 0 ;
2020-11-20 21:28:55 -05:00
iter = bch2_trans_get_iter ( trans , id , start , BTREE_ITER_INTENT ) ;
2019-05-11 17:36:31 -04:00
retry :
2019-03-13 20:49:16 -04:00
while ( ( k = bch2_btree_iter_peek ( iter ) ) . k & &
2019-03-27 22:03:30 -04:00
! ( ret = bkey_err ( k ) ) & &
2019-03-13 20:49:16 -04:00
bkey_cmp ( iter - > pos , end ) < 0 ) {
2017-03-16 22:18:50 -08:00
struct bkey_i delete ;
2020-02-26 15:39:46 -05:00
bch2_trans_begin ( trans ) ;
2019-12-20 16:35:24 -05:00
2017-03-16 22:18:50 -08:00
bkey_init ( & delete . k ) ;
2020-11-20 21:28:55 -05:00
/*
* This could probably be more efficient for extents :
*/
2017-03-16 22:18:50 -08:00
/*
* For extents , iter . pos won ' t necessarily be the same as
* bkey_start_pos ( k . k ) ( for non extents they always will be the
* same ) . It ' s important that we delete starting from iter . pos
* because the range we want to delete could start in the middle
* of k .
*
* ( bch2_btree_iter_peek ( ) does guarantee that iter . pos > =
* bkey_start_pos ( k . k ) ) .
*/
2019-03-13 20:49:16 -04:00
delete . k . p = iter - > pos ;
2017-03-16 22:18:50 -08:00
2020-01-31 13:26:05 -05:00
if ( btree_node_type_is_extents ( iter - > btree_id ) ) {
2019-05-11 17:36:31 -04:00
unsigned max_sectors =
KEY_SIZE_MAX & ( ~ 0 < < trans - > c - > block_bits ) ;
2017-03-16 22:18:50 -08:00
/* create the biggest key we can */
bch2_key_resize ( & delete . k , max_sectors ) ;
2019-11-09 19:02:48 -05:00
bch2_cut_back ( end , & delete ) ;
2019-08-16 09:58:07 -04:00
ret = bch2_extent_trim_atomic ( & delete , iter ) ;
if ( ret )
break ;
2017-03-16 22:18:50 -08:00
}
2021-06-02 00:15:07 -04:00
ret = bch2_trans_update ( trans , iter , & delete , 0 ) ? :
bch2_trans_commit ( trans , NULL , journal_seq ,
2019-03-13 20:49:16 -04:00
BTREE_INSERT_NOFAIL ) ;
2017-03-16 22:18:50 -08:00
if ( ret )
break ;
2019-05-11 17:36:31 -04:00
bch2_trans_cond_resched ( trans ) ;
2017-03-16 22:18:50 -08:00
}
2019-05-11 17:36:31 -04:00
if ( ret = = - EINTR ) {
ret = 0 ;
goto retry ;
}
2021-03-28 21:20:22 -04:00
bch2_trans_iter_free ( trans , iter ) ;
2019-05-11 17:36:31 -04:00
return ret ;
}
/*
* bch_btree_delete_range - delete everything within a given range
*
* Range is a half open interval - [ start , end )
*/
int bch2_btree_delete_range ( struct bch_fs * c , enum btree_id id ,
struct bpos start , struct bpos end ,
u64 * journal_seq )
{
2020-11-20 21:28:55 -05:00
return bch2_trans_do ( c , NULL , journal_seq , 0 ,
bch2_btree_delete_range_trans ( & trans , id , start , end , journal_seq ) ) ;
2017-03-16 22:18:50 -08:00
}