2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
# include "bkey_methods.h"
2020-12-17 15:08:58 -05:00
# include "bkey_buf.h"
2017-03-16 22:18:50 -08:00
# include "btree_cache.h"
# include "btree_iter.h"
2019-03-07 19:46:10 -05:00
# include "btree_key_cache.h"
2017-03-16 22:18:50 -08:00
# include "btree_locking.h"
2020-03-05 18:44:59 -05:00
# include "btree_update.h"
2017-03-16 22:18:50 -08:00
# include "debug.h"
2021-03-19 20:29:11 -04:00
# include "error.h"
2017-03-16 22:18:50 -08:00
# include "extents.h"
2019-03-07 19:46:10 -05:00
# include "journal.h"
2021-04-03 20:29:05 -04:00
# include "replicas.h"
2017-03-16 22:18:50 -08:00
# include "trace.h"
# include <linux/prefetch.h>
2021-03-21 18:09:02 -04:00
static void btree_iter_set_search_pos ( struct btree_iter * , struct bpos ) ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
static inline struct bpos bkey_successor ( struct btree_iter * iter , struct bpos p )
{
EBUG_ON ( btree_iter_type ( iter ) = = BTREE_ITER_NODES ) ;
/* Are we iterating over keys in all snapshots? */
if ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) {
p = bpos_successor ( p ) ;
} else {
p = bpos_nosnap_successor ( p ) ;
p . snapshot = iter - > snapshot ;
}
return p ;
}
static inline struct bpos bkey_predecessor ( struct btree_iter * iter , struct bpos p )
{
EBUG_ON ( btree_iter_type ( iter ) = = BTREE_ITER_NODES ) ;
/* Are we iterating over keys in all snapshots? */
if ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) {
p = bpos_predecessor ( p ) ;
} else {
p = bpos_nosnap_predecessor ( p ) ;
p . snapshot = iter - > snapshot ;
}
return p ;
}
2017-03-16 22:18:50 -08:00
static inline bool is_btree_node ( struct btree_iter * iter , unsigned l )
{
return l < BTREE_MAX_DEPTH & &
2019-05-14 14:08:23 -04:00
( unsigned long ) iter - > l [ l ] . b > = 128 ;
2017-03-16 22:18:50 -08:00
}
2020-01-06 22:25:09 -05:00
static inline struct bpos btree_iter_search_key ( struct btree_iter * iter )
2018-08-21 16:30:14 -04:00
{
2020-01-06 22:25:09 -05:00
struct bpos pos = iter - > pos ;
2018-08-21 17:38:41 -04:00
2020-01-06 22:25:09 -05:00
if ( ( iter - > flags & BTREE_ITER_IS_EXTENTS ) & &
bkey_cmp ( pos , POS_MAX ) )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
pos = bkey_successor ( iter , pos ) ;
2020-01-06 22:25:09 -05:00
return pos ;
2018-08-21 16:30:14 -04:00
}
2020-03-02 13:38:19 -05:00
static inline bool btree_iter_pos_before_node ( struct btree_iter * iter ,
struct btree * b )
{
2021-03-04 16:20:16 -05:00
return bpos_cmp ( iter - > real_pos , b - > data - > min_key ) < 0 ;
2020-03-02 13:38:19 -05:00
}
static inline bool btree_iter_pos_after_node ( struct btree_iter * iter ,
struct btree * b )
{
2021-03-04 16:20:16 -05:00
return bpos_cmp ( b - > key . k . p , iter - > real_pos ) < 0 ;
2020-03-02 13:38:19 -05:00
}
static inline bool btree_iter_pos_in_node ( struct btree_iter * iter ,
struct btree * b )
{
return iter - > btree_id = = b - > c . btree_id & &
! btree_iter_pos_before_node ( iter , b ) & &
! btree_iter_pos_after_node ( iter , b ) ;
}
2017-03-16 22:18:50 -08:00
/* Btree node locking: */
void bch2_btree_node_unlock_write ( struct btree * b , struct btree_iter * iter )
{
2019-10-28 19:35:13 -04:00
bch2_btree_node_unlock_write_inlined ( b , iter ) ;
2017-03-16 22:18:50 -08:00
}
void __bch2_btree_node_lock_write ( struct btree * b , struct btree_iter * iter )
{
struct btree_iter * linked ;
unsigned readers = 0 ;
2019-08-12 14:35:34 -04:00
EBUG_ON ( ! btree_node_intent_locked ( iter , b - > c . level ) ) ;
2017-03-16 22:18:50 -08:00
2019-03-27 22:03:30 -04:00
trans_for_each_iter ( iter - > trans , linked )
2020-06-06 12:28:01 -04:00
if ( linked - > l [ b - > c . level ] . b = = b & &
btree_node_read_locked ( linked , b - > c . level ) )
2017-03-16 22:18:50 -08:00
readers + + ;
/*
* Must drop our read locks before calling six_lock_write ( ) -
* six_unlock ( ) won ' t do wakeups until the reader count
* goes to 0 , and it ' s safe because we have the node intent
* locked :
*/
2021-03-23 23:52:27 -04:00
if ( ! b - > c . lock . readers )
atomic64_sub ( __SIX_VAL ( read_lock , readers ) ,
& b - > c . lock . state . counter ) ;
else
this_cpu_sub ( * b - > c . lock . readers , readers ) ;
2019-03-25 17:06:42 -04:00
btree_node_lock_type ( iter - > trans - > c , b , SIX_LOCK_write ) ;
2021-03-23 23:52:27 -04:00
if ( ! b - > c . lock . readers )
atomic64_add ( __SIX_VAL ( read_lock , readers ) ,
& b - > c . lock . state . counter ) ;
else
this_cpu_add ( * b - > c . lock . readers , readers ) ;
2017-03-16 22:18:50 -08:00
}
bool __bch2_btree_node_relock ( struct btree_iter * iter , unsigned level )
{
struct btree * b = btree_iter_node ( iter , level ) ;
int want = __btree_lock_want ( iter , level ) ;
2019-05-14 14:08:23 -04:00
if ( ! is_btree_node ( iter , level ) )
2017-03-16 22:18:50 -08:00
return false ;
if ( race_fault ( ) )
return false ;
2019-05-14 14:08:23 -04:00
if ( six_relock_type ( & b - > c . lock , want , iter - > l [ level ] . lock_seq ) | |
( btree_node_lock_seq_matches ( iter , b , level ) & &
2020-06-12 14:58:07 -04:00
btree_node_lock_increment ( iter - > trans , b , level , want ) ) ) {
2019-05-14 14:08:23 -04:00
mark_btree_node_locked ( iter , level , want ) ;
return true ;
} else {
2017-03-16 22:18:50 -08:00
return false ;
2019-05-14 14:08:23 -04:00
}
2017-03-16 22:18:50 -08:00
}
static bool bch2_btree_node_upgrade ( struct btree_iter * iter , unsigned level )
{
struct btree * b = iter - > l [ level ] . b ;
EBUG_ON ( btree_lock_want ( iter , level ) ! = BTREE_NODE_INTENT_LOCKED ) ;
if ( ! is_btree_node ( iter , level ) )
return false ;
if ( btree_node_intent_locked ( iter , level ) )
return true ;
if ( race_fault ( ) )
return false ;
if ( btree_node_locked ( iter , level )
2020-06-06 12:28:01 -04:00
? six_lock_tryupgrade ( & b - > c . lock )
: six_relock_type ( & b - > c . lock , SIX_LOCK_intent , iter - > l [ level ] . lock_seq ) )
2017-03-16 22:18:50 -08:00
goto success ;
2019-05-14 14:08:23 -04:00
if ( btree_node_lock_seq_matches ( iter , b , level ) & &
2020-06-12 14:58:07 -04:00
btree_node_lock_increment ( iter - > trans , b , level , BTREE_NODE_INTENT_LOCKED ) ) {
2017-03-16 22:18:50 -08:00
btree_node_unlock ( iter , level ) ;
goto success ;
}
return false ;
success :
mark_btree_node_intent_locked ( iter , level ) ;
return true ;
}
static inline bool btree_iter_get_locks ( struct btree_iter * iter ,
2019-05-15 10:08:55 -04:00
bool upgrade , bool trace )
2017-03-16 22:18:50 -08:00
{
unsigned l = iter - > level ;
int fail_idx = - 1 ;
do {
if ( ! btree_iter_node ( iter , l ) )
break ;
if ( ! ( upgrade
? bch2_btree_node_upgrade ( iter , l )
: bch2_btree_node_relock ( iter , l ) ) ) {
2019-05-15 10:08:55 -04:00
if ( trace )
( upgrade
? trace_node_upgrade_fail
: trace_node_relock_fail ) ( l , iter - > l [ l ] . lock_seq ,
2019-05-14 14:08:23 -04:00
is_btree_node ( iter , l )
? 0
: ( unsigned long ) iter - > l [ l ] . b ,
is_btree_node ( iter , l )
? iter - > l [ l ] . b - > c . lock . state . seq
: 0 ) ;
2017-03-16 22:18:50 -08:00
fail_idx = l ;
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_TRAVERSE ) ;
}
l + + ;
} while ( l < iter - > locks_want ) ;
/*
* When we fail to get a lock , we have to ensure that any child nodes
* can ' t be relocked so bch2_btree_iter_traverse has to walk back up to
* the node that we failed to relock :
*/
while ( fail_idx > = 0 ) {
btree_node_unlock ( iter , fail_idx ) ;
2019-05-14 14:08:23 -04:00
iter - > l [ fail_idx ] . b = BTREE_ITER_NO_NODE_GET_LOCKS ;
2017-03-16 22:18:50 -08:00
- - fail_idx ;
}
if ( iter - > uptodate = = BTREE_ITER_NEED_RELOCK )
iter - > uptodate = BTREE_ITER_NEED_PEEK ;
2019-03-27 22:03:30 -04:00
bch2_btree_trans_verify_locks ( iter - > trans ) ;
2017-03-16 22:18:50 -08:00
return iter - > uptodate < BTREE_ITER_NEED_RELOCK ;
}
2020-06-15 19:53:46 -04:00
static struct bpos btree_node_pos ( struct btree_bkey_cached_common * _b ,
enum btree_iter_type type )
{
return type ! = BTREE_ITER_CACHED
? container_of ( _b , struct btree , c ) - > key . k . p
: container_of ( _b , struct bkey_cached , c ) - > key . pos ;
}
2017-03-16 22:18:50 -08:00
/* Slowpath: */
bool __bch2_btree_node_lock ( struct btree * b , struct bpos pos ,
2020-06-12 14:58:07 -04:00
unsigned level , struct btree_iter * iter ,
2020-06-12 22:29:48 -04:00
enum six_lock_type type ,
2020-10-28 14:17:46 -04:00
six_lock_should_sleep_fn should_sleep_fn , void * p ,
unsigned long ip )
2017-03-16 22:18:50 -08:00
{
2020-06-12 14:58:07 -04:00
struct btree_trans * trans = iter - > trans ;
2020-10-28 14:17:46 -04:00
struct btree_iter * linked , * deadlock_iter = NULL ;
2020-06-12 22:29:48 -04:00
u64 start_time = local_clock ( ) ;
2020-10-28 14:17:46 -04:00
unsigned reason = 9 ;
2021-03-31 14:42:36 -04:00
bool ret ;
2017-03-16 22:18:50 -08:00
2018-07-24 16:42:27 -04:00
/* Check if it's safe to block: */
2020-06-12 14:58:07 -04:00
trans_for_each_iter ( trans , linked ) {
2017-03-16 22:18:50 -08:00
if ( ! linked - > nodes_locked )
continue ;
/*
* Can ' t block taking an intent lock if we have _any_ nodes read
* locked :
*
* - Our read lock blocks another thread with an intent lock on
* the same node from getting a write lock , and thus from
* dropping its intent lock
*
* - And the other thread may have multiple nodes intent locked :
* both the node we want to intent lock , and the node we
* already have read locked - deadlock :
*/
if ( type = = SIX_LOCK_intent & &
linked - > nodes_locked ! = linked - > nodes_intent_locked ) {
2021-04-14 13:26:15 -04:00
deadlock_iter = linked ;
reason = 1 ;
2017-03-16 22:18:50 -08:00
}
2020-11-05 20:49:08 -05:00
if ( linked - > btree_id ! = iter - > btree_id ) {
if ( linked - > btree_id > iter - > btree_id ) {
deadlock_iter = linked ;
reason = 3 ;
}
continue ;
}
/*
* Within the same btree , cached iterators come before non
* cached iterators :
*/
if ( btree_iter_is_cached ( linked ) ! = btree_iter_is_cached ( iter ) ) {
if ( btree_iter_is_cached ( iter ) ) {
deadlock_iter = linked ;
reason = 4 ;
}
continue ;
}
2017-03-16 22:18:50 -08:00
/*
* Interior nodes must be locked before their descendants : if
* another iterator has possible descendants locked of the node
* we ' re about to lock , it must have the ancestors locked too :
*/
2020-11-05 20:49:08 -05:00
if ( level > __fls ( linked - > nodes_locked ) ) {
2021-04-14 13:26:15 -04:00
deadlock_iter = linked ;
reason = 5 ;
2020-06-12 14:58:07 -04:00
}
/* Must lock btree nodes in key order: */
2020-11-05 20:49:08 -05:00
if ( btree_node_locked ( linked , level ) & &
2021-03-04 16:20:16 -05:00
bpos_cmp ( pos , btree_node_pos ( ( void * ) linked - > l [ level ] . b ,
2020-10-28 14:17:46 -04:00
btree_iter_type ( linked ) ) ) < = 0 ) {
deadlock_iter = linked ;
2020-11-05 20:49:08 -05:00
reason = 7 ;
2020-10-28 14:17:46 -04:00
}
2017-03-16 22:18:50 -08:00
}
2020-10-28 14:17:46 -04:00
if ( unlikely ( deadlock_iter ) ) {
trace_trans_restart_would_deadlock ( iter - > trans - > ip , ip ,
2021-04-14 13:26:15 -04:00
trans - > in_traverse_all , reason ,
2020-10-28 14:17:46 -04:00
deadlock_iter - > btree_id ,
btree_iter_type ( deadlock_iter ) ,
2021-04-14 13:26:15 -04:00
& deadlock_iter - > real_pos ,
2020-10-28 14:17:46 -04:00
iter - > btree_id ,
2021-04-14 13:26:15 -04:00
btree_iter_type ( iter ) ,
& pos ) ;
2019-04-23 00:10:08 -04:00
return false ;
}
2018-07-12 23:30:45 -04:00
2020-06-12 22:29:48 -04:00
if ( six_trylock_type ( & b - > c . lock , type ) )
return true ;
2021-03-31 14:42:36 -04:00
# ifdef CONFIG_BCACHEFS_DEBUG
trans - > locking_iter_idx = iter - > idx ;
trans - > locking_pos = pos ;
trans - > locking_btree_id = iter - > btree_id ;
trans - > locking_level = level ;
trans - > locking = b ;
# endif
2020-06-12 22:29:48 -04:00
2021-03-31 14:42:36 -04:00
ret = six_lock_type ( & b - > c . lock , type , should_sleep_fn , p ) = = 0 ;
# ifdef CONFIG_BCACHEFS_DEBUG
trans - > locking = NULL ;
# endif
if ( ret )
bch2_time_stats_update ( & trans - > c - > times [ lock_to_time_stat ( type ) ] ,
start_time ) ;
return ret ;
2017-03-16 22:18:50 -08:00
}
/* Btree iterator locking: */
# ifdef CONFIG_BCACHEFS_DEBUG
2020-03-15 16:15:08 -04:00
static void bch2_btree_iter_verify_locks ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
unsigned l ;
2020-06-12 22:29:48 -04:00
if ( ! ( iter - > trans - > iters_linked & ( 1ULL < < iter - > idx ) ) ) {
BUG_ON ( iter - > nodes_locked ) ;
return ;
}
2019-03-07 19:46:10 -05:00
for ( l = 0 ; is_btree_node ( iter , l ) ; l + + ) {
2017-03-16 22:18:50 -08:00
if ( iter - > uptodate > = BTREE_ITER_NEED_RELOCK & &
! btree_node_locked ( iter , l ) )
continue ;
BUG_ON ( btree_lock_want ( iter , l ) ! =
btree_node_locked_type ( iter , l ) ) ;
}
}
2018-11-23 05:19:25 -05:00
2019-03-27 22:03:30 -04:00
void bch2_btree_trans_verify_locks ( struct btree_trans * trans )
2018-11-23 05:19:25 -05:00
{
2019-03-27 22:03:30 -04:00
struct btree_iter * iter ;
2018-11-23 05:19:25 -05:00
2020-12-01 23:11:53 -05:00
trans_for_each_iter ( trans , iter )
2019-03-27 22:03:30 -04:00
bch2_btree_iter_verify_locks ( iter ) ;
2018-11-23 05:19:25 -05:00
}
2020-03-15 16:15:08 -04:00
# else
static inline void bch2_btree_iter_verify_locks ( struct btree_iter * iter ) { }
2017-03-16 22:18:50 -08:00
# endif
__flatten
2019-03-07 19:46:10 -05:00
bool bch2_btree_iter_relock ( struct btree_iter * iter , bool trace )
2017-03-16 22:18:50 -08:00
{
2019-11-04 15:56:04 -05:00
return btree_iter_get_locks ( iter , false , trace ) ;
2017-03-16 22:18:50 -08:00
}
bool __bch2_btree_iter_upgrade ( struct btree_iter * iter ,
unsigned new_locks_want )
{
2021-04-16 14:29:26 -04:00
struct btree_iter * linked ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( iter - > locks_want > = new_locks_want ) ;
iter - > locks_want = new_locks_want ;
2021-04-16 14:29:26 -04:00
if ( btree_iter_get_locks ( iter , true , true ) )
return true ;
/*
* XXX : this is ugly - we ' d prefer to not be mucking with other
* iterators in the btree_trans here .
*
* On failure to upgrade the iterator , setting iter - > locks_want and
* calling get_locks ( ) is sufficient to make bch2_btree_iter_traverse ( )
* get the locks we want on transaction restart .
*
* But if this iterator was a clone , on transaction restart what we did
* to this iterator isn ' t going to be preserved .
*
* Possibly we could add an iterator field for the parent iterator when
* an iterator is a copy - for now , we ' ll just upgrade any other
* iterators with the same btree id .
*
* The code below used to be needed to ensure ancestor nodes get locked
* before interior nodes - now that ' s handled by
* bch2_btree_iter_traverse_all ( ) .
*/
trans_for_each_iter ( iter - > trans , linked )
if ( linked ! = iter & &
btree_iter_type ( linked ) = = btree_iter_type ( iter ) & &
linked - > btree_id = = iter - > btree_id & &
linked - > locks_want < new_locks_want ) {
linked - > locks_want = new_locks_want ;
btree_iter_get_locks ( linked , true , false ) ;
}
return false ;
2017-03-16 22:18:50 -08:00
}
void __bch2_btree_iter_downgrade ( struct btree_iter * iter ,
2021-04-02 21:29:05 -04:00
unsigned new_locks_want )
2017-03-16 22:18:50 -08:00
{
2021-04-02 21:29:05 -04:00
unsigned l ;
2017-03-16 22:18:50 -08:00
2021-04-02 21:29:05 -04:00
EBUG_ON ( iter - > locks_want < new_locks_want ) ;
2017-03-16 22:18:50 -08:00
2021-04-02 21:29:05 -04:00
iter - > locks_want = new_locks_want ;
while ( iter - > nodes_locked & &
( l = __fls ( iter - > nodes_locked ) ) > = iter - > locks_want ) {
if ( l > iter - > level ) {
btree_node_unlock ( iter , l ) ;
} else {
if ( btree_node_intent_locked ( iter , l ) ) {
six_lock_downgrade ( & iter - > l [ l ] . b - > c . lock ) ;
iter - > nodes_intent_locked ^ = 1 < < l ;
2017-03-16 22:18:50 -08:00
}
2021-04-02 21:29:05 -04:00
break ;
2017-03-16 22:18:50 -08:00
}
}
2018-11-23 05:19:25 -05:00
2019-03-27 22:03:30 -04:00
bch2_btree_trans_verify_locks ( iter - > trans ) ;
2017-03-16 22:18:50 -08:00
}
2020-06-08 13:26:48 -04:00
void bch2_trans_downgrade ( struct btree_trans * trans )
{
struct btree_iter * iter ;
trans_for_each_iter ( trans , iter )
bch2_btree_iter_downgrade ( iter ) ;
}
2019-05-15 09:47:40 -04:00
/* Btree transaction locking: */
bool bch2_trans_relock ( struct btree_trans * trans )
2019-03-27 22:03:30 -04:00
{
struct btree_iter * iter ;
trans_for_each_iter ( trans , iter )
2021-04-12 14:00:07 -04:00
if ( ! bch2_btree_iter_relock ( iter , true ) ) {
2021-03-31 21:44:55 -04:00
trace_trans_restart_relock ( trans - > ip ) ;
2021-03-30 20:35:46 -04:00
return false ;
2021-03-31 21:44:55 -04:00
}
2021-03-30 20:35:46 -04:00
return true ;
2019-03-27 22:03:30 -04:00
}
2019-05-15 09:47:40 -04:00
void bch2_trans_unlock ( struct btree_trans * trans )
2019-03-27 22:03:30 -04:00
{
struct btree_iter * iter ;
trans_for_each_iter ( trans , iter )
__bch2_btree_iter_unlock ( iter ) ;
}
2017-03-16 22:18:50 -08:00
/* Btree iterator: */
# ifdef CONFIG_BCACHEFS_DEBUG
2020-06-15 19:53:46 -04:00
static void bch2_btree_iter_verify_cached ( struct btree_iter * iter )
{
struct bkey_cached * ck ;
bool locked = btree_node_locked ( iter , 0 ) ;
if ( ! bch2_btree_node_relock ( iter , 0 ) )
return ;
ck = ( void * ) iter - > l [ 0 ] . b ;
BUG_ON ( ck - > key . btree_id ! = iter - > btree_id | |
bkey_cmp ( ck - > key . pos , iter - > pos ) ) ;
if ( ! locked )
btree_node_unlock ( iter , 0 ) ;
}
2020-02-18 16:17:55 -05:00
static void bch2_btree_iter_verify_level ( struct btree_iter * iter ,
unsigned level )
2017-03-16 22:18:50 -08:00
{
2021-03-20 22:13:30 -04:00
struct btree_iter_level * l ;
struct btree_node_iter tmp ;
bool locked ;
2020-02-18 16:17:55 -05:00
struct bkey_packed * p , * k ;
2021-03-04 15:20:22 -05:00
char buf1 [ 100 ] , buf2 [ 100 ] , buf3 [ 100 ] ;
2020-02-18 16:17:55 -05:00
const char * msg ;
2017-03-16 22:18:50 -08:00
2020-11-02 18:20:44 -05:00
if ( ! bch2_debug_check_iterators )
2019-03-28 01:51:47 -04:00
return ;
2021-03-20 22:13:30 -04:00
l = & iter - > l [ level ] ;
tmp = l - > iter ;
locked = btree_node_locked ( iter , level ) ;
2020-06-15 19:53:46 -04:00
if ( btree_iter_type ( iter ) = = BTREE_ITER_CACHED ) {
if ( ! level )
bch2_btree_iter_verify_cached ( iter ) ;
return ;
}
2020-02-18 16:17:55 -05:00
BUG_ON ( iter - > level < iter - > min_depth ) ;
if ( ! btree_iter_node ( iter , level ) )
return ;
if ( ! bch2_btree_node_relock ( iter , level ) )
2016-07-21 19:05:06 -08:00
return ;
2021-02-07 20:16:21 -05:00
BUG_ON ( ! btree_iter_pos_in_node ( iter , l - > b ) ) ;
2020-02-18 16:17:55 -05:00
/*
* node iterators don ' t use leaf node iterator :
*/
if ( btree_iter_type ( iter ) = = BTREE_ITER_NODES & &
level < = iter - > min_depth )
goto unlock ;
2020-03-02 13:38:19 -05:00
2020-02-18 16:17:55 -05:00
bch2_btree_node_iter_verify ( & l - > iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
/*
* For interior nodes , the iterator will have skipped past
* deleted keys :
2016-07-21 19:05:06 -08:00
*
* For extents , the iterator may have skipped past deleted keys ( but not
* whiteouts )
2017-03-16 22:18:50 -08:00
*/
2020-02-18 16:17:55 -05:00
p = level | | btree_node_type_is_extents ( iter - > btree_id )
2021-02-19 23:41:40 -05:00
? bch2_btree_node_iter_prev ( & tmp , l - > b )
2020-02-18 16:17:55 -05:00
: bch2_btree_node_iter_prev_all ( & tmp , l - > b ) ;
k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
2021-02-11 21:57:32 -05:00
if ( p & & bkey_iter_pos_cmp ( l - > b , p , & iter - > real_pos ) > = 0 ) {
2020-02-18 16:17:55 -05:00
msg = " before " ;
goto err ;
2017-03-16 22:18:50 -08:00
}
2021-02-11 21:57:32 -05:00
if ( k & & bkey_iter_pos_cmp ( l - > b , k , & iter - > real_pos ) < 0 ) {
2020-02-18 16:17:55 -05:00
msg = " after " ;
goto err ;
}
unlock :
if ( ! locked )
btree_node_unlock ( iter , level ) ;
return ;
err :
strcpy ( buf2 , " (none) " ) ;
2021-03-04 15:20:22 -05:00
strcpy ( buf3 , " (none) " ) ;
bch2_bpos_to_text ( & PBUF ( buf1 ) , iter - > real_pos ) ;
2020-02-18 16:17:55 -05:00
if ( p ) {
struct bkey uk = bkey_unpack_key ( l - > b , p ) ;
2021-03-04 15:20:22 -05:00
bch2_bkey_to_text ( & PBUF ( buf2 ) , & uk ) ;
2020-02-18 16:17:55 -05:00
}
2017-03-16 22:18:50 -08:00
2020-02-18 16:17:55 -05:00
if ( k ) {
struct bkey uk = bkey_unpack_key ( l - > b , k ) ;
2021-03-04 15:20:22 -05:00
bch2_bkey_to_text ( & PBUF ( buf3 ) , & uk ) ;
2017-03-16 22:18:50 -08:00
}
2020-02-18 16:17:55 -05:00
panic ( " iterator should be %s key at level %u: \n "
2021-03-04 15:20:22 -05:00
" iter pos %s \n "
2020-02-18 16:17:55 -05:00
" prev key %s \n "
" cur key %s \n " ,
2021-03-04 15:20:22 -05:00
msg , level , buf1 , buf2 , buf3 ) ;
2017-03-16 22:18:50 -08:00
}
2020-02-18 16:17:55 -05:00
static void bch2_btree_iter_verify ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
enum btree_iter_type type = btree_iter_type ( iter ) ;
2020-02-18 16:17:55 -05:00
unsigned i ;
2017-03-16 22:18:50 -08:00
2021-02-11 21:57:32 -05:00
EBUG_ON ( iter - > btree_id > = BTREE_ID_NR ) ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
BUG_ON ( ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) & &
iter - > pos . snapshot ! = iter - > snapshot ) ;
BUG_ON ( ( iter - > flags & BTREE_ITER_IS_EXTENTS ) & &
( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) ) ;
BUG_ON ( type = = BTREE_ITER_NODES & &
! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) ) ;
BUG_ON ( type ! = BTREE_ITER_NODES & &
( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) & &
! btree_type_has_snapshots ( iter - > btree_id ) ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_locks ( iter ) ;
2020-02-18 16:17:55 -05:00
for ( i = 0 ; i < BTREE_MAX_DEPTH ; i + + )
bch2_btree_iter_verify_level ( iter , i ) ;
}
2021-02-11 21:57:32 -05:00
static void bch2_btree_iter_verify_entry_exit ( struct btree_iter * iter )
{
enum btree_iter_type type = btree_iter_type ( iter ) ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
BUG_ON ( ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) & &
iter - > pos . snapshot ! = iter - > snapshot ) ;
2021-02-11 21:57:32 -05:00
BUG_ON ( ( type = = BTREE_ITER_KEYS | |
type = = BTREE_ITER_CACHED ) & &
( bkey_cmp ( iter - > pos , bkey_start_pos ( & iter - > k ) ) < 0 | |
bkey_cmp ( iter - > pos , iter - > k . p ) > 0 ) ) ;
}
2020-02-18 16:17:55 -05:00
void bch2_btree_trans_verify_iters ( struct btree_trans * trans , struct btree * b )
{
struct btree_iter * iter ;
2020-11-02 18:20:44 -05:00
if ( ! bch2_debug_check_iterators )
2019-03-28 01:51:47 -04:00
return ;
2020-02-18 16:17:55 -05:00
trans_for_each_iter_with_node ( trans , b , iter )
bch2_btree_iter_verify_level ( iter , b - > c . level ) ;
2017-03-16 22:18:50 -08:00
}
2016-07-21 19:05:06 -08:00
# else
2020-03-15 16:15:08 -04:00
static inline void bch2_btree_iter_verify_level ( struct btree_iter * iter , unsigned l ) { }
static inline void bch2_btree_iter_verify ( struct btree_iter * iter ) { }
2021-02-11 21:57:32 -05:00
static inline void bch2_btree_iter_verify_entry_exit ( struct btree_iter * iter ) { }
2016-07-21 19:05:06 -08:00
2017-03-16 22:18:50 -08:00
# endif
2019-08-20 17:43:47 -04:00
static void btree_node_iter_set_set_pos ( struct btree_node_iter * iter ,
struct btree * b ,
struct bset_tree * t ,
struct bkey_packed * k )
{
struct btree_node_iter_set * set ;
btree_node_iter_for_each ( iter , set )
if ( set - > end = = t - > end_offset ) {
set - > k = __btree_node_key_to_offset ( b , k ) ;
bch2_btree_node_iter_sort ( iter , b ) ;
return ;
}
bch2_btree_node_iter_push ( iter , b , k , btree_bkey_last ( b , t ) ) ;
}
2019-10-02 09:56:39 -04:00
static void __bch2_btree_iter_fix_key_modified ( struct btree_iter * iter ,
2020-01-06 22:25:09 -05:00
struct btree * b ,
struct bkey_packed * where )
2019-10-02 09:56:39 -04:00
{
2020-01-06 22:25:09 -05:00
struct btree_iter_level * l = & iter - > l [ b - > c . level ] ;
2019-10-02 09:56:39 -04:00
2020-01-06 22:25:09 -05:00
if ( where ! = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) )
return ;
2021-02-11 21:57:32 -05:00
if ( bkey_iter_pos_cmp ( l - > b , where , & iter - > real_pos ) < 0 )
2020-01-06 22:25:09 -05:00
bch2_btree_node_iter_advance ( & l - > iter , l - > b ) ;
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_PEEK ) ;
2019-10-02 09:56:39 -04:00
}
void bch2_btree_iter_fix_key_modified ( struct btree_iter * iter ,
struct btree * b ,
struct bkey_packed * where )
{
struct btree_iter * linked ;
trans_for_each_iter_with_node ( iter - > trans , b , linked ) {
__bch2_btree_iter_fix_key_modified ( linked , b , where ) ;
2020-02-18 16:17:55 -05:00
bch2_btree_iter_verify_level ( linked , b - > c . level ) ;
2019-10-02 09:56:39 -04:00
}
}
2017-03-16 22:18:50 -08:00
static void __bch2_btree_node_iter_fix ( struct btree_iter * iter ,
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bset_tree * t ,
struct bkey_packed * where ,
unsigned clobber_u64s ,
unsigned new_u64s )
{
const struct bkey_packed * end = btree_bkey_last ( b , t ) ;
struct btree_node_iter_set * set ;
unsigned offset = __btree_node_key_to_offset ( b , where ) ;
int shift = new_u64s - clobber_u64s ;
2016-07-21 19:05:06 -08:00
unsigned old_end = t - > end_offset - shift ;
2019-09-07 19:17:40 -04:00
unsigned orig_iter_pos = node_iter - > data [ 0 ] . k ;
bool iter_current_key_modified =
orig_iter_pos > = offset & &
orig_iter_pos < = offset + clobber_u64s ;
2017-03-16 22:18:50 -08:00
btree_node_iter_for_each ( node_iter , set )
if ( set - > end = = old_end )
goto found ;
/* didn't find the bset in the iterator - might have to readd it: */
if ( new_u64s & &
2021-02-11 21:57:32 -05:00
bkey_iter_pos_cmp ( b , where , & iter - > real_pos ) > = 0 ) {
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_push ( node_iter , b , where , end ) ;
2019-09-07 19:17:40 -04:00
goto fixup_done ;
} else {
/* Iterator is after key that changed */
2019-09-19 16:07:41 -04:00
return ;
2017-03-16 22:18:50 -08:00
}
found :
2016-07-21 19:05:06 -08:00
set - > end = t - > end_offset ;
2017-03-16 22:18:50 -08:00
/* Iterator hasn't gotten to the key that changed yet: */
if ( set - > k < offset )
2019-09-19 16:07:41 -04:00
return ;
2017-03-16 22:18:50 -08:00
if ( new_u64s & &
2021-02-11 21:57:32 -05:00
bkey_iter_pos_cmp ( b , where , & iter - > real_pos ) > = 0 ) {
2017-03-16 22:18:50 -08:00
set - > k = offset ;
} else if ( set - > k < offset + clobber_u64s ) {
set - > k = offset + new_u64s ;
if ( set - > k = = set - > end )
bch2_btree_node_iter_set_drop ( node_iter , set ) ;
} else {
2019-09-07 19:17:40 -04:00
/* Iterator is after key that changed */
2017-03-16 22:18:50 -08:00
set - > k = ( int ) set - > k + shift ;
2019-09-19 16:07:41 -04:00
return ;
2017-03-16 22:18:50 -08:00
}
bch2_btree_node_iter_sort ( node_iter , b ) ;
2019-09-07 19:17:40 -04:00
fixup_done :
if ( node_iter - > data [ 0 ] . k ! = orig_iter_pos )
iter_current_key_modified = true ;
2018-12-06 12:01:29 -05:00
2017-03-16 22:18:50 -08:00
/*
2019-08-20 17:43:47 -04:00
* When a new key is added , and the node iterator now points to that
* key , the iterator might have skipped past deleted keys that should
* come after the key the iterator now points to . We have to rewind to
2019-09-07 19:17:40 -04:00
* before those deleted keys - otherwise
* bch2_btree_node_iter_prev_all ( ) breaks :
2017-03-16 22:18:50 -08:00
*/
2019-08-20 17:43:47 -04:00
if ( ! bch2_btree_node_iter_end ( node_iter ) & &
2019-09-07 19:17:40 -04:00
iter_current_key_modified & &
2019-08-20 17:43:47 -04:00
( b - > c . level | |
2020-01-31 13:26:05 -05:00
btree_node_type_is_extents ( iter - > btree_id ) ) ) {
2019-08-20 17:43:47 -04:00
struct bset_tree * t ;
struct bkey_packed * k , * k2 , * p ;
k = bch2_btree_node_iter_peek_all ( node_iter , b ) ;
2017-03-16 22:18:50 -08:00
for_each_bset ( b , t ) {
2019-08-20 17:43:47 -04:00
bool set_pos = false ;
if ( node_iter - > data [ 0 ] . end = = t - > end_offset )
2017-03-16 22:18:50 -08:00
continue ;
2019-08-20 17:43:47 -04:00
k2 = bch2_btree_node_iter_bset_pos ( node_iter , b , t ) ;
while ( ( p = bch2_bkey_prev_all ( b , t , k2 ) ) & &
bkey_iter_cmp ( b , k , p ) < 0 ) {
k2 = p ;
set_pos = true ;
2017-03-16 22:18:50 -08:00
}
2019-08-20 17:43:47 -04:00
if ( set_pos )
btree_node_iter_set_set_pos ( node_iter ,
b , t , k2 ) ;
2017-03-16 22:18:50 -08:00
}
}
2019-08-20 17:43:47 -04:00
2019-09-07 19:17:40 -04:00
if ( ! b - > c . level & &
node_iter = = & iter - > l [ 0 ] . iter & &
2020-02-18 16:17:55 -05:00
iter_current_key_modified )
2019-09-07 19:17:40 -04:00
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_PEEK ) ;
2017-03-16 22:18:50 -08:00
}
void bch2_btree_node_iter_fix ( struct btree_iter * iter ,
2018-08-11 19:12:05 -04:00
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bkey_packed * where ,
unsigned clobber_u64s ,
unsigned new_u64s )
2017-03-16 22:18:50 -08:00
{
2018-08-11 19:12:05 -04:00
struct bset_tree * t = bch2_bkey_to_bset_inlined ( b , where ) ;
2017-03-16 22:18:50 -08:00
struct btree_iter * linked ;
2019-09-19 16:07:41 -04:00
if ( node_iter ! = & iter - > l [ b - > c . level ] . iter ) {
2017-03-16 22:18:50 -08:00
__bch2_btree_node_iter_fix ( iter , b , node_iter , t ,
2019-09-19 16:07:41 -04:00
where , clobber_u64s , new_u64s ) ;
2020-02-18 16:17:55 -05:00
2020-11-02 18:20:44 -05:00
if ( bch2_debug_check_iterators )
2020-02-18 16:17:55 -05:00
bch2_btree_node_iter_verify ( node_iter , b ) ;
2019-09-19 16:07:41 -04:00
}
2017-03-16 22:18:50 -08:00
2019-09-19 16:07:41 -04:00
trans_for_each_iter_with_node ( iter - > trans , b , linked ) {
2017-03-16 22:18:50 -08:00
__bch2_btree_node_iter_fix ( linked , b ,
2019-09-19 16:07:41 -04:00
& linked - > l [ b - > c . level ] . iter , t ,
where , clobber_u64s , new_u64s ) ;
2020-02-18 16:17:55 -05:00
bch2_btree_iter_verify_level ( linked , b - > c . level ) ;
2019-09-19 16:07:41 -04:00
}
2017-03-16 22:18:50 -08:00
}
static inline struct bkey_s_c __btree_iter_unpack ( struct btree_iter * iter ,
struct btree_iter_level * l ,
struct bkey * u ,
struct bkey_packed * k )
{
struct bkey_s_c ret ;
if ( unlikely ( ! k ) ) {
/*
* signal to bch2_btree_iter_peek_slot ( ) that we ' re currently at
* a hole
*/
2018-11-01 15:10:01 -04:00
u - > type = KEY_TYPE_deleted ;
2017-03-16 22:18:50 -08:00
return bkey_s_c_null ;
}
ret = bkey_disassemble ( l - > b , k , u ) ;
2020-11-02 18:20:44 -05:00
if ( bch2_debug_check_bkeys )
2019-03-25 17:06:42 -04:00
bch2_bkey_debugcheck ( iter - > trans - > c , l - > b , ret ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
/* peek_all() doesn't skip deleted keys */
2021-03-21 19:22:58 -04:00
static inline struct bkey_s_c btree_iter_level_peek_all ( struct btree_iter * iter ,
struct btree_iter_level * l ,
struct bkey * u )
2017-03-16 22:18:50 -08:00
{
return __btree_iter_unpack ( iter , l , u ,
bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ) ;
}
2021-03-21 19:22:58 -04:00
static inline struct bkey_s_c btree_iter_level_peek ( struct btree_iter * iter ,
struct btree_iter_level * l )
2017-03-16 22:18:50 -08:00
{
2021-03-21 19:32:01 -04:00
struct bkey_s_c k = __btree_iter_unpack ( iter , l , & iter - > k ,
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ) ;
2021-03-21 19:32:01 -04:00
iter - > real_pos = k . k ? k . k - > p : l - > b - > key . k . p ;
return k ;
2017-03-16 22:18:50 -08:00
}
2021-03-21 19:22:58 -04:00
static inline struct bkey_s_c btree_iter_level_prev ( struct btree_iter * iter ,
struct btree_iter_level * l )
2019-09-07 17:17:21 -04:00
{
2021-03-21 19:32:01 -04:00
struct bkey_s_c k = __btree_iter_unpack ( iter , l , & iter - > k ,
2019-09-07 17:17:21 -04:00
bch2_btree_node_iter_prev ( & l - > iter , l - > b ) ) ;
2021-03-21 19:32:01 -04:00
iter - > real_pos = k . k ? k . k - > p : l - > b - > data - > min_key ;
return k ;
2019-09-07 17:17:21 -04:00
}
2018-08-21 16:30:14 -04:00
static inline bool btree_iter_advance_to_pos ( struct btree_iter * iter ,
struct btree_iter_level * l ,
int max_advance )
2017-03-16 22:18:50 -08:00
{
2018-08-21 16:30:14 -04:00
struct bkey_packed * k ;
int nr_advanced = 0 ;
while ( ( k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ) & &
2021-02-11 21:57:32 -05:00
bkey_iter_pos_cmp ( l - > b , k , & iter - > real_pos ) < 0 ) {
2018-08-21 16:30:14 -04:00
if ( max_advance > 0 & & nr_advanced > = max_advance )
return false ;
bch2_btree_node_iter_advance ( & l - > iter , l - > b ) ;
nr_advanced + + ;
}
return true ;
2017-03-16 22:18:50 -08:00
}
/*
* Verify that iterator for parent node points to child node :
*/
static void btree_iter_verify_new_node ( struct btree_iter * iter , struct btree * b )
{
struct btree_iter_level * l ;
unsigned plevel ;
bool parent_locked ;
struct bkey_packed * k ;
if ( ! IS_ENABLED ( CONFIG_BCACHEFS_DEBUG ) )
return ;
2020-06-06 12:28:01 -04:00
plevel = b - > c . level + 1 ;
2017-03-16 22:18:50 -08:00
if ( ! btree_iter_node ( iter , plevel ) )
return ;
parent_locked = btree_node_locked ( iter , plevel ) ;
if ( ! bch2_btree_node_relock ( iter , plevel ) )
return ;
l = & iter - > l [ plevel ] ;
k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ;
if ( ! k | |
bkey_deleted ( k ) | |
bkey_cmp_left_packed ( l - > b , k , & b - > key . k . p ) ) {
2021-03-04 15:20:22 -05:00
char buf1 [ 100 ] ;
char buf2 [ 100 ] ;
char buf3 [ 100 ] ;
char buf4 [ 100 ] ;
2017-03-16 22:18:50 -08:00
struct bkey uk = bkey_unpack_key ( b , k ) ;
2020-12-06 16:30:02 -05:00
bch2_dump_btree_node ( iter - > trans - > c , l - > b ) ;
2021-03-04 15:20:22 -05:00
bch2_bpos_to_text ( & PBUF ( buf1 ) , iter - > real_pos ) ;
bch2_bkey_to_text ( & PBUF ( buf2 ) , & uk ) ;
bch2_bpos_to_text ( & PBUF ( buf3 ) , b - > data - > min_key ) ;
bch2_bpos_to_text ( & PBUF ( buf3 ) , b - > data - > max_key ) ;
2020-12-06 16:30:02 -05:00
panic ( " parent iter doesn't point to new node: \n "
2021-03-04 15:20:22 -05:00
" iter pos %s %s \n "
2020-12-06 16:30:02 -05:00
" iter key %s \n "
2021-03-04 15:20:22 -05:00
" new node %s-%s \n " ,
bch2_btree_ids [ iter - > btree_id ] , buf1 ,
buf2 , buf3 , buf4 ) ;
2017-03-16 22:18:50 -08:00
}
if ( ! parent_locked )
2020-06-06 12:28:01 -04:00
btree_node_unlock ( iter , b - > c . level + 1 ) ;
2017-03-16 22:18:50 -08:00
}
static inline void __btree_iter_init ( struct btree_iter * iter ,
2018-08-21 16:30:14 -04:00
unsigned level )
2017-03-16 22:18:50 -08:00
{
2018-08-21 16:30:14 -04:00
struct btree_iter_level * l = & iter - > l [ level ] ;
2021-02-11 21:57:32 -05:00
bch2_btree_node_iter_init ( & l - > iter , l - > b , & iter - > real_pos ) ;
2017-03-16 22:18:50 -08:00
2020-12-11 12:02:48 -05:00
/*
* Iterators to interior nodes should always be pointed at the first non
* whiteout :
*/
if ( level )
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_PEEK ) ;
}
static inline void btree_iter_node_set ( struct btree_iter * iter ,
struct btree * b )
{
2019-03-07 19:46:10 -05:00
BUG_ON ( btree_iter_type ( iter ) = = BTREE_ITER_CACHED ) ;
2017-03-16 22:18:50 -08:00
btree_iter_verify_new_node ( iter , b ) ;
EBUG_ON ( ! btree_iter_pos_in_node ( iter , b ) ) ;
2020-06-06 12:28:01 -04:00
EBUG_ON ( b - > c . lock . state . seq & 1 ) ;
2017-03-16 22:18:50 -08:00
2020-06-06 12:28:01 -04:00
iter - > l [ b - > c . level ] . lock_seq = b - > c . lock . state . seq ;
iter - > l [ b - > c . level ] . b = b ;
__btree_iter_init ( iter , b - > c . level ) ;
2017-03-16 22:18:50 -08:00
}
/*
* A btree node is being replaced - update the iterator to point to the new
* node :
*/
void bch2_btree_iter_node_replace ( struct btree_iter * iter , struct btree * b )
{
enum btree_node_locked_type t ;
struct btree_iter * linked ;
2019-03-27 22:03:30 -04:00
trans_for_each_iter ( iter - > trans , linked )
2019-03-07 19:46:10 -05:00
if ( btree_iter_type ( linked ) ! = BTREE_ITER_CACHED & &
btree_iter_pos_in_node ( linked , b ) ) {
2017-03-16 22:18:50 -08:00
/*
* bch2_btree_iter_node_drop ( ) has already been called -
* the old node we ' re replacing has already been
* unlocked and the pointer invalidated
*/
2020-06-06 12:28:01 -04:00
BUG_ON ( btree_node_locked ( linked , b - > c . level ) ) ;
2017-03-16 22:18:50 -08:00
2020-06-06 12:28:01 -04:00
t = btree_lock_want ( linked , b - > c . level ) ;
2017-03-16 22:18:50 -08:00
if ( t ! = BTREE_NODE_UNLOCKED ) {
2020-06-06 12:28:01 -04:00
six_lock_increment ( & b - > c . lock , ( enum six_lock_type ) t ) ;
mark_btree_node_locked ( linked , b - > c . level , ( enum six_lock_type ) t ) ;
2017-03-16 22:18:50 -08:00
}
btree_iter_node_set ( linked , b ) ;
}
}
void bch2_btree_iter_node_drop ( struct btree_iter * iter , struct btree * b )
{
struct btree_iter * linked ;
2020-06-06 12:28:01 -04:00
unsigned level = b - > c . level ;
2017-03-16 22:18:50 -08:00
2019-03-27 22:03:30 -04:00
trans_for_each_iter ( iter - > trans , linked )
2017-03-16 22:18:50 -08:00
if ( linked - > l [ level ] . b = = b ) {
2021-03-31 16:43:50 -04:00
btree_node_unlock ( linked , level ) ;
2019-05-14 14:08:23 -04:00
linked - > l [ level ] . b = BTREE_ITER_NO_NODE_DROP ;
2017-03-16 22:18:50 -08:00
}
}
/*
* A btree node has been modified in such a way as to invalidate iterators - fix
* them :
*/
void bch2_btree_iter_reinit_node ( struct btree_iter * iter , struct btree * b )
{
struct btree_iter * linked ;
2019-03-27 22:03:30 -04:00
trans_for_each_iter_with_node ( iter - > trans , b , linked )
2020-06-06 12:28:01 -04:00
__btree_iter_init ( linked , b - > c . level ) ;
2017-03-16 22:18:50 -08:00
}
2020-06-12 22:29:48 -04:00
static int lock_root_check_fn ( struct six_lock * lock , void * p )
{
struct btree * b = container_of ( lock , struct btree , c . lock ) ;
struct btree * * rootp = p ;
return b = = * rootp ? 0 : - 1 ;
}
2017-03-16 22:18:50 -08:00
static inline int btree_iter_lock_root ( struct btree_iter * iter ,
2020-10-28 14:17:46 -04:00
unsigned depth_want ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2019-03-25 17:06:42 -04:00
struct bch_fs * c = iter - > trans - > c ;
2020-06-12 22:29:48 -04:00
struct btree * b , * * rootp = & c - > btree_roots [ iter - > btree_id ] . b ;
2017-03-16 22:18:50 -08:00
enum six_lock_type lock_type ;
unsigned i ;
EBUG_ON ( iter - > nodes_locked ) ;
while ( 1 ) {
2020-06-12 22:29:48 -04:00
b = READ_ONCE ( * rootp ) ;
2020-06-06 12:28:01 -04:00
iter - > level = READ_ONCE ( b - > c . level ) ;
2017-03-16 22:18:50 -08:00
if ( unlikely ( iter - > level < depth_want ) ) {
/*
* the root is at a lower depth than the depth we want :
* got to the end of the btree , or we ' re walking nodes
* greater than some depth and there are no nodes > =
* that depth
*/
iter - > level = depth_want ;
2019-05-14 14:08:23 -04:00
for ( i = iter - > level ; i < BTREE_MAX_DEPTH ; i + + )
iter - > l [ i ] . b = NULL ;
2018-11-21 02:59:07 -05:00
return 1 ;
2017-03-16 22:18:50 -08:00
}
lock_type = __btree_lock_want ( iter , iter - > level ) ;
if ( unlikely ( ! btree_node_lock ( b , POS_MAX , iter - > level ,
2020-06-12 22:29:48 -04:00
iter , lock_type ,
2020-10-28 14:17:46 -04:00
lock_root_check_fn , rootp ,
trace_ip ) ) )
2017-03-16 22:18:50 -08:00
return - EINTR ;
2020-06-12 22:29:48 -04:00
if ( likely ( b = = READ_ONCE ( * rootp ) & &
2020-06-06 12:28:01 -04:00
b - > c . level = = iter - > level & &
2017-03-16 22:18:50 -08:00
! race_fault ( ) ) ) {
for ( i = 0 ; i < iter - > level ; i + + )
2019-05-14 14:08:23 -04:00
iter - > l [ i ] . b = BTREE_ITER_NO_NODE_LOCK_ROOT ;
2017-03-16 22:18:50 -08:00
iter - > l [ iter - > level ] . b = b ;
2019-05-14 14:08:23 -04:00
for ( i = iter - > level + 1 ; i < BTREE_MAX_DEPTH ; i + + )
iter - > l [ i ] . b = NULL ;
2017-03-16 22:18:50 -08:00
mark_btree_node_locked ( iter , iter - > level , lock_type ) ;
btree_iter_node_set ( iter , b ) ;
return 0 ;
}
2020-06-06 12:28:01 -04:00
six_unlock_type ( & b - > c . lock , lock_type ) ;
2017-03-16 22:18:50 -08:00
}
}
noinline
static void btree_iter_prefetch ( struct btree_iter * iter )
{
2019-03-25 17:06:42 -04:00
struct bch_fs * c = iter - > trans - > c ;
2017-03-16 22:18:50 -08:00
struct btree_iter_level * l = & iter - > l [ iter - > level ] ;
struct btree_node_iter node_iter = l - > iter ;
struct bkey_packed * k ;
2020-12-17 15:08:58 -05:00
struct bkey_buf tmp ;
2019-03-25 17:06:42 -04:00
unsigned nr = test_bit ( BCH_FS_STARTED , & c - > flags )
2017-03-16 22:18:50 -08:00
? ( iter - > level > 1 ? 0 : 2 )
: ( iter - > level > 1 ? 1 : 16 ) ;
bool was_locked = btree_node_locked ( iter , iter - > level ) ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_init ( & tmp ) ;
2017-03-16 22:18:50 -08:00
while ( nr ) {
if ( ! bch2_btree_node_relock ( iter , iter - > level ) )
2020-12-17 15:08:58 -05:00
break ;
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_advance ( & node_iter , l - > b ) ;
k = bch2_btree_node_iter_peek ( & node_iter , l - > b ) ;
if ( ! k )
break ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_unpack ( & tmp , c , l - > b , k ) ;
2021-01-11 16:11:02 -05:00
bch2_btree_node_prefetch ( c , iter , tmp . k , iter - > btree_id ,
iter - > level - 1 ) ;
2017-03-16 22:18:50 -08:00
}
if ( ! was_locked )
btree_node_unlock ( iter , iter - > level ) ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_exit ( & tmp , c ) ;
2017-03-16 22:18:50 -08:00
}
2020-02-24 15:25:00 -05:00
static noinline void btree_node_mem_ptr_set ( struct btree_iter * iter ,
unsigned plevel , struct btree * b )
{
struct btree_iter_level * l = & iter - > l [ plevel ] ;
bool locked = btree_node_locked ( iter , plevel ) ;
struct bkey_packed * k ;
struct bch_btree_ptr_v2 * bp ;
if ( ! bch2_btree_node_relock ( iter , plevel ) )
return ;
k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ;
BUG_ON ( k - > type ! = KEY_TYPE_btree_ptr_v2 ) ;
bp = ( void * ) bkeyp_val ( & l - > b - > format , k ) ;
bp - > mem_ptr = ( unsigned long ) b ;
if ( ! locked )
btree_node_unlock ( iter , plevel ) ;
}
2020-10-28 14:17:46 -04:00
static __always_inline int btree_iter_down ( struct btree_iter * iter ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2019-03-25 17:06:42 -04:00
struct bch_fs * c = iter - > trans - > c ;
2017-03-16 22:18:50 -08:00
struct btree_iter_level * l = & iter - > l [ iter - > level ] ;
struct btree * b ;
unsigned level = iter - > level - 1 ;
enum six_lock_type lock_type = __btree_lock_want ( iter , level ) ;
2020-12-17 15:08:58 -05:00
struct bkey_buf tmp ;
int ret ;
2017-03-16 22:18:50 -08:00
2019-10-23 19:50:01 -04:00
EBUG_ON ( ! btree_node_locked ( iter , iter - > level ) ) ;
2017-03-16 22:18:50 -08:00
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_init ( & tmp ) ;
bch2_bkey_buf_unpack ( & tmp , c , l - > b ,
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ) ;
2020-12-17 15:08:58 -05:00
b = bch2_btree_node_get ( c , iter , tmp . k , level , lock_type , trace_ip ) ;
ret = PTR_ERR_OR_ZERO ( b ) ;
if ( unlikely ( ret ) )
goto err ;
2017-03-16 22:18:50 -08:00
mark_btree_node_locked ( iter , level , lock_type ) ;
btree_iter_node_set ( iter , b ) ;
2020-12-17 15:08:58 -05:00
if ( tmp . k - > k . type = = KEY_TYPE_btree_ptr_v2 & &
unlikely ( b ! = btree_node_mem_ptr ( tmp . k ) ) )
2020-02-24 15:25:00 -05:00
btree_node_mem_ptr_set ( iter , level + 1 , b ) ;
2017-03-16 22:18:50 -08:00
if ( iter - > flags & BTREE_ITER_PREFETCH )
btree_iter_prefetch ( iter ) ;
iter - > level = level ;
2020-12-17 15:08:58 -05:00
err :
bch2_bkey_buf_exit ( & tmp , c ) ;
return ret ;
2017-03-16 22:18:50 -08:00
}
2020-10-28 14:17:46 -04:00
static int btree_iter_traverse_one ( struct btree_iter * , unsigned long ) ;
2017-03-16 22:18:50 -08:00
2019-03-07 19:46:10 -05:00
static int __btree_iter_traverse_all ( struct btree_trans * trans , int ret )
2017-03-16 22:18:50 -08:00
{
2019-03-27 22:54:42 -04:00
struct bch_fs * c = trans - > c ;
2019-05-24 14:33:16 -04:00
struct btree_iter * iter ;
2019-03-27 22:54:42 -04:00
u8 sorted [ BTREE_ITER_MAX ] ;
2021-04-14 13:26:15 -04:00
int i , nr_sorted = 0 ;
bool relock_fail ;
2019-03-27 22:54:42 -04:00
2019-03-07 19:46:10 -05:00
if ( trans - > in_traverse_all )
return - EINTR ;
trans - > in_traverse_all = true ;
retry_all :
nr_sorted = 0 ;
2021-04-14 13:26:15 -04:00
relock_fail = false ;
2019-03-07 19:46:10 -05:00
2021-04-14 13:26:15 -04:00
trans_for_each_iter ( trans , iter ) {
if ( ! bch2_btree_iter_relock ( iter , true ) )
relock_fail = true ;
2019-03-07 19:46:10 -05:00
sorted [ nr_sorted + + ] = iter - > idx ;
2021-04-14 13:26:15 -04:00
}
if ( ! relock_fail ) {
trans - > in_traverse_all = false ;
return 0 ;
}
2019-03-27 22:54:42 -04:00
# define btree_iter_cmp_by_idx(_l, _r) \
2020-10-26 14:45:20 -04:00
btree_iter_lock_cmp ( & trans - > iters [ _l ] , & trans - > iters [ _r ] )
2019-03-27 22:54:42 -04:00
bubble_sort ( sorted , nr_sorted , btree_iter_cmp_by_idx ) ;
# undef btree_iter_cmp_by_idx
2021-04-14 13:26:15 -04:00
for ( i = nr_sorted - 2 ; i > = 0 ; - - i ) {
struct btree_iter * iter1 = trans - > iters + sorted [ i ] ;
struct btree_iter * iter2 = trans - > iters + sorted [ i + 1 ] ;
if ( iter1 - > btree_id = = iter2 - > btree_id & &
iter1 - > locks_want < iter2 - > locks_want )
__bch2_btree_iter_upgrade ( iter1 , iter2 - > locks_want ) ;
else if ( ! iter1 - > locks_want & & iter2 - > locks_want )
__bch2_btree_iter_upgrade ( iter1 , 1 ) ;
}
2019-05-15 09:47:40 -04:00
bch2_trans_unlock ( trans ) ;
2020-10-28 14:17:46 -04:00
cond_resched ( ) ;
2017-03-16 22:18:50 -08:00
2019-03-28 00:07:24 -04:00
if ( unlikely ( ret = = - ENOMEM ) ) {
2017-03-16 22:18:50 -08:00
struct closure cl ;
closure_init_stack ( & cl ) ;
do {
ret = bch2_btree_cache_cannibalize_lock ( c , & cl ) ;
closure_sync ( & cl ) ;
} while ( ret ) ;
}
2019-03-28 00:07:24 -04:00
if ( unlikely ( ret = = - EIO ) ) {
2019-04-04 21:28:16 -04:00
trans - > error = true ;
2019-03-28 00:07:24 -04:00
goto out ;
}
BUG_ON ( ret & & ret ! = - EINTR ) ;
2017-03-16 22:18:50 -08:00
/* Now, redo traversals in correct order: */
2019-03-27 22:54:42 -04:00
for ( i = 0 ; i < nr_sorted ; i + + ) {
2019-03-07 19:46:10 -05:00
unsigned idx = sorted [ i ] ;
/*
* sucessfully traversing one iterator can cause another to be
* unlinked , in btree_key_cache_fill ( )
*/
if ( ! ( trans - > iters_linked & ( 1ULL < < idx ) ) )
continue ;
2017-03-16 22:18:50 -08:00
2020-10-28 14:17:46 -04:00
ret = btree_iter_traverse_one ( & trans - > iters [ idx ] , _THIS_IP_ ) ;
2019-03-27 22:54:42 -04:00
if ( ret )
goto retry_all ;
}
2017-03-16 22:18:50 -08:00
2020-03-18 13:40:28 -04:00
if ( hweight64 ( trans - > iters_live ) > 1 )
ret = - EINTR ;
else
trans_for_each_iter ( trans , iter )
if ( iter - > flags & BTREE_ITER_KEEP_UNTIL_COMMIT ) {
ret = - EINTR ;
break ;
}
2017-03-16 22:18:50 -08:00
out :
bch2_btree_cache_cannibalize_unlock ( c ) ;
2019-03-07 19:46:10 -05:00
trans - > in_traverse_all = false ;
2021-04-14 13:26:15 -04:00
trace_trans_traverse_all ( trans - > ip ) ;
2017-03-16 22:18:50 -08:00
return ret ;
2019-03-28 00:07:24 -04:00
}
2017-03-16 22:18:50 -08:00
2019-03-28 00:07:24 -04:00
int bch2_btree_iter_traverse_all ( struct btree_trans * trans )
{
2019-03-07 19:46:10 -05:00
return __btree_iter_traverse_all ( trans , 0 ) ;
2017-03-16 22:18:50 -08:00
}
2019-09-07 19:19:57 -04:00
static inline bool btree_iter_good_node ( struct btree_iter * iter ,
unsigned l , int check_pos )
{
if ( ! is_btree_node ( iter , l ) | |
! bch2_btree_node_relock ( iter , l ) )
return false ;
2021-03-21 17:09:55 -04:00
if ( check_pos < 0 & & btree_iter_pos_before_node ( iter , iter - > l [ l ] . b ) )
2019-09-07 19:19:57 -04:00
return false ;
2021-03-21 17:09:55 -04:00
if ( check_pos > 0 & & btree_iter_pos_after_node ( iter , iter - > l [ l ] . b ) )
2019-09-07 19:19:57 -04:00
return false ;
return true ;
}
static inline unsigned btree_iter_up_until_good_node ( struct btree_iter * iter ,
int check_pos )
2017-03-16 22:18:50 -08:00
{
unsigned l = iter - > level ;
while ( btree_iter_node ( iter , l ) & &
2019-09-07 19:19:57 -04:00
! btree_iter_good_node ( iter , l , check_pos ) ) {
2017-03-16 22:18:50 -08:00
btree_node_unlock ( iter , l ) ;
2019-05-14 14:08:23 -04:00
iter - > l [ l ] . b = BTREE_ITER_NO_NODE_UP ;
2017-03-16 22:18:50 -08:00
l + + ;
}
return l ;
}
/*
* This is the main state machine for walking down the btree - walks down to a
* specified depth
*
* Returns 0 on success , - EIO on error ( error reading in a btree node ) .
*
* On error , caller ( peek_node ( ) / peek_key ( ) ) must return NULL ; the error is
2019-05-10 16:09:17 -04:00
* stashed in the iterator and returned from bch2_trans_exit ( ) .
2017-03-16 22:18:50 -08:00
*/
2020-10-28 14:17:46 -04:00
static int btree_iter_traverse_one ( struct btree_iter * iter ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
unsigned depth_want = iter - > level ;
2019-11-04 15:56:04 -05:00
/*
* if we need interior nodes locked , call btree_iter_relock ( ) to make
* sure we walk back up enough that we lock them :
*/
if ( iter - > uptodate = = BTREE_ITER_NEED_RELOCK | |
iter - > locks_want > 1 )
bch2_btree_iter_relock ( iter , false ) ;
2019-03-07 19:46:10 -05:00
if ( btree_iter_type ( iter ) = = BTREE_ITER_CACHED )
return bch2_btree_iter_traverse_cached ( iter ) ;
2019-11-04 15:56:04 -05:00
if ( iter - > uptodate < BTREE_ITER_NEED_RELOCK )
2017-03-16 22:18:50 -08:00
return 0 ;
2019-03-07 19:46:10 -05:00
if ( unlikely ( iter - > level > = BTREE_MAX_DEPTH ) )
return 0 ;
2019-09-07 19:19:57 -04:00
iter - > level = btree_iter_up_until_good_node ( iter , 0 ) ;
2017-03-16 22:18:50 -08:00
/*
* Note : iter - > nodes [ iter - > level ] may be temporarily NULL here - that
* would indicate to other code that we got to the end of the btree ,
* here it indicates that relocking the root failed - it ' s critical that
* btree_iter_lock_root ( ) comes next and that it can ' t fail
*/
while ( iter - > level > depth_want ) {
int ret = btree_iter_node ( iter , iter - > level )
2020-10-28 14:17:46 -04:00
? btree_iter_down ( iter , trace_ip )
: btree_iter_lock_root ( iter , depth_want , trace_ip ) ;
2017-03-16 22:18:50 -08:00
if ( unlikely ( ret ) ) {
2018-11-21 02:59:07 -05:00
if ( ret = = 1 )
return 0 ;
2017-03-16 22:18:50 -08:00
iter - > level = depth_want ;
2019-03-07 19:46:10 -05:00
if ( ret = = - EIO ) {
iter - > flags | = BTREE_ITER_ERROR ;
iter - > l [ iter - > level ] . b =
BTREE_ITER_NO_NODE_ERROR ;
} else {
iter - > l [ iter - > level ] . b =
BTREE_ITER_NO_NODE_DOWN ;
}
2017-03-16 22:18:50 -08:00
return ret ;
}
}
iter - > uptodate = BTREE_ITER_NEED_PEEK ;
2016-07-21 19:05:06 -08:00
2020-02-18 16:17:55 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
return 0 ;
}
2021-03-23 21:22:50 -04:00
static int __must_check __bch2_btree_iter_traverse ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
2019-03-07 19:46:10 -05:00
struct btree_trans * trans = iter - > trans ;
2017-03-16 22:18:50 -08:00
int ret ;
2019-03-07 19:46:10 -05:00
ret = bch2_trans_cond_resched ( trans ) ? :
2020-10-28 14:17:46 -04:00
btree_iter_traverse_one ( iter , _RET_IP_ ) ;
2017-03-16 22:18:50 -08:00
if ( unlikely ( ret ) )
2019-03-07 19:46:10 -05:00
ret = __btree_iter_traverse_all ( trans , ret ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
2021-03-23 21:22:50 -04:00
/*
* Note :
* bch2_btree_iter_traverse ( ) is for external users , btree_iter_traverse ( ) is
* for internal btree iterator users
*
* bch2_btree_iter_traverse sets iter - > real_pos to iter - > pos ,
* btree_iter_traverse ( ) does not :
*/
static inline int __must_check
btree_iter_traverse ( struct btree_iter * iter )
{
return iter - > uptodate > = BTREE_ITER_NEED_RELOCK
? __bch2_btree_iter_traverse ( iter )
: 0 ;
}
int __must_check
bch2_btree_iter_traverse ( struct btree_iter * iter )
{
btree_iter_set_search_pos ( iter , btree_iter_search_key ( iter ) ) ;
return btree_iter_traverse ( iter ) ;
}
2017-03-16 22:18:50 -08:00
/* Iterate across nodes (leaf and interior nodes) */
struct btree * bch2_btree_iter_peek_node ( struct btree_iter * iter )
{
struct btree * b ;
int ret ;
2020-06-15 19:53:46 -04:00
EBUG_ON ( btree_iter_type ( iter ) ! = BTREE_ITER_NODES ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2017-03-16 22:18:50 -08:00
if ( ret )
return NULL ;
b = btree_iter_node ( iter , iter - > level ) ;
if ( ! b )
return NULL ;
2021-03-04 16:20:16 -05:00
BUG_ON ( bpos_cmp ( b - > key . k . p , iter - > pos ) < 0 ) ;
2017-03-16 22:18:50 -08:00
2021-02-11 21:57:32 -05:00
iter - > pos = iter - > real_pos = b - > key . k . p ;
2017-03-16 22:18:50 -08:00
2020-02-18 16:17:55 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
return b ;
}
2020-02-18 16:17:55 -05:00
struct btree * bch2_btree_iter_next_node ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
struct btree * b ;
int ret ;
2020-06-15 19:53:46 -04:00
EBUG_ON ( btree_iter_type ( iter ) ! = BTREE_ITER_NODES ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
/* already got to end? */
if ( ! btree_iter_node ( iter , iter - > level ) )
return NULL ;
2019-04-04 21:53:12 -04:00
bch2_trans_cond_resched ( iter - > trans ) ;
2021-03-21 18:09:02 -04:00
btree_node_unlock ( iter , iter - > level ) ;
iter - > l [ iter - > level ] . b = BTREE_ITER_NO_NODE_UP ;
iter - > level + + ;
2017-03-16 22:18:50 -08:00
2021-03-21 18:09:02 -04:00
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_TRAVERSE ) ;
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2017-03-16 22:18:50 -08:00
if ( ret )
return NULL ;
/* got to end? */
b = btree_iter_node ( iter , iter - > level ) ;
if ( ! b )
return NULL ;
2021-03-04 16:20:16 -05:00
if ( bpos_cmp ( iter - > pos , b - > key . k . p ) < 0 ) {
2017-03-16 22:18:50 -08:00
/*
* Haven ' t gotten to the end of the parent node : go back down to
* the next child node
*/
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
btree_iter_set_search_pos ( iter , bpos_successor ( iter - > pos ) ) ;
2017-03-16 22:18:50 -08:00
2021-03-21 18:09:02 -04:00
/* Unlock to avoid screwing up our lock invariants: */
btree_node_unlock ( iter , iter - > level ) ;
2017-03-16 22:18:50 -08:00
2021-03-21 18:09:02 -04:00
iter - > level = iter - > min_depth ;
2017-03-16 22:18:50 -08:00
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_TRAVERSE ) ;
2021-03-21 18:09:02 -04:00
bch2_btree_iter_verify ( iter ) ;
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2017-03-16 22:18:50 -08:00
if ( ret )
return NULL ;
b = iter - > l [ iter - > level ] . b ;
}
2021-02-11 21:57:32 -05:00
iter - > pos = iter - > real_pos = b - > key . k . p ;
2017-03-16 22:18:50 -08:00
2020-02-18 16:17:55 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
return b ;
}
/* Iterate across keys (in leaf nodes only) */
2021-03-16 01:52:55 -04:00
static void btree_iter_set_search_pos ( struct btree_iter * iter , struct bpos new_pos )
2017-03-16 22:18:50 -08:00
{
2021-03-04 16:20:16 -05:00
int cmp = bpos_cmp ( new_pos , iter - > real_pos ) ;
2020-02-18 16:17:55 -05:00
unsigned l = iter - > level ;
if ( ! cmp )
goto out ;
2021-03-16 01:52:55 -04:00
iter - > real_pos = new_pos ;
2019-03-07 19:46:10 -05:00
if ( unlikely ( btree_iter_type ( iter ) = = BTREE_ITER_CACHED ) ) {
btree_node_unlock ( iter , 0 ) ;
iter - > l [ 0 ] . b = BTREE_ITER_NO_NODE_UP ;
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_TRAVERSE ) ;
return ;
}
2020-02-18 16:17:55 -05:00
l = btree_iter_up_until_good_node ( iter , cmp ) ;
2017-03-16 22:18:50 -08:00
2019-09-07 19:19:57 -04:00
if ( btree_iter_node ( iter , l ) ) {
2017-03-16 22:18:50 -08:00
/*
* We might have to skip over many keys , or just a few : try
* advancing the node iterator , and if we have to skip over too
* many keys just reinit it ( or if we ' re rewinding , since that
* is expensive ) .
*/
2018-08-21 16:30:14 -04:00
if ( cmp < 0 | |
2019-09-07 19:19:57 -04:00
! btree_iter_advance_to_pos ( iter , & iter - > l [ l ] , 8 ) )
__btree_iter_init ( iter , l ) ;
2017-03-16 22:18:50 -08:00
/* Don't leave it locked if we're not supposed to: */
2019-09-07 19:19:57 -04:00
if ( btree_lock_want ( iter , l ) = = BTREE_NODE_UNLOCKED )
btree_node_unlock ( iter , l ) ;
2017-03-16 22:18:50 -08:00
}
2020-02-18 16:17:55 -05:00
out :
if ( l ! = iter - > level )
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_TRAVERSE ) ;
else
btree_iter_set_dirty ( iter , BTREE_ITER_NEED_PEEK ) ;
2020-01-31 13:23:18 -05:00
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
}
2021-03-21 16:55:25 -04:00
inline bool bch2_btree_iter_advance ( struct btree_iter * iter )
2021-02-07 21:28:58 -05:00
{
2021-02-07 21:11:49 -05:00
struct bpos pos = iter - > k . p ;
2021-03-04 16:20:16 -05:00
bool ret = bpos_cmp ( pos , POS_MAX ) ! = 0 ;
2021-02-07 21:11:49 -05:00
2021-02-11 21:57:32 -05:00
if ( ret & & ! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
pos = bkey_successor ( iter , pos ) ;
2021-02-07 21:11:49 -05:00
bch2_btree_iter_set_pos ( iter , pos ) ;
2021-02-11 21:57:32 -05:00
return ret ;
2021-02-07 21:11:49 -05:00
}
2021-03-21 16:55:25 -04:00
inline bool bch2_btree_iter_rewind ( struct btree_iter * iter )
2021-02-07 21:11:49 -05:00
{
struct bpos pos = bkey_start_pos ( & iter - > k ) ;
2021-03-04 16:20:16 -05:00
bool ret = bpos_cmp ( pos , POS_MIN ) ! = 0 ;
2021-02-07 21:11:49 -05:00
2021-02-11 21:57:32 -05:00
if ( ret & & ! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
pos = bkey_predecessor ( iter , pos ) ;
2021-02-07 21:11:49 -05:00
bch2_btree_iter_set_pos ( iter , pos ) ;
2021-02-11 21:57:32 -05:00
return ret ;
2021-02-07 21:28:58 -05:00
}
2019-09-07 19:19:57 -04:00
static inline bool btree_iter_set_pos_to_next_leaf ( struct btree_iter * iter )
{
2021-02-07 18:52:13 -05:00
struct bpos next_pos = iter - > l [ 0 ] . b - > key . k . p ;
2021-03-04 16:20:16 -05:00
bool ret = bpos_cmp ( next_pos , POS_MAX ) ! = 0 ;
2019-09-07 19:19:57 -04:00
2021-02-11 21:57:32 -05:00
/*
* Typically , we don ' t want to modify iter - > pos here , since that
* indicates where we searched from - unless we got to the end of the
* btree , in that case we want iter - > pos to reflect that :
*/
if ( ret )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
btree_iter_set_search_pos ( iter , bpos_successor ( next_pos ) ) ;
2021-02-11 21:57:32 -05:00
else
bch2_btree_iter_set_pos ( iter , POS_MAX ) ;
2019-09-07 19:19:57 -04:00
2020-02-18 16:17:55 -05:00
return ret ;
2019-09-07 19:19:57 -04:00
}
static inline bool btree_iter_set_pos_to_prev_leaf ( struct btree_iter * iter )
{
2021-02-07 18:52:13 -05:00
struct bpos next_pos = iter - > l [ 0 ] . b - > data - > min_key ;
2021-03-04 16:20:16 -05:00
bool ret = bpos_cmp ( next_pos , POS_MIN ) ! = 0 ;
2019-09-07 19:19:57 -04:00
2021-02-11 21:57:32 -05:00
if ( ret )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
btree_iter_set_search_pos ( iter , bpos_predecessor ( next_pos ) ) ;
2021-02-11 21:57:32 -05:00
else
bch2_btree_iter_set_pos ( iter , POS_MIN ) ;
2019-09-07 19:19:57 -04:00
2020-02-18 16:17:55 -05:00
return ret ;
2019-09-07 19:19:57 -04:00
}
2021-03-21 19:43:31 -04:00
static struct bkey_i * btree_trans_peek_updates ( struct btree_trans * trans ,
enum btree_id btree_id , struct bpos pos )
{
struct btree_insert_entry * i ;
trans_for_each_update2 ( trans , i )
if ( ( cmp_int ( btree_id , i - > iter - > btree_id ) ? :
bkey_cmp ( pos , i - > k - > k . p ) ) < = 0 ) {
if ( btree_id = = i - > iter - > btree_id )
return i - > k ;
break ;
}
return NULL ;
}
static inline struct bkey_s_c __btree_iter_peek ( struct btree_iter * iter , bool with_updates )
2017-03-16 22:18:50 -08:00
{
2021-03-21 19:43:31 -04:00
struct bpos search_key = btree_iter_search_key ( iter ) ;
2021-04-29 21:44:05 -04:00
struct bkey_i * next_update ;
2017-03-16 22:18:50 -08:00
struct bkey_s_c k ;
int ret ;
2020-06-15 19:53:46 -04:00
EBUG_ON ( btree_iter_type ( iter ) ! = BTREE_ITER_KEYS ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
bch2_btree_iter_verify_entry_exit ( iter ) ;
2021-04-29 21:44:05 -04:00
start :
next_update = with_updates
? btree_trans_peek_updates ( iter - > trans , iter - > btree_id , search_key )
: NULL ;
2021-03-21 19:43:31 -04:00
btree_iter_set_search_pos ( iter , search_key ) ;
2017-03-16 22:18:50 -08:00
while ( 1 ) {
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2019-09-08 14:00:12 -04:00
if ( unlikely ( ret ) )
return bkey_s_c_err ( ret ) ;
2017-03-16 22:18:50 -08:00
2021-03-21 19:32:01 -04:00
k = btree_iter_level_peek ( iter , & iter - > l [ 0 ] ) ;
2021-03-21 19:43:31 -04:00
if ( next_update & &
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
bpos_cmp ( next_update - > k . p , iter - > real_pos ) < = 0 )
2021-03-21 19:43:31 -04:00
k = bkey_i_to_s_c ( next_update ) ;
if ( likely ( k . k ) ) {
if ( bkey_deleted ( k . k ) ) {
2021-04-29 21:44:05 -04:00
search_key = bkey_successor ( iter , k . k - > p ) ;
goto start ;
2021-03-21 19:43:31 -04:00
}
2017-03-16 22:18:50 -08:00
break ;
2021-03-21 19:43:31 -04:00
}
2017-03-16 22:18:50 -08:00
2019-09-07 19:19:57 -04:00
if ( ! btree_iter_set_pos_to_next_leaf ( iter ) )
2017-03-16 22:18:50 -08:00
return bkey_s_c_null ;
}
/*
2021-03-21 19:43:31 -04:00
* iter - > pos should be mononotically increasing , and always be equal to
* the key we just returned - except extents can straddle iter - > pos :
2017-03-16 22:18:50 -08:00
*/
2021-04-09 16:52:30 -04:00
if ( ! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) )
iter - > pos = k . k - > p ;
else if ( bkey_cmp ( bkey_start_pos ( k . k ) , iter - > pos ) > 0 )
2017-03-16 22:18:50 -08:00
iter - > pos = bkey_start_pos ( k . k ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
return k ;
}
2021-03-21 19:43:31 -04:00
/**
* bch2_btree_iter_peek : returns first key greater than or equal to iterator ' s
* current position
*/
struct bkey_s_c bch2_btree_iter_peek ( struct btree_iter * iter )
{
return __btree_iter_peek ( iter , false ) ;
}
2019-09-07 19:19:57 -04:00
/**
* bch2_btree_iter_next : returns first key greater than iterator ' s current
* position
*/
2017-03-16 22:18:50 -08:00
struct bkey_s_c bch2_btree_iter_next ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_advance ( iter ) )
2020-02-18 16:17:55 -05:00
return bkey_s_c_null ;
2019-09-07 19:19:57 -04:00
2020-02-18 16:17:55 -05:00
return bch2_btree_iter_peek ( iter ) ;
2017-03-16 22:18:50 -08:00
}
2020-03-05 18:44:59 -05:00
struct bkey_s_c bch2_btree_iter_peek_with_updates ( struct btree_iter * iter )
{
2021-03-21 19:43:31 -04:00
return __btree_iter_peek ( iter , true ) ;
2020-03-05 18:44:59 -05:00
}
struct bkey_s_c bch2_btree_iter_next_with_updates ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_advance ( iter ) )
2020-03-05 18:44:59 -05:00
return bkey_s_c_null ;
return bch2_btree_iter_peek_with_updates ( iter ) ;
}
2019-09-07 17:17:21 -04:00
/**
* bch2_btree_iter_peek_prev : returns first key less than or equal to
* iterator ' s current position
*/
struct bkey_s_c bch2_btree_iter_peek_prev ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
struct btree_iter_level * l = & iter - > l [ 0 ] ;
struct bkey_s_c k ;
int ret ;
2020-06-15 19:53:46 -04:00
EBUG_ON ( btree_iter_type ( iter ) ! = BTREE_ITER_KEYS ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
bch2_btree_iter_verify_entry_exit ( iter ) ;
btree_iter_set_search_pos ( iter , iter - > pos ) ;
2017-03-16 22:18:50 -08:00
while ( 1 ) {
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2021-02-11 21:57:32 -05:00
if ( unlikely ( ret ) ) {
k = bkey_s_c_err ( ret ) ;
goto no_key ;
}
2017-03-16 22:18:50 -08:00
2021-03-21 19:22:58 -04:00
k = btree_iter_level_peek ( iter , l ) ;
2021-02-07 21:11:49 -05:00
if ( ! k . k | |
( ( iter - > flags & BTREE_ITER_IS_EXTENTS )
2021-03-04 22:40:41 -05:00
? bkey_cmp ( bkey_start_pos ( k . k ) , iter - > pos ) > = 0
: bkey_cmp ( bkey_start_pos ( k . k ) , iter - > pos ) > 0 ) )
2021-03-21 19:22:58 -04:00
k = btree_iter_level_prev ( iter , l ) ;
2019-09-07 17:17:21 -04:00
if ( likely ( k . k ) )
2017-03-16 22:18:50 -08:00
break ;
2021-02-11 21:57:32 -05:00
if ( ! btree_iter_set_pos_to_prev_leaf ( iter ) ) {
k = bkey_s_c_null ;
goto no_key ;
}
2019-09-07 17:17:21 -04:00
}
2017-03-16 22:18:50 -08:00
2021-03-04 22:40:41 -05:00
EBUG_ON ( bkey_cmp ( bkey_start_pos ( k . k ) , iter - > pos ) > 0 ) ;
2021-02-07 21:11:49 -05:00
/* Extents can straddle iter->pos: */
2021-03-04 22:40:41 -05:00
if ( bkey_cmp ( k . k - > p , iter - > pos ) < 0 )
2021-02-07 21:11:49 -05:00
iter - > pos = k . k - > p ;
2021-02-11 21:57:32 -05:00
out :
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
return k ;
2021-02-11 21:57:32 -05:00
no_key :
/*
2021-03-21 19:22:58 -04:00
* btree_iter_level_peek ( ) may have set iter - > k to a key we didn ' t want , and
2021-02-11 21:57:32 -05:00
* then we errored going to the previous leaf - make sure it ' s
* consistent with iter - > pos :
*/
bkey_init ( & iter - > k ) ;
iter - > k . p = iter - > pos ;
goto out ;
2017-03-16 22:18:50 -08:00
}
2019-09-07 17:17:21 -04:00
/**
* bch2_btree_iter_prev : returns first key less than iterator ' s current
* position
*/
struct bkey_s_c bch2_btree_iter_prev ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_rewind ( iter ) )
2020-02-18 16:17:55 -05:00
return bkey_s_c_null ;
2019-09-07 17:17:21 -04:00
2020-02-18 16:17:55 -05:00
return bch2_btree_iter_peek_prev ( iter ) ;
2019-09-07 17:17:21 -04:00
}
2017-03-16 22:18:50 -08:00
static inline struct bkey_s_c
2016-07-21 19:05:06 -08:00
__bch2_btree_iter_peek_slot_extents ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
struct bkey_s_c k ;
2021-02-10 16:13:57 -05:00
struct bpos pos , next_start ;
2017-03-16 22:18:50 -08:00
2020-03-13 21:41:22 -04:00
/* keys & holes can't span inode numbers: */
if ( iter - > pos . offset = = KEY_OFFSET_MAX ) {
if ( iter - > pos . inode = = KEY_INODE_MAX )
return bkey_s_c_null ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
bch2_btree_iter_set_pos ( iter , bkey_successor ( iter , iter - > pos ) ) ;
2020-03-13 21:41:22 -04:00
}
2017-03-16 22:18:50 -08:00
2021-02-10 16:13:57 -05:00
pos = iter - > pos ;
k = bch2_btree_iter_peek ( iter ) ;
iter - > pos = pos ;
2020-01-06 22:25:09 -05:00
2021-02-10 16:13:57 -05:00
if ( bkey_err ( k ) )
2020-01-06 22:25:09 -05:00
return k ;
2016-07-21 19:05:06 -08:00
2021-02-10 16:13:57 -05:00
if ( k . k & & bkey_cmp ( bkey_start_pos ( k . k ) , iter - > pos ) < = 0 )
return k ;
2016-07-21 19:05:06 -08:00
2021-02-10 16:13:57 -05:00
next_start = k . k ? bkey_start_pos ( k . k ) : POS_MAX ;
2016-07-21 19:05:06 -08:00
2021-02-10 16:13:57 -05:00
bkey_init ( & iter - > k ) ;
iter - > k . p = iter - > pos ;
bch2_key_resize ( & iter - > k ,
2016-07-21 19:05:06 -08:00
min_t ( u64 , KEY_SIZE_MAX ,
2021-02-10 16:13:57 -05:00
( next_start . inode = = iter - > pos . inode
? next_start . offset
2016-07-21 19:05:06 -08:00
: KEY_OFFSET_MAX ) -
2021-02-10 16:13:57 -05:00
iter - > pos . offset ) ) ;
2016-07-21 19:05:06 -08:00
2021-02-10 16:13:57 -05:00
EBUG_ON ( ! iter - > k . size ) ;
2017-03-16 22:18:50 -08:00
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2016-07-21 19:05:06 -08:00
return ( struct bkey_s_c ) { & iter - > k , NULL } ;
}
2017-03-16 22:18:50 -08:00
2020-03-13 21:41:22 -04:00
struct bkey_s_c bch2_btree_iter_peek_slot ( struct btree_iter * iter )
2016-07-21 19:05:06 -08:00
{
struct btree_iter_level * l = & iter - > l [ 0 ] ;
struct bkey_s_c k ;
2020-03-13 21:41:22 -04:00
int ret ;
2020-06-15 19:53:46 -04:00
EBUG_ON ( btree_iter_type ( iter ) ! = BTREE_ITER_KEYS ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
bch2_btree_iter_verify_entry_exit ( iter ) ;
btree_iter_set_search_pos ( iter , btree_iter_search_key ( iter ) ) ;
2020-03-13 21:41:22 -04:00
2021-02-10 16:13:57 -05:00
if ( iter - > flags & BTREE_ITER_IS_EXTENTS )
return __bch2_btree_iter_peek_slot_extents ( iter ) ;
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2020-03-13 21:41:22 -04:00
if ( unlikely ( ret ) )
return bkey_s_c_err ( ret ) ;
2017-03-16 22:18:50 -08:00
2021-03-21 19:22:58 -04:00
k = btree_iter_level_peek_all ( iter , l , & iter - > k ) ;
2017-03-16 22:18:50 -08:00
2020-03-02 13:38:19 -05:00
EBUG_ON ( k . k & & bkey_deleted ( k . k ) & & bkey_cmp ( k . k - > p , iter - > pos ) = = 0 ) ;
2017-03-16 22:18:50 -08:00
2020-03-02 13:38:19 -05:00
if ( ! k . k | | bkey_cmp ( iter - > pos , k . k - > p ) ) {
2016-07-21 19:05:06 -08:00
/* hole */
bkey_init ( & iter - > k ) ;
iter - > k . p = iter - > pos ;
2019-08-17 15:17:09 -04:00
k = ( struct bkey_s_c ) { & iter - > k , NULL } ;
2016-07-21 19:05:06 -08:00
}
2019-08-17 15:17:09 -04:00
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2019-08-17 15:17:09 -04:00
return k ;
2017-03-16 22:18:50 -08:00
}
struct bkey_s_c bch2_btree_iter_next_slot ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_advance ( iter ) )
2020-02-18 16:17:55 -05:00
return bkey_s_c_null ;
2017-03-16 22:18:50 -08:00
2020-02-18 16:17:55 -05:00
return bch2_btree_iter_peek_slot ( iter ) ;
2017-03-16 22:18:50 -08:00
}
2021-03-02 22:45:28 -05:00
struct bkey_s_c bch2_btree_iter_prev_slot ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_rewind ( iter ) )
2021-03-02 22:45:28 -05:00
return bkey_s_c_null ;
return bch2_btree_iter_peek_slot ( iter ) ;
}
2019-03-07 19:46:10 -05:00
struct bkey_s_c bch2_btree_iter_peek_cached ( struct btree_iter * iter )
{
struct bkey_cached * ck ;
int ret ;
2020-06-15 19:53:46 -04:00
EBUG_ON ( btree_iter_type ( iter ) ! = BTREE_ITER_CACHED ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
2019-03-07 19:46:10 -05:00
2021-03-23 21:22:50 -04:00
ret = btree_iter_traverse ( iter ) ;
2019-03-07 19:46:10 -05:00
if ( unlikely ( ret ) )
return bkey_s_c_err ( ret ) ;
ck = ( void * ) iter - > l [ 0 ] . b ;
EBUG_ON ( iter - > btree_id ! = ck - > key . btree_id | |
bkey_cmp ( iter - > pos , ck - > key . pos ) ) ;
BUG_ON ( ! ck - > valid ) ;
return bkey_i_to_s_c ( ck - > k ) ;
}
2019-03-25 17:06:42 -04:00
static inline void bch2_btree_iter_init ( struct btree_trans * trans ,
2021-02-19 20:44:55 -05:00
struct btree_iter * iter , enum btree_id btree_id )
2017-03-16 22:18:50 -08:00
{
2019-03-25 17:06:42 -04:00
struct bch_fs * c = trans - > c ;
2017-03-16 22:18:50 -08:00
unsigned i ;
2019-03-25 17:06:42 -04:00
iter - > trans = trans ;
2017-03-16 22:18:50 -08:00
iter - > uptodate = BTREE_ITER_NEED_TRAVERSE ;
iter - > btree_id = btree_id ;
2021-05-16 21:53:55 -06:00
iter - > real_pos = POS_MIN ;
2019-03-25 15:10:15 -04:00
iter - > level = 0 ;
2020-02-18 16:17:55 -05:00
iter - > min_depth = 0 ;
2021-02-19 20:44:55 -05:00
iter - > locks_want = 0 ;
2017-03-16 22:18:50 -08:00
iter - > nodes_locked = 0 ;
iter - > nodes_intent_locked = 0 ;
for ( i = 0 ; i < ARRAY_SIZE ( iter - > l ) ; i + + )
2020-02-18 19:29:33 -05:00
iter - > l [ i ] . b = BTREE_ITER_NO_NODE_INIT ;
2017-03-16 22:18:50 -08:00
prefetch ( c - > btree_roots [ btree_id ] . b ) ;
}
/* new transactional stuff: */
2019-03-25 15:34:48 -04:00
static inline void __bch2_trans_iter_free ( struct btree_trans * trans ,
unsigned idx )
{
2019-03-27 22:46:52 -04:00
__bch2_btree_iter_unlock ( & trans - > iters [ idx ] ) ;
2019-03-25 15:34:48 -04:00
trans - > iters_linked & = ~ ( 1ULL < < idx ) ;
trans - > iters_live & = ~ ( 1ULL < < idx ) ;
trans - > iters_touched & = ~ ( 1ULL < < idx ) ;
}
2019-09-26 22:21:39 -04:00
int bch2_trans_iter_put ( struct btree_trans * trans ,
struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
2020-02-18 14:27:10 -05:00
int ret ;
if ( IS_ERR_OR_NULL ( iter ) )
return 0 ;
2020-02-26 15:39:46 -05:00
BUG_ON ( trans - > iters + iter - > idx ! = iter ) ;
2021-03-19 20:40:31 -04:00
BUG_ON ( ! btree_iter_live ( trans , iter ) ) ;
2020-02-26 15:39:46 -05:00
2020-02-18 14:27:10 -05:00
ret = btree_iter_err ( iter ) ;
2019-03-25 15:10:15 -04:00
2019-09-26 22:21:39 -04:00
if ( ! ( trans - > iters_touched & ( 1ULL < < iter - > idx ) ) & &
! ( iter - > flags & BTREE_ITER_KEEP_UNTIL_COMMIT ) )
__bch2_trans_iter_free ( trans , iter - > idx ) ;
trans - > iters_live & = ~ ( 1ULL < < iter - > idx ) ;
2019-03-25 15:10:15 -04:00
return ret ;
2019-03-25 15:34:48 -04:00
}
2017-03-16 22:18:50 -08:00
2019-09-26 22:21:39 -04:00
int bch2_trans_iter_free ( struct btree_trans * trans ,
struct btree_iter * iter )
2019-03-25 15:34:48 -04:00
{
2020-02-18 14:27:10 -05:00
if ( IS_ERR_OR_NULL ( iter ) )
return 0 ;
2021-03-19 22:54:18 -04:00
set_btree_iter_dontneed ( trans , iter ) ;
2019-03-25 15:10:15 -04:00
2019-09-26 22:21:39 -04:00
return bch2_trans_iter_put ( trans , iter ) ;
2017-03-16 22:18:50 -08:00
}
2020-12-01 23:11:53 -05:00
noinline __cold
static void btree_trans_iter_alloc_fail ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
2019-03-07 23:13:39 -05:00
2020-12-01 23:11:53 -05:00
struct btree_iter * iter ;
2020-12-09 13:34:42 -05:00
struct btree_insert_entry * i ;
2021-03-04 15:20:22 -05:00
char buf [ 100 ] ;
2018-07-12 23:30:45 -04:00
2020-12-01 23:11:53 -05:00
trans_for_each_iter ( trans , iter )
2021-03-15 21:18:50 -04:00
printk ( KERN_ERR " iter: btree %s pos %s%s%s%s %pS \n " ,
2020-12-01 23:11:53 -05:00
bch2_btree_ids [ iter - > btree_id ] ,
2021-03-04 15:20:22 -05:00
( bch2_bpos_to_text ( & PBUF ( buf ) , iter - > pos ) , buf ) ,
2021-02-20 22:19:34 -05:00
btree_iter_live ( trans , iter ) ? " live " : " " ,
2020-12-01 23:11:53 -05:00
( trans - > iters_touched & ( 1ULL < < iter - > idx ) ) ? " touched " : " " ,
iter - > flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep " : " " ,
( void * ) iter - > ip_allocated ) ;
2020-12-09 13:34:42 -05:00
trans_for_each_update ( trans , i ) {
char buf [ 300 ] ;
bch2_bkey_val_to_text ( & PBUF ( buf ) , trans - > c , bkey_i_to_s_c ( i - > k ) ) ;
printk ( KERN_ERR " update: btree %s %s \n " ,
bch2_btree_ids [ i - > iter - > btree_id ] , buf ) ;
}
2020-12-01 23:11:53 -05:00
panic ( " trans iter oveflow \n " ) ;
2017-03-16 22:18:50 -08:00
}
2019-09-22 19:35:12 -04:00
static struct btree_iter * btree_trans_iter_alloc ( struct btree_trans * trans )
2019-03-25 22:43:26 -04:00
{
2020-12-01 23:11:53 -05:00
unsigned idx ;
2019-09-26 22:21:39 -04:00
2020-12-01 23:11:53 -05:00
if ( unlikely ( trans - > iters_linked = =
~ ( ( ~ 0ULL < < 1 ) < < ( BTREE_ITER_MAX - 1 ) ) ) )
btree_trans_iter_alloc_fail ( trans ) ;
2019-03-25 22:43:26 -04:00
2020-12-01 23:11:53 -05:00
idx = __ffs64 ( ~ trans - > iters_linked ) ;
2019-03-27 23:14:38 -04:00
2020-12-01 23:11:53 -05:00
trans - > iters_linked | = 1ULL < < idx ;
trans - > iters [ idx ] . idx = idx ;
trans - > iters [ idx ] . flags = 0 ;
2019-09-22 19:35:12 -04:00
return & trans - > iters [ idx ] ;
2019-03-25 22:43:26 -04:00
}
2019-09-26 22:21:39 -04:00
static inline void btree_iter_copy ( struct btree_iter * dst ,
struct btree_iter * src )
{
unsigned i , idx = dst - > idx ;
* dst = * src ;
dst - > idx = idx ;
2019-03-07 19:46:10 -05:00
dst - > flags & = ~ BTREE_ITER_KEEP_UNTIL_COMMIT ;
2019-09-26 22:21:39 -04:00
for ( i = 0 ; i < BTREE_MAX_DEPTH ; i + + )
if ( btree_node_locked ( dst , i ) )
six_lock_increment ( & dst - > l [ i ] . b - > c . lock ,
__btree_lock_want ( dst , i ) ) ;
2019-12-31 19:37:10 -05:00
dst - > flags & = ~ BTREE_ITER_KEEP_UNTIL_COMMIT ;
dst - > flags & = ~ BTREE_ITER_SET_POS_AFTER_COMMIT ;
2019-09-26 22:21:39 -04:00
}
2021-02-19 20:44:55 -05:00
struct btree_iter * __bch2_trans_get_iter ( struct btree_trans * trans ,
enum btree_id btree_id , struct bpos pos ,
2021-04-02 21:29:05 -04:00
unsigned locks_want ,
unsigned depth ,
2021-02-19 20:44:55 -05:00
unsigned flags )
2017-03-16 22:18:50 -08:00
{
2019-09-26 22:21:39 -04:00
struct btree_iter * iter , * best = NULL ;
2021-04-29 16:56:17 -04:00
struct bpos real_pos , pos_min = POS_MIN ;
if ( ( flags & BTREE_ITER_TYPE ) ! = BTREE_ITER_NODES & &
btree_node_type_is_extents ( btree_id ) & &
! ( flags & BTREE_ITER_NOT_EXTENTS ) & &
! ( flags & BTREE_ITER_ALL_SNAPSHOTS ) )
flags | = BTREE_ITER_IS_EXTENTS ;
2017-03-16 22:18:50 -08:00
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
if ( ( flags & BTREE_ITER_TYPE ) ! = BTREE_ITER_NODES & &
! btree_type_has_snapshots ( btree_id ) )
flags & = ~ BTREE_ITER_ALL_SNAPSHOTS ;
if ( ! ( flags & BTREE_ITER_ALL_SNAPSHOTS ) )
pos . snapshot = btree_type_has_snapshots ( btree_id )
? U32_MAX : 0 ;
2021-04-29 16:56:17 -04:00
real_pos = pos ;
if ( ( flags & BTREE_ITER_IS_EXTENTS ) & &
bkey_cmp ( pos , POS_MAX ) )
real_pos = bpos_nosnap_successor ( pos ) ;
2019-09-26 22:21:39 -04:00
trans_for_each_iter ( trans , iter ) {
if ( btree_iter_type ( iter ) ! = ( flags & BTREE_ITER_TYPE ) )
continue ;
2019-09-22 19:35:12 -04:00
2019-09-26 22:21:39 -04:00
if ( iter - > btree_id ! = btree_id )
continue ;
2021-04-14 13:29:34 -04:00
if ( best ) {
2021-04-29 16:56:17 -04:00
int cmp = bkey_cmp ( bpos_diff ( best - > real_pos , real_pos ) ,
bpos_diff ( iter - > real_pos , real_pos ) ) ;
2021-04-14 13:29:34 -04:00
if ( cmp < 0 | |
( ( cmp = = 0 & & btree_iter_keep ( trans , iter ) ) ) )
continue ;
}
2019-09-26 22:21:39 -04:00
best = iter ;
}
2021-04-02 21:29:05 -04:00
2021-04-29 16:56:17 -04:00
trace_trans_get_iter ( _RET_IP_ , trans - > ip ,
btree_id ,
& real_pos , locks_want ,
best ? & best - > real_pos : & pos_min ,
best ? best - > locks_want : 0 ,
best ? best - > uptodate : BTREE_ITER_NEED_TRAVERSE ) ;
2019-09-26 22:21:39 -04:00
if ( ! best ) {
2019-09-22 19:35:12 -04:00
iter = btree_trans_iter_alloc ( trans ) ;
2021-02-19 20:44:55 -05:00
bch2_btree_iter_init ( trans , iter , btree_id ) ;
2021-02-20 22:19:34 -05:00
} else if ( btree_iter_keep ( trans , best ) ) {
2019-09-26 22:21:39 -04:00
iter = btree_trans_iter_alloc ( trans ) ;
btree_iter_copy ( iter , best ) ;
} else {
iter = best ;
2017-03-16 22:18:50 -08:00
}
2021-02-19 20:44:55 -05:00
trans - > iters_live | = 1ULL < < iter - > idx ;
trans - > iters_touched | = 1ULL < < iter - > idx ;
2021-02-20 22:19:34 -05:00
iter - > flags = flags ;
2019-09-26 22:21:39 -04:00
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
iter - > snapshot = pos . snapshot ;
2021-04-15 12:36:40 -04:00
/*
* If the iterator has locks_want greater than requested , we explicitly
* do not downgrade it here - on transaction restart because btree node
* split needs to upgrade locks , we might be putting / getting the
* iterator again . Downgrading iterators only happens via an explicit
* bch2_trans_downgrade ( ) .
*/
2021-04-02 21:29:05 -04:00
2021-04-15 12:36:40 -04:00
locks_want = min ( locks_want , BTREE_MAX_DEPTH ) ;
2021-04-02 21:29:05 -04:00
if ( locks_want > iter - > locks_want ) {
iter - > locks_want = locks_want ;
2021-03-31 16:43:50 -04:00
btree_iter_get_locks ( iter , true , false ) ;
}
2019-09-26 22:21:39 -04:00
2021-04-27 11:12:17 -04:00
while ( iter - > level ! = depth ) {
2021-04-02 21:29:05 -04:00
btree_node_unlock ( iter , iter - > level ) ;
iter - > l [ iter - > level ] . b = BTREE_ITER_NO_NODE_INIT ;
2021-04-27 11:12:17 -04:00
iter - > uptodate = BTREE_ITER_NEED_TRAVERSE ;
if ( iter - > level < depth )
iter - > level + + ;
else
iter - > level - - ;
2021-04-02 21:29:05 -04:00
}
iter - > min_depth = depth ;
2021-02-19 20:44:55 -05:00
bch2_btree_iter_set_pos ( iter , pos ) ;
2021-04-29 16:56:17 -04:00
btree_iter_set_search_pos ( iter , real_pos ) ;
2017-03-16 22:18:50 -08:00
return iter ;
}
2019-03-25 15:10:15 -04:00
struct btree_iter * bch2_trans_get_node_iter ( struct btree_trans * trans ,
enum btree_id btree_id ,
struct bpos pos ,
unsigned locks_want ,
unsigned depth ,
unsigned flags )
{
struct btree_iter * iter =
2021-02-19 20:44:55 -05:00
__bch2_trans_get_iter ( trans , btree_id , pos ,
2021-04-02 21:29:05 -04:00
locks_want , depth ,
BTREE_ITER_NODES |
BTREE_ITER_NOT_EXTENTS |
BTREE_ITER_ALL_SNAPSHOTS |
flags ) ;
2019-03-25 15:10:15 -04:00
BUG_ON ( bkey_cmp ( iter - > pos , pos ) ) ;
2021-04-02 21:29:05 -04:00
BUG_ON ( iter - > locks_want ! = min ( locks_want , BTREE_MAX_DEPTH ) ) ;
BUG_ON ( iter - > level ! = depth ) ;
BUG_ON ( iter - > min_depth ! = depth ) ;
2021-03-19 20:29:11 -04:00
iter - > ip_allocated = _RET_IP_ ;
2019-03-25 15:10:15 -04:00
return iter ;
}
2020-04-01 17:14:14 -04:00
struct btree_iter * __bch2_trans_copy_iter ( struct btree_trans * trans ,
2019-03-25 22:43:26 -04:00
struct btree_iter * src )
2017-03-16 22:18:50 -08:00
{
2019-03-27 22:46:52 -04:00
struct btree_iter * iter ;
2017-03-16 22:18:50 -08:00
2019-09-22 19:35:12 -04:00
iter = btree_trans_iter_alloc ( trans ) ;
2019-09-26 22:21:39 -04:00
btree_iter_copy ( iter , src ) ;
2019-03-25 22:43:26 -04:00
2019-09-26 22:21:39 -04:00
trans - > iters_live | = 1ULL < < iter - > idx ;
/*
2020-02-18 14:27:10 -05:00
* We don ' t need to preserve this iter since it ' s cheap to copy it
* again - this will cause trans_iter_put ( ) to free it right away :
2019-09-26 22:21:39 -04:00
*/
2021-03-19 22:54:18 -04:00
set_btree_iter_dontneed ( trans , iter ) ;
2019-03-25 22:43:26 -04:00
2019-09-22 19:35:12 -04:00
return iter ;
2017-03-16 22:18:50 -08:00
}
2021-04-15 12:50:09 -04:00
void * bch2_trans_kmalloc ( struct btree_trans * trans , size_t size )
2017-03-16 22:18:50 -08:00
{
2021-04-15 12:50:09 -04:00
size_t new_top = trans - > mem_top + size ;
void * p ;
if ( new_top > trans - > mem_bytes ) {
2017-03-16 22:18:50 -08:00
size_t old_bytes = trans - > mem_bytes ;
2021-04-15 12:50:09 -04:00
size_t new_bytes = roundup_pow_of_two ( new_top ) ;
2021-04-24 00:09:06 -04:00
void * new_mem ;
WARN_ON_ONCE ( new_bytes > BTREE_TRANS_MEM_MAX ) ;
new_mem = krealloc ( trans - > mem , new_bytes , GFP_NOFS ) ;
if ( ! new_mem & & new_bytes < = BTREE_TRANS_MEM_MAX ) {
new_mem = mempool_alloc ( & trans - > c - > btree_trans_mem_pool , GFP_KERNEL ) ;
new_bytes = BTREE_TRANS_MEM_MAX ;
kfree ( trans - > mem ) ;
}
2017-03-16 22:18:50 -08:00
if ( ! new_mem )
2021-04-15 12:50:09 -04:00
return ERR_PTR ( - ENOMEM ) ;
2017-03-16 22:18:50 -08:00
trans - > mem = new_mem ;
trans - > mem_bytes = new_bytes ;
2018-07-12 23:30:45 -04:00
if ( old_bytes ) {
2021-04-15 12:50:09 -04:00
trace_trans_restart_mem_realloced ( trans - > ip , _RET_IP_ , new_bytes ) ;
return ERR_PTR ( - EINTR ) ;
2018-07-12 23:30:45 -04:00
}
2017-03-16 22:18:50 -08:00
}
2019-05-15 10:54:43 -04:00
p = trans - > mem + trans - > mem_top ;
2017-03-16 22:18:50 -08:00
trans - > mem_top + = size ;
2019-05-15 10:54:43 -04:00
return p ;
2017-03-16 22:18:50 -08:00
}
2019-09-26 22:21:39 -04:00
inline void bch2_trans_unlink_iters ( struct btree_trans * trans )
2019-03-25 15:34:48 -04:00
{
2019-09-26 22:21:39 -04:00
u64 iters = trans - > iters_linked &
~ trans - > iters_touched &
~ trans - > iters_live ;
2019-03-25 15:34:48 -04:00
while ( iters ) {
unsigned idx = __ffs64 ( iters ) ;
iters & = ~ ( 1ULL < < idx ) ;
__bch2_trans_iter_free ( trans , idx ) ;
}
}
2019-09-26 22:21:39 -04:00
void bch2_trans_reset ( struct btree_trans * trans , unsigned flags )
2017-03-16 22:18:50 -08:00
{
2019-09-26 22:21:39 -04:00
struct btree_iter * iter ;
2017-03-16 22:18:50 -08:00
2019-09-26 22:21:39 -04:00
trans_for_each_iter ( trans , iter )
2019-12-31 19:37:10 -05:00
iter - > flags & = ~ ( BTREE_ITER_KEEP_UNTIL_COMMIT |
BTREE_ITER_SET_POS_AFTER_COMMIT ) ;
2019-03-07 23:13:39 -05:00
2019-09-26 22:21:39 -04:00
bch2_trans_unlink_iters ( trans ) ;
2019-03-07 23:13:39 -05:00
2019-09-26 22:21:39 -04:00
trans - > iters_touched & = trans - > iters_live ;
2017-03-16 22:18:50 -08:00
2019-03-25 15:34:48 -04:00
trans - > nr_updates = 0 ;
2019-12-30 14:37:25 -05:00
trans - > nr_updates2 = 0 ;
2020-02-26 15:39:46 -05:00
trans - > mem_top = 0 ;
2019-03-28 00:07:24 -04:00
2021-02-03 21:51:56 -05:00
trans - > hooks = NULL ;
2020-05-25 19:29:48 -04:00
trans - > extra_journal_entries = NULL ;
trans - > extra_journal_entry_u64s = 0 ;
2019-12-30 12:43:19 -05:00
if ( trans - > fs_usage_deltas ) {
trans - > fs_usage_deltas - > used = 0 ;
memset ( ( void * ) trans - > fs_usage_deltas +
offsetof ( struct replicas_delta_list , memset_start ) , 0 ,
( void * ) & trans - > fs_usage_deltas - > memset_end -
( void * ) & trans - > fs_usage_deltas - > memset_start ) ;
}
2021-03-27 20:58:57 -04:00
if ( ! ( flags & TRANS_RESET_NOUNLOCK ) )
bch2_trans_cond_resched ( trans ) ;
2021-03-19 20:29:11 -04:00
2021-04-14 13:26:15 -04:00
if ( ! ( flags & TRANS_RESET_NOTRAVERSE ) & &
trans - > iters_linked )
2019-12-30 12:43:19 -05:00
bch2_btree_iter_traverse_all ( trans ) ;
2017-03-16 22:18:50 -08:00
}
2020-11-05 20:02:01 -05:00
static void bch2_trans_alloc_iters ( struct btree_trans * trans , struct bch_fs * c )
{
2020-12-01 23:11:53 -05:00
size_t iters_bytes = sizeof ( struct btree_iter ) * BTREE_ITER_MAX ;
size_t updates_bytes = sizeof ( struct btree_insert_entry ) * BTREE_ITER_MAX ;
2020-11-16 18:20:50 -05:00
void * p = NULL ;
2020-11-05 20:02:01 -05:00
BUG_ON ( trans - > used_mempool ) ;
2020-11-16 18:20:50 -05:00
# ifdef __KERNEL__
p = this_cpu_xchg ( c - > btree_iters_bufs - > iter , NULL ) ;
# endif
if ( ! p )
p = mempool_alloc ( & trans - > c - > btree_iters_pool , GFP_NOFS ) ;
2020-11-05 20:02:01 -05:00
trans - > iters = p ; p + = iters_bytes ;
trans - > updates = p ; p + = updates_bytes ;
trans - > updates2 = p ; p + = updates_bytes ;
}
2019-05-15 10:54:43 -04:00
void bch2_trans_init ( struct btree_trans * trans , struct bch_fs * c ,
unsigned expected_nr_iters ,
size_t expected_mem_bytes )
2021-05-23 17:04:13 -04:00
__acquires ( & c - > btree_trans_barrier )
2017-03-16 22:18:50 -08:00
{
2020-11-02 18:54:33 -05:00
memset ( trans , 0 , sizeof ( * trans ) ) ;
2017-03-16 22:18:50 -08:00
trans - > c = c ;
2019-04-23 00:10:08 -04:00
trans - > ip = _RET_IP_ ;
2019-05-15 10:54:43 -04:00
2020-11-05 20:02:01 -05:00
/*
* reallocating iterators currently completely breaks
* bch2_trans_iter_put ( ) , we always allocate the max :
*/
bch2_trans_alloc_iters ( trans , c ) ;
2019-05-15 10:54:43 -04:00
2020-11-15 20:52:55 -05:00
if ( expected_mem_bytes ) {
expected_mem_bytes = roundup_pow_of_two ( expected_mem_bytes ) ;
trans - > mem = kmalloc ( expected_mem_bytes , GFP_KERNEL ) ;
2021-04-24 00:09:06 -04:00
if ( ! unlikely ( trans - > mem ) ) {
trans - > mem = mempool_alloc ( & c - > btree_trans_mem_pool , GFP_KERNEL ) ;
trans - > mem_bytes = BTREE_TRANS_MEM_MAX ;
} else {
2020-11-15 20:52:55 -05:00
trans - > mem_bytes = expected_mem_bytes ;
2021-04-24 00:09:06 -04:00
}
2020-11-15 20:52:55 -05:00
}
2020-06-02 16:36:11 -04:00
2020-11-15 16:30:22 -05:00
trans - > srcu_idx = srcu_read_lock ( & c - > btree_trans_barrier ) ;
2020-06-02 16:36:11 -04:00
# ifdef CONFIG_BCACHEFS_DEBUG
2020-06-02 19:41:47 -04:00
trans - > pid = current - > pid ;
2020-06-02 16:36:11 -04:00
mutex_lock ( & c - > btree_trans_lock ) ;
list_add ( & trans - > list , & c - > btree_trans_list ) ;
mutex_unlock ( & c - > btree_trans_lock ) ;
# endif
2017-03-16 22:18:50 -08:00
}
int bch2_trans_exit ( struct btree_trans * trans )
2021-05-23 17:04:13 -04:00
__releases ( & c - > btree_trans_barrier )
2017-03-16 22:18:50 -08:00
{
2020-11-05 20:02:01 -05:00
struct bch_fs * c = trans - > c ;
2019-04-04 21:28:16 -04:00
bch2_trans_unlock ( trans ) ;
2017-03-16 22:18:50 -08:00
2020-06-02 16:36:11 -04:00
# ifdef CONFIG_BCACHEFS_DEBUG
2021-03-19 20:29:11 -04:00
if ( trans - > iters_live ) {
struct btree_iter * iter ;
bch_err ( c , " btree iterators leaked! " ) ;
trans_for_each_iter ( trans , iter )
if ( btree_iter_live ( trans , iter ) )
printk ( KERN_ERR " btree %s allocated at %pS \n " ,
bch2_btree_ids [ iter - > btree_id ] ,
( void * ) iter - > ip_allocated ) ;
/* Be noisy about this: */
bch2_fatal_error ( c ) ;
}
2020-06-02 16:36:11 -04:00
mutex_lock ( & trans - > c - > btree_trans_lock ) ;
list_del ( & trans - > list ) ;
mutex_unlock ( & trans - > c - > btree_trans_lock ) ;
# endif
2020-11-15 16:30:22 -05:00
srcu_read_unlock ( & c - > btree_trans_barrier , trans - > srcu_idx ) ;
2019-03-07 19:46:10 -05:00
bch2_journal_preres_put ( & trans - > c - > journal , & trans - > journal_preres ) ;
2021-04-24 00:24:25 -04:00
if ( trans - > fs_usage_deltas ) {
if ( trans - > fs_usage_deltas - > size + sizeof ( trans - > fs_usage_deltas ) = =
REPLICAS_DELTA_LIST_MAX )
mempool_free ( trans - > fs_usage_deltas ,
& trans - > c - > replicas_delta_pool ) ;
else
kfree ( trans - > fs_usage_deltas ) ;
}
2021-04-24 00:09:06 -04:00
if ( trans - > mem_bytes = = BTREE_TRANS_MEM_MAX )
mempool_free ( trans - > mem , & trans - > c - > btree_trans_mem_pool ) ;
else
kfree ( trans - > mem ) ;
2020-11-05 20:02:01 -05:00
2020-11-16 18:20:50 -05:00
# ifdef __KERNEL__
/*
* Userspace doesn ' t have a real percpu implementation :
*/
2020-11-05 20:02:01 -05:00
trans - > iters = this_cpu_xchg ( c - > btree_iters_bufs - > iter , trans - > iters ) ;
2020-11-16 18:20:50 -05:00
# endif
2021-04-24 00:09:06 -04:00
2020-11-05 20:02:01 -05:00
if ( trans - > iters )
2018-08-08 21:22:46 -04:00
mempool_free ( trans - > iters , & trans - > c - > btree_iters_pool ) ;
2020-11-05 20:02:01 -05:00
2017-03-16 22:18:50 -08:00
trans - > mem = ( void * ) 0x1 ;
trans - > iters = ( void * ) 0x1 ;
2019-04-04 21:28:16 -04:00
return trans - > error ? - EIO : 0 ;
2017-03-16 22:18:50 -08:00
}
2019-09-07 14:16:00 -04:00
2020-11-07 12:43:48 -05:00
static void __maybe_unused
bch2_btree_iter_node_to_text ( struct printbuf * out ,
struct btree_bkey_cached_common * _b ,
enum btree_iter_type type )
2020-06-15 19:53:46 -04:00
{
2021-03-31 16:43:50 -04:00
pr_buf ( out , " l=%u %s: " ,
_b - > level , bch2_btree_ids [ _b - > btree_id ] ) ;
2020-06-15 19:53:46 -04:00
bch2_bpos_to_text ( out , btree_node_pos ( _b , type ) ) ;
}
2021-04-02 21:29:05 -04:00
# ifdef CONFIG_BCACHEFS_DEBUG
2021-03-31 16:43:50 -04:00
static bool trans_has_btree_nodes_locked ( struct btree_trans * trans )
{
struct btree_iter * iter ;
trans_for_each_iter ( trans , iter )
if ( btree_iter_type ( iter ) ! = BTREE_ITER_CACHED & &
iter - > nodes_locked )
return true ;
return false ;
}
2021-04-02 21:29:05 -04:00
# endif
2021-03-31 16:43:50 -04:00
2020-06-02 16:36:11 -04:00
void bch2_btree_trans_to_text ( struct printbuf * out , struct bch_fs * c )
{
# ifdef CONFIG_BCACHEFS_DEBUG
struct btree_trans * trans ;
struct btree_iter * iter ;
struct btree * b ;
unsigned l ;
mutex_lock ( & c - > btree_trans_lock ) ;
list_for_each_entry ( trans , & c - > btree_trans_list , list ) {
2021-03-31 16:43:50 -04:00
if ( ! trans_has_btree_nodes_locked ( trans ) )
continue ;
pr_buf ( out , " %i %ps \n " , trans - > pid , ( void * ) trans - > ip ) ;
2020-06-02 16:36:11 -04:00
trans_for_each_iter ( trans , iter ) {
if ( ! iter - > nodes_locked )
continue ;
2021-03-31 16:43:50 -04:00
pr_buf ( out , " iter %u %c %s: " ,
2020-06-12 14:58:07 -04:00
iter - > idx ,
2021-03-31 16:43:50 -04:00
btree_iter_type ( iter ) = = BTREE_ITER_CACHED ? ' c ' : ' b ' ,
2020-06-12 14:58:07 -04:00
bch2_btree_ids [ iter - > btree_id ] ) ;
2020-06-02 16:36:11 -04:00
bch2_bpos_to_text ( out , iter - > pos ) ;
pr_buf ( out , " \n " ) ;
for ( l = 0 ; l < BTREE_MAX_DEPTH ; l + + ) {
if ( btree_node_locked ( iter , l ) ) {
2020-06-15 19:53:46 -04:00
pr_buf ( out , " %s l=%u " ,
btree_node_intent_locked ( iter , l ) ? " i " : " r " , l ) ;
bch2_btree_iter_node_to_text ( out ,
( void * ) iter - > l [ l ] . b ,
btree_iter_type ( iter ) ) ;
2020-06-02 16:36:11 -04:00
pr_buf ( out , " \n " ) ;
}
}
}
b = READ_ONCE ( trans - > locking ) ;
if ( b ) {
2021-03-31 16:43:50 -04:00
iter = & trans - > iters [ trans - > locking_iter_idx ] ;
pr_buf ( out , " locking iter %u %c l=%u %s: " ,
2020-06-12 14:58:07 -04:00
trans - > locking_iter_idx ,
2021-03-31 16:43:50 -04:00
btree_iter_type ( iter ) = = BTREE_ITER_CACHED ? ' c ' : ' b ' ,
2020-06-12 14:58:07 -04:00
trans - > locking_level ,
bch2_btree_ids [ trans - > locking_btree_id ] ) ;
bch2_bpos_to_text ( out , trans - > locking_pos ) ;
2020-06-15 19:53:46 -04:00
pr_buf ( out , " node " ) ;
bch2_btree_iter_node_to_text ( out ,
( void * ) b ,
2021-03-31 16:43:50 -04:00
btree_iter_type ( iter ) ) ;
2020-06-02 16:36:11 -04:00
pr_buf ( out , " \n " ) ;
}
}
mutex_unlock ( & c - > btree_trans_lock ) ;
# endif
}
2019-09-07 14:16:00 -04:00
void bch2_fs_btree_iter_exit ( struct bch_fs * c )
{
2021-04-24 00:09:06 -04:00
mempool_exit ( & c - > btree_trans_mem_pool ) ;
2019-09-07 14:16:00 -04:00
mempool_exit ( & c - > btree_iters_pool ) ;
2020-11-15 16:30:22 -05:00
cleanup_srcu_struct ( & c - > btree_trans_barrier ) ;
2019-09-07 14:16:00 -04:00
}
int bch2_fs_btree_iter_init ( struct bch_fs * c )
{
unsigned nr = BTREE_ITER_MAX ;
2020-06-02 16:36:11 -04:00
INIT_LIST_HEAD ( & c - > btree_trans_list ) ;
mutex_init ( & c - > btree_trans_lock ) ;
2020-11-15 16:30:22 -05:00
return init_srcu_struct ( & c - > btree_trans_barrier ) ? :
mempool_init_kmalloc_pool ( & c - > btree_iters_pool , 1 ,
2019-09-07 14:16:00 -04:00
sizeof ( struct btree_iter ) * nr +
2019-12-30 13:08:26 -05:00
sizeof ( struct btree_insert_entry ) * nr +
2021-04-24 00:09:06 -04:00
sizeof ( struct btree_insert_entry ) * nr ) ? :
mempool_init_kmalloc_pool ( & c - > btree_trans_mem_pool , 1 ,
BTREE_TRANS_MEM_MAX ) ;
2019-09-07 14:16:00 -04:00
}