2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
# include "bkey_methods.h"
2020-12-17 15:08:58 -05:00
# include "bkey_buf.h"
2017-03-16 22:18:50 -08:00
# include "btree_cache.h"
# include "btree_iter.h"
2019-03-07 19:46:10 -05:00
# include "btree_key_cache.h"
2017-03-16 22:18:50 -08:00
# include "btree_locking.h"
2020-03-05 18:44:59 -05:00
# include "btree_update.h"
2017-03-16 22:18:50 -08:00
# include "debug.h"
2021-03-19 20:29:11 -04:00
# include "error.h"
2017-03-16 22:18:50 -08:00
# include "extents.h"
2019-03-07 19:46:10 -05:00
# include "journal.h"
2021-12-25 20:07:00 -05:00
# include "recovery.h"
2021-04-03 20:29:05 -04:00
# include "replicas.h"
2021-03-04 22:29:25 -05:00
# include "subvolume.h"
2017-03-16 22:18:50 -08:00
# include "trace.h"
2022-07-17 20:22:30 -04:00
# include <linux/random.h>
2017-03-16 22:18:50 -08:00
# include <linux/prefetch.h>
2021-08-30 15:18:31 -04:00
static inline void btree_path_list_remove ( struct btree_trans * , struct btree_path * ) ;
static inline void btree_path_list_add ( struct btree_trans * , struct btree_path * ,
struct btree_path * ) ;
2023-01-09 01:11:18 -05:00
static inline unsigned long btree_iter_ip_allocated ( struct btree_iter * iter )
{
2023-02-04 19:39:59 -05:00
# ifdef TRACK_PATH_ALLOCATED
2023-01-09 01:11:18 -05:00
return iter - > ip_allocated ;
# else
return 0 ;
# endif
}
2021-08-30 15:18:31 -04:00
static struct btree_path * btree_path_alloc ( struct btree_trans * , struct btree_path * ) ;
2021-10-24 16:40:05 -04:00
/*
* Unlocks before scheduling
* Note : does not revalidate iterator
*/
static inline int bch2_trans_cond_resched ( struct btree_trans * trans )
{
if ( need_resched ( ) | | race_fault ( ) ) {
bch2_trans_unlock ( trans ) ;
schedule ( ) ;
2022-07-17 23:06:38 -04:00
return bch2_trans_relock ( trans ) ;
2021-10-24 16:40:05 -04:00
} else {
return 0 ;
}
}
2021-08-30 15:18:31 -04:00
static inline int __btree_path_cmp ( const struct btree_path * l ,
enum btree_id r_btree_id ,
bool r_cached ,
struct bpos r_pos ,
unsigned r_level )
2021-06-12 15:45:45 -04:00
{
2022-02-15 22:01:33 -05:00
/*
* Must match lock ordering as defined by __bch2_btree_node_lock :
*/
2021-08-30 15:18:31 -04:00
return cmp_int ( l - > btree_id , r_btree_id ) ? :
2021-11-06 00:03:40 -04:00
cmp_int ( ( int ) l - > cached , ( int ) r_cached ) ? :
2021-08-30 15:18:31 -04:00
bpos_cmp ( l - > pos , r_pos ) ? :
- cmp_int ( l - > level , r_level ) ;
}
static inline int btree_path_cmp ( const struct btree_path * l ,
const struct btree_path * r )
{
return __btree_path_cmp ( l , r - > btree_id , r - > cached , r - > pos , r - > level ) ;
2021-06-12 15:45:45 -04:00
}
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
static inline struct bpos bkey_successor ( struct btree_iter * iter , struct bpos p )
{
/* Are we iterating over keys in all snapshots? */
if ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) {
p = bpos_successor ( p ) ;
} else {
p = bpos_nosnap_successor ( p ) ;
p . snapshot = iter - > snapshot ;
}
return p ;
}
static inline struct bpos bkey_predecessor ( struct btree_iter * iter , struct bpos p )
{
/* Are we iterating over keys in all snapshots? */
if ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) {
p = bpos_predecessor ( p ) ;
} else {
p = bpos_nosnap_predecessor ( p ) ;
p . snapshot = iter - > snapshot ;
}
return p ;
}
2020-01-06 22:25:09 -05:00
static inline struct bpos btree_iter_search_key ( struct btree_iter * iter )
2018-08-21 16:30:14 -04:00
{
2020-01-06 22:25:09 -05:00
struct bpos pos = iter - > pos ;
2018-08-21 17:38:41 -04:00
2020-01-06 22:25:09 -05:00
if ( ( iter - > flags & BTREE_ITER_IS_EXTENTS ) & &
2022-11-24 03:12:22 -05:00
! bkey_eq ( pos , POS_MAX ) )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
pos = bkey_successor ( iter , pos ) ;
2020-01-06 22:25:09 -05:00
return pos ;
2018-08-21 16:30:14 -04:00
}
2021-08-30 15:18:31 -04:00
static inline bool btree_path_pos_before_node ( struct btree_path * path ,
2020-03-02 13:38:19 -05:00
struct btree * b )
{
2022-11-24 03:12:22 -05:00
return bpos_lt ( path - > pos , b - > data - > min_key ) ;
2020-03-02 13:38:19 -05:00
}
2021-08-30 15:18:31 -04:00
static inline bool btree_path_pos_after_node ( struct btree_path * path ,
2020-03-02 13:38:19 -05:00
struct btree * b )
{
2022-11-24 03:12:22 -05:00
return bpos_gt ( path - > pos , b - > key . k . p ) ;
2020-03-02 13:38:19 -05:00
}
2021-08-30 15:18:31 -04:00
static inline bool btree_path_pos_in_node ( struct btree_path * path ,
2020-03-02 13:38:19 -05:00
struct btree * b )
{
2021-08-30 15:18:31 -04:00
return path - > btree_id = = b - > c . btree_id & &
! btree_path_pos_before_node ( path , b ) & &
! btree_path_pos_after_node ( path , b ) ;
2020-03-02 13:38:19 -05:00
}
2017-03-16 22:18:50 -08:00
/* Btree iterator: */
# ifdef CONFIG_BCACHEFS_DEBUG
2021-08-30 15:18:31 -04:00
static void bch2_btree_path_verify_cached ( struct btree_trans * trans ,
struct btree_path * path )
2020-06-15 19:53:46 -04:00
{
struct bkey_cached * ck ;
2021-08-30 15:18:31 -04:00
bool locked = btree_node_locked ( path , 0 ) ;
2020-06-15 19:53:46 -04:00
2021-08-30 15:18:31 -04:00
if ( ! bch2_btree_node_relock ( trans , path , 0 ) )
2020-06-15 19:53:46 -04:00
return ;
2021-08-30 15:18:31 -04:00
ck = ( void * ) path - > l [ 0 ] . b ;
BUG_ON ( ck - > key . btree_id ! = path - > btree_id | |
2022-11-24 03:12:22 -05:00
! bkey_eq ( ck - > key . pos , path - > pos ) ) ;
2020-06-15 19:53:46 -04:00
if ( ! locked )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , 0 ) ;
2020-06-15 19:53:46 -04:00
}
2021-08-30 15:18:31 -04:00
static void bch2_btree_path_verify_level ( struct btree_trans * trans ,
struct btree_path * path , unsigned level )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_path_level * l ;
2021-03-20 22:13:30 -04:00
struct btree_node_iter tmp ;
bool locked ;
2020-02-18 16:17:55 -05:00
struct bkey_packed * p , * k ;
2022-02-25 13:18:19 -05:00
struct printbuf buf1 = PRINTBUF ;
struct printbuf buf2 = PRINTBUF ;
struct printbuf buf3 = PRINTBUF ;
2020-02-18 16:17:55 -05:00
const char * msg ;
2017-03-16 22:18:50 -08:00
2020-11-02 18:20:44 -05:00
if ( ! bch2_debug_check_iterators )
2019-03-28 01:51:47 -04:00
return ;
2021-08-30 15:18:31 -04:00
l = & path - > l [ level ] ;
2021-03-20 22:13:30 -04:00
tmp = l - > iter ;
2021-08-30 15:18:31 -04:00
locked = btree_node_locked ( path , level ) ;
2021-03-20 22:13:30 -04:00
2021-08-30 15:18:31 -04:00
if ( path - > cached ) {
2020-06-15 19:53:46 -04:00
if ( ! level )
2021-08-30 15:18:31 -04:00
bch2_btree_path_verify_cached ( trans , path ) ;
2020-06-15 19:53:46 -04:00
return ;
}
2021-08-30 15:18:31 -04:00
if ( ! btree_path_node ( path , level ) )
2020-02-18 16:17:55 -05:00
return ;
2022-09-25 16:42:53 -04:00
if ( ! bch2_btree_node_relock_notrace ( trans , path , level ) )
2016-07-21 19:05:06 -08:00
return ;
2021-08-30 15:18:31 -04:00
BUG_ON ( ! btree_path_pos_in_node ( path , l - > b ) ) ;
2020-02-18 16:17:55 -05:00
bch2_btree_node_iter_verify ( & l - > iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
/*
2021-09-04 21:19:48 -04:00
* For interior nodes , the iterator will have skipped past deleted keys :
2017-03-16 22:18:50 -08:00
*/
2021-09-04 21:19:48 -04:00
p = level
2021-02-19 23:41:40 -05:00
? bch2_btree_node_iter_prev ( & tmp , l - > b )
2020-02-18 16:17:55 -05:00
: bch2_btree_node_iter_prev_all ( & tmp , l - > b ) ;
k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
if ( p & & bkey_iter_pos_cmp ( l - > b , p , & path - > pos ) > = 0 ) {
2020-02-18 16:17:55 -05:00
msg = " before " ;
goto err ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
if ( k & & bkey_iter_pos_cmp ( l - > b , k , & path - > pos ) < 0 ) {
2020-02-18 16:17:55 -05:00
msg = " after " ;
goto err ;
}
2021-08-30 15:54:41 -04:00
2020-02-18 16:17:55 -05:00
if ( ! locked )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , level ) ;
2020-02-18 16:17:55 -05:00
return ;
err :
2022-02-25 13:18:19 -05:00
bch2_bpos_to_text ( & buf1 , path - > pos ) ;
2020-02-18 16:17:55 -05:00
if ( p ) {
struct bkey uk = bkey_unpack_key ( l - > b , p ) ;
2022-10-22 15:59:53 -04:00
2022-02-25 13:18:19 -05:00
bch2_bkey_to_text ( & buf2 , & uk ) ;
} else {
2023-02-03 21:01:40 -05:00
prt_printf ( & buf2 , " (none) " ) ;
2020-02-18 16:17:55 -05:00
}
2017-03-16 22:18:50 -08:00
2020-02-18 16:17:55 -05:00
if ( k ) {
struct bkey uk = bkey_unpack_key ( l - > b , k ) ;
2022-10-22 15:59:53 -04:00
2022-02-25 13:18:19 -05:00
bch2_bkey_to_text ( & buf3 , & uk ) ;
} else {
2023-02-03 21:01:40 -05:00
prt_printf ( & buf3 , " (none) " ) ;
2017-03-16 22:18:50 -08:00
}
2020-02-18 16:17:55 -05:00
2021-08-30 15:18:31 -04:00
panic ( " path should be %s key at level %u: \n "
" path pos %s \n "
2020-02-18 16:17:55 -05:00
" prev key %s \n "
" cur key %s \n " ,
2022-02-25 13:18:19 -05:00
msg , level , buf1 . buf , buf2 . buf , buf3 . buf ) ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
static void bch2_btree_path_verify ( struct btree_trans * trans ,
struct btree_path * path )
2017-03-16 22:18:50 -08:00
{
2021-07-14 15:13:27 -04:00
struct bch_fs * c = trans - > c ;
2020-02-18 16:17:55 -05:00
unsigned i ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
EBUG_ON ( path - > btree_id > = BTREE_ID_NR ) ;
for ( i = 0 ; i < ( ! path - > cached ? BTREE_MAX_DEPTH : 1 ) ; i + + ) {
if ( ! path - > l [ i ] . b ) {
2021-10-30 05:28:27 -04:00
BUG_ON ( ! path - > cached & &
c - > btree_roots [ path - > btree_id ] . b - > c . level > i ) ;
2021-08-30 15:18:31 -04:00
break ;
}
bch2_btree_path_verify_level ( trans , path , i ) ;
}
bch2_btree_path_verify_locks ( path ) ;
}
void bch2_trans_verify_paths ( struct btree_trans * trans )
{
struct btree_path * path ;
trans_for_each_path ( trans , path )
bch2_btree_path_verify ( trans , path ) ;
}
static void bch2_btree_iter_verify ( struct btree_iter * iter )
{
struct btree_trans * trans = iter - > trans ;
BUG_ON ( iter - > btree_id > = BTREE_ID_NR ) ;
BUG_ON ( ! ! ( iter - > flags & BTREE_ITER_CACHED ) ! = iter - > path - > cached ) ;
2021-02-11 21:57:32 -05:00
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
BUG_ON ( ( iter - > flags & BTREE_ITER_IS_EXTENTS ) & &
( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) ) ;
2021-08-30 15:54:41 -04:00
BUG_ON ( ! ( iter - > flags & __BTREE_ITER_ALL_SNAPSHOTS ) & &
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) & &
! btree_type_has_snapshots ( iter - > btree_id ) ) ;
2022-01-08 21:22:31 -05:00
if ( iter - > update_path )
bch2_btree_path_verify ( trans , iter - > update_path ) ;
2021-08-30 15:18:31 -04:00
bch2_btree_path_verify ( trans , iter - > path ) ;
2020-02-18 16:17:55 -05:00
}
2021-02-11 21:57:32 -05:00
static void bch2_btree_iter_verify_entry_exit ( struct btree_iter * iter )
{
2021-03-15 22:34:00 -04:00
BUG_ON ( ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) & &
! iter - > pos . snapshot ) ;
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
BUG_ON ( ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) & &
iter - > pos . snapshot ! = iter - > snapshot ) ;
2022-11-24 03:12:22 -05:00
BUG_ON ( bkey_lt ( iter - > pos , bkey_start_pos ( & iter - > k ) ) | |
bkey_gt ( iter - > pos , iter - > k . p ) ) ;
2021-02-11 21:57:32 -05:00
}
2021-03-04 22:29:25 -05:00
static int bch2_btree_iter_verify_ret ( struct btree_iter * iter , struct bkey_s_c k )
{
struct btree_trans * trans = iter - > trans ;
struct btree_iter copy ;
struct bkey_s_c prev ;
int ret = 0 ;
if ( ! bch2_debug_check_iterators )
return 0 ;
if ( ! ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) )
return 0 ;
if ( bkey_err ( k ) | | ! k . k )
return 0 ;
BUG_ON ( ! bch2_snapshot_is_ancestor ( trans - > c ,
iter - > snapshot ,
k . k - > p . snapshot ) ) ;
bch2_trans_iter_init ( trans , & copy , iter - > btree_id , iter - > pos ,
2021-12-25 20:13:47 -05:00
BTREE_ITER_NOPRESERVE |
2021-03-04 22:29:25 -05:00
BTREE_ITER_ALL_SNAPSHOTS ) ;
prev = bch2_btree_iter_prev ( & copy ) ;
if ( ! prev . k )
goto out ;
ret = bkey_err ( prev ) ;
if ( ret )
goto out ;
2022-11-24 03:12:22 -05:00
if ( bkey_eq ( prev . k - > p , k . k - > p ) & &
2021-03-04 22:29:25 -05:00
bch2_snapshot_is_ancestor ( trans - > c , iter - > snapshot ,
prev . k - > p . snapshot ) > 0 ) {
2022-02-25 13:18:19 -05:00
struct printbuf buf1 = PRINTBUF , buf2 = PRINTBUF ;
2021-03-04 22:29:25 -05:00
2022-02-25 13:18:19 -05:00
bch2_bkey_to_text ( & buf1 , k . k ) ;
bch2_bkey_to_text ( & buf2 , prev . k ) ;
2021-03-04 22:29:25 -05:00
panic ( " iter snap %u \n "
" k %s \n "
" prev %s \n " ,
iter - > snapshot ,
2022-02-25 13:18:19 -05:00
buf1 . buf , buf2 . buf ) ;
2021-03-04 22:29:25 -05:00
}
out :
bch2_trans_iter_exit ( trans , & copy ) ;
return ret ;
}
2021-11-06 00:03:40 -04:00
void bch2_assert_pos_locked ( struct btree_trans * trans , enum btree_id id ,
struct bpos pos , bool key_cache )
{
struct btree_path * path ;
unsigned idx ;
2022-02-25 13:18:19 -05:00
struct printbuf buf = PRINTBUF ;
2021-11-06 00:03:40 -04:00
2022-08-11 20:14:54 -04:00
btree_trans_sort_paths ( trans ) ;
2021-11-06 00:03:40 -04:00
trans_for_each_path_inorder ( trans , path , idx ) {
int cmp = cmp_int ( path - > btree_id , id ) ? :
cmp_int ( path - > cached , key_cache ) ;
if ( cmp > 0 )
break ;
if ( cmp < 0 )
continue ;
2022-08-21 18:17:51 -04:00
if ( ! btree_node_locked ( path , 0 ) | |
2021-11-06 00:03:40 -04:00
! path - > should_be_locked )
continue ;
if ( ! key_cache ) {
2022-11-24 03:12:22 -05:00
if ( bkey_ge ( pos , path - > l [ 0 ] . b - > data - > min_key ) & &
bkey_le ( pos , path - > l [ 0 ] . b - > key . k . p ) )
2021-11-06 00:03:40 -04:00
return ;
} else {
2022-11-24 03:12:22 -05:00
if ( bkey_eq ( pos , path - > pos ) )
2021-11-06 00:03:40 -04:00
return ;
}
}
bch2_dump_trans_paths_updates ( trans ) ;
2022-02-25 13:18:19 -05:00
bch2_bpos_to_text ( & buf , pos ) ;
2021-11-06 00:03:40 -04:00
panic ( " not locked: %s %s%s \n " ,
2022-02-25 13:18:19 -05:00
bch2_btree_ids [ id ] , buf . buf ,
2021-11-06 00:03:40 -04:00
key_cache ? " cached " : " " ) ;
}
2016-07-21 19:05:06 -08:00
# else
2021-08-30 15:18:31 -04:00
static inline void bch2_btree_path_verify_level ( struct btree_trans * trans ,
struct btree_path * path , unsigned l ) { }
static inline void bch2_btree_path_verify ( struct btree_trans * trans ,
struct btree_path * path ) { }
2020-03-15 16:15:08 -04:00
static inline void bch2_btree_iter_verify ( struct btree_iter * iter ) { }
2021-02-11 21:57:32 -05:00
static inline void bch2_btree_iter_verify_entry_exit ( struct btree_iter * iter ) { }
2021-03-04 22:29:25 -05:00
static inline int bch2_btree_iter_verify_ret ( struct btree_iter * iter , struct bkey_s_c k ) { return 0 ; }
2016-07-21 19:05:06 -08:00
2017-03-16 22:18:50 -08:00
# endif
2021-08-30 15:18:31 -04:00
/* Btree path: fixups after btree updates */
2019-08-20 17:43:47 -04:00
static void btree_node_iter_set_set_pos ( struct btree_node_iter * iter ,
struct btree * b ,
struct bset_tree * t ,
struct bkey_packed * k )
{
struct btree_node_iter_set * set ;
btree_node_iter_for_each ( iter , set )
if ( set - > end = = t - > end_offset ) {
set - > k = __btree_node_key_to_offset ( b , k ) ;
bch2_btree_node_iter_sort ( iter , b ) ;
return ;
}
bch2_btree_node_iter_push ( iter , b , k , btree_bkey_last ( b , t ) ) ;
}
2021-08-30 15:18:31 -04:00
static void __bch2_btree_path_fix_key_modified ( struct btree_path * path ,
2020-01-06 22:25:09 -05:00
struct btree * b ,
struct bkey_packed * where )
2019-10-02 09:56:39 -04:00
{
2021-08-30 15:18:31 -04:00
struct btree_path_level * l = & path - > l [ b - > c . level ] ;
2019-10-02 09:56:39 -04:00
2020-01-06 22:25:09 -05:00
if ( where ! = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) )
return ;
2021-08-30 15:18:31 -04:00
if ( bkey_iter_pos_cmp ( l - > b , where , & path - > pos ) < 0 )
2020-01-06 22:25:09 -05:00
bch2_btree_node_iter_advance ( & l - > iter , l - > b ) ;
2019-10-02 09:56:39 -04:00
}
2021-08-30 15:18:31 -04:00
void bch2_btree_path_fix_key_modified ( struct btree_trans * trans ,
2019-10-02 09:56:39 -04:00
struct btree * b ,
struct bkey_packed * where )
{
2021-08-30 15:18:31 -04:00
struct btree_path * path ;
2019-10-02 09:56:39 -04:00
2021-08-30 15:18:31 -04:00
trans_for_each_path_with_node ( trans , b , path ) {
__bch2_btree_path_fix_key_modified ( path , b , where ) ;
bch2_btree_path_verify_level ( trans , path , b - > c . level ) ;
2019-10-02 09:56:39 -04:00
}
}
2021-08-30 15:18:31 -04:00
static void __bch2_btree_node_iter_fix ( struct btree_path * path ,
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bset_tree * t ,
struct bkey_packed * where ,
unsigned clobber_u64s ,
unsigned new_u64s )
2017-03-16 22:18:50 -08:00
{
const struct bkey_packed * end = btree_bkey_last ( b , t ) ;
struct btree_node_iter_set * set ;
unsigned offset = __btree_node_key_to_offset ( b , where ) ;
int shift = new_u64s - clobber_u64s ;
2016-07-21 19:05:06 -08:00
unsigned old_end = t - > end_offset - shift ;
2019-09-07 19:17:40 -04:00
unsigned orig_iter_pos = node_iter - > data [ 0 ] . k ;
bool iter_current_key_modified =
orig_iter_pos > = offset & &
orig_iter_pos < = offset + clobber_u64s ;
2017-03-16 22:18:50 -08:00
btree_node_iter_for_each ( node_iter , set )
if ( set - > end = = old_end )
goto found ;
/* didn't find the bset in the iterator - might have to readd it: */
if ( new_u64s & &
2021-08-30 15:18:31 -04:00
bkey_iter_pos_cmp ( b , where , & path - > pos ) > = 0 ) {
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_push ( node_iter , b , where , end ) ;
2019-09-07 19:17:40 -04:00
goto fixup_done ;
} else {
/* Iterator is after key that changed */
2019-09-19 16:07:41 -04:00
return ;
2017-03-16 22:18:50 -08:00
}
found :
2016-07-21 19:05:06 -08:00
set - > end = t - > end_offset ;
2017-03-16 22:18:50 -08:00
/* Iterator hasn't gotten to the key that changed yet: */
if ( set - > k < offset )
2019-09-19 16:07:41 -04:00
return ;
2017-03-16 22:18:50 -08:00
if ( new_u64s & &
2021-08-30 15:18:31 -04:00
bkey_iter_pos_cmp ( b , where , & path - > pos ) > = 0 ) {
2017-03-16 22:18:50 -08:00
set - > k = offset ;
} else if ( set - > k < offset + clobber_u64s ) {
set - > k = offset + new_u64s ;
if ( set - > k = = set - > end )
bch2_btree_node_iter_set_drop ( node_iter , set ) ;
} else {
2019-09-07 19:17:40 -04:00
/* Iterator is after key that changed */
2017-03-16 22:18:50 -08:00
set - > k = ( int ) set - > k + shift ;
2019-09-19 16:07:41 -04:00
return ;
2017-03-16 22:18:50 -08:00
}
bch2_btree_node_iter_sort ( node_iter , b ) ;
2019-09-07 19:17:40 -04:00
fixup_done :
if ( node_iter - > data [ 0 ] . k ! = orig_iter_pos )
iter_current_key_modified = true ;
2018-12-06 12:01:29 -05:00
2017-03-16 22:18:50 -08:00
/*
2019-08-20 17:43:47 -04:00
* When a new key is added , and the node iterator now points to that
* key , the iterator might have skipped past deleted keys that should
* come after the key the iterator now points to . We have to rewind to
2019-09-07 19:17:40 -04:00
* before those deleted keys - otherwise
* bch2_btree_node_iter_prev_all ( ) breaks :
2017-03-16 22:18:50 -08:00
*/
2019-08-20 17:43:47 -04:00
if ( ! bch2_btree_node_iter_end ( node_iter ) & &
2019-09-07 19:17:40 -04:00
iter_current_key_modified & &
2021-09-04 21:19:48 -04:00
b - > c . level ) {
2019-08-20 17:43:47 -04:00
struct bset_tree * t ;
struct bkey_packed * k , * k2 , * p ;
k = bch2_btree_node_iter_peek_all ( node_iter , b ) ;
2017-03-16 22:18:50 -08:00
for_each_bset ( b , t ) {
2019-08-20 17:43:47 -04:00
bool set_pos = false ;
if ( node_iter - > data [ 0 ] . end = = t - > end_offset )
2017-03-16 22:18:50 -08:00
continue ;
2019-08-20 17:43:47 -04:00
k2 = bch2_btree_node_iter_bset_pos ( node_iter , b , t ) ;
while ( ( p = bch2_bkey_prev_all ( b , t , k2 ) ) & &
bkey_iter_cmp ( b , k , p ) < 0 ) {
k2 = p ;
set_pos = true ;
2017-03-16 22:18:50 -08:00
}
2019-08-20 17:43:47 -04:00
if ( set_pos )
btree_node_iter_set_set_pos ( node_iter ,
b , t , k2 ) ;
2017-03-16 22:18:50 -08:00
}
}
}
2021-08-24 21:30:06 -04:00
void bch2_btree_node_iter_fix ( struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_path * path ,
2018-08-11 19:12:05 -04:00
struct btree * b ,
struct btree_node_iter * node_iter ,
struct bkey_packed * where ,
unsigned clobber_u64s ,
unsigned new_u64s )
2017-03-16 22:18:50 -08:00
{
2018-08-11 19:12:05 -04:00
struct bset_tree * t = bch2_bkey_to_bset_inlined ( b , where ) ;
2021-08-30 15:18:31 -04:00
struct btree_path * linked ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
if ( node_iter ! = & path - > l [ b - > c . level ] . iter ) {
__bch2_btree_node_iter_fix ( path , b , node_iter , t ,
2019-09-19 16:07:41 -04:00
where , clobber_u64s , new_u64s ) ;
2020-02-18 16:17:55 -05:00
2020-11-02 18:20:44 -05:00
if ( bch2_debug_check_iterators )
2020-02-18 16:17:55 -05:00
bch2_btree_node_iter_verify ( node_iter , b ) ;
2019-09-19 16:07:41 -04:00
}
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
trans_for_each_path_with_node ( trans , b , linked ) {
2017-03-16 22:18:50 -08:00
__bch2_btree_node_iter_fix ( linked , b ,
2019-09-19 16:07:41 -04:00
& linked - > l [ b - > c . level ] . iter , t ,
where , clobber_u64s , new_u64s ) ;
2021-08-30 15:18:31 -04:00
bch2_btree_path_verify_level ( trans , linked , b - > c . level ) ;
2019-09-19 16:07:41 -04:00
}
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
/* Btree path level: pointer to a particular btree node and node iter */
static inline struct bkey_s_c __btree_iter_unpack ( struct bch_fs * c ,
struct btree_path_level * l ,
2017-03-16 22:18:50 -08:00
struct bkey * u ,
struct bkey_packed * k )
{
if ( unlikely ( ! k ) ) {
/*
* signal to bch2_btree_iter_peek_slot ( ) that we ' re currently at
* a hole
*/
2018-11-01 15:10:01 -04:00
u - > type = KEY_TYPE_deleted ;
2017-03-16 22:18:50 -08:00
return bkey_s_c_null ;
}
2022-02-13 22:16:45 -05:00
return bkey_disassemble ( l - > b , k , u ) ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
static inline struct bkey_s_c btree_path_level_peek_all ( struct bch_fs * c ,
struct btree_path_level * l ,
struct bkey * u )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
return __btree_iter_unpack ( c , l , u ,
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ) ;
}
2021-08-30 15:18:31 -04:00
static inline struct bkey_s_c btree_path_level_peek ( struct btree_trans * trans ,
struct btree_path * path ,
struct btree_path_level * l ,
struct bkey * u )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct bkey_s_c k = __btree_iter_unpack ( trans - > c , l , u ,
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ) ;
2021-03-21 19:32:01 -04:00
2021-08-30 15:18:31 -04:00
path - > pos = k . k ? k . k - > p : l - > b - > key . k . p ;
trans - > paths_sorted = false ;
2022-07-21 09:53:28 -04:00
bch2_btree_path_verify_level ( trans , path , l - path - > l ) ;
2021-03-21 19:32:01 -04:00
return k ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
static inline struct bkey_s_c btree_path_level_prev ( struct btree_trans * trans ,
struct btree_path * path ,
struct btree_path_level * l ,
struct bkey * u )
2019-09-07 17:17:21 -04:00
{
2021-08-30 15:18:31 -04:00
struct bkey_s_c k = __btree_iter_unpack ( trans - > c , l , u ,
2019-09-07 17:17:21 -04:00
bch2_btree_node_iter_prev ( & l - > iter , l - > b ) ) ;
2021-03-21 19:32:01 -04:00
2021-08-30 15:18:31 -04:00
path - > pos = k . k ? k . k - > p : l - > b - > data - > min_key ;
trans - > paths_sorted = false ;
2022-07-21 09:53:28 -04:00
bch2_btree_path_verify_level ( trans , path , l - path - > l ) ;
2021-03-21 19:32:01 -04:00
return k ;
2019-09-07 17:17:21 -04:00
}
2021-08-30 15:18:31 -04:00
static inline bool btree_path_advance_to_pos ( struct btree_path * path ,
struct btree_path_level * l ,
2018-08-21 16:30:14 -04:00
int max_advance )
2017-03-16 22:18:50 -08:00
{
2018-08-21 16:30:14 -04:00
struct bkey_packed * k ;
int nr_advanced = 0 ;
while ( ( k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ) & &
2021-08-30 15:18:31 -04:00
bkey_iter_pos_cmp ( l - > b , k , & path - > pos ) < 0 ) {
2018-08-21 16:30:14 -04:00
if ( max_advance > 0 & & nr_advanced > = max_advance )
return false ;
bch2_btree_node_iter_advance ( & l - > iter , l - > b ) ;
nr_advanced + + ;
}
return true ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
static inline void __btree_path_level_init ( struct btree_path * path ,
2021-08-30 14:22:43 -04:00
unsigned level )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_path_level * l = & path - > l [ level ] ;
2018-08-21 16:30:14 -04:00
2021-08-30 15:18:31 -04:00
bch2_btree_node_iter_init ( & l - > iter , l - > b , & path - > pos ) ;
2017-03-16 22:18:50 -08:00
2020-12-11 12:02:48 -05:00
/*
* Iterators to interior nodes should always be pointed at the first non
* whiteout :
*/
if ( level )
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
}
2022-11-13 20:01:42 -05:00
void bch2_btree_path_level_init ( struct btree_trans * trans ,
struct btree_path * path ,
struct btree * b )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
BUG_ON ( path - > cached ) ;
2019-03-07 19:46:10 -05:00
2021-08-30 15:18:31 -04:00
EBUG_ON ( ! btree_path_pos_in_node ( path , b ) ) ;
2020-06-06 12:28:01 -04:00
EBUG_ON ( b - > c . lock . state . seq & 1 ) ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
path - > l [ b - > c . level ] . lock_seq = b - > c . lock . state . seq ;
path - > l [ b - > c . level ] . b = b ;
__btree_path_level_init ( path , b - > c . level ) ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
/* Btree path: fixups after btree node updates: */
2022-11-23 18:46:03 -05:00
static void bch2_trans_revalidate_updates_in_node ( struct btree_trans * trans , struct btree * b )
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
trans_for_each_update ( trans , i )
if ( ! i - > cached & &
i - > level = = b - > c . level & &
i - > btree_id = = b - > c . btree_id & &
bpos_cmp ( i - > k - > k . p , b - > data - > min_key ) > = 0 & &
bpos_cmp ( i - > k - > k . p , b - > data - > max_key ) < = 0 ) {
i - > old_v = bch2_btree_path_peek_slot ( i - > path , & i - > old_k ) . v ;
if ( unlikely ( trans - > journal_replay_not_finished ) ) {
struct bkey_i * j_k =
bch2_journal_keys_peek_slot ( c , i - > btree_id , i - > level ,
i - > k - > k . p ) ;
if ( j_k ) {
i - > old_k = j_k - > k ;
i - > old_v = & j_k - > v ;
}
}
}
}
2017-03-16 22:18:50 -08:00
/*
* A btree node is being replaced - update the iterator to point to the new
* node :
*/
2021-08-30 14:36:03 -04:00
void bch2_trans_node_add ( struct btree_trans * trans , struct btree * b )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_path * path ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
trans_for_each_path ( trans , path )
2022-09-02 22:59:39 -04:00
if ( path - > uptodate = = BTREE_ITER_UPTODATE & &
! path - > cached & &
2021-08-30 15:18:31 -04:00
btree_path_pos_in_node ( path , b ) ) {
2021-09-04 21:23:11 -04:00
enum btree_node_locked_type t =
btree_lock_want ( path , b - > c . level ) ;
2017-03-16 22:18:50 -08:00
2022-09-02 22:59:39 -04:00
if ( t ! = BTREE_NODE_UNLOCKED ) {
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , b - > c . level ) ;
2020-06-06 12:28:01 -04:00
six_lock_increment ( & b - > c . lock , ( enum six_lock_type ) t ) ;
2022-07-14 20:33:09 +12:00
mark_btree_node_locked ( trans , path , b - > c . level , ( enum six_lock_type ) t ) ;
2017-03-16 22:18:50 -08:00
}
2022-09-16 14:42:38 -04:00
bch2_btree_path_level_init ( trans , path , b ) ;
2017-03-16 22:18:50 -08:00
}
2022-11-23 18:46:03 -05:00
bch2_trans_revalidate_updates_in_node ( trans , b ) ;
2017-03-16 22:18:50 -08:00
}
/*
* A btree node has been modified in such a way as to invalidate iterators - fix
* them :
*/
2021-08-30 14:36:03 -04:00
void bch2_trans_node_reinit_iter ( struct btree_trans * trans , struct btree * b )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_path * path ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
trans_for_each_path_with_node ( trans , b , path )
__btree_path_level_init ( path , b - > c . level ) ;
2022-11-23 18:46:03 -05:00
bch2_trans_revalidate_updates_in_node ( trans , b ) ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
/* Btree path: traverse, set_pos: */
static inline int btree_path_lock_root ( struct btree_trans * trans ,
struct btree_path * path ,
2020-10-28 14:17:46 -04:00
unsigned depth_want ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2021-07-25 17:19:52 -04:00
struct bch_fs * c = trans - > c ;
2021-08-30 15:18:31 -04:00
struct btree * b , * * rootp = & c - > btree_roots [ path - > btree_id ] . b ;
2017-03-16 22:18:50 -08:00
enum six_lock_type lock_type ;
unsigned i ;
2022-07-17 23:06:38 -04:00
int ret ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
EBUG_ON ( path - > nodes_locked ) ;
2017-03-16 22:18:50 -08:00
while ( 1 ) {
2020-06-12 22:29:48 -04:00
b = READ_ONCE ( * rootp ) ;
2021-08-30 15:18:31 -04:00
path - > level = READ_ONCE ( b - > c . level ) ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
if ( unlikely ( path - > level < depth_want ) ) {
2017-03-16 22:18:50 -08:00
/*
* the root is at a lower depth than the depth we want :
* got to the end of the btree , or we ' re walking nodes
* greater than some depth and there are no nodes > =
* that depth
*/
2021-08-30 15:18:31 -04:00
path - > level = depth_want ;
for ( i = path - > level ; i < BTREE_MAX_DEPTH ; i + + )
path - > l [ i ] . b = NULL ;
2018-11-21 02:59:07 -05:00
return 1 ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
lock_type = __btree_lock_want ( path , path - > level ) ;
2022-08-22 15:29:53 -04:00
ret = btree_node_lock ( trans , path , & b - > c ,
path - > level , lock_type , trace_ip ) ;
2022-07-17 23:06:38 -04:00
if ( unlikely ( ret ) ) {
if ( bch2_err_matches ( ret , BCH_ERR_lock_fail_root_changed ) )
continue ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ret ;
BUG ( ) ;
2021-07-25 17:19:52 -04:00
}
2017-03-16 22:18:50 -08:00
2020-06-12 22:29:48 -04:00
if ( likely ( b = = READ_ONCE ( * rootp ) & &
2021-08-30 15:18:31 -04:00
b - > c . level = = path - > level & &
2017-03-16 22:18:50 -08:00
! race_fault ( ) ) ) {
2021-08-30 15:18:31 -04:00
for ( i = 0 ; i < path - > level ; i + + )
2022-08-10 19:08:30 -04:00
path - > l [ i ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_lock_root ) ;
2021-08-30 15:18:31 -04:00
path - > l [ path - > level ] . b = b ;
for ( i = path - > level + 1 ; i < BTREE_MAX_DEPTH ; i + + )
path - > l [ i ] . b = NULL ;
2022-07-14 20:33:09 +12:00
mark_btree_node_locked ( trans , path , path - > level , lock_type ) ;
2022-09-16 14:42:38 -04:00
bch2_btree_path_level_init ( trans , path , b ) ;
2017-03-16 22:18:50 -08:00
return 0 ;
}
2020-06-06 12:28:01 -04:00
six_unlock_type ( & b - > c . lock , lock_type ) ;
2017-03-16 22:18:50 -08:00
}
}
noinline
2021-08-30 15:18:31 -04:00
static int btree_path_prefetch ( struct btree_trans * trans , struct btree_path * path )
2017-03-16 22:18:50 -08:00
{
2021-08-24 21:30:06 -04:00
struct bch_fs * c = trans - > c ;
2021-08-30 15:18:31 -04:00
struct btree_path_level * l = path_l ( path ) ;
2017-03-16 22:18:50 -08:00
struct btree_node_iter node_iter = l - > iter ;
struct bkey_packed * k ;
2020-12-17 15:08:58 -05:00
struct bkey_buf tmp ;
2019-03-25 17:06:42 -04:00
unsigned nr = test_bit ( BCH_FS_STARTED , & c - > flags )
2021-08-30 15:18:31 -04:00
? ( path - > level > 1 ? 0 : 2 )
: ( path - > level > 1 ? 1 : 16 ) ;
bool was_locked = btree_node_locked ( path , path - > level ) ;
2021-07-24 19:50:40 -04:00
int ret = 0 ;
2017-03-16 22:18:50 -08:00
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_init ( & tmp ) ;
2022-10-14 07:20:05 -04:00
while ( nr - - & & ! ret ) {
2021-08-30 15:18:31 -04:00
if ( ! bch2_btree_node_relock ( trans , path , path - > level ) )
2020-12-17 15:08:58 -05:00
break ;
2017-03-16 22:18:50 -08:00
bch2_btree_node_iter_advance ( & node_iter , l - > b ) ;
k = bch2_btree_node_iter_peek ( & node_iter , l - > b ) ;
if ( ! k )
break ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_unpack ( & tmp , c , l - > b , k ) ;
2021-08-30 15:18:31 -04:00
ret = bch2_btree_node_prefetch ( c , trans , path , tmp . k , path - > btree_id ,
path - > level - 1 ) ;
2017-03-16 22:18:50 -08:00
}
if ( ! was_locked )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , path - > level ) ;
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_exit ( & tmp , c ) ;
2021-07-24 19:50:40 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2021-12-25 20:07:00 -05:00
static int btree_path_prefetch_j ( struct btree_trans * trans , struct btree_path * path ,
struct btree_and_journal_iter * jiter )
{
struct bch_fs * c = trans - > c ;
struct bkey_s_c k ;
struct bkey_buf tmp ;
unsigned nr = test_bit ( BCH_FS_STARTED , & c - > flags )
? ( path - > level > 1 ? 0 : 2 )
: ( path - > level > 1 ? 1 : 16 ) ;
bool was_locked = btree_node_locked ( path , path - > level ) ;
int ret = 0 ;
bch2_bkey_buf_init ( & tmp ) ;
2022-10-14 07:20:05 -04:00
while ( nr - - & & ! ret ) {
2021-12-25 20:07:00 -05:00
if ( ! bch2_btree_node_relock ( trans , path , path - > level ) )
break ;
bch2_btree_and_journal_iter_advance ( jiter ) ;
k = bch2_btree_and_journal_iter_peek ( jiter ) ;
if ( ! k . k )
break ;
bch2_bkey_buf_reassemble ( & tmp , c , k ) ;
ret = bch2_btree_node_prefetch ( c , trans , path , tmp . k , path - > btree_id ,
path - > level - 1 ) ;
}
if ( ! was_locked )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , path - > level ) ;
2021-12-25 20:07:00 -05:00
bch2_bkey_buf_exit ( & tmp , c ) ;
return ret ;
}
2021-08-30 14:22:43 -04:00
static noinline void btree_node_mem_ptr_set ( struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_path * path ,
2020-02-24 15:25:00 -05:00
unsigned plevel , struct btree * b )
{
2021-08-30 15:18:31 -04:00
struct btree_path_level * l = & path - > l [ plevel ] ;
bool locked = btree_node_locked ( path , plevel ) ;
2020-02-24 15:25:00 -05:00
struct bkey_packed * k ;
struct bch_btree_ptr_v2 * bp ;
2021-08-30 15:18:31 -04:00
if ( ! bch2_btree_node_relock ( trans , path , plevel ) )
2020-02-24 15:25:00 -05:00
return ;
k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ;
BUG_ON ( k - > type ! = KEY_TYPE_btree_ptr_v2 ) ;
bp = ( void * ) bkeyp_val ( & l - > b - > format , k ) ;
bp - > mem_ptr = ( unsigned long ) b ;
if ( ! locked )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , plevel ) ;
2020-02-24 15:25:00 -05:00
}
2021-12-25 20:07:00 -05:00
static noinline int btree_node_iter_and_journal_peek ( struct btree_trans * trans ,
struct btree_path * path ,
unsigned flags ,
struct bkey_buf * out )
{
struct bch_fs * c = trans - > c ;
struct btree_path_level * l = path_l ( path ) ;
struct btree_and_journal_iter jiter ;
struct bkey_s_c k ;
int ret = 0 ;
__bch2_btree_and_journal_iter_init_node_iter ( & jiter , c , l - > b , l - > iter , path - > pos ) ;
k = bch2_btree_and_journal_iter_peek ( & jiter ) ;
bch2_bkey_buf_reassemble ( out , c , k ) ;
if ( flags & BTREE_ITER_PREFETCH )
ret = btree_path_prefetch_j ( trans , path , & jiter ) ;
bch2_btree_and_journal_iter_exit ( & jiter ) ;
return ret ;
}
2021-08-30 15:18:31 -04:00
static __always_inline int btree_path_down ( struct btree_trans * trans ,
struct btree_path * path ,
unsigned flags ,
2020-10-28 14:17:46 -04:00
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2021-07-24 17:12:51 -04:00
struct bch_fs * c = trans - > c ;
2021-08-30 15:18:31 -04:00
struct btree_path_level * l = path_l ( path ) ;
2017-03-16 22:18:50 -08:00
struct btree * b ;
2021-08-30 15:18:31 -04:00
unsigned level = path - > level - 1 ;
enum six_lock_type lock_type = __btree_lock_want ( path , level ) ;
2020-12-17 15:08:58 -05:00
struct bkey_buf tmp ;
int ret ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
EBUG_ON ( ! btree_node_locked ( path , path - > level ) ) ;
2017-03-16 22:18:50 -08:00
2020-12-17 15:08:58 -05:00
bch2_bkey_buf_init ( & tmp ) ;
2021-12-25 20:07:00 -05:00
if ( unlikely ( trans - > journal_replay_not_finished ) ) {
ret = btree_node_iter_and_journal_peek ( trans , path , flags , & tmp ) ;
if ( ret )
goto err ;
} else {
bch2_bkey_buf_unpack ( & tmp , c , l - > b ,
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ) ;
if ( flags & BTREE_ITER_PREFETCH ) {
ret = btree_path_prefetch ( trans , path ) ;
if ( ret )
goto err ;
}
}
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
b = bch2_btree_node_get ( trans , path , tmp . k , level , lock_type , trace_ip ) ;
2020-12-17 15:08:58 -05:00
ret = PTR_ERR_OR_ZERO ( b ) ;
if ( unlikely ( ret ) )
goto err ;
2017-03-16 22:18:50 -08:00
2021-12-25 20:07:00 -05:00
if ( likely ( ! trans - > journal_replay_not_finished & &
tmp . k - > k . type = = KEY_TYPE_btree_ptr_v2 ) & &
2020-12-17 15:08:58 -05:00
unlikely ( b ! = btree_node_mem_ptr ( tmp . k ) ) )
2021-08-30 15:18:31 -04:00
btree_node_mem_ptr_set ( trans , path , level + 1 , b ) ;
2020-02-24 15:25:00 -05:00
2021-08-30 15:18:31 -04:00
if ( btree_node_read_locked ( path , level + 1 ) )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , level + 1 ) ;
2022-08-15 18:55:20 -04:00
mark_btree_node_locked ( trans , path , level , lock_type ) ;
2021-08-30 15:18:31 -04:00
path - > level = level ;
2022-09-16 14:42:38 -04:00
bch2_btree_path_level_init ( trans , path , b ) ;
2021-04-08 22:26:53 -04:00
2021-08-30 15:18:31 -04:00
bch2_btree_path_verify_locks ( path ) ;
2020-12-17 15:08:58 -05:00
err :
bch2_bkey_buf_exit ( & tmp , c ) ;
return ret ;
2017-03-16 22:18:50 -08:00
}
2022-02-18 00:47:45 -05:00
static int bch2_btree_path_traverse_all ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
2019-03-27 22:54:42 -04:00
struct bch_fs * c = trans - > c ;
2022-08-22 15:29:53 -04:00
struct btree_path * path ;
2022-02-18 00:47:45 -05:00
unsigned long trace_ip = _RET_IP_ ;
int i , ret = 0 ;
2019-03-27 22:54:42 -04:00
2019-03-07 19:46:10 -05:00
if ( trans - > in_traverse_all )
2022-07-17 23:06:38 -04:00
return - BCH_ERR_transaction_restart_in_traverse_all ;
2019-03-07 19:46:10 -05:00
trans - > in_traverse_all = true ;
retry_all :
2022-07-17 23:06:38 -04:00
trans - > restarted = 0 ;
2021-07-25 17:19:52 -04:00
2021-08-30 15:18:31 -04:00
trans_for_each_path ( trans , path )
path - > should_be_locked = false ;
2019-03-27 22:54:42 -04:00
2021-08-30 15:18:31 -04:00
btree_trans_sort_paths ( trans ) ;
2021-04-14 13:26:15 -04:00
2019-05-15 09:47:40 -04:00
bch2_trans_unlock ( trans ) ;
2020-10-28 14:17:46 -04:00
cond_resched ( ) ;
2017-03-16 22:18:50 -08:00
2022-02-18 00:47:45 -05:00
if ( unlikely ( trans - > memory_allocation_failure ) ) {
2017-03-16 22:18:50 -08:00
struct closure cl ;
closure_init_stack ( & cl ) ;
do {
ret = bch2_btree_cache_cannibalize_lock ( c , & cl ) ;
closure_sync ( & cl ) ;
} while ( ret ) ;
}
/* Now, redo traversals in correct order: */
2021-06-12 15:45:45 -04:00
i = 0 ;
while ( i < trans - > nr_sorted ) {
2021-08-30 15:18:31 -04:00
path = trans - > paths + trans - > sorted [ i ] ;
2019-03-07 19:46:10 -05:00
2022-02-16 03:13:36 -05:00
/*
* Traversing a path can cause another path to be added at about
* the same position :
*/
if ( path - > uptodate ) {
2023-01-08 00:04:30 -05:00
__btree_path_get ( path , false ) ;
2023-02-04 20:40:29 -05:00
ret = bch2_btree_path_traverse_one ( trans , path , 0 , _THIS_IP_ ) ;
2023-01-08 00:04:30 -05:00
__btree_path_put ( path , false ) ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) | |
ret = = - ENOMEM )
2022-02-16 03:13:36 -05:00
goto retry_all ;
2022-03-07 22:05:49 -05:00
if ( ret )
goto err ;
2022-02-16 03:13:36 -05:00
} else {
2021-06-12 15:45:45 -04:00
i + + ;
2022-02-16 03:13:36 -05:00
}
2019-03-27 22:54:42 -04:00
}
2021-06-12 15:45:45 -04:00
/*
2023-01-06 22:58:19 -05:00
* We used to assert that all paths had been traversed here
* ( path - > uptodate < BTREE_ITER_NEED_TRAVERSE ) ; however , since
* path - > Should_be_locked is not set yet , we we might have unlocked and
* then failed to relock a path - that ' s fine .
2021-06-12 15:45:45 -04:00
*/
2022-03-07 22:05:49 -05:00
err :
2017-03-16 22:18:50 -08:00
bch2_btree_cache_cannibalize_unlock ( c ) ;
2019-03-07 19:46:10 -05:00
2022-01-04 00:33:52 -05:00
trans - > in_traverse_all = false ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_traverse_all , trans , trace_ip ) ;
2017-03-16 22:18:50 -08:00
return ret ;
2019-03-28 00:07:24 -04:00
}
2017-03-16 22:18:50 -08:00
2022-10-17 02:04:31 -04:00
static inline bool btree_path_check_pos_in_node ( struct btree_path * path ,
unsigned l , int check_pos )
2019-09-07 19:19:57 -04:00
{
2021-08-30 15:18:31 -04:00
if ( check_pos < 0 & & btree_path_pos_before_node ( path , path - > l [ l ] . b ) )
2019-09-07 19:19:57 -04:00
return false ;
2021-08-30 15:18:31 -04:00
if ( check_pos > 0 & & btree_path_pos_after_node ( path , path - > l [ l ] . b ) )
2019-09-07 19:19:57 -04:00
return false ;
return true ;
}
2022-10-17 02:04:31 -04:00
static inline bool btree_path_good_node ( struct btree_trans * trans ,
struct btree_path * path ,
unsigned l , int check_pos )
{
return is_btree_node ( path , l ) & &
bch2_btree_node_relock ( trans , path , l ) & &
btree_path_check_pos_in_node ( path , l , check_pos ) ;
}
2022-04-14 15:43:37 -04:00
static void btree_path_set_level_down ( struct btree_trans * trans ,
struct btree_path * path ,
unsigned new_level )
{
unsigned l ;
path - > level = new_level ;
for ( l = path - > level + 1 ; l < BTREE_MAX_DEPTH ; l + + )
if ( btree_lock_want ( path , l ) = = BTREE_NODE_UNLOCKED )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , l ) ;
2022-04-14 15:43:37 -04:00
btree_path_set_dirty ( path , BTREE_ITER_NEED_TRAVERSE ) ;
bch2_btree_path_verify ( trans , path ) ;
}
2022-10-17 02:04:31 -04:00
static noinline unsigned __btree_path_up_until_good_node ( struct btree_trans * trans ,
struct btree_path * path ,
int check_pos )
2017-03-16 22:18:50 -08:00
{
2021-09-07 20:23:30 -04:00
unsigned i , l = path - > level ;
2022-09-27 18:56:57 -04:00
again :
2021-08-30 15:18:31 -04:00
while ( btree_path_node ( path , l ) & &
2022-08-10 19:08:30 -04:00
! btree_path_good_node ( trans , path , l , check_pos ) )
__btree_path_set_level_up ( trans , path , l + + ) ;
2017-03-16 22:18:50 -08:00
2021-09-07 20:23:30 -04:00
/* If we need intent locks, take them too: */
for ( i = l + 1 ;
i < path - > locks_want & & btree_path_node ( path , i ) ;
i + + )
2022-09-27 18:56:57 -04:00
if ( ! bch2_btree_node_relock ( trans , path , i ) ) {
2022-08-10 19:08:30 -04:00
while ( l < = i )
__btree_path_set_level_up ( trans , path , l + + ) ;
2022-09-27 18:56:57 -04:00
goto again ;
}
2021-09-07 20:23:30 -04:00
2017-03-16 22:18:50 -08:00
return l ;
}
2022-10-17 02:04:31 -04:00
static inline unsigned btree_path_up_until_good_node ( struct btree_trans * trans ,
struct btree_path * path ,
int check_pos )
{
return likely ( btree_node_locked ( path , path - > level ) & &
btree_path_check_pos_in_node ( path , path - > level , check_pos ) )
? path - > level
: __btree_path_up_until_good_node ( trans , path , check_pos ) ;
}
2017-03-16 22:18:50 -08:00
/*
* This is the main state machine for walking down the btree - walks down to a
* specified depth
*
* Returns 0 on success , - EIO on error ( error reading in a btree node ) .
*
* On error , caller ( peek_node ( ) / peek_key ( ) ) must return NULL ; the error is
2019-05-10 16:09:17 -04:00
* stashed in the iterator and returned from bch2_trans_exit ( ) .
2017-03-16 22:18:50 -08:00
*/
2023-02-04 20:40:29 -05:00
int bch2_btree_path_traverse_one ( struct btree_trans * trans ,
struct btree_path * path ,
unsigned flags ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2021-09-07 20:23:30 -04:00
unsigned depth_want = path - > level ;
2022-11-15 22:48:03 -05:00
int ret = - ( ( int ) trans - > restarted ) ;
2017-03-16 22:18:50 -08:00
2022-07-17 23:06:38 -04:00
if ( unlikely ( ret ) )
2021-10-24 16:55:17 -04:00
goto out ;
2021-07-22 12:39:11 -04:00
/*
2021-08-30 15:18:31 -04:00
* Ensure we obey path - > should_be_locked : if it ' s set , we can ' t unlock
* and re - traverse the path without a transaction restart :
2021-07-22 12:39:11 -04:00
*/
2021-08-30 15:18:31 -04:00
if ( path - > should_be_locked ) {
2022-07-17 23:06:38 -04:00
ret = bch2_btree_path_relock ( trans , path , trace_ip ) ;
2021-07-22 12:39:11 -04:00
goto out ;
}
2021-08-30 15:18:31 -04:00
if ( path - > cached ) {
ret = bch2_btree_path_traverse_cached ( trans , path , flags ) ;
2021-06-04 15:18:10 -04:00
goto out ;
}
2019-03-07 19:46:10 -05:00
2021-08-30 15:18:31 -04:00
if ( unlikely ( path - > level > = BTREE_MAX_DEPTH ) )
2021-06-04 15:18:10 -04:00
goto out ;
2019-03-07 19:46:10 -05:00
2021-08-30 15:18:31 -04:00
path - > level = btree_path_up_until_good_node ( trans , path , 0 ) ;
2017-03-16 22:18:50 -08:00
2022-09-19 14:14:01 -04:00
EBUG_ON ( btree_path_node ( path , path - > level ) & &
! btree_node_locked ( path , path - > level ) ) ;
2017-03-16 22:18:50 -08:00
/*
2021-08-30 15:18:31 -04:00
* Note : path - > nodes [ path - > level ] may be temporarily NULL here - that
2017-03-16 22:18:50 -08:00
* would indicate to other code that we got to the end of the btree ,
* here it indicates that relocking the root failed - it ' s critical that
2021-08-30 15:18:31 -04:00
* btree_path_lock_root ( ) comes next and that it can ' t fail
2017-03-16 22:18:50 -08:00
*/
2021-08-30 15:18:31 -04:00
while ( path - > level > depth_want ) {
ret = btree_path_node ( path , path - > level )
? btree_path_down ( trans , path , flags , trace_ip )
: btree_path_lock_root ( trans , path , depth_want , trace_ip ) ;
2017-03-16 22:18:50 -08:00
if ( unlikely ( ret ) ) {
2021-06-04 15:18:10 -04:00
if ( ret = = 1 ) {
/*
2021-08-30 15:54:41 -04:00
* No nodes at this level - got to the end of
* the btree :
2021-06-04 15:18:10 -04:00
*/
ret = 0 ;
goto out ;
}
2018-11-21 02:59:07 -05:00
2022-07-14 18:58:23 +12:00
__bch2_btree_path_unlock ( trans , path ) ;
2021-08-30 15:18:31 -04:00
path - > level = depth_want ;
2022-08-10 19:08:30 -04:00
path - > l [ path - > level ] . b = ERR_PTR ( ret ) ;
2021-06-04 15:18:10 -04:00
goto out ;
2017-03-16 22:18:50 -08:00
}
}
2021-08-30 15:18:31 -04:00
path - > uptodate = BTREE_ITER_UPTODATE ;
2021-06-04 15:18:10 -04:00
out :
2023-02-09 15:49:25 -05:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ! = ! ! trans - > restarted )
panic ( " ret %s (%i) trans->restarted %s (%i) \n " ,
bch2_err_str ( ret ) , ret ,
bch2_err_str ( trans - > restarted ) , trans - > restarted ) ;
2021-08-30 15:18:31 -04:00
bch2_btree_path_verify ( trans , path ) ;
2021-06-04 15:18:10 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2022-11-01 03:37:53 -04:00
static inline void btree_path_copy ( struct btree_trans * trans , struct btree_path * dst ,
2021-08-30 15:18:31 -04:00
struct btree_path * src )
{
unsigned i , offset = offsetof ( struct btree_path , pos ) ;
memcpy ( ( void * ) dst + offset ,
( void * ) src + offset ,
sizeof ( struct btree_path ) - offset ) ;
2022-10-21 17:26:49 -04:00
for ( i = 0 ; i < BTREE_MAX_DEPTH ; i + + ) {
unsigned t = btree_node_locked_type ( dst , i ) ;
if ( t ! = BTREE_NODE_UNLOCKED )
six_lock_increment ( & dst - > l [ i ] . b - > c . lock , t ) ;
}
2021-08-30 15:18:31 -04:00
}
2021-09-07 13:55:33 -04:00
static struct btree_path * btree_path_clone ( struct btree_trans * trans , struct btree_path * src ,
bool intent )
2021-08-30 15:18:31 -04:00
{
2021-09-07 13:55:33 -04:00
struct btree_path * new = btree_path_alloc ( trans , src ) ;
2021-08-30 15:18:31 -04:00
2021-09-07 13:55:33 -04:00
btree_path_copy ( trans , new , src ) ;
2021-08-30 15:18:31 -04:00
__btree_path_get ( new , intent ) ;
2021-09-07 13:55:33 -04:00
return new ;
}
2022-11-01 03:37:53 -04:00
__flatten
2022-03-30 13:47:07 -04:00
struct btree_path * __bch2_btree_path_make_mut ( struct btree_trans * trans ,
2023-01-09 01:11:18 -05:00
struct btree_path * path , bool intent ,
unsigned long ip )
2021-09-07 13:55:33 -04:00
{
2021-08-30 15:18:31 -04:00
__btree_path_put ( path , intent ) ;
2021-09-07 13:55:33 -04:00
path = btree_path_clone ( trans , path , intent ) ;
2021-08-30 15:18:31 -04:00
path - > preserve = false ;
return path ;
}
2022-02-06 22:21:44 -05:00
struct btree_path * __must_check
2021-08-30 15:18:31 -04:00
__bch2_btree_path_set_pos ( struct btree_trans * trans ,
2023-01-09 01:11:18 -05:00
struct btree_path * path , struct bpos new_pos ,
bool intent , unsigned long ip , int cmp )
2021-08-30 15:18:31 -04:00
{
2022-12-02 12:45:37 -05:00
unsigned level = path - > level ;
2021-08-30 15:18:31 -04:00
2023-02-01 16:15:51 -05:00
bch2_trans_verify_not_in_restart ( trans ) ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( ! path - > ref ) ;
2023-01-09 01:11:18 -05:00
path = bch2_btree_path_make_mut ( trans , path , intent , ip ) ;
2021-08-30 15:18:31 -04:00
path - > pos = new_pos ;
trans - > paths_sorted = false ;
if ( unlikely ( path - > cached ) ) {
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , 0 ) ;
2022-08-10 19:08:30 -04:00
path - > l [ 0 ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_up ) ;
2021-08-30 15:18:31 -04:00
btree_path_set_dirty ( path , BTREE_ITER_NEED_TRAVERSE ) ;
goto out ;
}
2022-12-02 12:45:37 -05:00
level = btree_path_up_until_good_node ( trans , path , cmp ) ;
2021-08-30 15:18:31 -04:00
2022-12-02 12:45:37 -05:00
if ( btree_path_node ( path , level ) ) {
struct btree_path_level * l = & path - > l [ level ] ;
BUG_ON ( ! btree_node_locked ( path , level ) ) ;
2021-08-30 15:18:31 -04:00
/*
* We might have to skip over many keys , or just a few : try
* advancing the node iterator , and if we have to skip over too
* many keys just reinit it ( or if we ' re rewinding , since that
* is expensive ) .
*/
if ( cmp < 0 | |
2022-12-02 12:45:37 -05:00
! btree_path_advance_to_pos ( path , l , 8 ) )
bch2_btree_node_iter_init ( & l - > iter , l - > b , & path - > pos ) ;
/*
* Iterators to interior nodes should always be pointed at the first non
* whiteout :
*/
if ( unlikely ( level ) )
bch2_btree_node_iter_peek ( & l - > iter , l - > b ) ;
2021-08-30 15:18:31 -04:00
}
2022-12-02 12:45:37 -05:00
if ( unlikely ( level ! = path - > level ) ) {
2021-08-30 15:18:31 -04:00
btree_path_set_dirty ( path , BTREE_ITER_NEED_TRAVERSE ) ;
2022-07-14 18:58:23 +12:00
__bch2_btree_path_unlock ( trans , path ) ;
2021-09-04 21:23:11 -04:00
}
2021-08-30 15:18:31 -04:00
out :
bch2_btree_path_verify ( trans , path ) ;
return path ;
}
/* Btree path: main interface: */
static struct btree_path * have_path_at_pos ( struct btree_trans * trans , struct btree_path * path )
{
2022-08-10 20:05:14 -04:00
struct btree_path * sib ;
2021-08-30 15:18:31 -04:00
2022-08-10 20:05:14 -04:00
sib = prev_btree_path ( trans , path ) ;
if ( sib & & ! btree_path_cmp ( sib , path ) )
return sib ;
2021-08-30 15:18:31 -04:00
2022-08-10 20:05:14 -04:00
sib = next_btree_path ( trans , path ) ;
if ( sib & & ! btree_path_cmp ( sib , path ) )
return sib ;
2021-08-30 15:18:31 -04:00
return NULL ;
}
2021-11-07 10:19:37 -05:00
static struct btree_path * have_node_at_pos ( struct btree_trans * trans , struct btree_path * path )
2021-08-30 15:18:31 -04:00
{
2022-08-10 20:05:14 -04:00
struct btree_path * sib ;
2021-08-30 15:18:31 -04:00
2022-08-10 20:05:14 -04:00
sib = prev_btree_path ( trans , path ) ;
if ( sib & & sib - > level = = path - > level & & path_l ( sib ) - > b = = path_l ( path ) - > b )
return sib ;
2021-08-30 15:18:31 -04:00
2022-08-10 20:05:14 -04:00
sib = next_btree_path ( trans , path ) ;
if ( sib & & sib - > level = = path - > level & & path_l ( sib ) - > b = = path_l ( path ) - > b )
return sib ;
2021-08-30 15:18:31 -04:00
2021-11-07 10:19:37 -05:00
return NULL ;
2021-08-30 15:18:31 -04:00
}
static inline void __bch2_path_free ( struct btree_trans * trans , struct btree_path * path )
2021-03-23 21:22:50 -04:00
{
2022-07-14 18:58:23 +12:00
__bch2_btree_path_unlock ( trans , path ) ;
2021-08-30 15:18:31 -04:00
btree_path_list_remove ( trans , path ) ;
trans - > paths_allocated & = ~ ( 1ULL < < path - > idx ) ;
2021-03-23 21:22:50 -04:00
}
2021-08-30 15:18:31 -04:00
void bch2_path_put ( struct btree_trans * trans , struct btree_path * path , bool intent )
{
struct btree_path * dup ;
EBUG_ON ( trans - > paths + path - > idx ! = path ) ;
EBUG_ON ( ! path - > ref ) ;
if ( ! __btree_path_put ( path , intent ) )
return ;
2022-08-10 20:05:14 -04:00
dup = path - > preserve
? have_path_at_pos ( trans , path )
: have_node_at_pos ( trans , path ) ;
if ( ! dup & & ! ( ! path - > preserve & & ! is_btree_node ( path , path - > level ) ) )
return ;
2021-08-30 15:18:31 -04:00
2021-11-07 10:19:37 -05:00
if ( path - > should_be_locked & &
2022-08-10 20:05:14 -04:00
! trans - > restarted & &
( ! dup | | ! bch2_btree_path_relock_norestart ( trans , dup , _THIS_IP_ ) ) )
2021-11-07 10:19:37 -05:00
return ;
2022-08-10 20:05:14 -04:00
if ( dup ) {
dup - > preserve | = path - > preserve ;
dup - > should_be_locked | = path - > should_be_locked ;
}
2021-11-07 10:19:37 -05:00
__bch2_path_free ( trans , path ) ;
2021-08-30 15:18:31 -04:00
}
2022-10-11 06:37:56 -04:00
static void bch2_path_put_nokeep ( struct btree_trans * trans , struct btree_path * path ,
bool intent )
{
EBUG_ON ( trans - > paths + path - > idx ! = path ) ;
EBUG_ON ( ! path - > ref ) ;
if ( ! __btree_path_put ( path , intent ) )
return ;
__bch2_path_free ( trans , path ) ;
}
2023-02-01 16:15:51 -05:00
void __noreturn bch2_trans_restart_error ( struct btree_trans * trans , u32 restart_count )
{
panic ( " trans->restart_count %u, should be %u, last restarted by %pS \n " ,
trans - > restart_count , restart_count ,
( void * ) trans - > last_restarted_ip ) ;
}
void __noreturn bch2_trans_in_restart_error ( struct btree_trans * trans )
{
panic ( " in transaction restart: %s, last restarted by %pS \n " ,
bch2_err_str ( trans - > restarted ) ,
( void * ) trans - > last_restarted_ip ) ;
}
2022-08-11 20:14:54 -04:00
noinline __cold
2022-03-11 18:38:24 -05:00
void bch2_trans_updates_to_text ( struct printbuf * buf , struct btree_trans * trans )
2021-08-30 15:18:31 -04:00
{
struct btree_insert_entry * i ;
2022-03-02 22:18:56 -05:00
2023-02-03 21:01:40 -05:00
prt_printf ( buf , " transaction updates for %s journal seq %llu " ,
2022-03-11 18:38:24 -05:00
trans - > fn , trans - > journal_res . seq ) ;
2023-02-03 21:01:40 -05:00
prt_newline ( buf ) ;
printbuf_indent_add ( buf , 2 ) ;
2022-03-02 22:18:56 -05:00
trans_for_each_update ( trans , i ) {
struct bkey_s_c old = { & i - > old_k , i - > old_v } ;
2023-02-03 21:01:40 -05:00
prt_printf ( buf , " update: btree=%s cached=%u %pS " ,
2022-03-02 22:18:56 -05:00
bch2_btree_ids [ i - > btree_id ] ,
2022-03-11 18:38:24 -05:00
i - > cached ,
( void * ) i - > ip_allocated ) ;
2023-02-03 21:01:40 -05:00
prt_newline ( buf ) ;
2022-03-11 18:38:24 -05:00
2023-02-03 21:01:40 -05:00
prt_printf ( buf , " old " ) ;
2022-03-11 18:38:24 -05:00
bch2_bkey_val_to_text ( buf , trans - > c , old ) ;
2023-02-03 21:01:40 -05:00
prt_newline ( buf ) ;
2022-03-11 18:38:24 -05:00
2023-02-03 21:01:40 -05:00
prt_printf ( buf , " new " ) ;
2022-03-11 18:38:24 -05:00
bch2_bkey_val_to_text ( buf , trans - > c , bkey_i_to_s_c ( i - > k ) ) ;
2023-02-03 21:01:40 -05:00
prt_newline ( buf ) ;
2022-03-02 22:18:56 -05:00
}
2023-02-03 21:01:40 -05:00
printbuf_indent_sub ( buf , 2 ) ;
2022-03-11 18:38:24 -05:00
}
noinline __cold
void bch2_dump_trans_updates ( struct btree_trans * trans )
{
struct printbuf buf = PRINTBUF ;
bch2_trans_updates_to_text ( & buf , trans ) ;
2022-09-25 16:43:55 -04:00
bch2_print_string_as_lines ( KERN_ERR , buf . buf ) ;
2022-03-11 18:38:24 -05:00
printbuf_exit ( & buf ) ;
2022-03-02 22:18:56 -05:00
}
noinline __cold
2022-08-11 20:14:54 -04:00
void bch2_btree_path_to_text ( struct printbuf * out , struct btree_path * path )
{
prt_printf ( out , " path: idx %2u ref %u:%u %c %c btree=%s l=%u pos " ,
path - > idx , path - > ref , path - > intent_ref ,
path - > preserve ? ' P ' : ' ' ,
path - > should_be_locked ? ' S ' : ' ' ,
bch2_btree_ids [ path - > btree_id ] ,
path - > level ) ;
bch2_bpos_to_text ( out , path - > pos ) ;
prt_printf ( out , " locks %u " , path - > nodes_locked ) ;
2023-02-04 19:39:59 -05:00
# ifdef TRACK_PATH_ALLOCATED
2022-08-11 20:14:54 -04:00
prt_printf ( out , " %pS " , ( void * ) path - > ip_allocated ) ;
# endif
prt_newline ( out ) ;
}
noinline __cold
void __bch2_trans_paths_to_text ( struct printbuf * out , struct btree_trans * trans ,
bool nosort )
2022-03-02 22:18:56 -05:00
{
struct btree_path * path ;
2021-08-30 15:18:31 -04:00
unsigned idx ;
2022-08-11 20:14:54 -04:00
if ( ! nosort )
btree_trans_sort_paths ( trans ) ;
2021-08-30 15:18:31 -04:00
2022-08-11 20:14:54 -04:00
trans_for_each_path_inorder ( trans , path , idx )
bch2_btree_path_to_text ( out , path ) ;
}
2022-02-25 13:18:19 -05:00
2022-08-11 20:14:54 -04:00
noinline __cold
void bch2_trans_paths_to_text ( struct printbuf * out , struct btree_trans * trans )
{
__bch2_trans_paths_to_text ( out , trans , false ) ;
}
2022-02-25 13:18:19 -05:00
2022-08-11 20:14:54 -04:00
noinline __cold
void __bch2_dump_trans_paths_updates ( struct btree_trans * trans , bool nosort )
{
struct printbuf buf = PRINTBUF ;
__bch2_trans_paths_to_text ( & buf , trans , nosort ) ;
2022-09-25 16:43:55 -04:00
bch2_trans_updates_to_text ( & buf , trans ) ;
2021-08-30 15:18:31 -04:00
2022-09-25 16:43:55 -04:00
bch2_print_string_as_lines ( KERN_ERR , buf . buf ) ;
2022-03-02 22:18:56 -05:00
printbuf_exit ( & buf ) ;
2021-08-30 15:18:31 -04:00
}
2022-08-11 20:14:54 -04:00
noinline __cold
void bch2_dump_trans_paths_updates ( struct btree_trans * trans )
{
__bch2_dump_trans_paths_updates ( trans , false ) ;
}
noinline __cold
static void bch2_trans_update_max_paths ( struct btree_trans * trans )
{
struct btree_transaction_stats * s = btree_trans_stats ( trans ) ;
struct printbuf buf = PRINTBUF ;
if ( ! s )
return ;
bch2_trans_paths_to_text ( & buf , trans ) ;
if ( ! buf . allocation_failure ) {
mutex_lock ( & s - > lock ) ;
if ( s - > nr_max_paths < hweight64 ( trans - > paths_allocated ) ) {
2022-09-26 16:19:56 -04:00
s - > nr_max_paths = trans - > nr_max_paths =
hweight64 ( trans - > paths_allocated ) ;
2022-08-11 20:14:54 -04:00
swap ( s - > max_paths_text , buf . buf ) ;
}
mutex_unlock ( & s - > lock ) ;
}
printbuf_exit ( & buf ) ;
trans - > nr_max_paths = hweight64 ( trans - > paths_allocated ) ;
}
2022-09-26 16:19:56 -04:00
static noinline void btree_path_overflow ( struct btree_trans * trans )
{
bch2_dump_trans_paths_updates ( trans ) ;
panic ( " trans path oveflow \n " ) ;
}
static inline struct btree_path * btree_path_alloc ( struct btree_trans * trans ,
struct btree_path * pos )
2021-08-30 15:18:31 -04:00
{
struct btree_path * path ;
unsigned idx ;
if ( unlikely ( trans - > paths_allocated = =
2022-09-26 16:19:56 -04:00
~ ( ( ~ 0ULL < < 1 ) < < ( BTREE_ITER_MAX - 1 ) ) ) )
btree_path_overflow ( trans ) ;
2021-08-30 15:18:31 -04:00
idx = __ffs64 ( ~ trans - > paths_allocated ) ;
2023-02-08 18:04:22 -05:00
/*
* Do this before marking the new path as allocated , since it won ' t be
* initialized yet :
*/
if ( unlikely ( idx > trans - > nr_max_paths ) )
bch2_trans_update_max_paths ( trans ) ;
2021-08-30 15:18:31 -04:00
trans - > paths_allocated | = 1ULL < < idx ;
path = & trans - > paths [ idx ] ;
path - > idx = idx ;
path - > ref = 0 ;
path - > intent_ref = 0 ;
path - > nodes_locked = 0 ;
btree_path_list_add ( trans , pos , path ) ;
2022-08-11 20:14:54 -04:00
trans - > paths_sorted = false ;
2021-08-30 15:18:31 -04:00
return path ;
}
2021-12-21 20:48:26 -05:00
struct btree_path * bch2_path_get ( struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
enum btree_id btree_id , struct bpos pos ,
unsigned locks_want , unsigned level ,
2023-01-09 01:11:18 -05:00
unsigned flags , unsigned long ip )
2021-08-30 15:18:31 -04:00
{
2021-08-30 15:18:31 -04:00
struct btree_path * path , * path_pos = NULL ;
2021-12-21 20:48:26 -05:00
bool cached = flags & BTREE_ITER_CACHED ;
bool intent = flags & BTREE_ITER_INTENT ;
2021-08-30 15:18:31 -04:00
int i ;
2023-02-01 16:15:51 -05:00
bch2_trans_verify_not_in_restart ( trans ) ;
2022-03-30 13:10:03 -04:00
bch2_trans_verify_locks ( trans ) ;
2022-02-24 19:04:11 -05:00
btree_trans_sort_paths ( trans ) ;
2021-08-30 15:18:31 -04:00
2021-08-30 15:18:31 -04:00
trans_for_each_path_inorder ( trans , path , i ) {
if ( __btree_path_cmp ( path ,
btree_id ,
cached ,
pos ,
level ) > 0 )
break ;
2021-08-30 15:18:31 -04:00
2021-08-30 15:18:31 -04:00
path_pos = path ;
2021-08-30 15:18:31 -04:00
}
2021-08-30 15:18:31 -04:00
if ( path_pos & &
path_pos - > cached = = cached & &
path_pos - > btree_id = = btree_id & &
path_pos - > level = = level ) {
__btree_path_get ( path_pos , intent ) ;
2023-01-09 01:11:18 -05:00
path = bch2_btree_path_set_pos ( trans , path_pos , pos , intent , ip ) ;
2021-08-30 15:18:31 -04:00
} else {
2021-08-30 15:18:31 -04:00
path = btree_path_alloc ( trans , path_pos ) ;
path_pos = NULL ;
2021-08-30 15:18:31 -04:00
__btree_path_get ( path , intent ) ;
path - > pos = pos ;
path - > btree_id = btree_id ;
path - > cached = cached ;
path - > uptodate = BTREE_ITER_NEED_TRAVERSE ;
path - > should_be_locked = false ;
path - > level = level ;
path - > locks_want = locks_want ;
path - > nodes_locked = 0 ;
for ( i = 0 ; i < ARRAY_SIZE ( path - > l ) ; i + + )
2022-08-10 19:08:30 -04:00
path - > l [ i ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_init ) ;
2023-02-04 19:39:59 -05:00
# ifdef TRACK_PATH_ALLOCATED
2023-01-09 01:11:18 -05:00
path - > ip_allocated = ip ;
2021-08-30 15:18:31 -04:00
# endif
trans - > paths_sorted = false ;
}
2021-12-21 20:48:26 -05:00
if ( ! ( flags & BTREE_ITER_NOPRESERVE ) )
path - > preserve = true ;
2021-08-30 15:18:31 -04:00
if ( path - > intent_ref )
locks_want = max ( locks_want , level + 1 ) ;
/*
* If the path has locks_want greater than requested , we don ' t downgrade
* it here - on transaction restart because btree node split needs to
* upgrade locks , we might be putting / getting the iterator again .
* Downgrading iterators only happens via bch2_trans_downgrade ( ) , after
* a successful transaction commit .
*/
locks_want = min ( locks_want , BTREE_MAX_DEPTH ) ;
2022-08-19 15:35:34 -04:00
if ( locks_want > path - > locks_want )
bch2_btree_path_upgrade_noupgrade_sibs ( trans , path , locks_want ) ;
2021-08-30 15:18:31 -04:00
return path ;
}
2022-11-13 20:01:42 -05:00
struct bkey_s_c bch2_btree_path_peek_slot ( struct btree_path * path , struct bkey * u )
2021-08-30 15:18:31 -04:00
{
2022-10-12 07:58:50 -04:00
struct btree_path_level * l = path_l ( path ) ;
struct bkey_packed * _k ;
2021-08-30 15:18:31 -04:00
struct bkey_s_c k ;
2022-10-12 07:58:50 -04:00
if ( unlikely ( ! l - > b ) )
return bkey_s_c_null ;
2022-02-06 23:15:12 -05:00
2022-10-12 07:58:50 -04:00
EBUG_ON ( path - > uptodate ! = BTREE_ITER_UPTODATE ) ;
EBUG_ON ( ! btree_node_locked ( path , path - > level ) ) ;
2021-08-30 15:18:31 -04:00
2022-10-12 07:58:50 -04:00
if ( ! path - > cached ) {
2022-02-06 23:15:12 -05:00
_k = bch2_btree_node_iter_peek_all ( & l - > iter , l - > b ) ;
2021-08-30 15:18:31 -04:00
k = _k ? bkey_disassemble ( l - > b , _k , u ) : bkey_s_c_null ;
2022-11-24 03:12:22 -05:00
EBUG_ON ( k . k & & bkey_deleted ( k . k ) & & bpos_eq ( k . k - > p , path - > pos ) ) ;
2021-08-30 15:18:31 -04:00
2022-11-24 03:12:22 -05:00
if ( ! k . k | | ! bpos_eq ( path - > pos , k . k - > p ) )
2021-08-30 15:18:31 -04:00
goto hole ;
} else {
struct bkey_cached * ck = ( void * ) path - > l [ 0 ] . b ;
2022-02-06 23:15:12 -05:00
EBUG_ON ( ck & &
( path - > btree_id ! = ck - > key . btree_id | |
2022-11-24 03:12:22 -05:00
! bkey_eq ( path - > pos , ck - > key . pos ) ) ) ;
2022-12-20 11:26:57 -05:00
if ( ! ck | | ! ck - > valid )
return bkey_s_c_null ;
2021-08-30 15:18:31 -04:00
2022-02-24 11:02:58 -05:00
* u = ck - > k - > k ;
2021-08-30 15:18:31 -04:00
k = bkey_i_to_s_c ( ck - > k ) ;
}
return k ;
hole :
bkey_init ( u ) ;
u - > p = path - > pos ;
return ( struct bkey_s_c ) { u , NULL } ;
}
/* Btree iterators: */
2021-09-07 15:34:16 -04:00
int __must_check
__bch2_btree_iter_traverse ( struct btree_iter * iter )
{
return bch2_btree_path_traverse ( iter - > trans , iter - > path , iter - > flags ) ;
}
2021-03-23 21:22:50 -04:00
int __must_check
bch2_btree_iter_traverse ( struct btree_iter * iter )
{
2021-06-04 17:17:45 -04:00
int ret ;
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( iter - > trans , iter - > path ,
2021-08-30 15:18:31 -04:00
btree_iter_search_key ( iter ) ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2021-03-23 21:22:50 -04:00
2021-08-30 15:18:31 -04:00
ret = bch2_btree_path_traverse ( iter - > trans , iter - > path , iter - > flags ) ;
2021-06-04 17:17:45 -04:00
if ( ret )
return ret ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
2021-06-04 17:17:45 -04:00
return 0 ;
2021-03-23 21:22:50 -04:00
}
2017-03-16 22:18:50 -08:00
/* Iterate across nodes (leaf and interior nodes) */
struct btree * bch2_btree_iter_peek_node ( struct btree_iter * iter )
{
2021-10-07 14:54:50 -04:00
struct btree_trans * trans = iter - > trans ;
2021-08-30 15:54:41 -04:00
struct btree * b = NULL ;
2017-03-16 22:18:50 -08:00
int ret ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( iter - > path - > cached ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
2021-10-07 14:54:50 -04:00
ret = bch2_btree_path_traverse ( trans , iter - > path , iter - > flags ) ;
2017-03-16 22:18:50 -08:00
if ( ret )
2021-10-19 14:20:50 -04:00
goto err ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
b = btree_path_node ( iter - > path , iter - > path - > level ) ;
2017-03-16 22:18:50 -08:00
if ( ! b )
2021-08-30 15:54:41 -04:00
goto out ;
2017-03-16 22:18:50 -08:00
2022-11-24 03:12:22 -05:00
BUG_ON ( bpos_lt ( b - > key . k . p , iter - > pos ) ) ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:54:41 -04:00
bkey_init ( & iter - > k ) ;
2021-08-30 15:18:31 -04:00
iter - > k . p = iter - > pos = b - > key . k . p ;
2021-10-07 14:54:50 -04:00
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , b - > key . k . p ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
2021-08-30 15:54:41 -04:00
out :
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2020-02-18 16:17:55 -05:00
2017-03-16 22:18:50 -08:00
return b ;
2021-10-19 14:20:50 -04:00
err :
b = ERR_PTR ( ret ) ;
goto out ;
2017-03-16 22:18:50 -08:00
}
2020-02-18 16:17:55 -05:00
struct btree * bch2_btree_iter_next_node ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_trans * trans = iter - > trans ;
struct btree_path * path = iter - > path ;
2021-08-30 15:54:41 -04:00
struct btree * b = NULL ;
2017-03-16 22:18:50 -08:00
int ret ;
2023-02-01 16:15:51 -05:00
bch2_trans_verify_not_in_restart ( trans ) ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( iter - > path - > cached ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
2021-10-24 16:55:17 -04:00
/* already at end? */
2021-08-30 15:18:31 -04:00
if ( ! btree_path_node ( path , path - > level ) )
2021-10-24 16:55:17 -04:00
return NULL ;
2017-03-16 22:18:50 -08:00
2021-10-24 16:55:17 -04:00
/* got to end? */
if ( ! btree_path_node ( path , path - > level + 1 ) ) {
2022-07-14 18:58:23 +12:00
btree_path_set_level_up ( trans , path ) ;
2021-10-24 16:55:17 -04:00
return NULL ;
}
2017-03-16 22:18:50 -08:00
2021-10-24 16:55:17 -04:00
if ( ! bch2_btree_node_relock ( trans , path , path - > level + 1 ) ) {
2022-07-14 18:58:23 +12:00
__bch2_btree_path_unlock ( trans , path ) ;
2022-08-10 19:08:30 -04:00
path - > l [ path - > level ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_relock ) ;
path - > l [ path - > level + 1 ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_relock ) ;
2022-03-30 13:10:03 -04:00
btree_path_set_dirty ( path , BTREE_ITER_NEED_TRAVERSE ) ;
2022-08-27 12:48:36 -04:00
trace_and_count ( trans - > c , trans_restart_relock_next_node , trans , _THIS_IP_ , path ) ;
2022-07-17 23:06:38 -04:00
ret = btree_trans_restart ( trans , BCH_ERR_transaction_restart_relock ) ;
2021-10-19 14:20:50 -04:00
goto err ;
2021-10-24 16:55:17 -04:00
}
2017-03-16 22:18:50 -08:00
2021-10-24 16:55:17 -04:00
b = btree_path_node ( path , path - > level + 1 ) ;
2017-03-16 22:18:50 -08:00
2022-11-24 03:12:22 -05:00
if ( bpos_eq ( iter - > pos , b - > key . k . p ) ) {
2022-08-10 19:08:30 -04:00
__btree_path_set_level_up ( trans , path , path - > level + + ) ;
2021-10-24 16:55:17 -04:00
} else {
2017-03-16 22:18:50 -08:00
/*
* Haven ' t gotten to the end of the parent node : go back down to
* the next child node
*/
2021-08-30 15:18:31 -04:00
path = iter - > path =
2022-02-06 22:21:44 -05:00
bch2_btree_path_set_pos ( trans , path , bpos_successor ( iter - > pos ) ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2017-03-16 22:18:50 -08:00
2022-04-14 15:43:37 -04:00
btree_path_set_level_down ( trans , path , iter - > min_depth ) ;
2021-03-21 18:09:02 -04:00
2021-08-30 15:18:31 -04:00
ret = bch2_btree_path_traverse ( trans , path , iter - > flags ) ;
2021-10-19 14:20:50 -04:00
if ( ret )
goto err ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
b = path - > l [ path - > level ] . b ;
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:54:41 -04:00
bkey_init ( & iter - > k ) ;
2021-08-30 15:18:31 -04:00
iter - > k . p = iter - > pos = b - > key . k . p ;
2021-10-07 14:54:50 -04:00
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , b - > key . k . p ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
2021-10-07 14:54:50 -04:00
BUG_ON ( iter - > path - > uptodate ) ;
2021-08-30 15:54:41 -04:00
out :
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2020-02-18 16:17:55 -05:00
2017-03-16 22:18:50 -08:00
return b ;
2021-10-19 14:20:50 -04:00
err :
b = ERR_PTR ( ret ) ;
goto out ;
2017-03-16 22:18:50 -08:00
}
/* Iterate across keys (in leaf nodes only) */
2021-03-21 16:55:25 -04:00
inline bool bch2_btree_iter_advance ( struct btree_iter * iter )
2021-02-07 21:28:58 -05:00
{
2022-04-12 18:04:08 -04:00
if ( likely ( ! ( iter - > flags & BTREE_ITER_ALL_LEVELS ) ) ) {
struct bpos pos = iter - > k . p ;
2022-11-24 03:12:22 -05:00
bool ret = ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS
? bpos_eq ( pos , SPOS_MAX )
: bkey_eq ( pos , SPOS_MAX ) ) ;
2022-04-12 18:04:08 -04:00
if ( ret & & ! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) )
pos = bkey_successor ( iter , pos ) ;
bch2_btree_iter_set_pos ( iter , pos ) ;
return ret ;
} else {
if ( ! btree_path_node ( iter - > path , iter - > path - > level ) )
return true ;
2021-02-07 21:11:49 -05:00
2022-04-12 18:04:08 -04:00
iter - > advanced = true ;
return false ;
}
2021-02-07 21:11:49 -05:00
}
2021-03-21 16:55:25 -04:00
inline bool bch2_btree_iter_rewind ( struct btree_iter * iter )
2021-02-07 21:11:49 -05:00
{
struct bpos pos = bkey_start_pos ( & iter - > k ) ;
2022-11-24 03:12:22 -05:00
bool ret = ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS
? bpos_eq ( pos , POS_MIN )
: bkey_eq ( pos , POS_MIN ) ) ;
2021-02-07 21:11:49 -05:00
2021-02-11 21:57:32 -05:00
if ( ret & & ! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) )
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 18:02:16 -04:00
pos = bkey_predecessor ( iter , pos ) ;
2021-02-07 21:11:49 -05:00
bch2_btree_iter_set_pos ( iter , pos ) ;
2021-02-11 21:57:32 -05:00
return ret ;
2021-02-07 21:28:58 -05:00
}
2021-12-25 20:36:47 -05:00
static noinline
2021-08-30 15:18:31 -04:00
struct bkey_i * __bch2_btree_trans_peek_updates ( struct btree_iter * iter )
2021-03-21 19:43:31 -04:00
{
struct btree_insert_entry * i ;
2021-06-04 00:29:49 -04:00
struct bkey_i * ret = NULL ;
2021-03-21 19:43:31 -04:00
2021-06-02 00:18:34 -04:00
trans_for_each_update ( iter - > trans , i ) {
2021-06-04 00:29:49 -04:00
if ( i - > btree_id < iter - > btree_id )
continue ;
if ( i - > btree_id > iter - > btree_id )
2021-03-21 19:43:31 -04:00
break ;
2022-11-24 03:12:22 -05:00
if ( bpos_lt ( i - > k - > k . p , iter - > path - > pos ) )
2021-06-04 00:29:49 -04:00
continue ;
2022-01-12 01:14:47 -05:00
if ( i - > key_cache_already_flushed )
continue ;
2022-11-24 03:12:22 -05:00
if ( ! ret | | bpos_lt ( i - > k - > k . p , ret - > k . p ) )
2021-06-04 00:29:49 -04:00
ret = i - > k ;
}
2021-03-21 19:43:31 -04:00
2021-06-04 00:29:49 -04:00
return ret ;
}
2021-12-25 20:36:47 -05:00
static inline struct bkey_i * btree_trans_peek_updates ( struct btree_iter * iter )
{
return iter - > flags & BTREE_ITER_WITH_UPDATES
? __bch2_btree_trans_peek_updates ( iter )
: NULL ;
}
2022-05-21 13:10:39 -04:00
struct bkey_i * bch2_btree_journal_peek ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bpos end_pos )
{
struct bkey_i * k ;
2022-11-24 03:12:22 -05:00
if ( bpos_lt ( iter - > path - > pos , iter - > journal_pos ) )
2022-05-21 13:10:39 -04:00
iter - > journal_idx = 0 ;
k = bch2_journal_keys_peek_upto ( trans - > c , iter - > btree_id ,
iter - > path - > level ,
iter - > path - > pos ,
end_pos ,
& iter - > journal_idx ) ;
iter - > journal_pos = k ? k - > k . p : end_pos ;
return k ;
}
2021-12-25 20:07:00 -05:00
static noinline
struct bkey_s_c btree_trans_peek_slot_journal ( struct btree_trans * trans ,
struct btree_iter * iter )
{
2022-05-21 13:10:39 -04:00
struct bkey_i * k = bch2_btree_journal_peek ( trans , iter , iter - > path - > pos ) ;
2021-12-25 20:07:00 -05:00
2022-04-11 20:28:13 -04:00
if ( k ) {
2021-12-25 20:07:00 -05:00
iter - > k = k - > k ;
return bkey_i_to_s_c ( k ) ;
} else {
return bkey_s_c_null ;
}
}
static noinline
struct bkey_s_c btree_trans_peek_journal ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
{
struct bkey_i * next_journal =
2022-05-21 13:10:39 -04:00
bch2_btree_journal_peek ( trans , iter ,
2022-10-23 17:37:23 -04:00
k . k ? k . k - > p : path_l ( iter - > path ) - > b - > key . k . p ) ;
2021-12-25 20:07:00 -05:00
2022-04-11 20:28:13 -04:00
if ( next_journal ) {
2021-12-25 20:07:00 -05:00
iter - > k = next_journal - > k ;
k = bkey_i_to_s_c ( next_journal ) ;
}
return k ;
}
2022-02-06 23:15:12 -05:00
/*
* Checks btree key cache for key at iter - > pos and returns it if present , or
* bkey_s_c_null :
*/
static noinline
2022-12-20 16:02:09 -05:00
struct bkey_s_c btree_trans_peek_key_cache ( struct btree_iter * iter , struct bpos pos )
2022-02-06 23:15:12 -05:00
{
struct btree_trans * trans = iter - > trans ;
struct bch_fs * c = trans - > c ;
struct bkey u ;
2022-12-20 10:51:20 -05:00
struct bkey_s_c k ;
2022-02-06 23:15:12 -05:00
int ret ;
2022-11-22 20:15:33 -05:00
if ( ( iter - > flags & BTREE_ITER_KEY_CACHE_FILL ) & &
bpos_eq ( iter - > pos , pos ) )
return bkey_s_c_null ;
2022-02-06 23:15:12 -05:00
if ( ! bch2_btree_key_cache_find ( c , iter - > btree_id , pos ) )
return bkey_s_c_null ;
if ( ! iter - > key_cache_path )
iter - > key_cache_path = bch2_path_get ( trans , iter - > btree_id , pos ,
iter - > flags & BTREE_ITER_INTENT , 0 ,
2022-12-20 11:26:57 -05:00
iter - > flags | BTREE_ITER_CACHED |
2023-01-09 01:11:18 -05:00
BTREE_ITER_CACHED_NOFILL ,
_THIS_IP_ ) ;
2022-02-06 23:15:12 -05:00
iter - > key_cache_path = bch2_btree_path_set_pos ( trans , iter - > key_cache_path , pos ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2022-02-06 23:15:12 -05:00
2022-12-20 16:02:09 -05:00
ret = bch2_btree_path_traverse ( trans , iter - > key_cache_path ,
iter - > flags | BTREE_ITER_CACHED ) ? :
bch2_btree_path_relock ( trans , iter - > path , _THIS_IP_ ) ;
2022-02-06 23:15:12 -05:00
if ( unlikely ( ret ) )
return bkey_s_c_err ( ret ) ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > key_cache_path ) ;
2022-02-06 23:15:12 -05:00
2022-12-20 10:51:20 -05:00
k = bch2_btree_path_peek_slot ( iter - > key_cache_path , & u ) ;
if ( k . k & & ! bkey_err ( k ) ) {
iter - > k = u ;
k . k = & iter - > k ;
}
return k ;
2022-02-06 23:15:12 -05:00
}
2022-01-09 01:07:29 -05:00
static struct bkey_s_c __bch2_btree_iter_peek ( struct btree_iter * iter , struct bpos search_key )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_trans * trans = iter - > trans ;
2021-04-29 21:44:05 -04:00
struct bkey_i * next_update ;
2022-02-06 23:15:12 -05:00
struct bkey_s_c k , k2 ;
2023-09-10 23:35:25 -04:00
int ret ;
2017-03-16 22:18:50 -08:00
2022-10-09 22:25:19 -04:00
EBUG_ON ( iter - > path - > cached ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
2017-03-16 22:18:50 -08:00
while ( 1 ) {
2022-10-09 22:25:19 -04:00
struct btree_path_level * l ;
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , search_key ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2021-08-07 18:19:33 -04:00
2021-08-30 15:18:31 -04:00
ret = bch2_btree_path_traverse ( trans , iter - > path , iter - > flags ) ;
2021-08-24 16:54:36 -04:00
if ( unlikely ( ret ) ) {
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos ( iter , iter - > pos ) ;
k = bkey_s_c_err ( ret ) ;
goto out ;
}
2017-03-16 22:18:50 -08:00
2022-10-09 22:25:19 -04:00
l = path_l ( iter - > path ) ;
if ( unlikely ( ! l - > b ) ) {
/* No btree nodes at requested level: */
bch2_btree_iter_set_pos ( iter , SPOS_MAX ) ;
k = bkey_s_c_null ;
goto out ;
}
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
2022-02-06 23:15:12 -05:00
2022-10-09 22:25:19 -04:00
k = btree_path_level_peek_all ( trans - > c , l , & iter - > k ) ;
2021-08-24 16:54:36 -04:00
2022-02-06 23:15:12 -05:00
if ( unlikely ( iter - > flags & BTREE_ITER_WITH_KEY_CACHE ) & &
k . k & &
( k2 = btree_trans_peek_key_cache ( iter , k . k - > p ) ) . k ) {
2022-08-16 03:08:15 -04:00
k = k2 ;
ret = bkey_err ( k ) ;
2022-02-06 23:15:12 -05:00
if ( ret ) {
bch2_btree_iter_set_pos ( iter , iter - > pos ) ;
goto out ;
}
}
2021-12-25 20:07:00 -05:00
if ( unlikely ( iter - > flags & BTREE_ITER_WITH_JOURNAL ) )
k = btree_trans_peek_journal ( trans , iter , k ) ;
next_update = btree_trans_peek_updates ( iter ) ;
2021-08-24 16:54:36 -04:00
2021-03-21 19:43:31 -04:00
if ( next_update & &
2022-11-24 03:12:22 -05:00
bpos_le ( next_update - > k . p ,
k . k ? k . k - > p : l - > b - > key . k . p ) ) {
2021-06-04 00:29:49 -04:00
iter - > k = next_update - > k ;
2021-03-21 19:43:31 -04:00
k = bkey_i_to_s_c ( next_update ) ;
2021-06-04 00:29:49 -04:00
}
2021-03-21 19:43:31 -04:00
2021-12-25 20:07:00 -05:00
if ( k . k & & bkey_deleted ( k . k ) ) {
/*
* If we ' ve got a whiteout , and it ' s after the search
* key , advance the search key to the whiteout instead
* of just after the whiteout - it might be a btree
* whiteout , with a real key at the same position , since
* in the btree deleted keys sort before non deleted .
*/
2022-11-24 03:12:22 -05:00
search_key = ! bpos_eq ( search_key , k . k - > p )
2021-12-25 20:07:00 -05:00
? k . k - > p
: bpos_successor ( k . k - > p ) ;
continue ;
}
2021-03-21 19:43:31 -04:00
if ( likely ( k . k ) ) {
2021-03-04 22:29:25 -05:00
break ;
2022-11-24 03:12:22 -05:00
} else if ( likely ( ! bpos_eq ( l - > b - > key . k . p , SPOS_MAX ) ) ) {
2021-08-24 16:54:36 -04:00
/* Advance to next leaf node: */
2022-10-09 22:25:19 -04:00
search_key = bpos_successor ( l - > b - > key . k . p ) ;
2021-08-24 16:54:36 -04:00
} else {
/* End of btree: */
2021-08-07 18:19:33 -04:00
bch2_btree_iter_set_pos ( iter , SPOS_MAX ) ;
k = bkey_s_c_null ;
goto out ;
}
2017-03-16 22:18:50 -08:00
}
2022-01-09 01:07:29 -05:00
out :
bch2_btree_iter_verify ( iter ) ;
return k ;
}
/**
* bch2_btree_iter_peek : returns first key greater than or equal to iterator ' s
* current position
*/
2022-03-11 12:31:52 -05:00
struct bkey_s_c bch2_btree_iter_peek_upto ( struct btree_iter * iter , struct bpos end )
2022-01-09 01:07:29 -05:00
{
struct btree_trans * trans = iter - > trans ;
struct bpos search_key = btree_iter_search_key ( iter ) ;
struct bkey_s_c k ;
2022-03-11 12:31:52 -05:00
struct bpos iter_pos ;
2022-01-09 01:07:29 -05:00
int ret ;
2022-04-12 18:04:08 -04:00
EBUG_ON ( iter - > flags & BTREE_ITER_ALL_LEVELS ) ;
2022-10-11 04:32:41 -04:00
EBUG_ON ( ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) & & bkey_eq ( end , POS_MAX ) ) ;
2022-04-12 18:04:08 -04:00
2022-01-08 21:22:31 -05:00
if ( iter - > update_path ) {
2022-10-11 06:37:56 -04:00
bch2_path_put_nokeep ( trans , iter - > update_path ,
iter - > flags & BTREE_ITER_INTENT ) ;
2022-01-08 21:22:31 -05:00
iter - > update_path = NULL ;
}
2022-01-09 01:07:29 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
while ( 1 ) {
k = __bch2_btree_iter_peek ( iter , search_key ) ;
2022-10-11 04:32:41 -04:00
if ( unlikely ( ! k . k ) )
goto end ;
if ( unlikely ( bkey_err ( k ) ) )
2022-08-10 18:55:53 -04:00
goto out_no_locked ;
2022-01-09 01:07:29 -05:00
2022-03-11 12:31:52 -05:00
/*
* iter - > pos should be mononotically increasing , and always be
* equal to the key we just returned - except extents can
* straddle iter - > pos :
*/
if ( ! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) )
iter_pos = k . k - > p ;
else
2022-11-25 15:01:36 -05:00
iter_pos = bkey_max ( iter - > pos , bkey_start_pos ( k . k ) ) ;
2022-03-11 12:31:52 -05:00
2022-10-11 04:32:41 -04:00
if ( unlikely ( ! ( iter - > flags & BTREE_ITER_IS_EXTENTS )
? bkey_gt ( iter_pos , end )
: bkey_ge ( iter_pos , end ) ) )
goto end ;
2022-03-11 12:31:52 -05:00
2022-01-08 21:22:31 -05:00
if ( iter - > update_path & &
2022-11-24 03:12:22 -05:00
! bkey_eq ( iter - > update_path - > pos , k . k - > p ) ) {
2022-10-11 06:37:56 -04:00
bch2_path_put_nokeep ( trans , iter - > update_path ,
iter - > flags & BTREE_ITER_INTENT ) ;
2022-01-08 21:22:31 -05:00
iter - > update_path = NULL ;
}
if ( ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) & &
( iter - > flags & BTREE_ITER_INTENT ) & &
! ( iter - > flags & BTREE_ITER_IS_EXTENTS ) & &
! iter - > update_path ) {
struct bpos pos = k . k - > p ;
if ( pos . snapshot < iter - > snapshot ) {
search_key = bpos_successor ( k . k - > p ) ;
continue ;
}
pos . snapshot = iter - > snapshot ;
/*
* advance , same as on exit for iter - > path , but only up
* to snapshot
*/
__btree_path_get ( iter - > path , iter - > flags & BTREE_ITER_INTENT ) ;
iter - > update_path = iter - > path ;
2022-02-06 22:21:44 -05:00
iter - > update_path = bch2_btree_path_set_pos ( trans ,
2022-01-08 21:22:31 -05:00
iter - > update_path , pos ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
_THIS_IP_ ) ;
2022-12-09 16:22:36 -05:00
ret = bch2_btree_path_traverse ( trans , iter - > update_path , iter - > flags ) ;
if ( unlikely ( ret ) ) {
k = bkey_s_c_err ( ret ) ;
goto out_no_locked ;
}
2022-01-08 21:22:31 -05:00
}
2022-01-09 01:07:29 -05:00
/*
* We can never have a key in a leaf node at POS_MAX , so
* we don ' t have to check these successor ( ) calls :
*/
if ( ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) & &
! bch2_snapshot_is_ancestor ( trans - > c ,
iter - > snapshot ,
k . k - > p . snapshot ) ) {
search_key = bpos_successor ( k . k - > p ) ;
continue ;
}
if ( bkey_whiteout ( k . k ) & &
! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) ) {
search_key = bkey_successor ( iter , k . k - > p ) ;
continue ;
}
break ;
}
2022-03-11 12:31:52 -05:00
iter - > pos = iter_pos ;
2022-01-08 21:22:31 -05:00
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , k . k - > p ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
out_no_locked :
2022-01-08 21:22:31 -05:00
if ( iter - > update_path ) {
2022-07-17 23:06:38 -04:00
ret = bch2_btree_path_relock ( trans , iter - > update_path , _THIS_IP_ ) ;
2022-08-10 18:55:53 -04:00
if ( unlikely ( ret ) )
2022-07-17 23:06:38 -04:00
k = bkey_s_c_err ( ret ) ;
2022-08-10 18:55:53 -04:00
else
btree_path_set_should_be_locked ( iter - > update_path ) ;
2022-01-08 21:22:31 -05:00
}
2022-01-09 01:07:29 -05:00
if ( ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) )
2021-03-04 22:29:25 -05:00
iter - > pos . snapshot = iter - > snapshot ;
2022-01-09 01:07:29 -05:00
ret = bch2_btree_iter_verify_ret ( iter , k ) ;
if ( unlikely ( ret ) ) {
bch2_btree_iter_set_pos ( iter , iter - > pos ) ;
k = bkey_s_c_err ( ret ) ;
}
2021-08-30 15:18:31 -04:00
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
2021-03-04 22:29:25 -05:00
2017-03-16 22:18:50 -08:00
return k ;
2022-10-11 04:32:41 -04:00
end :
bch2_btree_iter_set_pos ( iter , end ) ;
k = bkey_s_c_null ;
goto out_no_locked ;
2017-03-16 22:18:50 -08:00
}
2022-04-12 18:04:08 -04:00
/**
* bch2_btree_iter_peek_all_levels : returns the first key greater than or equal
* to iterator ' s current position , returning keys from every level of the btree .
* For keys at different levels of the btree that compare equal , the key from
* the lower level ( leaf ) is returned first .
*/
struct bkey_s_c bch2_btree_iter_peek_all_levels ( struct btree_iter * iter )
{
struct btree_trans * trans = iter - > trans ;
struct bkey_s_c k ;
int ret ;
EBUG_ON ( iter - > path - > cached ) ;
bch2_btree_iter_verify ( iter ) ;
BUG_ON ( iter - > path - > level < iter - > min_depth ) ;
BUG_ON ( ! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) ) ;
EBUG_ON ( ! ( iter - > flags & BTREE_ITER_ALL_LEVELS ) ) ;
while ( 1 ) {
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , iter - > pos ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2022-04-12 18:04:08 -04:00
ret = bch2_btree_path_traverse ( trans , iter - > path , iter - > flags ) ;
if ( unlikely ( ret ) ) {
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos ( iter , iter - > pos ) ;
k = bkey_s_c_err ( ret ) ;
2022-08-10 18:55:53 -04:00
goto out_no_locked ;
2022-04-12 18:04:08 -04:00
}
/* Already at end? */
if ( ! btree_path_node ( iter - > path , iter - > path - > level ) ) {
k = bkey_s_c_null ;
2022-08-10 18:55:53 -04:00
goto out_no_locked ;
2022-04-12 18:04:08 -04:00
}
k = btree_path_level_peek_all ( trans - > c ,
& iter - > path - > l [ iter - > path - > level ] , & iter - > k ) ;
/* Check if we should go up to the parent node: */
if ( ! k . k | |
( iter - > advanced & &
2022-11-24 03:12:22 -05:00
bpos_eq ( path_l ( iter - > path ) - > b - > key . k . p , iter - > pos ) ) ) {
2022-04-12 18:04:08 -04:00
iter - > pos = path_l ( iter - > path ) - > b - > key . k . p ;
2022-07-14 18:58:23 +12:00
btree_path_set_level_up ( trans , iter - > path ) ;
2022-04-12 18:04:08 -04:00
iter - > advanced = false ;
continue ;
}
/*
* Check if we should go back down to a leaf :
* If we ' re not in a leaf node , we only return the current key
* if it exactly matches iter - > pos - otherwise we first have to
* go back to the leaf :
*/
if ( iter - > path - > level ! = iter - > min_depth & &
( iter - > advanced | |
! k . k | |
2022-11-24 03:12:22 -05:00
! bpos_eq ( iter - > pos , k . k - > p ) ) ) {
2022-04-12 18:04:08 -04:00
btree_path_set_level_down ( trans , iter - > path , iter - > min_depth ) ;
iter - > pos = bpos_successor ( iter - > pos ) ;
iter - > advanced = false ;
continue ;
}
/* Check if we should go to the next key: */
if ( iter - > path - > level = = iter - > min_depth & &
iter - > advanced & &
k . k & &
2022-11-24 03:12:22 -05:00
bpos_eq ( iter - > pos , k . k - > p ) ) {
2022-04-12 18:04:08 -04:00
iter - > pos = bpos_successor ( iter - > pos ) ;
iter - > advanced = false ;
continue ;
}
if ( iter - > advanced & &
iter - > path - > level = = iter - > min_depth & &
2022-11-24 03:12:22 -05:00
! bpos_eq ( k . k - > p , iter - > pos ) )
2022-04-12 18:04:08 -04:00
iter - > advanced = false ;
BUG_ON ( iter - > advanced ) ;
BUG_ON ( ! k . k ) ;
break ;
}
iter - > pos = k . k - > p ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
out_no_locked :
2022-04-12 18:04:08 -04:00
bch2_btree_iter_verify ( iter ) ;
return k ;
}
2019-09-07 19:19:57 -04:00
/**
* bch2_btree_iter_next : returns first key greater than iterator ' s current
* position
*/
2017-03-16 22:18:50 -08:00
struct bkey_s_c bch2_btree_iter_next ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_advance ( iter ) )
2020-02-18 16:17:55 -05:00
return bkey_s_c_null ;
2019-09-07 19:19:57 -04:00
2020-02-18 16:17:55 -05:00
return bch2_btree_iter_peek ( iter ) ;
2017-03-16 22:18:50 -08:00
}
2019-09-07 17:17:21 -04:00
/**
* bch2_btree_iter_peek_prev : returns first key less than or equal to
* iterator ' s current position
*/
struct bkey_s_c bch2_btree_iter_peek_prev ( struct btree_iter * iter )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_trans * trans = iter - > trans ;
2021-08-07 18:19:33 -04:00
struct bpos search_key = iter - > pos ;
2021-03-04 22:29:25 -05:00
struct btree_path * saved_path = NULL ;
2017-03-16 22:18:50 -08:00
struct bkey_s_c k ;
2021-03-04 22:29:25 -05:00
struct bkey saved_k ;
const struct bch_val * saved_v ;
2017-03-16 22:18:50 -08:00
int ret ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( iter - > path - > cached | | iter - > path - > level ) ;
2021-06-04 00:29:49 -04:00
EBUG_ON ( iter - > flags & BTREE_ITER_WITH_UPDATES ) ;
2021-12-25 20:07:00 -05:00
if ( iter - > flags & BTREE_ITER_WITH_JOURNAL )
return bkey_s_c_err ( - EIO ) ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
bch2_btree_iter_verify_entry_exit ( iter ) ;
2021-03-04 22:29:25 -05:00
if ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS )
search_key . snapshot = U32_MAX ;
2017-03-16 22:18:50 -08:00
while ( 1 ) {
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , search_key ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2021-08-07 18:19:33 -04:00
2021-08-30 15:18:31 -04:00
ret = bch2_btree_path_traverse ( trans , iter - > path , iter - > flags ) ;
2021-02-11 21:57:32 -05:00
if ( unlikely ( ret ) ) {
2021-08-24 16:54:36 -04:00
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos ( iter , iter - > pos ) ;
2021-02-11 21:57:32 -05:00
k = bkey_s_c_err ( ret ) ;
2022-08-10 18:55:53 -04:00
goto out_no_locked ;
2021-02-11 21:57:32 -05:00
}
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
k = btree_path_level_peek ( trans , iter - > path ,
& iter - > path - > l [ 0 ] , & iter - > k ) ;
2021-02-07 21:11:49 -05:00
if ( ! k . k | |
( ( iter - > flags & BTREE_ITER_IS_EXTENTS )
2022-11-24 03:12:22 -05:00
? bpos_ge ( bkey_start_pos ( k . k ) , search_key )
: bpos_gt ( k . k - > p , search_key ) ) )
2021-08-30 15:18:31 -04:00
k = btree_path_level_prev ( trans , iter - > path ,
& iter - > path - > l [ 0 ] , & iter - > k ) ;
2019-09-07 17:17:21 -04:00
2021-08-24 16:54:36 -04:00
if ( likely ( k . k ) ) {
2021-03-04 22:29:25 -05:00
if ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS ) {
if ( k . k - > p . snapshot = = iter - > snapshot )
goto got_key ;
/*
* If we have a saved candidate , and we ' re no
* longer at the same _key_ ( not pos ) , return
* that candidate
*/
2022-11-24 03:12:22 -05:00
if ( saved_path & & ! bkey_eq ( k . k - > p , saved_k . p ) ) {
2022-10-11 06:37:56 -04:00
bch2_path_put_nokeep ( trans , iter - > path ,
2021-03-04 22:29:25 -05:00
iter - > flags & BTREE_ITER_INTENT ) ;
iter - > path = saved_path ;
saved_path = NULL ;
iter - > k = saved_k ;
k . v = saved_v ;
goto got_key ;
}
if ( bch2_snapshot_is_ancestor ( iter - > trans - > c ,
iter - > snapshot ,
k . k - > p . snapshot ) ) {
if ( saved_path )
2022-10-11 06:37:56 -04:00
bch2_path_put_nokeep ( trans , saved_path ,
2021-03-04 22:29:25 -05:00
iter - > flags & BTREE_ITER_INTENT ) ;
saved_path = btree_path_clone ( trans , iter - > path ,
iter - > flags & BTREE_ITER_INTENT ) ;
saved_k = * k . k ;
saved_v = k . v ;
}
search_key = bpos_predecessor ( k . k - > p ) ;
continue ;
}
got_key :
if ( bkey_whiteout ( k . k ) & &
! ( iter - > flags & BTREE_ITER_ALL_SNAPSHOTS ) ) {
search_key = bkey_predecessor ( iter , k . k - > p ) ;
if ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS )
search_key . snapshot = U32_MAX ;
continue ;
}
2017-03-16 22:18:50 -08:00
break ;
2022-11-24 03:12:22 -05:00
} else if ( likely ( ! bpos_eq ( iter - > path - > l [ 0 ] . b - > data - > min_key , POS_MIN ) ) ) {
2021-08-24 16:54:36 -04:00
/* Advance to previous leaf node: */
2021-08-30 15:18:31 -04:00
search_key = bpos_predecessor ( iter - > path - > l [ 0 ] . b - > data - > min_key ) ;
2021-08-24 16:54:36 -04:00
} else {
/* Start of btree: */
2021-08-07 18:19:33 -04:00
bch2_btree_iter_set_pos ( iter , POS_MIN ) ;
2021-02-11 21:57:32 -05:00
k = bkey_s_c_null ;
2022-08-10 18:55:53 -04:00
goto out_no_locked ;
2021-02-11 21:57:32 -05:00
}
2019-09-07 17:17:21 -04:00
}
2017-03-16 22:18:50 -08:00
2022-11-24 03:12:22 -05:00
EBUG_ON ( bkey_gt ( bkey_start_pos ( k . k ) , iter - > pos ) ) ;
2021-02-07 21:11:49 -05:00
/* Extents can straddle iter->pos: */
2022-11-24 03:12:22 -05:00
if ( bkey_lt ( k . k - > p , iter - > pos ) )
2021-02-07 21:11:49 -05:00
iter - > pos = k . k - > p ;
2021-03-04 22:29:25 -05:00
if ( iter - > flags & BTREE_ITER_FILTER_SNAPSHOTS )
iter - > pos . snapshot = iter - > snapshot ;
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
out_no_locked :
2021-03-04 22:29:25 -05:00
if ( saved_path )
2022-10-11 06:37:56 -04:00
bch2_path_put_nokeep ( trans , saved_path , iter - > flags & BTREE_ITER_INTENT ) ;
2021-08-30 15:18:31 -04:00
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2021-08-30 15:18:31 -04:00
2017-03-16 22:18:50 -08:00
return k ;
}
2019-09-07 17:17:21 -04:00
/**
* bch2_btree_iter_prev : returns first key less than iterator ' s current
* position
*/
struct bkey_s_c bch2_btree_iter_prev ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_rewind ( iter ) )
2020-02-18 16:17:55 -05:00
return bkey_s_c_null ;
2019-09-07 17:17:21 -04:00
2020-02-18 16:17:55 -05:00
return bch2_btree_iter_peek_prev ( iter ) ;
2019-09-07 17:17:21 -04:00
}
2020-03-13 21:41:22 -04:00
struct bkey_s_c bch2_btree_iter_peek_slot ( struct btree_iter * iter )
2016-07-21 19:05:06 -08:00
{
2021-08-24 21:30:06 -04:00
struct btree_trans * trans = iter - > trans ;
2021-06-10 20:15:50 -04:00
struct bpos search_key ;
2016-07-21 19:05:06 -08:00
struct bkey_s_c k ;
2020-03-13 21:41:22 -04:00
int ret ;
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify ( iter ) ;
bch2_btree_iter_verify_entry_exit ( iter ) ;
2022-04-12 18:04:08 -04:00
EBUG_ON ( iter - > flags & BTREE_ITER_ALL_LEVELS ) ;
2022-04-14 15:45:00 -04:00
EBUG_ON ( iter - > path - > level & & ( iter - > flags & BTREE_ITER_WITH_KEY_CACHE ) ) ;
2021-02-11 21:57:32 -05:00
2022-12-30 19:15:53 -05:00
/* extents can't span inode numbers: */
if ( ( iter - > flags & BTREE_ITER_IS_EXTENTS ) & &
2021-06-10 20:15:50 -04:00
unlikely ( iter - > pos . offset = = KEY_OFFSET_MAX ) ) {
2022-12-30 19:15:53 -05:00
if ( iter - > pos . inode = = KEY_INODE_MAX )
return bkey_s_c_null ;
2020-03-13 21:41:22 -04:00
2022-12-30 19:15:53 -05:00
bch2_btree_iter_set_pos ( iter , bpos_nosnap_successor ( iter - > pos ) ) ;
}
2021-02-10 16:13:57 -05:00
2021-06-10 20:15:50 -04:00
search_key = btree_iter_search_key ( iter ) ;
2022-02-06 22:21:44 -05:00
iter - > path = bch2_btree_path_set_pos ( trans , iter - > path , search_key ,
2023-01-09 01:11:18 -05:00
iter - > flags & BTREE_ITER_INTENT ,
btree_iter_ip_allocated ( iter ) ) ;
2021-06-10 20:15:50 -04:00
2021-08-30 15:18:31 -04:00
ret = bch2_btree_path_traverse ( trans , iter - > path , iter - > flags ) ;
2022-08-17 17:49:12 -04:00
if ( unlikely ( ret ) ) {
k = bkey_s_c_err ( ret ) ;
goto out_no_locked ;
}
2017-03-16 22:18:50 -08:00
2021-03-04 22:29:25 -05:00
if ( ( iter - > flags & BTREE_ITER_CACHED ) | |
! ( iter - > flags & ( BTREE_ITER_IS_EXTENTS | BTREE_ITER_FILTER_SNAPSHOTS ) ) ) {
2021-06-10 20:15:50 -04:00
struct bkey_i * next_update ;
2017-03-16 22:18:50 -08:00
2021-12-25 20:07:00 -05:00
if ( ( next_update = btree_trans_peek_updates ( iter ) ) & &
2022-11-24 03:12:22 -05:00
bpos_eq ( next_update - > k . p , iter - > pos ) ) {
2022-12-30 19:15:53 -05:00
iter - > k = next_update - > k ;
k = bkey_i_to_s_c ( next_update ) ;
2021-12-25 20:07:00 -05:00
goto out ;
2022-12-30 19:15:53 -05:00
}
2021-12-25 20:07:00 -05:00
if ( unlikely ( iter - > flags & BTREE_ITER_WITH_JOURNAL ) & &
( k = btree_trans_peek_slot_journal ( trans , iter ) ) . k )
goto out ;
2022-02-06 23:15:12 -05:00
if ( unlikely ( iter - > flags & BTREE_ITER_WITH_KEY_CACHE ) & &
2022-12-20 16:02:09 -05:00
( k = btree_trans_peek_key_cache ( iter , iter - > pos ) ) . k ) {
2022-08-16 03:08:15 -04:00
if ( ! bkey_err ( k ) )
2022-02-06 23:15:12 -05:00
iter - > k = * k . k ;
2022-08-16 03:08:15 -04:00
/* We're not returning a key from iter->path: */
goto out_no_locked ;
2022-02-06 23:15:12 -05:00
}
2021-12-25 20:07:00 -05:00
k = bch2_btree_path_peek_slot ( iter - > path , & iter - > k ) ;
2022-10-12 07:58:50 -04:00
if ( unlikely ( ! k . k ) )
goto out_no_locked ;
2022-12-30 19:15:53 -05:00
} else {
struct bpos next ;
2022-10-11 04:32:41 -04:00
struct bpos end = iter - > pos ;
if ( iter - > flags & BTREE_ITER_IS_EXTENTS )
end . offset = U64_MAX ;
2022-12-30 19:15:53 -05:00
2022-04-14 15:45:00 -04:00
EBUG_ON ( iter - > path - > level ) ;
2022-12-30 19:15:53 -05:00
if ( iter - > flags & BTREE_ITER_INTENT ) {
2021-08-30 15:18:31 -04:00
struct btree_iter iter2 ;
2022-12-30 19:15:53 -05:00
2021-08-30 15:18:31 -04:00
bch2_trans_copy_iter ( & iter2 , iter ) ;
2022-03-11 12:31:52 -05:00
k = bch2_btree_iter_peek_upto ( & iter2 , end ) ;
2022-12-30 19:15:53 -05:00
2021-08-30 15:18:31 -04:00
if ( k . k & & ! bkey_err ( k ) ) {
iter - > k = iter2 . k ;
k . k = & iter - > k ;
}
bch2_trans_iter_exit ( trans , & iter2 ) ;
2022-12-30 19:15:53 -05:00
} else {
struct bpos pos = iter - > pos ;
2022-10-11 04:32:41 -04:00
k = bch2_btree_iter_peek_upto ( iter , end ) ;
2022-08-17 17:49:12 -04:00
if ( unlikely ( bkey_err ( k ) ) )
bch2_btree_iter_set_pos ( iter , pos ) ;
else
iter - > pos = pos ;
2022-12-30 19:15:53 -05:00
}
2022-12-30 19:15:53 -05:00
if ( unlikely ( bkey_err ( k ) ) )
2022-10-12 07:58:50 -04:00
goto out_no_locked ;
2022-12-30 19:15:53 -05:00
next = k . k ? bkey_start_pos ( k . k ) : POS_MAX ;
2022-11-24 03:12:22 -05:00
if ( bkey_lt ( iter - > pos , next ) ) {
2022-12-30 19:15:53 -05:00
bkey_init ( & iter - > k ) ;
iter - > k . p = iter - > pos ;
2021-03-04 22:29:25 -05:00
if ( iter - > flags & BTREE_ITER_IS_EXTENTS ) {
bch2_key_resize ( & iter - > k ,
min_t ( u64 , KEY_SIZE_MAX ,
( next . inode = = iter - > pos . inode
? next . offset
: KEY_OFFSET_MAX ) -
iter - > pos . offset ) ) ;
EBUG_ON ( ! iter - > k . size ) ;
}
2022-12-30 19:15:53 -05:00
k = ( struct bkey_s_c ) { & iter - > k , NULL } ;
}
2016-07-21 19:05:06 -08:00
}
2021-12-25 20:07:00 -05:00
out :
2022-08-10 18:55:53 -04:00
btree_path_set_should_be_locked ( iter - > path ) ;
out_no_locked :
2021-02-11 21:57:32 -05:00
bch2_btree_iter_verify_entry_exit ( iter ) ;
bch2_btree_iter_verify ( iter ) ;
2021-03-04 22:29:25 -05:00
ret = bch2_btree_iter_verify_ret ( iter , k ) ;
if ( unlikely ( ret ) )
return bkey_s_c_err ( ret ) ;
2021-06-04 17:17:45 -04:00
2019-08-17 15:17:09 -04:00
return k ;
2017-03-16 22:18:50 -08:00
}
struct bkey_s_c bch2_btree_iter_next_slot ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_advance ( iter ) )
2020-02-18 16:17:55 -05:00
return bkey_s_c_null ;
2017-03-16 22:18:50 -08:00
2020-02-18 16:17:55 -05:00
return bch2_btree_iter_peek_slot ( iter ) ;
2017-03-16 22:18:50 -08:00
}
2021-03-02 22:45:28 -05:00
struct bkey_s_c bch2_btree_iter_prev_slot ( struct btree_iter * iter )
{
2021-03-21 16:55:25 -04:00
if ( ! bch2_btree_iter_rewind ( iter ) )
2021-03-02 22:45:28 -05:00
return bkey_s_c_null ;
return bch2_btree_iter_peek_slot ( iter ) ;
}
2017-03-16 22:18:50 -08:00
/* new transactional stuff: */
2021-06-12 15:45:45 -04:00
# ifdef CONFIG_BCACHEFS_DEBUG
static void btree_trans_verify_sorted_refs ( struct btree_trans * trans )
{
2021-08-30 15:18:31 -04:00
struct btree_path * path ;
2021-06-12 15:45:45 -04:00
unsigned i ;
2021-08-30 15:18:31 -04:00
BUG_ON ( trans - > nr_sorted ! = hweight64 ( trans - > paths_allocated ) ) ;
2021-06-12 15:45:45 -04:00
2021-08-30 15:18:31 -04:00
trans_for_each_path ( trans , path ) {
BUG_ON ( path - > sorted_idx > = trans - > nr_sorted ) ;
BUG_ON ( trans - > sorted [ path - > sorted_idx ] ! = path - > idx ) ;
2021-06-12 15:45:45 -04:00
}
for ( i = 0 ; i < trans - > nr_sorted ; i + + ) {
unsigned idx = trans - > sorted [ i ] ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( ! ( trans - > paths_allocated & ( 1ULL < < idx ) ) ) ;
BUG_ON ( trans - > paths [ idx ] . sorted_idx ! = i ) ;
2021-06-12 15:45:45 -04:00
}
}
static void btree_trans_verify_sorted ( struct btree_trans * trans )
{
2021-08-30 15:18:31 -04:00
struct btree_path * path , * prev = NULL ;
2021-06-12 15:45:45 -04:00
unsigned i ;
2022-04-25 02:12:03 -04:00
if ( ! bch2_debug_check_iterators )
return ;
2021-08-30 15:18:31 -04:00
trans_for_each_path_inorder ( trans , path , i ) {
2022-02-24 19:04:11 -05:00
if ( prev & & btree_path_cmp ( prev , path ) > 0 ) {
2022-08-11 20:14:54 -04:00
__bch2_dump_trans_paths_updates ( trans , true ) ;
2022-02-24 19:04:11 -05:00
panic ( " trans paths out of order! \n " ) ;
}
2021-08-30 15:18:31 -04:00
prev = path ;
2021-06-12 15:45:45 -04:00
}
}
2021-09-03 17:18:57 -04:00
# else
static inline void btree_trans_verify_sorted_refs ( struct btree_trans * trans ) { }
static inline void btree_trans_verify_sorted ( struct btree_trans * trans ) { }
# endif
2021-06-12 15:45:45 -04:00
2021-09-03 17:18:57 -04:00
void __bch2_btree_trans_sort_paths ( struct btree_trans * trans )
2021-06-12 15:45:45 -04:00
{
int i , l = 0 , r = trans - > nr_sorted , inc = 1 ;
bool swapped ;
2021-09-03 17:18:57 -04:00
btree_trans_verify_sorted_refs ( trans ) ;
if ( trans - > paths_sorted )
goto out ;
2021-06-12 15:45:45 -04:00
/*
* Cocktail shaker sort : this is efficient because iterators will be
2022-08-11 20:14:54 -04:00
* mostly sorted .
2021-06-12 15:45:45 -04:00
*/
do {
swapped = false ;
for ( i = inc > 0 ? l : r - 2 ;
i + 1 < r & & i > = l ;
i + = inc ) {
2021-08-30 15:18:31 -04:00
if ( btree_path_cmp ( trans - > paths + trans - > sorted [ i ] ,
trans - > paths + trans - > sorted [ i + 1 ] ) > 0 ) {
2021-06-12 15:45:45 -04:00
swap ( trans - > sorted [ i ] , trans - > sorted [ i + 1 ] ) ;
2021-08-30 15:18:31 -04:00
trans - > paths [ trans - > sorted [ i ] ] . sorted_idx = i ;
trans - > paths [ trans - > sorted [ i + 1 ] ] . sorted_idx = i + 1 ;
2021-06-12 15:45:45 -04:00
swapped = true ;
}
}
if ( inc > 0 )
- - r ;
else
l + + ;
inc = - inc ;
} while ( swapped ) ;
2021-08-30 15:18:31 -04:00
trans - > paths_sorted = true ;
2021-09-03 17:18:57 -04:00
out :
2021-06-12 15:45:45 -04:00
btree_trans_verify_sorted ( trans ) ;
}
2021-08-30 15:18:31 -04:00
static inline void btree_path_list_remove ( struct btree_trans * trans ,
struct btree_path * path )
2021-06-12 15:45:45 -04:00
{
unsigned i ;
2021-08-30 15:18:31 -04:00
EBUG_ON ( path - > sorted_idx > = trans - > nr_sorted ) ;
2021-06-12 15:45:45 -04:00
# ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
trans - > nr_sorted - - ;
2021-08-30 15:18:31 -04:00
memmove_u64s_down_small ( trans - > sorted + path - > sorted_idx ,
trans - > sorted + path - > sorted_idx + 1 ,
DIV_ROUND_UP ( trans - > nr_sorted - path - > sorted_idx , 8 ) ) ;
2021-06-12 15:45:45 -04:00
# else
2021-08-30 15:18:31 -04:00
array_remove_item ( trans - > sorted , trans - > nr_sorted , path - > sorted_idx ) ;
2021-06-12 15:45:45 -04:00
# endif
2021-08-30 15:18:31 -04:00
for ( i = path - > sorted_idx ; i < trans - > nr_sorted ; i + + )
trans - > paths [ trans - > sorted [ i ] ] . sorted_idx = i ;
2021-06-12 15:45:45 -04:00
2021-08-30 15:18:31 -04:00
path - > sorted_idx = U8_MAX ;
2021-06-12 15:45:45 -04:00
}
2021-08-30 15:18:31 -04:00
static inline void btree_path_list_add ( struct btree_trans * trans ,
struct btree_path * pos ,
struct btree_path * path )
2021-06-12 15:45:45 -04:00
{
unsigned i ;
2021-09-03 17:18:57 -04:00
path - > sorted_idx = pos ? pos - > sorted_idx + 1 : trans - > nr_sorted ;
2021-06-12 15:45:45 -04:00
# ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
2021-08-30 15:18:31 -04:00
memmove_u64s_up_small ( trans - > sorted + path - > sorted_idx + 1 ,
trans - > sorted + path - > sorted_idx ,
DIV_ROUND_UP ( trans - > nr_sorted - path - > sorted_idx , 8 ) ) ;
2021-06-12 15:45:45 -04:00
trans - > nr_sorted + + ;
2021-08-30 15:18:31 -04:00
trans - > sorted [ path - > sorted_idx ] = path - > idx ;
2021-06-12 15:45:45 -04:00
# else
2021-08-30 15:18:31 -04:00
array_insert_item ( trans - > sorted , trans - > nr_sorted , path - > sorted_idx , path - > idx ) ;
2021-06-12 15:45:45 -04:00
# endif
2021-08-30 15:18:31 -04:00
for ( i = path - > sorted_idx ; i < trans - > nr_sorted ; i + + )
trans - > paths [ trans - > sorted [ i ] ] . sorted_idx = i ;
2019-09-26 22:21:39 -04:00
2021-06-12 15:45:45 -04:00
btree_trans_verify_sorted_refs ( trans ) ;
2019-03-25 22:43:26 -04:00
}
2021-08-30 15:18:31 -04:00
void bch2_trans_iter_exit ( struct btree_trans * trans , struct btree_iter * iter )
2019-09-26 22:21:39 -04:00
{
2021-08-30 15:18:31 -04:00
if ( iter - > path )
bch2_path_put ( trans , iter - > path ,
iter - > flags & BTREE_ITER_INTENT ) ;
2022-01-08 21:22:31 -05:00
if ( iter - > update_path )
2022-10-11 06:37:56 -04:00
bch2_path_put_nokeep ( trans , iter - > update_path ,
2022-01-08 21:22:31 -05:00
iter - > flags & BTREE_ITER_INTENT ) ;
2022-02-06 23:15:12 -05:00
if ( iter - > key_cache_path )
bch2_path_put ( trans , iter - > key_cache_path ,
iter - > flags & BTREE_ITER_INTENT ) ;
2021-08-30 15:18:31 -04:00
iter - > path = NULL ;
2022-01-08 21:22:31 -05:00
iter - > update_path = NULL ;
2022-02-06 23:15:12 -05:00
iter - > key_cache_path = NULL ;
2019-09-26 22:21:39 -04:00
}
2022-11-25 00:40:27 -05:00
static inline void bch2_trans_iter_init_inlined ( struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_iter * iter ,
unsigned btree_id , struct bpos pos ,
unsigned flags )
2019-03-25 15:10:15 -04:00
{
2022-11-25 00:40:27 -05:00
bch2_trans_iter_init_common ( trans , iter , btree_id , pos , 0 , 0 ,
2023-01-09 01:11:18 -05:00
bch2_btree_iter_flags ( trans , btree_id , flags ) ,
_RET_IP_ ) ;
2022-11-25 00:40:27 -05:00
}
2022-04-14 15:45:00 -04:00
2022-11-25 00:40:27 -05:00
void bch2_trans_iter_init_outlined ( struct btree_trans * trans ,
struct btree_iter * iter ,
enum btree_id btree_id , struct bpos pos ,
unsigned flags )
{
bch2_trans_iter_init_common ( trans , iter , btree_id , pos , 0 , 0 ,
2023-01-09 01:11:18 -05:00
bch2_btree_iter_flags ( trans , btree_id , flags ) ,
_RET_IP_ ) ;
2019-03-25 15:10:15 -04:00
}
2021-08-30 15:18:31 -04:00
void bch2_trans_node_iter_init ( struct btree_trans * trans ,
struct btree_iter * iter ,
enum btree_id btree_id ,
struct bpos pos ,
unsigned locks_want ,
unsigned depth ,
unsigned flags )
2017-03-16 22:18:50 -08:00
{
2022-11-25 00:40:27 -05:00
flags | = BTREE_ITER_NOT_EXTENTS ;
flags | = __BTREE_ITER_ALL_SNAPSHOTS ;
flags | = BTREE_ITER_ALL_SNAPSHOTS ;
bch2_trans_iter_init_common ( trans , iter , btree_id , pos , locks_want , depth ,
2023-01-09 01:11:18 -05:00
__bch2_btree_iter_flags ( trans , btree_id , flags ) ,
_RET_IP_ ) ;
2022-11-25 00:40:27 -05:00
iter - > min_depth = depth ;
2021-08-30 15:18:31 -04:00
BUG_ON ( iter - > path - > locks_want < min ( locks_want , BTREE_MAX_DEPTH ) ) ;
BUG_ON ( iter - > path - > level ! = depth ) ;
BUG_ON ( iter - > min_depth ! = depth ) ;
}
2019-03-25 22:43:26 -04:00
2021-08-30 15:18:31 -04:00
void bch2_trans_copy_iter ( struct btree_iter * dst , struct btree_iter * src )
{
* dst = * src ;
if ( src - > path )
__btree_path_get ( src - > path , src - > flags & BTREE_ITER_INTENT ) ;
2022-01-08 21:22:31 -05:00
if ( src - > update_path )
__btree_path_get ( src - > update_path , src - > flags & BTREE_ITER_INTENT ) ;
2022-02-06 23:15:12 -05:00
dst - > key_cache_path = NULL ;
2017-03-16 22:18:50 -08:00
}
2022-09-26 16:15:17 -04:00
void * __bch2_trans_kmalloc ( struct btree_trans * trans , size_t size )
2017-03-16 22:18:50 -08:00
{
2022-08-22 21:49:55 -04:00
unsigned new_top = trans - > mem_top + size ;
2022-09-26 16:15:17 -04:00
size_t old_bytes = trans - > mem_bytes ;
size_t new_bytes = roundup_pow_of_two ( new_top ) ;
void * new_mem ;
2021-04-15 12:50:09 -04:00
void * p ;
2022-08-22 21:49:55 -04:00
trans - > mem_max = max ( trans - > mem_max , new_top ) ;
2022-09-26 16:15:17 -04:00
WARN_ON_ONCE ( new_bytes > BTREE_TRANS_MEM_MAX ) ;
2021-04-24 00:09:06 -04:00
2022-09-26 16:15:17 -04:00
new_mem = krealloc ( trans - > mem , new_bytes , GFP_NOFS ) ;
if ( ! new_mem & & new_bytes < = BTREE_TRANS_MEM_MAX ) {
new_mem = mempool_alloc ( & trans - > c - > btree_trans_mem_pool , GFP_KERNEL ) ;
new_bytes = BTREE_TRANS_MEM_MAX ;
kfree ( trans - > mem ) ;
}
2017-03-16 22:18:50 -08:00
2022-09-26 16:15:17 -04:00
if ( ! new_mem )
return ERR_PTR ( - ENOMEM ) ;
2017-03-16 22:18:50 -08:00
2022-09-26 16:15:17 -04:00
trans - > mem = new_mem ;
trans - > mem_bytes = new_bytes ;
2017-03-16 22:18:50 -08:00
2022-09-26 16:15:17 -04:00
if ( old_bytes ) {
trace_and_count ( trans - > c , trans_restart_mem_realloced , trans , _RET_IP_ , new_bytes ) ;
return ERR_PTR ( btree_trans_restart ( trans , BCH_ERR_transaction_restart_mem_realloced ) ) ;
2017-03-16 22:18:50 -08:00
}
2019-05-15 10:54:43 -04:00
p = trans - > mem + trans - > mem_top ;
2017-03-16 22:18:50 -08:00
trans - > mem_top + = size ;
2021-06-07 16:50:30 -04:00
memset ( p , 0 , size ) ;
2019-05-15 10:54:43 -04:00
return p ;
2017-03-16 22:18:50 -08:00
}
2022-12-15 21:44:32 -05:00
static noinline void bch2_trans_reset_srcu_lock ( struct btree_trans * trans )
{
struct bch_fs * c = trans - > c ;
struct btree_path * path ;
trans_for_each_path ( trans , path )
if ( path - > cached & & ! btree_node_locked ( path , 0 ) )
path - > l [ 0 ] . b = ERR_PTR ( - BCH_ERR_no_btree_node_srcu_reset ) ;
srcu_read_unlock ( & c - > btree_trans_barrier , trans - > srcu_idx ) ;
trans - > srcu_idx = srcu_read_lock ( & c - > btree_trans_barrier ) ;
trans - > srcu_lock_time = jiffies ;
}
2021-07-07 22:31:36 -04:00
/**
2021-07-24 23:57:28 -04:00
* bch2_trans_begin ( ) - reset a transaction after a interrupted attempt
2021-07-07 22:31:36 -04:00
* @ trans : transaction to reset
*
2022-07-17 23:06:38 -04:00
* While iterating over nodes or updating nodes a attempt to lock a btree node
* may return BCH_ERR_transaction_restart when the trylock fails . When this
* occurs bch2_trans_begin ( ) should be called and the transaction retried .
2021-07-07 22:31:36 -04:00
*/
2022-07-17 19:35:38 -04:00
u32 bch2_trans_begin ( struct btree_trans * trans )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_path * path ;
2019-03-07 23:13:39 -05:00
2022-05-29 11:38:48 -04:00
bch2_trans_reset_updates ( trans ) ;
2017-03-16 22:18:50 -08:00
2022-08-12 12:45:01 -04:00
trans - > restart_count + + ;
2020-02-26 15:39:46 -05:00
trans - > mem_top = 0 ;
2019-03-28 00:07:24 -04:00
2021-08-30 15:18:31 -04:00
trans_for_each_path ( trans , path ) {
2022-02-24 13:27:31 -05:00
path - > should_be_locked = false ;
2022-03-05 15:21:07 -05:00
/*
* If the transaction wasn ' t restarted , we ' re presuming to be
* doing something new : dont keep iterators excpt the ones that
* are in use - except for the subvolumes btree :
*/
if ( ! trans - > restarted & & path - > btree_id ! = BTREE_ID_subvolumes )
path - > preserve = false ;
2021-08-30 15:18:31 -04:00
/*
* XXX : we probably shouldn ' t be doing this if the transaction
* was restarted , but currently we still overflow transaction
* iterators if we do that
*/
if ( ! path - > ref & & ! path - > preserve )
__bch2_path_free ( trans , path ) ;
else
2022-02-24 13:27:31 -05:00
path - > preserve = false ;
2021-08-30 15:18:31 -04:00
}
2022-07-13 06:03:21 -04:00
if ( ! trans - > restarted & &
( need_resched ( ) | |
2022-10-15 01:03:14 -04:00
local_clock ( ) - trans - > last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS ) ) {
2022-07-13 06:03:21 -04:00
bch2_trans_unlock ( trans ) ;
cond_resched ( ) ;
bch2_trans_relock ( trans ) ;
}
2021-03-19 20:29:11 -04:00
2023-01-07 05:46:52 -05:00
if ( unlikely ( time_after ( jiffies , trans - > srcu_lock_time + msecs_to_jiffies ( 10 ) ) ) )
2022-12-15 21:44:32 -05:00
bch2_trans_reset_srcu_lock ( trans ) ;
2022-07-17 19:35:38 -04:00
trans - > last_restarted_ip = _RET_IP_ ;
2021-07-24 23:57:28 -04:00
if ( trans - > restarted )
2021-08-30 15:18:31 -04:00
bch2_btree_path_traverse_all ( trans ) ;
2021-07-25 17:19:52 -04:00
2022-10-15 01:03:14 -04:00
trans - > last_begin_time = local_clock ( ) ;
2022-07-17 19:35:38 -04:00
return trans - > restart_count ;
}
2021-08-30 15:18:31 -04:00
static void bch2_trans_alloc_paths ( struct btree_trans * trans , struct bch_fs * c )
2020-11-05 20:02:01 -05:00
{
2021-08-30 15:18:31 -04:00
size_t paths_bytes = sizeof ( struct btree_path ) * BTREE_ITER_MAX ;
2020-12-01 23:11:53 -05:00
size_t updates_bytes = sizeof ( struct btree_insert_entry ) * BTREE_ITER_MAX ;
2020-11-16 18:20:50 -05:00
void * p = NULL ;
2020-11-05 20:02:01 -05:00
BUG_ON ( trans - > used_mempool ) ;
2020-11-16 18:20:50 -05:00
# ifdef __KERNEL__
2022-10-19 18:31:33 -04:00
p = this_cpu_xchg ( c - > btree_paths_bufs - > path , NULL ) ;
2020-11-16 18:20:50 -05:00
# endif
if ( ! p )
2021-08-30 15:18:31 -04:00
p = mempool_alloc ( & trans - > c - > btree_paths_pool , GFP_NOFS ) ;
2020-11-05 20:02:01 -05:00
2021-08-30 15:18:31 -04:00
trans - > paths = p ; p + = paths_bytes ;
2020-11-05 20:02:01 -05:00
trans - > updates = p ; p + = updates_bytes ;
}
2022-10-17 07:03:11 -04:00
const char * bch2_btree_transaction_fns [ BCH_TRANSACTIONS_NR ] ;
unsigned bch2_trans_get_fn_idx ( const char * fn )
2022-08-11 19:36:24 -04:00
{
unsigned i ;
2022-10-17 07:03:11 -04:00
for ( i = 0 ; i < ARRAY_SIZE ( bch2_btree_transaction_fns ) ; i + + )
if ( ! bch2_btree_transaction_fns [ i ] | |
bch2_btree_transaction_fns [ i ] = = fn ) {
bch2_btree_transaction_fns [ i ] = fn ;
2022-08-11 19:36:24 -04:00
return i ;
}
pr_warn_once ( " BCH_TRANSACTIONS_NR not big enough! " ) ;
return i ;
}
2022-10-17 07:03:11 -04:00
void __bch2_trans_init ( struct btree_trans * trans , struct bch_fs * c , unsigned fn_idx )
2021-05-23 17:04:13 -04:00
__acquires ( & c - > btree_trans_barrier )
2017-03-16 22:18:50 -08:00
{
2022-08-11 20:14:54 -04:00
struct btree_transaction_stats * s ;
2020-11-02 18:54:33 -05:00
memset ( trans , 0 , sizeof ( * trans ) ) ;
2017-03-16 22:18:50 -08:00
trans - > c = c ;
2022-10-17 07:03:11 -04:00
trans - > fn = fn_idx < ARRAY_SIZE ( bch2_btree_transaction_fns )
? bch2_btree_transaction_fns [ fn_idx ] : NULL ;
2022-10-15 01:03:14 -04:00
trans - > last_begin_time = local_clock ( ) ;
2022-10-17 07:03:11 -04:00
trans - > fn_idx = fn_idx ;
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy.
The btree iterator API provides an interface where the user doesn't need
to concern themselves with lock ordering - different btree iterators can
be traversed in any order. Without special care, this will lead to
deadlocks.
Our previous strategy was to define a lock ordering internally, and
whenever we attempt to take a lock and trylock() fails, we'd check if
the current btree transaction is holding any locks that cause a lock
ordering violation. If so, we'd issue a transaction restart, and then
bch2_trans_begin() would re-traverse all previously used iterators, but
in the correct order.
That approach had some issues, though.
- Sometimes we'd issue transaction restarts unnecessarily, when no
deadlock would have actually occured. Lock ordering restarts have
become our primary cause of transaction restarts, on some workloads
totally 20% of actual transaction commits.
- To avoid deadlock or livelock, we'd often have to take intent locks
when we only wanted a read lock: with the lock ordering approach, it
is actually illegal to hold _any_ read lock while blocking on an intent
lock, and this has been causing us unnecessary lock contention.
- It was getting fragile - the various lock ordering rules are not
trivial, and we'd been seeing occasional livelock issues related to
this machinery.
So, since bcachefs is already a relational database masquerading as a
filesystem, we're stealing the next traditional database technique and
switching to a cycle detector for avoiding deadlocks.
When we block taking a btree lock, after adding ourself to the waitlist
but before sleeping, we do a DFS of btree transactions waiting on other
btree transactions, starting with the current transaction and walking
our held locks, and transactions blocking on our held locks.
If we find a cycle, we emit a transaction restart. Occasionally (e.g.
the btree split path) we can not allow the lock() operation to fail, so
if necessary we'll tell another transaction that it has to fail.
Result: trans_restart_would_deadlock events are reduced by a factor of
10 to 100, and we'll be able to delete a whole bunch of grotty, fragile
code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2022-08-22 13:23:47 -04:00
trans - > locking_wait . task = current ;
2021-12-25 20:07:00 -05:00
trans - > journal_replay_not_finished =
! test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) ;
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy.
The btree iterator API provides an interface where the user doesn't need
to concern themselves with lock ordering - different btree iterators can
be traversed in any order. Without special care, this will lead to
deadlocks.
Our previous strategy was to define a lock ordering internally, and
whenever we attempt to take a lock and trylock() fails, we'd check if
the current btree transaction is holding any locks that cause a lock
ordering violation. If so, we'd issue a transaction restart, and then
bch2_trans_begin() would re-traverse all previously used iterators, but
in the correct order.
That approach had some issues, though.
- Sometimes we'd issue transaction restarts unnecessarily, when no
deadlock would have actually occured. Lock ordering restarts have
become our primary cause of transaction restarts, on some workloads
totally 20% of actual transaction commits.
- To avoid deadlock or livelock, we'd often have to take intent locks
when we only wanted a read lock: with the lock ordering approach, it
is actually illegal to hold _any_ read lock while blocking on an intent
lock, and this has been causing us unnecessary lock contention.
- It was getting fragile - the various lock ordering rules are not
trivial, and we'd been seeing occasional livelock issues related to
this machinery.
So, since bcachefs is already a relational database masquerading as a
filesystem, we're stealing the next traditional database technique and
switching to a cycle detector for avoiding deadlocks.
When we block taking a btree lock, after adding ourself to the waitlist
but before sleeping, we do a DFS of btree transactions waiting on other
btree transactions, starting with the current transaction and walking
our held locks, and transactions blocking on our held locks.
If we find a cycle, we emit a transaction restart. Occasionally (e.g.
the btree split path) we can not allow the lock() operation to fail, so
if necessary we'll tell another transaction that it has to fail.
Result: trans_restart_would_deadlock events are reduced by a factor of
10 to 100, and we'll be able to delete a whole bunch of grotty, fragile
code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2022-08-22 13:23:47 -04:00
closure_init_stack ( & trans - > ref ) ;
2019-05-15 10:54:43 -04:00
2021-08-30 15:18:31 -04:00
bch2_trans_alloc_paths ( trans , c ) ;
2019-05-15 10:54:43 -04:00
2022-08-22 21:49:55 -04:00
s = btree_trans_stats ( trans ) ;
2022-10-22 16:19:27 -04:00
if ( s & & s - > max_mem ) {
2022-08-22 21:49:55 -04:00
unsigned expected_mem_bytes = roundup_pow_of_two ( s - > max_mem ) ;
2020-11-15 20:52:55 -05:00
trans - > mem = kmalloc ( expected_mem_bytes , GFP_KERNEL ) ;
2021-04-24 00:09:06 -04:00
if ( ! unlikely ( trans - > mem ) ) {
trans - > mem = mempool_alloc ( & c - > btree_trans_mem_pool , GFP_KERNEL ) ;
trans - > mem_bytes = BTREE_TRANS_MEM_MAX ;
} else {
2020-11-15 20:52:55 -05:00
trans - > mem_bytes = expected_mem_bytes ;
2021-04-24 00:09:06 -04:00
}
2022-08-22 21:49:55 -04:00
}
2022-10-22 16:19:27 -04:00
if ( s )
trans - > nr_max_paths = s - > nr_max_paths ;
2022-08-11 20:14:54 -04:00
2020-11-15 16:30:22 -05:00
trans - > srcu_idx = srcu_read_lock ( & c - > btree_trans_barrier ) ;
2022-12-15 21:44:32 -05:00
trans - > srcu_lock_time = jiffies ;
2020-11-15 16:30:22 -05:00
2021-12-16 20:36:26 -05:00
if ( IS_ENABLED ( CONFIG_BCACHEFS_DEBUG_TRANSACTIONS ) ) {
2022-06-17 20:12:02 -04:00
struct btree_trans * pos ;
2021-12-16 20:36:26 -05:00
mutex_lock ( & c - > btree_trans_lock ) ;
2022-06-17 20:12:02 -04:00
list_for_each_entry ( pos , & c - > btree_trans_list , list ) {
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy.
The btree iterator API provides an interface where the user doesn't need
to concern themselves with lock ordering - different btree iterators can
be traversed in any order. Without special care, this will lead to
deadlocks.
Our previous strategy was to define a lock ordering internally, and
whenever we attempt to take a lock and trylock() fails, we'd check if
the current btree transaction is holding any locks that cause a lock
ordering violation. If so, we'd issue a transaction restart, and then
bch2_trans_begin() would re-traverse all previously used iterators, but
in the correct order.
That approach had some issues, though.
- Sometimes we'd issue transaction restarts unnecessarily, when no
deadlock would have actually occured. Lock ordering restarts have
become our primary cause of transaction restarts, on some workloads
totally 20% of actual transaction commits.
- To avoid deadlock or livelock, we'd often have to take intent locks
when we only wanted a read lock: with the lock ordering approach, it
is actually illegal to hold _any_ read lock while blocking on an intent
lock, and this has been causing us unnecessary lock contention.
- It was getting fragile - the various lock ordering rules are not
trivial, and we'd been seeing occasional livelock issues related to
this machinery.
So, since bcachefs is already a relational database masquerading as a
filesystem, we're stealing the next traditional database technique and
switching to a cycle detector for avoiding deadlocks.
When we block taking a btree lock, after adding ourself to the waitlist
but before sleeping, we do a DFS of btree transactions waiting on other
btree transactions, starting with the current transaction and walking
our held locks, and transactions blocking on our held locks.
If we find a cycle, we emit a transaction restart. Occasionally (e.g.
the btree split path) we can not allow the lock() operation to fail, so
if necessary we'll tell another transaction that it has to fail.
Result: trans_restart_would_deadlock events are reduced by a factor of
10 to 100, and we'll be able to delete a whole bunch of grotty, fragile
code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2022-08-22 13:23:47 -04:00
if ( trans - > locking_wait . task - > pid < pos - > locking_wait . task - > pid ) {
2022-06-17 20:12:02 -04:00
list_add_tail ( & trans - > list , & pos - > list ) ;
goto list_add_done ;
}
}
list_add_tail ( & trans - > list , & c - > btree_trans_list ) ;
list_add_done :
2021-12-16 20:36:26 -05:00
mutex_unlock ( & c - > btree_trans_lock ) ;
}
2017-03-16 22:18:50 -08:00
}
2021-08-30 15:18:31 -04:00
static void check_btree_paths_leaked ( struct btree_trans * trans )
{
# ifdef CONFIG_BCACHEFS_DEBUG
struct bch_fs * c = trans - > c ;
struct btree_path * path ;
trans_for_each_path ( trans , path )
if ( path - > ref )
goto leaked ;
return ;
leaked :
2022-01-04 00:33:52 -05:00
bch_err ( c , " btree paths leaked from %s! " , trans - > fn ) ;
2021-08-30 15:18:31 -04:00
trans_for_each_path ( trans , path )
if ( path - > ref )
printk ( KERN_ERR " btree %s %pS \n " ,
bch2_btree_ids [ path - > btree_id ] ,
( void * ) path - > ip_allocated ) ;
/* Be noisy about this: */
bch2_fatal_error ( c ) ;
# endif
}
2021-10-19 15:08:00 -04:00
void bch2_trans_exit ( struct btree_trans * trans )
2021-05-23 17:04:13 -04:00
__releases ( & c - > btree_trans_barrier )
2017-03-16 22:18:50 -08:00
{
2021-08-30 15:18:31 -04:00
struct btree_insert_entry * i ;
2020-11-05 20:02:01 -05:00
struct bch_fs * c = trans - > c ;
2022-08-22 21:49:55 -04:00
struct btree_transaction_stats * s = btree_trans_stats ( trans ) ;
2020-11-05 20:02:01 -05:00
2019-04-04 21:28:16 -04:00
bch2_trans_unlock ( trans ) ;
2017-03-16 22:18:50 -08:00
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy.
The btree iterator API provides an interface where the user doesn't need
to concern themselves with lock ordering - different btree iterators can
be traversed in any order. Without special care, this will lead to
deadlocks.
Our previous strategy was to define a lock ordering internally, and
whenever we attempt to take a lock and trylock() fails, we'd check if
the current btree transaction is holding any locks that cause a lock
ordering violation. If so, we'd issue a transaction restart, and then
bch2_trans_begin() would re-traverse all previously used iterators, but
in the correct order.
That approach had some issues, though.
- Sometimes we'd issue transaction restarts unnecessarily, when no
deadlock would have actually occured. Lock ordering restarts have
become our primary cause of transaction restarts, on some workloads
totally 20% of actual transaction commits.
- To avoid deadlock or livelock, we'd often have to take intent locks
when we only wanted a read lock: with the lock ordering approach, it
is actually illegal to hold _any_ read lock while blocking on an intent
lock, and this has been causing us unnecessary lock contention.
- It was getting fragile - the various lock ordering rules are not
trivial, and we'd been seeing occasional livelock issues related to
this machinery.
So, since bcachefs is already a relational database masquerading as a
filesystem, we're stealing the next traditional database technique and
switching to a cycle detector for avoiding deadlocks.
When we block taking a btree lock, after adding ourself to the waitlist
but before sleeping, we do a DFS of btree transactions waiting on other
btree transactions, starting with the current transaction and walking
our held locks, and transactions blocking on our held locks.
If we find a cycle, we emit a transaction restart. Occasionally (e.g.
the btree split path) we can not allow the lock() operation to fail, so
if necessary we'll tell another transaction that it has to fail.
Result: trans_restart_would_deadlock events are reduced by a factor of
10 to 100, and we'll be able to delete a whole bunch of grotty, fragile
code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2022-08-22 13:23:47 -04:00
closure_sync ( & trans - > ref ) ;
2022-08-22 21:49:55 -04:00
if ( s )
s - > max_mem = max ( s - > max_mem , trans - > mem_max ) ;
2021-08-30 15:18:31 -04:00
trans_for_each_update ( trans , i )
__btree_path_put ( i - > path , true ) ;
trans - > nr_updates = 0 ;
2021-03-20 15:12:05 -04:00
2021-08-30 15:18:31 -04:00
check_btree_paths_leaked ( trans ) ;
2021-03-19 20:29:11 -04:00
2021-12-16 20:36:26 -05:00
if ( IS_ENABLED ( CONFIG_BCACHEFS_DEBUG_TRANSACTIONS ) ) {
mutex_lock ( & c - > btree_trans_lock ) ;
list_del ( & trans - > list ) ;
mutex_unlock ( & c - > btree_trans_lock ) ;
}
2020-06-02 16:36:11 -04:00
2020-11-15 16:30:22 -05:00
srcu_read_unlock ( & c - > btree_trans_barrier , trans - > srcu_idx ) ;
2021-08-30 15:18:31 -04:00
bch2_journal_preres_put ( & c - > journal , & trans - > journal_preres ) ;
2019-03-07 19:46:10 -05:00
2022-03-29 16:29:10 -04:00
kfree ( trans - > extra_journal_entries . data ) ;
2021-04-24 00:24:25 -04:00
if ( trans - > fs_usage_deltas ) {
if ( trans - > fs_usage_deltas - > size + sizeof ( trans - > fs_usage_deltas ) = =
REPLICAS_DELTA_LIST_MAX )
mempool_free ( trans - > fs_usage_deltas ,
2021-08-30 15:18:31 -04:00
& c - > replicas_delta_pool ) ;
2021-04-24 00:24:25 -04:00
else
kfree ( trans - > fs_usage_deltas ) ;
}
2021-04-24 00:09:06 -04:00
if ( trans - > mem_bytes = = BTREE_TRANS_MEM_MAX )
2021-08-30 15:18:31 -04:00
mempool_free ( trans - > mem , & c - > btree_trans_mem_pool ) ;
2021-04-24 00:09:06 -04:00
else
kfree ( trans - > mem ) ;
2020-11-05 20:02:01 -05:00
2020-11-16 18:20:50 -05:00
# ifdef __KERNEL__
/*
* Userspace doesn ' t have a real percpu implementation :
*/
2021-08-30 15:18:31 -04:00
trans - > paths = this_cpu_xchg ( c - > btree_paths_bufs - > path , trans - > paths ) ;
2020-11-16 18:20:50 -05:00
# endif
2021-04-24 00:09:06 -04:00
2021-08-30 15:18:31 -04:00
if ( trans - > paths )
mempool_free ( trans - > paths , & c - > btree_paths_pool ) ;
2020-11-05 20:02:01 -05:00
2017-03-16 22:18:50 -08:00
trans - > mem = ( void * ) 0x1 ;
2021-08-30 15:18:31 -04:00
trans - > paths = ( void * ) 0x1 ;
2017-03-16 22:18:50 -08:00
}
2019-09-07 14:16:00 -04:00
2020-11-07 12:43:48 -05:00
static void __maybe_unused
2022-09-01 22:56:27 -04:00
bch2_btree_bkey_cached_common_to_text ( struct printbuf * out ,
struct btree_bkey_cached_common * b )
2020-06-15 19:53:46 -04:00
{
2022-08-23 01:20:24 -04:00
struct six_lock_count c = six_lock_counts ( & b - > lock ) ;
struct task_struct * owner ;
pid_t pid ;
rcu_read_lock ( ) ;
owner = READ_ONCE ( b - > lock . owner ) ;
2022-10-19 18:31:33 -04:00
pid = owner ? owner - > pid : 0 ;
2022-08-23 01:20:24 -04:00
rcu_read_unlock ( ) ;
2022-09-01 22:56:27 -04:00
prt_tab ( out ) ;
prt_printf ( out , " %px %c l=%u %s: " , b , b - > cached ? ' c ' : ' b ' ,
b - > level , bch2_btree_ids [ b - > btree_id ] ) ;
2022-08-31 18:53:42 -04:00
bch2_bpos_to_text ( out , btree_node_pos ( b ) ) ;
2022-08-23 01:20:24 -04:00
2022-09-01 22:56:27 -04:00
prt_tab ( out ) ;
prt_printf ( out , " locks %u:%u:%u held by pid %u " ,
2022-08-23 01:20:24 -04:00
c . n [ 0 ] , c . n [ 1 ] , c . n [ 2 ] , pid ) ;
2020-06-15 19:53:46 -04:00
}
2022-06-17 20:12:02 -04:00
void bch2_btree_trans_to_text ( struct printbuf * out , struct btree_trans * trans )
2021-03-31 16:43:50 -04:00
{
2021-08-30 15:18:31 -04:00
struct btree_path * path ;
2022-08-05 11:36:13 -04:00
struct btree_bkey_cached_common * b ;
2022-02-15 22:28:37 -05:00
static char lock_types [ ] = { ' r ' , ' i ' , ' w ' } ;
2020-06-02 16:36:11 -04:00
unsigned l ;
2022-09-01 22:56:27 -04:00
if ( ! out - > nr_tabstops ) {
printbuf_tabstop_push ( out , 16 ) ;
printbuf_tabstop_push ( out , 32 ) ;
}
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy.
The btree iterator API provides an interface where the user doesn't need
to concern themselves with lock ordering - different btree iterators can
be traversed in any order. Without special care, this will lead to
deadlocks.
Our previous strategy was to define a lock ordering internally, and
whenever we attempt to take a lock and trylock() fails, we'd check if
the current btree transaction is holding any locks that cause a lock
ordering violation. If so, we'd issue a transaction restart, and then
bch2_trans_begin() would re-traverse all previously used iterators, but
in the correct order.
That approach had some issues, though.
- Sometimes we'd issue transaction restarts unnecessarily, when no
deadlock would have actually occured. Lock ordering restarts have
become our primary cause of transaction restarts, on some workloads
totally 20% of actual transaction commits.
- To avoid deadlock or livelock, we'd often have to take intent locks
when we only wanted a read lock: with the lock ordering approach, it
is actually illegal to hold _any_ read lock while blocking on an intent
lock, and this has been causing us unnecessary lock contention.
- It was getting fragile - the various lock ordering rules are not
trivial, and we'd been seeing occasional livelock issues related to
this machinery.
So, since bcachefs is already a relational database masquerading as a
filesystem, we're stealing the next traditional database technique and
switching to a cycle detector for avoiding deadlocks.
When we block taking a btree lock, after adding ourself to the waitlist
but before sleeping, we do a DFS of btree transactions waiting on other
btree transactions, starting with the current transaction and walking
our held locks, and transactions blocking on our held locks.
If we find a cycle, we emit a transaction restart. Occasionally (e.g.
the btree split path) we can not allow the lock() operation to fail, so
if necessary we'll tell another transaction that it has to fail.
Result: trans_restart_would_deadlock events are reduced by a factor of
10 to 100, and we'll be able to delete a whole bunch of grotty, fragile
code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
2022-08-22 13:23:47 -04:00
prt_printf ( out , " %i %s \n " , trans - > locking_wait . task - > pid , trans - > fn ) ;
2020-06-02 16:36:11 -04:00
2022-06-17 20:12:02 -04:00
trans_for_each_path ( trans , path ) {
if ( ! path - > nodes_locked )
continue ;
2020-06-02 16:36:11 -04:00
2022-06-17 20:12:02 -04:00
prt_printf ( out , " path %u %c l=%u %s: " ,
path - > idx ,
path - > cached ? ' c ' : ' b ' ,
path - > level ,
bch2_btree_ids [ path - > btree_id ] ) ;
bch2_bpos_to_text ( out , path - > pos ) ;
2022-09-01 22:56:27 -04:00
prt_newline ( out ) ;
2022-06-17 20:12:02 -04:00
for ( l = 0 ; l < BTREE_MAX_DEPTH ; l + + ) {
2022-08-05 11:36:13 -04:00
if ( btree_node_locked ( path , l ) & &
2022-08-10 19:08:30 -04:00
! IS_ERR_OR_NULL ( b = ( void * ) READ_ONCE ( path - > l [ l ] . b ) ) ) {
2022-08-21 18:17:51 -04:00
prt_printf ( out , " %c l=%u " ,
lock_types [ btree_node_locked_type ( path , l ) ] , l ) ;
2022-09-01 22:56:27 -04:00
bch2_btree_bkey_cached_common_to_text ( out , b ) ;
prt_newline ( out ) ;
2020-06-02 16:36:11 -04:00
}
}
2022-06-17 20:12:02 -04:00
}
2020-06-02 16:36:11 -04:00
2022-06-17 20:12:02 -04:00
b = READ_ONCE ( trans - > locking ) ;
if ( b ) {
2023-01-04 21:34:41 -05:00
prt_printf ( out , " blocked for %lluus on " ,
div_u64 ( local_clock ( ) - trans - > locking_wait . start_time ,
1000 ) ) ;
2022-09-01 22:56:27 -04:00
prt_newline ( out ) ;
prt_printf ( out , " %c " , lock_types [ trans - > locking_wait . lock_want ] ) ;
bch2_btree_bkey_cached_common_to_text ( out , b ) ;
prt_newline ( out ) ;
2020-06-02 16:36:11 -04:00
}
}
2019-09-07 14:16:00 -04:00
void bch2_fs_btree_iter_exit ( struct bch_fs * c )
{
2022-08-11 19:36:24 -04:00
struct btree_transaction_stats * s ;
for ( s = c - > btree_transaction_stats ;
s < c - > btree_transaction_stats + ARRAY_SIZE ( c - > btree_transaction_stats ) ;
2022-08-11 20:14:54 -04:00
s + + ) {
kfree ( s - > max_paths_text ) ;
2022-08-11 19:36:24 -04:00
bch2_time_stats_exit ( & s - > lock_hold_times ) ;
2022-08-11 20:14:54 -04:00
}
2022-08-11 19:36:24 -04:00
2021-12-20 18:18:35 -05:00
if ( c - > btree_trans_barrier_initialized )
cleanup_srcu_struct ( & c - > btree_trans_barrier ) ;
2021-04-24 00:09:06 -04:00
mempool_exit ( & c - > btree_trans_mem_pool ) ;
2021-08-30 15:18:31 -04:00
mempool_exit ( & c - > btree_paths_pool ) ;
2019-09-07 14:16:00 -04:00
}
int bch2_fs_btree_iter_init ( struct bch_fs * c )
{
2022-08-11 19:36:24 -04:00
struct btree_transaction_stats * s ;
2019-09-07 14:16:00 -04:00
unsigned nr = BTREE_ITER_MAX ;
2021-12-20 18:18:35 -05:00
int ret ;
2019-09-07 14:16:00 -04:00
2022-08-11 19:36:24 -04:00
for ( s = c - > btree_transaction_stats ;
s < c - > btree_transaction_stats + ARRAY_SIZE ( c - > btree_transaction_stats ) ;
2022-08-11 20:14:54 -04:00
s + + ) {
2022-08-11 19:36:24 -04:00
bch2_time_stats_init ( & s - > lock_hold_times ) ;
2022-08-11 20:14:54 -04:00
mutex_init ( & s - > lock ) ;
}
2022-08-11 19:36:24 -04:00
2020-06-02 16:36:11 -04:00
INIT_LIST_HEAD ( & c - > btree_trans_list ) ;
mutex_init ( & c - > btree_trans_lock ) ;
2021-12-20 18:18:35 -05:00
ret = mempool_init_kmalloc_pool ( & c - > btree_paths_pool , 1 ,
2021-08-30 15:18:31 -04:00
sizeof ( struct btree_path ) * nr +
2021-04-24 00:09:06 -04:00
sizeof ( struct btree_insert_entry ) * nr ) ? :
mempool_init_kmalloc_pool ( & c - > btree_trans_mem_pool , 1 ,
2021-12-20 18:18:35 -05:00
BTREE_TRANS_MEM_MAX ) ? :
init_srcu_struct ( & c - > btree_trans_barrier ) ;
if ( ! ret )
c - > btree_trans_barrier_initialized = true ;
return ret ;
2019-09-07 14:16:00 -04:00
}