2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
# include "bcachefs.h"
2020-12-17 15:08:58 -05:00
# include "bkey_buf.h"
2017-03-16 22:18:50 -08:00
# include "btree_cache.h"
# include "btree_io.h"
# include "btree_iter.h"
# include "btree_locking.h"
# include "debug.h"
2022-07-17 23:06:38 -04:00
# include "errcode.h"
2021-01-26 20:59:00 -05:00
# include "error.h"
2017-03-16 22:18:50 -08:00
# include "trace.h"
# include <linux/prefetch.h>
2019-06-11 21:03:23 -04:00
# include <linux/sched/mm.h>
2017-03-16 22:18:50 -08:00
2022-02-26 11:10:20 -05:00
const char * const bch2_btree_node_flags [ ] = {
# define x(f) #f,
BTREE_FLAGS ( )
# undef x
NULL
} ;
2017-03-16 22:18:50 -08:00
void bch2_recalc_btree_reserve ( struct bch_fs * c )
{
unsigned i , reserve = 16 ;
if ( ! c - > btree_roots [ 0 ] . b )
reserve + = 8 ;
for ( i = 0 ; i < BTREE_ID_NR ; i + + )
if ( c - > btree_roots [ i ] . b )
reserve + = min_t ( unsigned , 1 ,
2020-06-06 12:28:01 -04:00
c - > btree_roots [ i ] . b - > c . level ) * 8 ;
2017-03-16 22:18:50 -08:00
c - > btree_cache . reserve = reserve ;
}
static inline unsigned btree_cache_can_free ( struct btree_cache * bc )
{
return max_t ( int , 0 , bc - > used - bc - > reserve ) ;
}
2022-03-04 19:16:04 -05:00
static void btree_node_to_freedlist ( struct btree_cache * bc , struct btree * b )
{
if ( b - > c . lock . readers )
list_move ( & b - > list , & bc - > freed_pcpu ) ;
else
list_move ( & b - > list , & bc - > freed_nonpcpu ) ;
}
2021-04-24 00:38:16 -04:00
static void btree_node_data_free ( struct bch_fs * c , struct btree * b )
2017-03-16 22:18:50 -08:00
{
2021-04-24 00:38:16 -04:00
struct btree_cache * bc = & c - > btree_cache ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( btree_node_write_in_flight ( b ) ) ;
kvpfree ( b - > data , btree_bytes ( c ) ) ;
b - > data = NULL ;
2021-04-24 00:38:16 -04:00
# ifdef __KERNEL__
2020-07-25 15:07:37 -04:00
kvfree ( b - > aux_data ) ;
2021-04-24 00:38:16 -04:00
# else
munmap ( b - > aux_data , btree_aux_data_bytes ( b ) ) ;
# endif
2020-07-25 15:07:37 -04:00
b - > aux_data = NULL ;
2017-03-16 22:18:50 -08:00
bc - > used - - ;
2022-03-04 19:16:04 -05:00
btree_node_to_freedlist ( bc , b ) ;
2017-03-16 22:18:50 -08:00
}
static int bch2_btree_cache_cmp_fn ( struct rhashtable_compare_arg * arg ,
const void * obj )
{
const struct btree * b = obj ;
const u64 * v = arg - > key ;
2020-02-18 17:15:32 -05:00
return b - > hash_val = = * v ? 0 : 1 ;
2017-03-16 22:18:50 -08:00
}
static const struct rhashtable_params bch_btree_cache_params = {
. head_offset = offsetof ( struct btree , hash ) ,
2020-02-18 17:15:32 -05:00
. key_offset = offsetof ( struct btree , hash_val ) ,
. key_len = sizeof ( u64 ) ,
2017-03-16 22:18:50 -08:00
. obj_cmpfn = bch2_btree_cache_cmp_fn ,
} ;
2020-07-25 15:07:37 -04:00
static int btree_node_data_alloc ( struct bch_fs * c , struct btree * b , gfp_t gfp )
2017-03-16 22:18:50 -08:00
{
2020-06-09 17:49:24 -04:00
BUG_ON ( b - > data | | b - > aux_data ) ;
2017-03-16 22:18:50 -08:00
b - > data = kvpmalloc ( btree_bytes ( c ) , gfp ) ;
if ( ! b - > data )
2020-06-09 17:49:24 -04:00
return - ENOMEM ;
2021-04-24 00:38:16 -04:00
# ifdef __KERNEL__
2020-07-25 15:07:37 -04:00
b - > aux_data = kvmalloc ( btree_aux_data_bytes ( b ) , gfp ) ;
2021-04-24 00:38:16 -04:00
# else
b - > aux_data = mmap ( NULL , btree_aux_data_bytes ( b ) ,
PROT_READ | PROT_WRITE | PROT_EXEC ,
MAP_PRIVATE | MAP_ANONYMOUS , 0 , 0 ) ;
if ( b - > aux_data = = MAP_FAILED )
b - > aux_data = NULL ;
# endif
2020-07-25 15:07:37 -04:00
if ( ! b - > aux_data ) {
2020-06-09 17:49:24 -04:00
kvpfree ( b - > data , btree_bytes ( c ) ) ;
b - > data = NULL ;
return - ENOMEM ;
}
2017-03-16 22:18:50 -08:00
2020-06-09 17:49:24 -04:00
return 0 ;
}
2020-07-25 15:07:37 -04:00
static struct btree * __btree_node_mem_alloc ( struct bch_fs * c )
2020-06-09 17:49:24 -04:00
{
2020-07-25 15:07:37 -04:00
struct btree * b = kzalloc ( sizeof ( struct btree ) , GFP_KERNEL ) ;
2017-03-16 22:18:50 -08:00
if ( ! b )
return NULL ;
2018-11-01 15:10:01 -04:00
bkey_btree_ptr_init ( & b - > key ) ;
2020-06-06 12:28:01 -04:00
six_lock_init ( & b - > c . lock ) ;
lockdep_set_novalidate_class ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
INIT_LIST_HEAD ( & b - > list ) ;
INIT_LIST_HEAD ( & b - > write_blocked ) ;
2020-07-25 15:07:37 -04:00
b - > byte_order = ilog2 ( btree_bytes ( c ) ) ;
return b ;
}
2021-04-20 20:21:12 -04:00
struct btree * __bch2_btree_node_mem_alloc ( struct bch_fs * c )
2020-07-25 15:07:37 -04:00
{
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b = __btree_node_mem_alloc ( c ) ;
if ( ! b )
return NULL ;
2017-03-16 22:18:50 -08:00
2020-07-25 15:07:37 -04:00
if ( btree_node_data_alloc ( c , b , GFP_KERNEL ) ) {
kfree ( b ) ;
return NULL ;
}
bc - > used + + ;
list_add ( & b - > list , & bc - > freeable ) ;
return b ;
2017-03-16 22:18:50 -08:00
}
/* Btree in memory cache - hash table */
void bch2_btree_node_hash_remove ( struct btree_cache * bc , struct btree * b )
{
2021-09-01 00:50:18 -04:00
int ret = rhashtable_remove_fast ( & bc - > table , & b - > hash , bch_btree_cache_params ) ;
BUG_ON ( ret ) ;
2017-03-16 22:18:50 -08:00
/* Cause future lookups for this node to fail: */
2020-02-18 17:15:32 -05:00
b - > hash_val = 0 ;
2020-07-25 15:37:14 -04:00
six_lock_wakeup_all ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
}
int __bch2_btree_node_hash_insert ( struct btree_cache * bc , struct btree * b )
{
2020-02-18 17:15:32 -05:00
BUG_ON ( b - > hash_val ) ;
b - > hash_val = btree_ptr_hash_val ( & b - > key ) ;
2017-03-16 22:18:50 -08:00
return rhashtable_lookup_insert_fast ( & bc - > table , & b - > hash ,
bch_btree_cache_params ) ;
}
int bch2_btree_node_hash_insert ( struct btree_cache * bc , struct btree * b ,
unsigned level , enum btree_id id )
{
int ret ;
2020-06-06 12:28:01 -04:00
b - > c . level = level ;
b - > c . btree_id = id ;
2017-03-16 22:18:50 -08:00
mutex_lock ( & bc - > lock ) ;
ret = __bch2_btree_node_hash_insert ( bc , b ) ;
if ( ! ret )
list_add ( & b - > list , & bc - > live ) ;
mutex_unlock ( & bc - > lock ) ;
return ret ;
}
__flatten
static inline struct btree * btree_cache_find ( struct btree_cache * bc ,
const struct bkey_i * k )
{
2020-02-18 17:15:32 -05:00
u64 v = btree_ptr_hash_val ( k ) ;
return rhashtable_lookup_fast ( & bc - > table , & v , bch_btree_cache_params ) ;
2017-03-16 22:18:50 -08:00
}
/*
* this version is for btree nodes that have already been freed ( we ' re not
* reaping a real btree node )
*/
static int __btree_node_reclaim ( struct bch_fs * c , struct btree * b , bool flush )
{
struct btree_cache * bc = & c - > btree_cache ;
int ret = 0 ;
lockdep_assert_held ( & bc - > lock ) ;
2021-07-10 23:03:15 -04:00
wait_on_io :
if ( b - > flags & ( ( 1U < < BTREE_NODE_dirty ) |
( 1U < < BTREE_NODE_read_in_flight ) |
( 1U < < BTREE_NODE_write_in_flight ) ) ) {
if ( ! flush )
return - ENOMEM ;
/* XXX: waiting on IO with btree cache lock held */
bch2_btree_node_wait_on_read ( b ) ;
bch2_btree_node_wait_on_write ( b ) ;
}
2017-03-16 22:18:50 -08:00
2020-06-06 12:28:01 -04:00
if ( ! six_trylock_intent ( & b - > c . lock ) )
2017-03-16 22:18:50 -08:00
return - ENOMEM ;
2020-06-06 12:28:01 -04:00
if ( ! six_trylock_write ( & b - > c . lock ) )
2017-03-16 22:18:50 -08:00
goto out_unlock_intent ;
2021-07-10 23:03:15 -04:00
/* recheck under lock */
if ( b - > flags & ( ( 1U < < BTREE_NODE_read_in_flight ) |
( 1U < < BTREE_NODE_write_in_flight ) ) ) {
if ( ! flush )
goto out_unlock ;
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
goto wait_on_io ;
}
2022-02-27 09:56:33 -05:00
if ( btree_node_noevict ( b ) | |
btree_node_write_blocked ( b ) | |
btree_node_will_make_reachable ( b ) )
2017-03-16 22:18:50 -08:00
goto out_unlock ;
2021-07-10 23:03:15 -04:00
if ( btree_node_dirty ( b ) ) {
2022-02-26 20:25:15 -05:00
if ( ! flush )
2017-03-16 22:18:50 -08:00
goto out_unlock ;
/*
* Using the underscore version because we don ' t want to compact
* bsets after the write , since this node is about to be evicted
* - unless btree verify mode is enabled , since it runs out of
* the post write cleanup :
*/
2020-11-02 18:20:44 -05:00
if ( bch2_verify_btree_ondisk )
2022-02-26 21:46:41 -05:00
bch2_btree_node_write ( c , b , SIX_LOCK_intent , 0 ) ;
2017-03-16 22:18:50 -08:00
else
2022-02-26 21:46:41 -05:00
__bch2_btree_node_write ( c , b , 0 ) ;
2017-03-16 22:18:50 -08:00
2021-07-10 23:03:15 -04:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
goto wait_on_io ;
2017-03-16 22:18:50 -08:00
}
out :
2020-02-18 17:15:32 -05:00
if ( b - > hash_val & & ! ret )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_reap , c , b ) ;
2017-03-16 22:18:50 -08:00
return ret ;
out_unlock :
2020-06-06 12:28:01 -04:00
six_unlock_write ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
out_unlock_intent :
2020-06-06 12:28:01 -04:00
six_unlock_intent ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
ret = - ENOMEM ;
goto out ;
}
static int btree_node_reclaim ( struct bch_fs * c , struct btree * b )
{
return __btree_node_reclaim ( c , b , false ) ;
}
static int btree_node_write_and_reclaim ( struct bch_fs * c , struct btree * b )
{
return __btree_node_reclaim ( c , b , true ) ;
}
static unsigned long bch2_btree_cache_scan ( struct shrinker * shrink ,
struct shrink_control * sc )
{
struct bch_fs * c = container_of ( shrink , struct bch_fs ,
btree_cache . shrink ) ;
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b , * t ;
unsigned long nr = sc - > nr_to_scan ;
2022-04-03 20:36:32 -04:00
unsigned long can_free = 0 ;
2017-03-16 22:18:50 -08:00
unsigned long touched = 0 ;
unsigned long freed = 0 ;
2020-10-15 21:48:58 -04:00
unsigned i , flags ;
2021-12-27 20:45:07 -05:00
unsigned long ret = SHRINK_STOP ;
2017-03-16 22:18:50 -08:00
2020-11-02 18:20:44 -05:00
if ( bch2_btree_shrinker_disabled )
2017-03-16 22:18:50 -08:00
return SHRINK_STOP ;
/* Return -1 if we can't do anything right now */
2020-06-05 09:01:23 -04:00
if ( sc - > gfp_mask & __GFP_FS )
2017-03-16 22:18:50 -08:00
mutex_lock ( & bc - > lock ) ;
else if ( ! mutex_trylock ( & bc - > lock ) )
2021-12-27 20:45:07 -05:00
goto out_norestore ;
2017-03-16 22:18:50 -08:00
2020-10-15 21:48:58 -04:00
flags = memalloc_nofs_save ( ) ;
2017-03-16 22:18:50 -08:00
/*
* It ' s _really_ critical that we don ' t free too many btree nodes - we
* have to always leave ourselves a reserve . The reserve is how we
* guarantee that allocating memory for a new btree node can always
* succeed , so that inserting keys into the btree can always succeed and
* IO can always make forward progress :
*/
can_free = btree_cache_can_free ( bc ) ;
nr = min_t ( unsigned long , nr , can_free ) ;
i = 0 ;
list_for_each_entry_safe ( b , t , & bc - > freeable , list ) {
2021-12-27 22:11:54 -05:00
/*
* Leave a few nodes on the freeable list , so that a btree split
* won ' t have to hit the system allocator :
*/
if ( + + i < = 3 )
continue ;
2017-03-16 22:18:50 -08:00
touched + + ;
2021-11-11 15:50:22 -05:00
if ( touched > = nr )
2017-03-16 22:18:50 -08:00
break ;
2021-12-27 22:11:54 -05:00
if ( ! btree_node_reclaim ( c , b ) ) {
2017-03-16 22:18:50 -08:00
btree_node_data_free ( c , b ) ;
2020-06-06 12:28:01 -04:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
freed + + ;
}
}
restart :
list_for_each_entry_safe ( b , t , & bc - > live , list ) {
2022-03-03 11:04:01 -05:00
/* tweak this */
if ( btree_node_accessed ( b ) ) {
clear_btree_node_accessed ( b ) ;
goto touched ;
2017-03-16 22:18:50 -08:00
}
2022-03-03 11:04:01 -05:00
if ( ! btree_node_reclaim ( c , b ) ) {
2017-03-16 22:18:50 -08:00
/* can't call bch2_btree_node_hash_remove under lock */
freed + + ;
if ( & t - > list ! = & bc - > live )
list_move_tail ( & bc - > live , & t - > list ) ;
btree_node_data_free ( c , b ) ;
mutex_unlock ( & bc - > lock ) ;
bch2_btree_node_hash_remove ( bc , b ) ;
2020-06-06 12:28:01 -04:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
if ( freed > = nr )
goto out ;
2020-06-15 20:18:02 -04:00
if ( sc - > gfp_mask & __GFP_FS )
2017-03-16 22:18:50 -08:00
mutex_lock ( & bc - > lock ) ;
else if ( ! mutex_trylock ( & bc - > lock ) )
goto out ;
goto restart ;
2022-03-03 11:04:01 -05:00
} else {
continue ;
}
touched :
touched + + ;
if ( touched > = nr ) {
/* Save position */
if ( & t - > list ! = & bc - > live )
list_move_tail ( & bc - > live , & t - > list ) ;
break ;
}
2017-03-16 22:18:50 -08:00
}
mutex_unlock ( & bc - > lock ) ;
out :
2022-04-03 20:36:32 -04:00
ret = freed ;
2020-11-11 18:59:41 -05:00
memalloc_nofs_restore ( flags ) ;
2021-12-27 20:45:07 -05:00
out_norestore :
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_scan , sc - > nr_to_scan , can_free , ret ) ;
2021-12-27 20:45:07 -05:00
return ret ;
2017-03-16 22:18:50 -08:00
}
static unsigned long bch2_btree_cache_count ( struct shrinker * shrink ,
struct shrink_control * sc )
{
struct bch_fs * c = container_of ( shrink , struct bch_fs ,
btree_cache . shrink ) ;
struct btree_cache * bc = & c - > btree_cache ;
2020-11-02 18:20:44 -05:00
if ( bch2_btree_shrinker_disabled )
2017-03-16 22:18:50 -08:00
return 0 ;
2022-04-03 20:36:32 -04:00
return btree_cache_can_free ( bc ) ;
2017-03-16 22:18:50 -08:00
}
void bch2_fs_btree_cache_exit ( struct bch_fs * c )
{
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
2020-10-11 16:33:49 -04:00
unsigned i , flags ;
2017-03-16 22:18:50 -08:00
if ( bc - > shrink . list . next )
unregister_shrinker ( & bc - > shrink ) ;
2020-10-11 16:33:49 -04:00
/* vfree() can allocate memory: */
flags = memalloc_nofs_save ( ) ;
2017-03-16 22:18:50 -08:00
mutex_lock ( & bc - > lock ) ;
if ( c - > verify_data )
list_move ( & c - > verify_data - > list , & bc - > live ) ;
kvpfree ( c - > verify_ondisk , btree_bytes ( c ) ) ;
for ( i = 0 ; i < BTREE_ID_NR ; i + + )
if ( c - > btree_roots [ i ] . b )
list_add ( & c - > btree_roots [ i ] . b - > list , & bc - > live ) ;
list_splice ( & bc - > freeable , & bc - > live ) ;
while ( ! list_empty ( & bc - > live ) ) {
b = list_first_entry ( & bc - > live , struct btree , list ) ;
BUG_ON ( btree_node_read_in_flight ( b ) | |
btree_node_write_in_flight ( b ) ) ;
if ( btree_node_dirty ( b ) )
bch2_btree_complete_write ( c , b , btree_current_write ( b ) ) ;
2022-02-26 11:10:20 -05:00
clear_btree_node_dirty_acct ( c , b ) ;
2017-03-16 22:18:50 -08:00
btree_node_data_free ( c , b ) ;
}
2020-11-09 13:01:52 -05:00
BUG_ON ( atomic_read ( & c - > btree_cache . dirty ) ) ;
2022-03-04 19:16:04 -05:00
list_splice ( & bc - > freed_pcpu , & bc - > freed_nonpcpu ) ;
while ( ! list_empty ( & bc - > freed_nonpcpu ) ) {
b = list_first_entry ( & bc - > freed_nonpcpu , struct btree , list ) ;
2017-03-16 22:18:50 -08:00
list_del ( & b - > list ) ;
2021-03-23 23:52:27 -04:00
six_lock_pcpu_free ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
kfree ( b ) ;
}
mutex_unlock ( & bc - > lock ) ;
2020-10-11 16:33:49 -04:00
memalloc_nofs_restore ( flags ) ;
2017-03-16 22:18:50 -08:00
if ( bc - > table_init_done )
rhashtable_destroy ( & bc - > table ) ;
}
int bch2_fs_btree_cache_init ( struct bch_fs * c )
{
struct btree_cache * bc = & c - > btree_cache ;
unsigned i ;
int ret = 0 ;
pr_verbose_init ( c - > opts , " " ) ;
ret = rhashtable_init ( & bc - > table , & bch_btree_cache_params ) ;
if ( ret )
goto out ;
bc - > table_init_done = true ;
bch2_recalc_btree_reserve ( c ) ;
for ( i = 0 ; i < bc - > reserve ; i + + )
2021-04-20 20:21:12 -04:00
if ( ! __bch2_btree_node_mem_alloc ( c ) ) {
2017-03-16 22:18:50 -08:00
ret = - ENOMEM ;
goto out ;
}
list_splice_init ( & bc - > live , & bc - > freeable ) ;
mutex_init ( & c - > verify_lock ) ;
bc - > shrink . count_objects = bch2_btree_cache_count ;
bc - > shrink . scan_objects = bch2_btree_cache_scan ;
bc - > shrink . seeks = 4 ;
2020-11-15 16:31:58 -05:00
ret = register_shrinker ( & bc - > shrink , " %s/btree_cache " , c - > name ) ;
2017-03-16 22:18:50 -08:00
out :
pr_verbose_init ( c - > opts , " ret %i " , ret ) ;
return ret ;
}
void bch2_fs_btree_cache_init_early ( struct btree_cache * bc )
{
mutex_init ( & bc - > lock ) ;
INIT_LIST_HEAD ( & bc - > live ) ;
INIT_LIST_HEAD ( & bc - > freeable ) ;
2022-03-04 19:16:04 -05:00
INIT_LIST_HEAD ( & bc - > freed_pcpu ) ;
INIT_LIST_HEAD ( & bc - > freed_nonpcpu ) ;
2017-03-16 22:18:50 -08:00
}
/*
* We can only have one thread cannibalizing other cached btree nodes at a time ,
* or we ' ll deadlock . We use an open coded mutex to ensure that , which a
* cannibalize_bucket ( ) will take . This means every time we unlock the root of
* the btree , we need to release this lock if we have it held .
*/
void bch2_btree_cache_cannibalize_unlock ( struct bch_fs * c )
{
struct btree_cache * bc = & c - > btree_cache ;
if ( bc - > alloc_lock = = current ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_cannibalize_unlock , c ) ;
2017-03-16 22:18:50 -08:00
bc - > alloc_lock = NULL ;
closure_wake_up ( & bc - > alloc_wait ) ;
}
}
int bch2_btree_cache_cannibalize_lock ( struct bch_fs * c , struct closure * cl )
{
struct btree_cache * bc = & c - > btree_cache ;
struct task_struct * old ;
old = cmpxchg ( & bc - > alloc_lock , NULL , current ) ;
if ( old = = NULL | | old = = current )
goto success ;
if ( ! cl ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_cannibalize_lock_fail , c ) ;
2017-03-16 22:18:50 -08:00
return - ENOMEM ;
}
closure_wait ( & bc - > alloc_wait , cl ) ;
/* Try again, after adding ourselves to waitlist */
old = cmpxchg ( & bc - > alloc_lock , NULL , current ) ;
if ( old = = NULL | | old = = current ) {
/* We raced */
closure_wake_up ( & bc - > alloc_wait ) ;
goto success ;
}
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_cannibalize_lock_fail , c ) ;
2017-03-16 22:18:50 -08:00
return - EAGAIN ;
success :
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_cannibalize_lock , c ) ;
2017-03-16 22:18:50 -08:00
return 0 ;
}
static struct btree * btree_node_cannibalize ( struct bch_fs * c )
{
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
list_for_each_entry_reverse ( b , & bc - > live , list )
if ( ! btree_node_reclaim ( c , b ) )
return b ;
while ( 1 ) {
list_for_each_entry_reverse ( b , & bc - > live , list )
if ( ! btree_node_write_and_reclaim ( c , b ) )
return b ;
/*
* Rare case : all nodes were intent - locked .
* Just busy - wait .
*/
WARN_ONCE ( 1 , " btree cache cannibalize failed \n " ) ;
cond_resched ( ) ;
}
}
2022-03-04 19:16:04 -05:00
struct btree * bch2_btree_node_mem_alloc ( struct bch_fs * c , bool pcpu_read_locks )
2017-03-16 22:18:50 -08:00
{
struct btree_cache * bc = & c - > btree_cache ;
2022-03-04 19:16:04 -05:00
struct list_head * freed = pcpu_read_locks
? & bc - > freed_pcpu
: & bc - > freed_nonpcpu ;
2022-03-04 19:50:28 -05:00
struct btree * b , * b2 ;
2017-03-16 22:18:50 -08:00
u64 start_time = local_clock ( ) ;
2019-06-11 21:03:23 -04:00
unsigned flags ;
2017-03-16 22:18:50 -08:00
2019-06-11 21:03:23 -04:00
flags = memalloc_nofs_save ( ) ;
2017-03-16 22:18:50 -08:00
mutex_lock ( & bc - > lock ) ;
/*
* We never free struct btree itself , just the memory that holds the on
* disk node . Check the freed list before allocating a new one :
*/
2022-03-04 19:16:04 -05:00
list_for_each_entry ( b , freed , list )
2022-03-04 19:50:28 -05:00
if ( ! btree_node_reclaim ( c , b ) ) {
list_del_init ( & b - > list ) ;
2020-06-09 17:49:24 -04:00
goto got_node ;
2022-03-04 19:50:28 -05:00
}
2017-03-16 22:18:50 -08:00
2022-03-04 19:50:28 -05:00
b = __btree_node_mem_alloc ( c ) ;
if ( ! b )
goto err_locked ;
2022-03-04 19:16:04 -05:00
if ( pcpu_read_locks )
six_lock_pcpu_alloc ( & b - > c . lock ) ;
2022-03-04 19:50:28 -05:00
BUG_ON ( ! six_trylock_intent ( & b - > c . lock ) ) ;
BUG_ON ( ! six_trylock_write ( & b - > c . lock ) ) ;
2020-06-09 17:49:24 -04:00
got_node :
2022-03-04 19:50:28 -05:00
/*
* btree_free ( ) doesn ' t free memory ; it sticks the node on the end of
* the list . Check if there ' s any freed nodes there :
*/
list_for_each_entry ( b2 , & bc - > freeable , list )
if ( ! btree_node_reclaim ( c , b2 ) ) {
swap ( b - > data , b2 - > data ) ;
swap ( b - > aux_data , b2 - > aux_data ) ;
2022-03-04 19:16:04 -05:00
btree_node_to_freedlist ( bc , b2 ) ;
2022-03-04 19:50:28 -05:00
six_unlock_write ( & b2 - > c . lock ) ;
six_unlock_intent ( & b2 - > c . lock ) ;
goto got_mem ;
}
2017-03-16 22:18:50 -08:00
2022-03-04 19:50:28 -05:00
mutex_unlock ( & bc - > lock ) ;
2020-06-09 17:49:24 -04:00
2022-03-04 19:50:28 -05:00
if ( btree_node_data_alloc ( c , b , __GFP_NOWARN | GFP_KERNEL ) )
goto err ;
2020-06-09 17:49:24 -04:00
2022-03-04 19:50:28 -05:00
mutex_lock ( & bc - > lock ) ;
bc - > used + + ;
got_mem :
mutex_unlock ( & bc - > lock ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( btree_node_hashed ( b ) ) ;
2021-07-10 23:03:15 -04:00
BUG_ON ( btree_node_dirty ( b ) ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( btree_node_write_in_flight ( b ) ) ;
out :
b - > flags = 0 ;
b - > written = 0 ;
b - > nsets = 0 ;
b - > sib_u64s [ 0 ] = 0 ;
b - > sib_u64s [ 1 ] = 0 ;
b - > whiteout_u64s = 0 ;
2020-11-02 18:20:44 -05:00
bch2_btree_keys_init ( b ) ;
2021-04-29 16:55:26 -04:00
set_btree_node_accessed ( b ) ;
2017-03-16 22:18:50 -08:00
bch2_time_stats_update ( & c - > times [ BCH_TIME_btree_node_mem_alloc ] ,
start_time ) ;
2020-05-27 14:10:27 -04:00
memalloc_nofs_restore ( flags ) ;
2017-03-16 22:18:50 -08:00
return b ;
err :
2020-06-09 17:49:24 -04:00
mutex_lock ( & bc - > lock ) ;
2022-03-04 19:50:28 -05:00
err_locked :
2017-03-16 22:18:50 -08:00
/* Try to cannibalize another cached btree node: */
if ( bc - > alloc_lock = = current ) {
2022-03-04 19:50:28 -05:00
b2 = btree_node_cannibalize ( c ) ;
bch2_btree_node_hash_remove ( bc , b2 ) ;
if ( b ) {
swap ( b - > data , b2 - > data ) ;
swap ( b - > aux_data , b2 - > aux_data ) ;
2022-03-04 19:16:04 -05:00
btree_node_to_freedlist ( bc , b2 ) ;
2022-03-04 19:50:28 -05:00
six_unlock_write ( & b2 - > c . lock ) ;
six_unlock_intent ( & b2 - > c . lock ) ;
} else {
b = b2 ;
list_del_init ( & b - > list ) ;
}
2017-03-16 22:18:50 -08:00
2022-03-04 19:50:28 -05:00
mutex_unlock ( & bc - > lock ) ;
2017-03-16 22:18:50 -08:00
2022-08-27 12:48:36 -04:00
trace_and_count ( c , btree_cache_cannibalize , c ) ;
2017-03-16 22:18:50 -08:00
goto out ;
}
mutex_unlock ( & bc - > lock ) ;
2020-05-27 14:10:27 -04:00
memalloc_nofs_restore ( flags ) ;
2017-03-16 22:18:50 -08:00
return ERR_PTR ( - ENOMEM ) ;
}
/* Slowpath, don't want it inlined into btree_iter_traverse() */
static noinline struct btree * bch2_btree_node_fill ( struct bch_fs * c ,
2021-08-30 14:22:43 -04:00
struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_path * path ,
2017-03-16 22:18:50 -08:00
const struct bkey_i * k ,
2020-03-15 23:29:43 -04:00
enum btree_id btree_id ,
2017-03-16 22:18:50 -08:00
unsigned level ,
enum six_lock_type lock_type ,
bool sync )
{
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
2021-07-10 23:03:15 -04:00
u32 seq ;
2017-03-16 22:18:50 -08:00
2020-02-24 15:25:00 -05:00
BUG_ON ( level + 1 > = BTREE_MAX_DEPTH ) ;
2017-03-16 22:18:50 -08:00
/*
* Parent node must be locked , else we could read in a btree node that ' s
* been freed :
*/
2021-08-30 15:18:31 -04:00
if ( trans & & ! bch2_btree_node_relock ( trans , path , level + 1 ) ) {
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_relock_parent_for_fill , trans , _THIS_IP_ , path ) ;
2022-07-17 23:06:38 -04:00
return ERR_PTR ( btree_trans_restart ( trans , BCH_ERR_transaction_restart_fill_relock ) ) ;
2021-07-25 17:19:52 -04:00
}
2017-03-16 22:18:50 -08:00
2022-03-04 19:16:04 -05:00
b = bch2_btree_node_mem_alloc ( c , level ! = 0 ) ;
2022-02-18 00:47:45 -05:00
if ( trans & & b = = ERR_PTR ( - ENOMEM ) ) {
trans - > memory_allocation_failure = true ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_memory_allocation_failure , trans , _THIS_IP_ , path ) ;
2022-07-17 23:06:38 -04:00
return ERR_PTR ( btree_trans_restart ( trans , BCH_ERR_transaction_restart_fill_mem_alloc_fail ) ) ;
2022-02-18 00:47:45 -05:00
}
2017-03-16 22:18:50 -08:00
if ( IS_ERR ( b ) )
return b ;
bkey_copy ( & b - > key , k ) ;
2020-03-15 23:29:43 -04:00
if ( bch2_btree_node_hash_insert ( bc , b , level , btree_id ) ) {
2017-03-16 22:18:50 -08:00
/* raced with another fill: */
/* mark as unhashed... */
2020-02-18 17:15:32 -05:00
b - > hash_val = 0 ;
2017-03-16 22:18:50 -08:00
mutex_lock ( & bc - > lock ) ;
list_add ( & b - > list , & bc - > freeable ) ;
mutex_unlock ( & bc - > lock ) ;
2020-06-06 12:28:01 -04:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
2017-03-16 22:18:50 -08:00
return NULL ;
}
2021-07-10 23:03:15 -04:00
set_btree_node_read_in_flight ( b ) ;
six_unlock_write ( & b - > c . lock ) ;
seq = b - > c . lock . state . seq ;
six_unlock_intent ( & b - > c . lock ) ;
2021-04-08 22:26:53 -04:00
/* Unlock before doing IO: */
2021-08-30 14:22:43 -04:00
if ( trans & & sync )
bch2_trans_unlock ( trans ) ;
2017-03-16 22:18:50 -08:00
bch2_btree_node_read ( c , b , sync ) ;
2021-07-10 23:03:15 -04:00
if ( ! sync )
2017-03-16 22:18:50 -08:00
return NULL ;
2022-07-17 23:06:38 -04:00
if ( trans ) {
int ret = bch2_trans_relock ( trans ) ? :
bch2_btree_path_relock_intent ( trans , path ) ;
if ( ret ) {
BUG_ON ( ! trans - > restarted ) ;
return ERR_PTR ( ret ) ;
}
2021-07-25 17:19:52 -04:00
}
2021-04-08 22:26:53 -04:00
2021-07-25 17:19:52 -04:00
if ( ! six_relock_type ( & b - > c . lock , lock_type , seq ) ) {
2022-08-10 12:42:55 -04:00
if ( trans )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_relock_after_fill , trans , _THIS_IP_ , path ) ;
2022-07-17 23:06:38 -04:00
return ERR_PTR ( btree_trans_restart ( trans , BCH_ERR_transaction_restart_relock_after_fill ) ) ;
2021-07-25 17:19:52 -04:00
}
2017-03-16 22:18:50 -08:00
return b ;
}
2020-06-12 22:29:48 -04:00
static int lock_node_check_fn ( struct six_lock * lock , void * p )
{
struct btree * b = container_of ( lock , struct btree , c . lock ) ;
const struct bkey_i * k = p ;
2022-07-17 23:06:38 -04:00
if ( b - > hash_val ! = btree_ptr_hash_val ( k ) )
return BCH_ERR_lock_fail_node_reused ;
return 0 ;
2020-06-12 22:29:48 -04:00
}
2021-04-23 16:05:49 -04:00
static noinline void btree_bad_header ( struct bch_fs * c , struct btree * b )
{
2022-04-07 17:28:09 -04:00
struct printbuf buf = PRINTBUF ;
2021-04-23 16:05:49 -04:00
if ( ! test_bit ( BCH_FS_INITIAL_GC_DONE , & c - > flags ) )
return ;
2023-02-03 21:01:40 -05:00
prt_printf ( & buf ,
2022-04-07 17:28:09 -04:00
" btree node header doesn't match ptr \n "
" btree %s level %u \n "
" ptr: " ,
bch2_btree_ids [ b - > c . btree_id ] , b - > c . level ) ;
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & b - > key ) ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( & buf , " \n header: btree %s level %llu \n "
2022-04-07 17:28:09 -04:00
" min " ,
bch2_btree_ids [ BTREE_NODE_ID ( b - > data ) ] ,
BTREE_NODE_LEVEL ( b - > data ) ) ;
bch2_bpos_to_text ( & buf , b - > data - > min_key ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( & buf , " \n max " ) ;
2022-04-07 17:28:09 -04:00
bch2_bpos_to_text ( & buf , b - > data - > max_key ) ;
bch2_fs_inconsistent ( c , " %s " , buf . buf ) ;
printbuf_exit ( & buf ) ;
2021-04-23 16:05:49 -04:00
}
static inline void btree_check_header ( struct bch_fs * c , struct btree * b )
{
if ( b - > c . btree_id ! = BTREE_NODE_ID ( b - > data ) | |
b - > c . level ! = BTREE_NODE_LEVEL ( b - > data ) | |
bpos_cmp ( b - > data - > max_key , b - > key . k . p ) | |
( b - > key . k . type = = KEY_TYPE_btree_ptr_v2 & &
bpos_cmp ( b - > data - > min_key ,
bkey_i_to_btree_ptr_v2 ( & b - > key ) - > v . min_key ) ) )
btree_bad_header ( c , b ) ;
}
2017-03-16 22:18:50 -08:00
/**
* bch_btree_node_get - find a btree node in the cache and lock it , reading it
* in from disk if necessary .
*
* If IO is necessary and running under generic_make_request , returns - EAGAIN .
*
* The btree node will have either a read or a write lock held , depending on
* the @ write parameter .
*/
2021-08-30 15:18:31 -04:00
struct btree * bch2_btree_node_get ( struct btree_trans * trans , struct btree_path * path ,
2017-03-16 22:18:50 -08:00
const struct bkey_i * k , unsigned level ,
2020-10-28 14:17:46 -04:00
enum six_lock_type lock_type ,
unsigned long trace_ip )
2017-03-16 22:18:50 -08:00
{
2021-07-24 17:12:51 -04:00
struct bch_fs * c = trans - > c ;
2017-03-16 22:18:50 -08:00
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
struct bset_tree * t ;
2022-07-17 23:06:38 -04:00
int ret ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( level > = BTREE_MAX_DEPTH ) ;
2020-02-24 15:25:00 -05:00
2021-12-19 19:02:50 -05:00
b = btree_node_mem_ptr ( k ) ;
/*
* Check b - > hash_val _before_ calling btree_node_lock ( ) - this might not
* be the node we want anymore , and trying to lock the wrong node could
* cause an unneccessary transaction restart :
*/
if ( likely ( c - > opts . btree_node_mem_ptr_optimization & &
b & &
b - > hash_val = = btree_ptr_hash_val ( k ) ) )
2021-07-26 15:52:41 -04:00
goto lock_node ;
2017-03-16 22:18:50 -08:00
retry :
b = btree_cache_find ( bc , k ) ;
if ( unlikely ( ! b ) ) {
/*
* We must have the parent locked to call bch2_btree_node_fill ( ) ,
* else we could read in a btree node from disk that ' s been
* freed :
*/
2021-08-30 15:18:31 -04:00
b = bch2_btree_node_fill ( c , trans , path , k , path - > btree_id ,
2020-03-15 23:29:43 -04:00
level , lock_type , true ) ;
2017-03-16 22:18:50 -08:00
/* We raced and found the btree node in the cache */
if ( ! b )
goto retry ;
if ( IS_ERR ( b ) )
return b ;
} else {
2020-02-24 15:25:00 -05:00
lock_node :
2017-03-16 22:18:50 -08:00
/*
* There ' s a potential deadlock with splits and insertions into
* interior nodes we have to avoid :
*
* The other thread might be holding an intent lock on the node
* we want , and they want to update its parent node so they ' re
* going to upgrade their intent lock on the parent node to a
* write lock .
*
* But if we ' re holding a read lock on the parent , and we ' re
* trying to get the intent lock they ' re holding , we deadlock .
*
* So to avoid this we drop the read locks on parent nodes when
* we ' re starting to take intent locks - and handle the race .
*
* The race is that they might be about to free the node we
* want , and dropping our read lock on the parent node lets them
* update the parent marking the node we want as freed , and then
* free it :
*
* To guard against this , btree nodes are evicted from the cache
2020-02-18 17:15:32 -05:00
* when they ' re freed - and b - > hash_val is zeroed out , which we
2017-03-16 22:18:50 -08:00
* check for after we lock the node .
*
* Then , bch2_btree_node_relock ( ) on the parent will fail - because
* the parent was modified , when the pointer to the node we want
* was removed - and we ' ll bail out :
*/
2021-08-30 15:18:31 -04:00
if ( btree_node_read_locked ( path , level + 1 ) )
2022-07-14 18:58:23 +12:00
btree_node_unlock ( trans , path , level + 1 ) ;
2017-03-16 22:18:50 -08:00
2022-08-22 13:21:10 -04:00
ret = btree_node_lock ( trans , path , & b - > c , k - > k . p , level , lock_type ,
2022-07-17 23:06:38 -04:00
lock_node_check_fn , ( void * ) k , trace_ip ) ;
if ( unlikely ( ret ) ) {
if ( bch2_err_matches ( ret , BCH_ERR_lock_fail_node_reused ) )
2020-06-12 22:29:48 -04:00
goto retry ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ERR_PTR ( ret ) ;
BUG ( ) ;
2020-06-12 22:29:48 -04:00
}
2017-03-16 22:18:50 -08:00
2020-02-18 17:15:32 -05:00
if ( unlikely ( b - > hash_val ! = btree_ptr_hash_val ( k ) | |
2020-06-06 12:28:01 -04:00
b - > c . level ! = level | |
2017-03-16 22:18:50 -08:00
race_fault ( ) ) ) {
2020-06-06 12:28:01 -04:00
six_unlock_type ( & b - > c . lock , lock_type ) ;
2021-08-30 15:18:31 -04:00
if ( bch2_btree_node_relock ( trans , path , level + 1 ) )
2017-03-16 22:18:50 -08:00
goto retry ;
2022-08-27 12:48:36 -04:00
trace_and_count ( c , trans_restart_btree_node_reused , trans , trace_ip , path ) ;
2022-07-17 23:06:38 -04:00
return ERR_PTR ( btree_trans_restart ( trans , BCH_ERR_transaction_restart_lock_node_reused ) ) ;
2017-03-16 22:18:50 -08:00
}
}
2021-04-08 22:26:53 -04:00
if ( unlikely ( btree_node_read_in_flight ( b ) ) ) {
2021-07-10 23:03:15 -04:00
u32 seq = b - > c . lock . state . seq ;
2021-04-08 22:26:53 -04:00
six_unlock_type ( & b - > c . lock , lock_type ) ;
2021-07-24 17:12:51 -04:00
bch2_trans_unlock ( trans ) ;
2021-04-08 22:26:53 -04:00
2021-07-10 23:03:15 -04:00
bch2_btree_node_wait_on_read ( b ) ;
2021-04-08 22:26:53 -04:00
/*
2021-08-30 15:18:31 -04:00
* should_be_locked is not set on this path yet , so we need to
* relock it specifically :
2021-04-08 22:26:53 -04:00
*/
2022-07-17 23:06:38 -04:00
if ( trans ) {
int ret = bch2_trans_relock ( trans ) ? :
bch2_btree_path_relock_intent ( trans , path ) ;
if ( ret ) {
BUG_ON ( ! trans - > restarted ) ;
return ERR_PTR ( ret ) ;
}
2021-07-25 17:19:52 -04:00
}
2021-07-10 23:03:15 -04:00
if ( ! six_relock_type ( & b - > c . lock , lock_type , seq ) )
goto retry ;
2021-04-08 22:26:53 -04:00
}
2017-03-16 22:18:50 -08:00
prefetch ( b - > aux_data ) ;
for_each_bset ( b , t ) {
void * p = ( u64 * ) b - > aux_data + t - > aux_data_offset ;
prefetch ( p + L1_CACHE_BYTES * 0 ) ;
prefetch ( p + L1_CACHE_BYTES * 1 ) ;
prefetch ( p + L1_CACHE_BYTES * 2 ) ;
}
/* avoid atomic set bit if it's not needed: */
2020-02-26 17:25:13 -05:00
if ( ! btree_node_accessed ( b ) )
2017-03-16 22:18:50 -08:00
set_btree_node_accessed ( b ) ;
if ( unlikely ( btree_node_read_error ( b ) ) ) {
2020-06-06 12:28:01 -04:00
six_unlock_type ( & b - > c . lock , lock_type ) ;
2017-03-16 22:18:50 -08:00
return ERR_PTR ( - EIO ) ;
}
2021-08-30 15:18:31 -04:00
EBUG_ON ( b - > c . btree_id ! = path - > btree_id ) ;
2021-01-26 20:59:00 -05:00
EBUG_ON ( BTREE_NODE_LEVEL ( b - > data ) ! = level ) ;
2021-04-23 16:05:49 -04:00
btree_check_header ( c , b ) ;
2017-03-16 22:18:50 -08:00
return b ;
}
2022-08-21 14:29:43 -04:00
struct btree * bch2_btree_node_get_noiter ( struct btree_trans * trans ,
2020-03-15 23:29:43 -04:00
const struct bkey_i * k ,
enum btree_id btree_id ,
2021-01-26 20:59:00 -05:00
unsigned level ,
bool nofill )
2020-03-15 23:29:43 -04:00
{
2022-08-21 14:29:43 -04:00
struct bch_fs * c = trans - > c ;
2020-03-15 23:29:43 -04:00
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
struct bset_tree * t ;
2020-06-12 22:29:48 -04:00
int ret ;
2020-03-15 23:29:43 -04:00
EBUG_ON ( level > = BTREE_MAX_DEPTH ) ;
2021-07-26 15:52:41 -04:00
if ( c - > opts . btree_node_mem_ptr_optimization ) {
b = btree_node_mem_ptr ( k ) ;
if ( b )
goto lock_node ;
}
2020-03-15 23:29:43 -04:00
retry :
b = btree_cache_find ( bc , k ) ;
if ( unlikely ( ! b ) ) {
2021-01-26 20:59:00 -05:00
if ( nofill )
2021-02-23 21:41:25 -05:00
goto out ;
2021-01-26 20:59:00 -05:00
2021-08-30 14:22:43 -04:00
b = bch2_btree_node_fill ( c , NULL , NULL , k , btree_id ,
2020-03-15 23:29:43 -04:00
level , SIX_LOCK_read , true ) ;
/* We raced and found the btree node in the cache */
if ( ! b )
goto retry ;
2021-02-23 21:41:25 -05:00
if ( IS_ERR ( b ) & &
! bch2_btree_cache_cannibalize_lock ( c , NULL ) )
goto retry ;
2020-03-15 23:29:43 -04:00
if ( IS_ERR ( b ) )
2021-02-23 21:41:25 -05:00
goto out ;
2020-03-15 23:29:43 -04:00
} else {
lock_node :
2022-08-21 14:29:43 -04:00
ret = btree_node_lock_nopath ( trans , & b - > c , SIX_LOCK_read ) ;
if ( unlikely ( ret ) ) {
if ( bch2_err_matches ( ret , BCH_ERR_lock_fail_node_reused ) )
goto retry ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
return ERR_PTR ( ret ) ;
BUG ( ) ;
}
2020-03-15 23:29:43 -04:00
if ( unlikely ( b - > hash_val ! = btree_ptr_hash_val ( k ) | |
b - > c . btree_id ! = btree_id | |
b - > c . level ! = level ) ) {
six_unlock_read ( & b - > c . lock ) ;
goto retry ;
}
}
/* XXX: waiting on IO with btree locks held: */
2021-07-10 23:03:15 -04:00
__bch2_btree_node_wait_on_read ( b ) ;
2020-03-15 23:29:43 -04:00
prefetch ( b - > aux_data ) ;
for_each_bset ( b , t ) {
void * p = ( u64 * ) b - > aux_data + t - > aux_data_offset ;
prefetch ( p + L1_CACHE_BYTES * 0 ) ;
prefetch ( p + L1_CACHE_BYTES * 1 ) ;
prefetch ( p + L1_CACHE_BYTES * 2 ) ;
}
/* avoid atomic set bit if it's not needed: */
if ( ! btree_node_accessed ( b ) )
set_btree_node_accessed ( b ) ;
if ( unlikely ( btree_node_read_error ( b ) ) ) {
six_unlock_read ( & b - > c . lock ) ;
2021-02-23 21:41:25 -05:00
b = ERR_PTR ( - EIO ) ;
goto out ;
2020-03-15 23:29:43 -04:00
}
2021-01-26 20:59:00 -05:00
EBUG_ON ( b - > c . btree_id ! = btree_id ) ;
EBUG_ON ( BTREE_NODE_LEVEL ( b - > data ) ! = level ) ;
2021-04-23 16:05:49 -04:00
btree_check_header ( c , b ) ;
2021-02-23 21:41:25 -05:00
out :
bch2_btree_cache_cannibalize_unlock ( c ) ;
2020-03-15 23:29:43 -04:00
return b ;
}
2021-08-30 14:22:43 -04:00
int bch2_btree_node_prefetch ( struct bch_fs * c ,
struct btree_trans * trans ,
2021-08-30 15:18:31 -04:00
struct btree_path * path ,
2021-07-24 19:50:40 -04:00
const struct bkey_i * k ,
enum btree_id btree_id , unsigned level )
2017-03-16 22:18:50 -08:00
{
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
2021-08-30 15:18:31 -04:00
BUG_ON ( trans & & ! btree_node_locked ( path , level + 1 ) ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( level > = BTREE_MAX_DEPTH ) ;
b = btree_cache_find ( bc , k ) ;
if ( b )
2021-07-24 19:50:40 -04:00
return 0 ;
2017-03-16 22:18:50 -08:00
2021-08-30 15:18:31 -04:00
b = bch2_btree_node_fill ( c , trans , path , k , btree_id ,
2021-08-30 14:22:43 -04:00
level , SIX_LOCK_read , false ) ;
2021-07-24 19:50:40 -04:00
return PTR_ERR_OR_ZERO ( b ) ;
2017-03-16 22:18:50 -08:00
}
2022-08-21 14:29:43 -04:00
void bch2_btree_node_evict ( struct btree_trans * trans , const struct bkey_i * k )
2021-04-25 16:24:03 -04:00
{
2022-08-21 14:29:43 -04:00
struct bch_fs * c = trans - > c ;
2021-04-25 16:24:03 -04:00
struct btree_cache * bc = & c - > btree_cache ;
struct btree * b ;
b = btree_cache_find ( bc , k ) ;
if ( ! b )
return ;
2021-07-10 23:03:15 -04:00
wait_on_io :
/* not allowed to wait on io with btree locks held: */
/* XXX we're called from btree_gc which will be holding other btree
* nodes locked
* */
__bch2_btree_node_wait_on_read ( b ) ;
__bch2_btree_node_wait_on_write ( b ) ;
2021-04-25 16:24:03 -04:00
2022-08-21 14:29:43 -04:00
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_intent ) ;
btree_node_lock_nopath_nofail ( trans , & b - > c , SIX_LOCK_write ) ;
2021-04-25 16:24:03 -04:00
2021-07-10 23:03:15 -04:00
if ( btree_node_dirty ( b ) ) {
2022-02-26 21:46:41 -05:00
__bch2_btree_node_write ( c , b , 0 ) ;
2021-07-10 23:03:15 -04:00
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
goto wait_on_io ;
}
2021-04-25 16:24:03 -04:00
BUG_ON ( btree_node_dirty ( b ) ) ;
mutex_lock ( & bc - > lock ) ;
btree_node_data_free ( c , b ) ;
bch2_btree_node_hash_remove ( bc , b ) ;
mutex_unlock ( & bc - > lock ) ;
six_unlock_write ( & b - > c . lock ) ;
six_unlock_intent ( & b - > c . lock ) ;
}
2018-11-09 01:24:07 -05:00
void bch2_btree_node_to_text ( struct printbuf * out , struct bch_fs * c ,
struct btree * b )
2017-03-16 22:18:50 -08:00
{
const struct bkey_format * f = & b - > format ;
struct bset_stats stats ;
memset ( & stats , 0 , sizeof ( stats ) ) ;
bch2_btree_keys_stats ( b , & stats ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( out , " l %u " , b - > c . level ) ;
2021-03-04 15:20:22 -05:00
bch2_bpos_to_text ( out , b - > data - > min_key ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( out , " - " ) ;
2021-03-04 15:20:22 -05:00
bch2_bpos_to_text ( out , b - > data - > max_key ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( out , " : \n "
2021-03-04 15:20:22 -05:00
" ptrs: " ) ;
2018-11-01 15:10:01 -04:00
bch2_val_to_text ( out , c , bkey_i_to_s_c ( & b - > key ) ) ;
2021-03-04 15:20:22 -05:00
2023-02-03 21:01:40 -05:00
prt_printf ( out , " \n "
2018-11-09 01:24:07 -05:00
" format: u64s %u fields %u %u %u %u %u \n "
" unpack fn len: %u \n "
" bytes used %zu/%zu (%zu%% full) \n "
2021-03-29 01:13:31 -04:00
" sib u64s: %u, %u (merge threshold %u) \n "
2018-11-09 01:24:07 -05:00
" nr packed keys %u \n "
" nr unpacked keys %u \n "
" floats %zu \n "
2019-10-23 14:56:20 -04:00
" failed unpacked %zu \n " ,
2018-11-09 01:24:07 -05:00
f - > key_u64s ,
f - > bits_per_field [ 0 ] ,
f - > bits_per_field [ 1 ] ,
f - > bits_per_field [ 2 ] ,
f - > bits_per_field [ 3 ] ,
f - > bits_per_field [ 4 ] ,
b - > unpack_fn_len ,
b - > nr . live_u64s * sizeof ( u64 ) ,
btree_bytes ( c ) - sizeof ( struct btree_node ) ,
b - > nr . live_u64s * 100 / btree_max_u64s ( c ) ,
b - > sib_u64s [ 0 ] ,
b - > sib_u64s [ 1 ] ,
2021-03-29 01:13:31 -04:00
c - > btree_foreground_merge_threshold ,
2018-11-09 01:24:07 -05:00
b - > nr . packed_keys ,
b - > nr . unpacked_keys ,
stats . floats ,
2019-10-23 14:56:20 -04:00
stats . failed ) ;
2017-03-16 22:18:50 -08:00
}
2020-11-19 20:13:30 -05:00
void bch2_btree_cache_to_text ( struct printbuf * out , struct bch_fs * c )
{
2023-02-03 21:01:40 -05:00
prt_printf ( out , " nr nodes: \t \t %u \n " , c - > btree_cache . used ) ;
prt_printf ( out , " nr dirty: \t \t %u \n " , atomic_read ( & c - > btree_cache . dirty ) ) ;
prt_printf ( out , " cannibalize lock: \t %p \n " , c - > btree_cache . alloc_lock ) ;
2020-11-19 20:13:30 -05:00
}