2019-03-08 03:46:10 +03:00
# include "bcachefs.h"
2020-06-16 02:53:46 +03:00
# include "btree_cache.h"
2019-03-08 03:46:10 +03:00
# include "btree_iter.h"
# include "btree_key_cache.h"
# include "btree_locking.h"
# include "btree_update.h"
2022-07-18 06:06:38 +03:00
# include "errcode.h"
2019-03-08 03:46:10 +03:00
# include "error.h"
# include "journal.h"
# include "journal_reclaim.h"
# include "trace.h"
2020-11-13 01:19:47 +03:00
# include <linux/sched/mm.h>
2020-11-18 22:09:33 +03:00
static struct kmem_cache * bch2_key_cache ;
2019-03-08 03:46:10 +03:00
static int bch2_btree_key_cache_cmp_fn ( struct rhashtable_compare_arg * arg ,
const void * obj )
{
const struct bkey_cached * ck = obj ;
const struct bkey_cached_key * key = arg - > key ;
return cmp_int ( ck - > key . btree_id , key - > btree_id ) ? :
2021-03-05 00:20:16 +03:00
bpos_cmp ( ck - > key . pos , key - > pos ) ;
2019-03-08 03:46:10 +03:00
}
static const struct rhashtable_params bch2_btree_key_cache_params = {
. head_offset = offsetof ( struct bkey_cached , hash ) ,
. key_offset = offsetof ( struct bkey_cached , key ) ,
. key_len = sizeof ( struct bkey_cached_key ) ,
. obj_cmpfn = bch2_btree_key_cache_cmp_fn ,
} ;
__flatten
2019-09-23 02:10:21 +03:00
inline struct bkey_cached *
bch2_btree_key_cache_find ( struct bch_fs * c , enum btree_id btree_id , struct bpos pos )
2019-03-08 03:46:10 +03:00
{
struct bkey_cached_key key = {
. btree_id = btree_id ,
. pos = pos ,
} ;
return rhashtable_lookup_fast ( & c - > btree_key_cache . table , & key ,
bch2_btree_key_cache_params ) ;
}
static bool bkey_cached_lock_for_evict ( struct bkey_cached * ck )
{
if ( ! six_trylock_intent ( & ck - > c . lock ) )
return false ;
if ( ! six_trylock_write ( & ck - > c . lock ) ) {
six_unlock_intent ( & ck - > c . lock ) ;
return false ;
}
if ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
six_unlock_write ( & ck - > c . lock ) ;
six_unlock_intent ( & ck - > c . lock ) ;
return false ;
}
return true ;
}
static void bkey_cached_evict ( struct btree_key_cache * c ,
struct bkey_cached * ck )
{
BUG_ON ( rhashtable_remove_fast ( & c - > table , & ck - > hash ,
bch2_btree_key_cache_params ) ) ;
memset ( & ck - > key , ~ 0 , sizeof ( ck - > key ) ) ;
2020-11-09 21:01:52 +03:00
2021-03-25 06:37:33 +03:00
atomic_long_dec ( & c - > nr_keys ) ;
2019-03-08 03:46:10 +03:00
}
2020-11-13 01:19:47 +03:00
static void bkey_cached_free ( struct btree_key_cache * bc ,
2019-03-08 03:46:10 +03:00
struct bkey_cached * ck )
{
2020-11-13 01:19:47 +03:00
struct bch_fs * c = container_of ( bc , struct bch_fs , btree_key_cache ) ;
2020-11-19 23:38:27 +03:00
BUG_ON ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) ;
2020-11-13 01:19:47 +03:00
ck - > btree_trans_barrier_seq =
start_poll_synchronize_srcu ( & c - > btree_trans_barrier ) ;
2020-11-19 23:38:27 +03:00
list_move_tail ( & ck - > list , & bc - > freed ) ;
2022-06-17 08:07:54 +03:00
atomic_long_inc ( & bc - > nr_freed ) ;
2019-03-08 03:46:10 +03:00
kfree ( ck - > k ) ;
ck - > k = NULL ;
ck - > u64s = 0 ;
six_unlock_write ( & ck - > c . lock ) ;
six_unlock_intent ( & ck - > c . lock ) ;
}
2022-08-21 21:29:43 +03:00
static void bkey_cached_move_to_freelist ( struct btree_key_cache * bc ,
struct bkey_cached * ck )
2022-06-17 08:07:54 +03:00
{
struct btree_key_cache_freelist * f ;
bool freed = false ;
BUG_ON ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) ;
preempt_disable ( ) ;
f = this_cpu_ptr ( bc - > pcpu_freed ) ;
if ( f - > nr < ARRAY_SIZE ( f - > objs ) ) {
f - > objs [ f - > nr + + ] = ck ;
freed = true ;
}
preempt_enable ( ) ;
if ( ! freed ) {
mutex_lock ( & bc - > lock ) ;
preempt_disable ( ) ;
f = this_cpu_ptr ( bc - > pcpu_freed ) ;
while ( f - > nr > ARRAY_SIZE ( f - > objs ) / 2 ) {
struct bkey_cached * ck2 = f - > objs [ - - f - > nr ] ;
list_move_tail ( & ck2 - > list , & bc - > freed ) ;
}
preempt_enable ( ) ;
list_move_tail ( & ck - > list , & bc - > freed ) ;
mutex_unlock ( & bc - > lock ) ;
}
2022-08-21 21:29:43 +03:00
}
static void bkey_cached_free_fast ( struct btree_key_cache * bc ,
struct bkey_cached * ck )
{
struct bch_fs * c = container_of ( bc , struct bch_fs , btree_key_cache ) ;
ck - > btree_trans_barrier_seq =
start_poll_synchronize_srcu ( & c - > btree_trans_barrier ) ;
list_del_init ( & ck - > list ) ;
atomic_long_inc ( & bc - > nr_freed ) ;
kfree ( ck - > k ) ;
ck - > k = NULL ;
ck - > u64s = 0 ;
bkey_cached_move_to_freelist ( bc , ck ) ;
2022-06-17 08:07:54 +03:00
six_unlock_write ( & ck - > c . lock ) ;
six_unlock_intent ( & ck - > c . lock ) ;
}
2019-03-08 03:46:10 +03:00
static struct bkey_cached *
2022-08-21 21:29:43 +03:00
bkey_cached_alloc ( struct btree_trans * trans ,
struct btree_key_cache * c )
2019-03-08 03:46:10 +03:00
{
2022-06-17 08:07:54 +03:00
struct bkey_cached * ck = NULL ;
struct btree_key_cache_freelist * f ;
preempt_disable ( ) ;
f = this_cpu_ptr ( c - > pcpu_freed ) ;
if ( f - > nr )
ck = f - > objs [ - - f - > nr ] ;
preempt_enable ( ) ;
if ( ! ck ) {
mutex_lock ( & c - > lock ) ;
preempt_disable ( ) ;
f = this_cpu_ptr ( c - > pcpu_freed ) ;
while ( ! list_empty ( & c - > freed ) & &
f - > nr < ARRAY_SIZE ( f - > objs ) / 2 ) {
ck = list_last_entry ( & c - > freed , struct bkey_cached , list ) ;
list_del_init ( & ck - > list ) ;
f - > objs [ f - > nr + + ] = ck ;
}
ck = f - > nr ? f - > objs [ - - f - > nr ] : NULL ;
preempt_enable ( ) ;
mutex_unlock ( & c - > lock ) ;
}
if ( ck ) {
2022-08-21 21:29:43 +03:00
int ret ;
ret = btree_node_lock_nopath ( trans , & ck - > c , SIX_LOCK_intent ) ;
if ( unlikely ( ret ) ) {
bkey_cached_move_to_freelist ( c , ck ) ;
return ERR_PTR ( ret ) ;
}
ret = btree_node_lock_nopath ( trans , & ck - > c , SIX_LOCK_write ) ;
if ( unlikely ( ret ) ) {
six_unlock_intent ( & ck - > c . lock ) ;
bkey_cached_move_to_freelist ( c , ck ) ;
return ERR_PTR ( ret ) ;
}
2022-06-17 08:07:54 +03:00
return ck ;
}
2019-03-08 03:46:10 +03:00
2020-11-19 23:38:27 +03:00
ck = kmem_cache_alloc ( bch2_key_cache , GFP_NOFS | __GFP_ZERO ) ;
if ( likely ( ck ) ) {
INIT_LIST_HEAD ( & ck - > list ) ;
2022-08-26 04:42:46 +03:00
__six_lock_init ( & ck - > c . lock , " b->c.lock " , & bch2_btree_node_lock_key ) ;
2020-11-19 23:38:27 +03:00
lockdep_set_novalidate_class ( & ck - > c . lock ) ;
BUG_ON ( ! six_trylock_intent ( & ck - > c . lock ) ) ;
BUG_ON ( ! six_trylock_write ( & ck - > c . lock ) ) ;
return ck ;
}
2019-03-08 03:46:10 +03:00
2021-03-25 06:37:33 +03:00
return NULL ;
}
static struct bkey_cached *
bkey_cached_reuse ( struct btree_key_cache * c )
{
struct bucket_table * tbl ;
struct rhash_head * pos ;
struct bkey_cached * ck ;
unsigned i ;
rcu_read_lock ( ) ;
tbl = rht_dereference_rcu ( c - > table . tbl , & c - > table ) ;
for ( i = 0 ; i < tbl - > size ; i + + )
rht_for_each_entry_rcu ( ck , pos , tbl , i , hash ) {
if ( ! test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) & &
bkey_cached_lock_for_evict ( ck ) ) {
bkey_cached_evict ( c , ck ) ;
rcu_read_unlock ( ) ;
return ck ;
}
}
rcu_read_unlock ( ) ;
2019-03-08 03:46:10 +03:00
2020-11-19 23:38:27 +03:00
return NULL ;
2019-03-08 03:46:10 +03:00
}
static struct bkey_cached *
2022-08-21 21:29:43 +03:00
btree_key_cache_create ( struct btree_trans * trans ,
2019-03-08 03:46:10 +03:00
enum btree_id btree_id ,
struct bpos pos )
{
2022-08-21 21:29:43 +03:00
struct bch_fs * c = trans - > c ;
2021-12-31 04:14:52 +03:00
struct btree_key_cache * bc = & c - > btree_key_cache ;
2019-03-08 03:46:10 +03:00
struct bkey_cached * ck ;
2021-03-25 06:37:33 +03:00
bool was_new = true ;
2019-03-08 03:46:10 +03:00
2022-08-21 21:29:43 +03:00
ck = bkey_cached_alloc ( trans , bc ) ;
if ( unlikely ( IS_ERR ( ck ) ) )
return ck ;
2021-03-25 06:37:33 +03:00
if ( unlikely ( ! ck ) ) {
2021-12-31 04:14:52 +03:00
ck = bkey_cached_reuse ( bc ) ;
if ( unlikely ( ! ck ) ) {
bch_err ( c , " error allocating memory for key cache item, btree %s " ,
bch2_btree_ids [ btree_id ] ) ;
2021-03-25 06:37:33 +03:00
return ERR_PTR ( - ENOMEM ) ;
2021-12-31 04:14:52 +03:00
}
2021-03-25 06:37:33 +03:00
was_new = false ;
2022-03-05 03:16:04 +03:00
} else {
if ( btree_id = = BTREE_ID_subvolumes )
six_lock_pcpu_alloc ( & ck - > c . lock ) ;
2021-03-25 06:37:33 +03:00
}
2019-03-08 03:46:10 +03:00
ck - > c . level = 0 ;
ck - > c . btree_id = btree_id ;
ck - > key . btree_id = btree_id ;
ck - > key . pos = pos ;
ck - > valid = false ;
2020-11-19 23:38:27 +03:00
ck - > flags = 1U < < BKEY_CACHED_ACCESSED ;
2019-03-08 03:46:10 +03:00
2021-12-31 04:14:52 +03:00
if ( unlikely ( rhashtable_lookup_insert_fast ( & bc - > table ,
2019-03-08 03:46:10 +03:00
& ck - > hash ,
2021-03-25 06:37:33 +03:00
bch2_btree_key_cache_params ) ) ) {
2019-03-08 03:46:10 +03:00
/* We raced with another fill: */
2021-03-25 06:37:33 +03:00
if ( likely ( was_new ) ) {
six_unlock_write ( & ck - > c . lock ) ;
six_unlock_intent ( & ck - > c . lock ) ;
kfree ( ck ) ;
} else {
2022-06-17 08:07:54 +03:00
bkey_cached_free_fast ( bc , ck ) ;
2021-03-25 06:37:33 +03:00
}
2019-03-08 03:46:10 +03:00
return NULL ;
}
2021-12-31 04:14:52 +03:00
atomic_long_inc ( & bc - > nr_keys ) ;
2020-11-09 21:01:52 +03:00
2019-03-08 03:46:10 +03:00
six_unlock_write ( & ck - > c . lock ) ;
return ck ;
}
static int btree_key_cache_fill ( struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * ck_path ,
2019-03-08 03:46:10 +03:00
struct bkey_cached * ck )
{
2022-02-07 07:15:12 +03:00
struct btree_path * path ;
2019-03-08 03:46:10 +03:00
struct bkey_s_c k ;
unsigned new_u64s = 0 ;
struct bkey_i * new_k = NULL ;
2022-02-07 07:15:12 +03:00
struct bkey u ;
2019-03-08 03:46:10 +03:00
int ret ;
2022-02-07 07:15:12 +03:00
path = bch2_path_get ( trans , ck - > key . btree_id , ck - > key . pos , 0 , 0 , 0 ) ;
ret = bch2_btree_path_traverse ( trans , path , 0 ) ;
2021-03-20 05:54:18 +03:00
if ( ret )
goto err ;
2019-03-08 03:46:10 +03:00
2022-02-07 07:15:12 +03:00
k = bch2_btree_path_peek_slot ( path , & u ) ;
2021-08-30 22:18:31 +03:00
if ( ! bch2_btree_node_relock ( trans , ck_path , 0 ) ) {
2022-08-27 19:48:36 +03:00
trace_and_count ( trans - > c , trans_restart_relock_key_cache_fill , trans , _THIS_IP_ , ck_path ) ;
2022-07-18 06:06:38 +03:00
ret = btree_trans_restart ( trans , BCH_ERR_transaction_restart_key_cache_raced ) ;
2021-03-20 05:54:18 +03:00
goto err ;
2019-03-08 03:46:10 +03:00
}
2021-04-24 07:42:02 +03:00
/*
* bch2_varint_decode can read past the end of the buffer by at
* most 7 bytes ( it won ' t be used ) :
*/
new_u64s = k . k - > u64s + 1 ;
2022-04-18 00:50:47 +03:00
/*
* Allocate some extra space so that the transaction commit path is less
* likely to have to reallocate , since that requires a transaction
* restart :
*/
new_u64s = min ( 256U , ( new_u64s * 3 ) / 2 ) ;
2021-04-24 07:42:02 +03:00
if ( new_u64s > ck - > u64s ) {
new_u64s = roundup_pow_of_two ( new_u64s ) ;
2019-03-08 03:46:10 +03:00
new_k = kmalloc ( new_u64s * sizeof ( u64 ) , GFP_NOFS ) ;
if ( ! new_k ) {
2022-01-05 03:05:08 +03:00
bch_err ( trans - > c , " error allocating memory for key cache key, btree %s u64s %u " ,
2021-12-31 04:14:52 +03:00
bch2_btree_ids [ ck - > key . btree_id ] , new_u64s ) ;
2021-03-20 05:54:18 +03:00
ret = - ENOMEM ;
goto err ;
2019-03-08 03:46:10 +03:00
}
}
2021-07-26 00:19:52 +03:00
/*
* XXX : not allowed to be holding read locks when we take a write lock ,
* currently
*/
2021-08-30 22:18:31 +03:00
bch2_btree_node_lock_write ( trans , ck_path , ck_path - > l [ 0 ] . b ) ;
2019-03-08 03:46:10 +03:00
if ( new_k ) {
kfree ( ck - > k ) ;
ck - > u64s = new_u64s ;
ck - > k = new_k ;
}
bkey_reassemble ( ck - > k , k ) ;
ck - > valid = true ;
2021-08-30 22:18:31 +03:00
bch2_btree_node_unlock_write ( trans , ck_path , ck_path - > l [ 0 ] . b ) ;
2019-03-08 03:46:10 +03:00
/* We're not likely to need this iterator again: */
2022-02-07 07:15:12 +03:00
path - > preserve = false ;
2021-03-20 05:54:18 +03:00
err :
2022-02-07 07:15:12 +03:00
bch2_path_put ( trans , path , 0 ) ;
2021-03-20 05:54:18 +03:00
return ret ;
2019-03-08 03:46:10 +03:00
}
static int bkey_cached_check_fn ( struct six_lock * lock , void * p )
{
struct bkey_cached * ck = container_of ( lock , struct bkey_cached , c . lock ) ;
2021-08-30 22:18:31 +03:00
const struct btree_path * path = p ;
2019-03-08 03:46:10 +03:00
2022-07-18 06:06:38 +03:00
if ( ck - > key . btree_id ! = path - > btree_id & &
bpos_cmp ( ck - > key . pos , path - > pos ) )
return BCH_ERR_lock_fail_node_reused ;
return 0 ;
2019-03-08 03:46:10 +03:00
}
2020-11-06 09:34:41 +03:00
__flatten
2021-08-30 22:18:31 +03:00
int bch2_btree_path_traverse_cached ( struct btree_trans * trans , struct btree_path * path ,
unsigned flags )
2019-03-08 03:46:10 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_cached * ck ;
int ret = 0 ;
2021-08-30 22:18:31 +03:00
BUG_ON ( path - > level ) ;
2019-03-08 03:46:10 +03:00
2021-08-30 22:18:31 +03:00
path - > l [ 1 ] . b = NULL ;
2021-07-14 22:13:27 +03:00
2021-08-30 22:18:31 +03:00
if ( bch2_btree_node_relock ( trans , path , 0 ) ) {
ck = ( void * ) path - > l [ 0 ] . b ;
2019-03-08 03:46:10 +03:00
goto fill ;
}
retry :
2021-08-30 22:18:31 +03:00
ck = bch2_btree_key_cache_find ( c , path - > btree_id , path - > pos ) ;
2019-03-08 03:46:10 +03:00
if ( ! ck ) {
2022-08-21 21:29:43 +03:00
ck = btree_key_cache_create ( trans , path - > btree_id , path - > pos ) ;
2019-03-08 03:46:10 +03:00
ret = PTR_ERR_OR_ZERO ( ck ) ;
if ( ret )
goto err ;
if ( ! ck )
goto retry ;
2022-07-14 11:33:09 +03:00
mark_btree_node_locked ( trans , path , 0 , SIX_LOCK_intent ) ;
2021-08-30 22:18:31 +03:00
path - > locks_want = 1 ;
2019-03-08 03:46:10 +03:00
} else {
2021-08-30 22:18:31 +03:00
enum six_lock_type lock_want = __btree_lock_want ( path , 0 ) ;
2019-03-08 03:46:10 +03:00
2022-07-18 06:06:38 +03:00
ret = btree_node_lock ( trans , path , ( void * ) ck , path - > pos , 0 ,
lock_want ,
bkey_cached_check_fn , path , _THIS_IP_ ) ;
if ( ret ) {
if ( bch2_err_matches ( ret , BCH_ERR_lock_fail_node_reused ) )
2019-03-08 03:46:10 +03:00
goto retry ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
goto err ;
BUG ( ) ;
2019-03-08 03:46:10 +03:00
}
2021-08-30 22:18:31 +03:00
if ( ck - > key . btree_id ! = path - > btree_id | |
bpos_cmp ( ck - > key . pos , path - > pos ) ) {
2019-03-08 03:46:10 +03:00
six_unlock_type ( & ck - > c . lock , lock_want ) ;
goto retry ;
}
2022-07-14 11:33:09 +03:00
mark_btree_node_locked ( trans , path , 0 , lock_want ) ;
2019-03-08 03:46:10 +03:00
}
2021-08-30 22:18:31 +03:00
path - > l [ 0 ] . lock_seq = ck - > c . lock . state . seq ;
path - > l [ 0 ] . b = ( void * ) ck ;
2019-03-08 03:46:10 +03:00
fill :
2022-08-11 19:23:21 +03:00
if ( ! ck - > valid ) {
2022-08-07 20:43:32 +03:00
/*
* Using the underscore version because we haven ' t set
* path - > uptodate yet :
*/
2021-08-30 22:18:31 +03:00
if ( ! path - > locks_want & &
! __bch2_btree_path_upgrade ( trans , path , 1 ) ) {
2022-08-27 19:48:36 +03:00
trace_and_count ( trans - > c , trans_restart_key_cache_upgrade , trans , _THIS_IP_ ) ;
2022-08-04 19:46:37 +03:00
ret = btree_trans_restart ( trans , BCH_ERR_transaction_restart_key_cache_upgrade ) ;
2019-03-08 03:46:10 +03:00
goto err ;
}
2021-08-30 22:18:31 +03:00
ret = btree_key_cache_fill ( trans , path , ck ) ;
2019-03-08 03:46:10 +03:00
if ( ret )
goto err ;
}
2020-11-19 23:38:27 +03:00
if ( ! test_bit ( BKEY_CACHED_ACCESSED , & ck - > flags ) )
set_bit ( BKEY_CACHED_ACCESSED , & ck - > flags ) ;
2021-08-30 22:18:31 +03:00
path - > uptodate = BTREE_ITER_UPTODATE ;
2022-08-11 19:23:21 +03:00
BUG_ON ( ! ck - > valid ) ;
2021-08-30 22:18:31 +03:00
BUG_ON ( btree_node_locked_type ( path , 0 ) ! = btree_lock_want ( path , 0 ) ) ;
2021-03-09 01:09:13 +03:00
2019-03-08 03:46:10 +03:00
return ret ;
err :
2022-07-18 06:06:38 +03:00
if ( ! bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ) {
2022-07-14 09:58:23 +03:00
btree_node_unlock ( trans , path , 0 ) ;
2022-08-11 02:08:30 +03:00
path - > l [ 0 ] . b = ERR_PTR ( ret ) ;
2019-03-08 03:46:10 +03:00
}
return ret ;
}
static int btree_key_cache_flush_pos ( struct btree_trans * trans ,
struct bkey_cached_key key ,
u64 journal_seq ,
2021-04-03 23:24:13 +03:00
unsigned commit_flags ,
2019-03-08 03:46:10 +03:00
bool evict )
{
struct bch_fs * c = trans - > c ;
struct journal * j = & c - > journal ;
2021-08-30 22:18:31 +03:00
struct btree_iter c_iter , b_iter ;
2020-12-03 21:09:08 +03:00
struct bkey_cached * ck = NULL ;
2019-03-08 03:46:10 +03:00
int ret ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( trans , & b_iter , key . btree_id , key . pos ,
BTREE_ITER_SLOTS |
2021-03-05 06:29:25 +03:00
BTREE_ITER_INTENT |
BTREE_ITER_ALL_SNAPSHOTS ) ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( trans , & c_iter , key . btree_id , key . pos ,
BTREE_ITER_CACHED |
BTREE_ITER_INTENT ) ;
2022-02-07 07:15:12 +03:00
b_iter . flags & = ~ BTREE_ITER_WITH_KEY_CACHE ;
2021-08-30 22:18:31 +03:00
ret = bch2_btree_iter_traverse ( & c_iter ) ;
2019-03-08 03:46:10 +03:00
if ( ret )
2021-07-24 01:26:38 +03:00
goto out ;
2019-03-08 03:46:10 +03:00
2021-08-30 22:18:31 +03:00
ck = ( void * ) c_iter . path - > l [ 0 ] . b ;
2022-01-12 08:49:23 +03:00
if ( ! ck )
2019-03-08 03:46:10 +03:00
goto out ;
if ( ! test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
2022-01-12 08:49:23 +03:00
if ( evict )
goto evict ;
goto out ;
2019-03-08 03:46:10 +03:00
}
2022-01-12 08:49:23 +03:00
BUG_ON ( ! ck - > valid ) ;
if ( journal_seq & & ck - > journal . seq ! = journal_seq )
goto out ;
2021-04-21 00:09:25 +03:00
/*
* Since journal reclaim depends on us making progress here , and the
* allocator / copygc depend on journal reclaim making progress , we need
* to be using alloc reserves :
* */
2021-08-30 22:18:31 +03:00
ret = bch2_btree_iter_traverse ( & b_iter ) ? :
bch2_trans_update ( trans , & b_iter , ck - > k ,
2022-01-12 09:14:47 +03:00
BTREE_UPDATE_KEY_CACHE_RECLAIM |
2021-07-06 05:16:02 +03:00
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
BTREE_TRIGGER_NORUN ) ? :
2019-03-08 03:46:10 +03:00
bch2_trans_commit ( trans , NULL , NULL ,
BTREE_INSERT_NOCHECK_RW |
BTREE_INSERT_NOFAIL |
2021-04-21 00:09:25 +03:00
BTREE_INSERT_USE_RESERVE |
2021-04-03 23:24:13 +03:00
( ck - > journal . seq = = journal_last_seq ( j )
2022-03-15 04:48:42 +03:00
? JOURNAL_WATERMARK_reserved
2021-04-03 23:24:13 +03:00
: 0 ) |
commit_flags ) ;
2022-07-18 06:06:38 +03:00
bch2_fs_fatal_err_on ( ret & &
! bch2_err_matches ( ret , BCH_ERR_transaction_restart ) & &
! bch2_err_matches ( ret , BCH_ERR_journal_reclaim_would_deadlock ) & &
! bch2_journal_error ( j ) , c ,
" error flushing key cache: %s " , bch2_err_str ( ret ) ) ;
if ( ret )
2019-03-08 03:46:10 +03:00
goto out ;
bch2_journal_pin_drop ( j , & ck - > journal ) ;
bch2_journal_preres_put ( j , & ck - > res ) ;
2021-08-30 22:18:31 +03:00
BUG_ON ( ! btree_node_locked ( c_iter . path , 0 ) ) ;
2021-03-25 06:37:33 +03:00
2019-03-08 03:46:10 +03:00
if ( ! evict ) {
2020-11-09 21:01:52 +03:00
if ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
clear_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ;
2021-03-25 06:37:33 +03:00
atomic_long_dec ( & c - > btree_key_cache . nr_dirty ) ;
2020-11-09 21:01:52 +03:00
}
2019-03-08 03:46:10 +03:00
} else {
evict :
2021-08-30 22:18:31 +03:00
BUG_ON ( ! btree_node_intent_locked ( c_iter . path , 0 ) ) ;
2019-03-08 03:46:10 +03:00
2022-08-21 21:29:43 +03:00
/*
* XXX : holding a lock that is not marked in btree_trans , not
* ideal :
*/
six_lock_increment ( & ck - > c . lock , SIX_LOCK_intent ) ;
bch2_trans_unlock ( trans ) ;
2019-03-08 03:46:10 +03:00
2022-08-21 21:29:43 +03:00
/* Will not fail because we are holding no other locks: */
btree_node_lock_nopath_nofail ( trans , & ck - > c , SIX_LOCK_write ) ;
2019-03-08 03:46:10 +03:00
2020-11-09 21:01:52 +03:00
if ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
clear_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ;
2021-03-25 06:37:33 +03:00
atomic_long_dec ( & c - > btree_key_cache . nr_dirty ) ;
2020-11-09 21:01:52 +03:00
}
2019-03-08 03:46:10 +03:00
bkey_cached_evict ( & c - > btree_key_cache , ck ) ;
2021-03-25 06:37:33 +03:00
2022-06-17 08:07:54 +03:00
bkey_cached_free_fast ( & c - > btree_key_cache , ck ) ;
2019-03-08 03:46:10 +03:00
}
out :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & b_iter ) ;
bch2_trans_iter_exit ( trans , & c_iter ) ;
2019-03-08 03:46:10 +03:00
return ret ;
}
2021-04-01 04:44:55 +03:00
int bch2_btree_key_cache_journal_flush ( struct journal * j ,
struct journal_entry_pin * pin , u64 seq )
2019-03-08 03:46:10 +03:00
{
struct bch_fs * c = container_of ( j , struct bch_fs , journal ) ;
struct bkey_cached * ck =
container_of ( pin , struct bkey_cached , journal ) ;
struct bkey_cached_key key ;
2022-08-21 21:29:43 +03:00
struct btree_trans trans ;
int srcu_idx = srcu_read_lock ( & c - > btree_trans_barrier ) ;
2021-04-03 23:24:13 +03:00
int ret = 0 ;
2019-03-08 03:46:10 +03:00
2022-08-21 21:29:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2020-11-13 01:19:47 +03:00
2022-08-21 21:29:43 +03:00
btree_node_lock_nopath_nofail ( & trans , & ck - > c , SIX_LOCK_read ) ;
2019-03-08 03:46:10 +03:00
key = ck - > key ;
if ( ck - > journal . seq ! = seq | |
! test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
six_unlock_read ( & ck - > c . lock ) ;
2020-11-13 01:19:47 +03:00
goto unlock ;
2019-03-08 03:46:10 +03:00
}
2023-01-04 12:34:16 +03:00
if ( ck - > seq ! = seq ) {
bch2_journal_pin_update ( & c - > journal , ck - > seq , & ck - > journal ,
bch2_btree_key_cache_journal_flush ) ;
six_unlock_read ( & ck - > c . lock ) ;
goto unlock ;
}
2019-03-08 03:46:10 +03:00
six_unlock_read ( & ck - > c . lock ) ;
2022-08-21 21:29:43 +03:00
ret = commit_do ( & trans , NULL , NULL , 0 ,
2021-07-24 01:26:38 +03:00
btree_key_cache_flush_pos ( & trans , key , seq ,
BTREE_INSERT_JOURNAL_RECLAIM , false ) ) ;
2020-11-13 01:19:47 +03:00
unlock :
srcu_read_unlock ( & c - > btree_trans_barrier , srcu_idx ) ;
2021-04-03 23:24:13 +03:00
2022-08-21 21:29:43 +03:00
bch2_trans_exit ( & trans ) ;
2021-04-03 23:24:13 +03:00
return ret ;
2019-03-08 03:46:10 +03:00
}
/*
* Flush and evict a key from the key cache :
*/
int bch2_btree_key_cache_flush ( struct btree_trans * trans ,
enum btree_id id , struct bpos pos )
{
struct bch_fs * c = trans - > c ;
struct bkey_cached_key key = { id , pos } ;
/* Fastpath - assume it won't be found: */
2019-09-23 02:10:21 +03:00
if ( ! bch2_btree_key_cache_find ( c , id , pos ) )
2019-03-08 03:46:10 +03:00
return 0 ;
2021-04-03 23:24:13 +03:00
return btree_key_cache_flush_pos ( trans , key , 0 , 0 , true ) ;
2019-03-08 03:46:10 +03:00
}
bool bch2_btree_insert_key_cached ( struct btree_trans * trans ,
2021-08-30 22:18:31 +03:00
struct btree_path * path ,
2019-03-08 03:46:10 +03:00
struct bkey_i * insert )
{
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct bkey_cached * ck = ( void * ) path - > l [ 0 ] . b ;
2020-11-20 03:54:40 +03:00
bool kick_reclaim = false ;
2019-03-08 03:46:10 +03:00
BUG_ON ( insert - > u64s > ck - > u64s ) ;
if ( likely ( ! ( trans - > flags & BTREE_INSERT_JOURNAL_REPLAY ) ) ) {
int difference ;
BUG_ON ( jset_u64s ( insert - > u64s ) > trans - > journal_preres . u64s ) ;
difference = jset_u64s ( insert - > u64s ) - ck - > res . u64s ;
if ( difference > 0 ) {
trans - > journal_preres . u64s - = difference ;
ck - > res . u64s + = difference ;
}
}
bkey_copy ( ck - > k , insert ) ;
ck - > valid = true ;
if ( ! test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
set_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ;
2021-03-25 06:37:33 +03:00
atomic_long_inc ( & c - > btree_key_cache . nr_dirty ) ;
2020-11-20 03:54:40 +03:00
if ( bch2_nr_btree_keys_need_flush ( c ) )
kick_reclaim = true ;
2019-03-08 03:46:10 +03:00
}
2023-01-04 12:34:16 +03:00
bch2_journal_pin_add ( & c - > journal , trans - > journal_res . seq ,
& ck - > journal , bch2_btree_key_cache_journal_flush ) ;
ck - > seq = trans - > journal_res . seq ;
2020-11-20 03:54:40 +03:00
if ( kick_reclaim )
2020-11-20 04:55:33 +03:00
journal_reclaim_kick ( & c - > journal ) ;
2019-03-08 03:46:10 +03:00
return true ;
}
2022-01-12 09:14:47 +03:00
void bch2_btree_key_cache_drop ( struct btree_trans * trans ,
struct btree_path * path )
2019-03-08 03:46:10 +03:00
{
2022-08-12 04:06:43 +03:00
struct bch_fs * c = trans - > c ;
2022-01-12 09:14:47 +03:00
struct bkey_cached * ck = ( void * ) path - > l [ 0 ] . b ;
2022-08-12 04:06:43 +03:00
BUG_ON ( ! ck - > valid ) ;
2022-01-12 09:14:47 +03:00
2022-08-12 04:06:43 +03:00
/*
* We just did an update to the btree , bypassing the key cache : the key
* cache key is now stale and must be dropped , even if dirty :
*/
if ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ) {
clear_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) ;
atomic_long_dec ( & c - > btree_key_cache . nr_dirty ) ;
bch2_journal_pin_drop ( & c - > journal , & ck - > journal ) ;
}
ck - > valid = false ;
2019-03-08 03:46:10 +03:00
}
2020-11-13 01:19:47 +03:00
static unsigned long bch2_btree_key_cache_scan ( struct shrinker * shrink ,
struct shrink_control * sc )
{
struct bch_fs * c = container_of ( shrink , struct bch_fs ,
btree_key_cache . shrink ) ;
struct btree_key_cache * bc = & c - > btree_key_cache ;
2021-03-25 06:37:33 +03:00
struct bucket_table * tbl ;
2020-11-13 01:19:47 +03:00
struct bkey_cached * ck , * t ;
size_t scanned = 0 , freed = 0 , nr = sc - > nr_to_scan ;
2021-03-25 06:37:33 +03:00
unsigned start , flags ;
int srcu_idx ;
2020-11-13 01:19:47 +03:00
/* Return -1 if we can't do anything right now */
if ( sc - > gfp_mask & __GFP_FS )
mutex_lock ( & bc - > lock ) ;
else if ( ! mutex_trylock ( & bc - > lock ) )
return - 1 ;
2021-03-25 06:37:33 +03:00
srcu_idx = srcu_read_lock ( & c - > btree_trans_barrier ) ;
2020-11-13 01:19:47 +03:00
flags = memalloc_nofs_save ( ) ;
2020-11-19 23:38:27 +03:00
/*
* Newest freed entries are at the end of the list - once we hit one
* that ' s too new to be freed , we can bail out :
*/
2020-11-13 01:19:47 +03:00
list_for_each_entry_safe ( ck , t , & bc - > freed , list ) {
2020-11-19 23:38:27 +03:00
if ( ! poll_state_synchronize_srcu ( & c - > btree_trans_barrier ,
ck - > btree_trans_barrier_seq ) )
break ;
2020-11-13 01:19:47 +03:00
2020-11-19 23:38:27 +03:00
list_del ( & ck - > list ) ;
2022-08-30 18:40:03 +03:00
six_lock_pcpu_free ( & ck - > c . lock ) ;
2020-11-19 23:38:27 +03:00
kmem_cache_free ( bch2_key_cache , ck ) ;
2022-06-17 08:07:54 +03:00
atomic_long_dec ( & bc - > nr_freed ) ;
2020-11-19 23:38:27 +03:00
scanned + + ;
freed + + ;
2020-11-13 01:19:47 +03:00
}
2020-11-19 23:38:27 +03:00
if ( scanned > = nr )
goto out ;
2020-11-13 01:19:47 +03:00
2021-03-25 06:37:33 +03:00
rcu_read_lock ( ) ;
tbl = rht_dereference_rcu ( bc - > table . tbl , & bc - > table ) ;
if ( bc - > shrink_iter > = tbl - > size )
bc - > shrink_iter = 0 ;
start = bc - > shrink_iter ;
2020-11-13 01:19:47 +03:00
2021-03-25 06:37:33 +03:00
do {
struct rhash_head * pos , * next ;
pos = rht_ptr_rcu ( rht_bucket ( tbl , bc - > shrink_iter ) ) ;
while ( ! rht_is_a_nulls ( pos ) ) {
next = rht_dereference_bucket_rcu ( pos - > next , tbl , bc - > shrink_iter ) ;
ck = container_of ( pos , struct bkey_cached , hash ) ;
if ( test_bit ( BKEY_CACHED_DIRTY , & ck - > flags ) )
goto next ;
if ( test_bit ( BKEY_CACHED_ACCESSED , & ck - > flags ) )
clear_bit ( BKEY_CACHED_ACCESSED , & ck - > flags ) ;
else if ( bkey_cached_lock_for_evict ( ck ) ) {
bkey_cached_evict ( bc , ck ) ;
bkey_cached_free ( bc , ck ) ;
}
scanned + + ;
if ( scanned > = nr )
break ;
next :
pos = next ;
2020-11-13 01:19:47 +03:00
}
2021-03-25 06:37:33 +03:00
bc - > shrink_iter + + ;
if ( bc - > shrink_iter > = tbl - > size )
bc - > shrink_iter = 0 ;
} while ( scanned < nr & & bc - > shrink_iter ! = start ) ;
rcu_read_unlock ( ) ;
2020-11-13 01:19:47 +03:00
out :
memalloc_nofs_restore ( flags ) ;
2021-03-25 06:37:33 +03:00
srcu_read_unlock ( & c - > btree_trans_barrier , srcu_idx ) ;
2020-11-13 01:19:47 +03:00
mutex_unlock ( & bc - > lock ) ;
return freed ;
}
static unsigned long bch2_btree_key_cache_count ( struct shrinker * shrink ,
struct shrink_control * sc )
{
struct bch_fs * c = container_of ( shrink , struct bch_fs ,
btree_key_cache . shrink ) ;
struct btree_key_cache * bc = & c - > btree_key_cache ;
2021-04-27 21:02:00 +03:00
long nr = atomic_long_read ( & bc - > nr_keys ) -
atomic_long_read ( & bc - > nr_dirty ) ;
2020-11-13 01:19:47 +03:00
2021-04-27 21:02:00 +03:00
return max ( 0L , nr ) ;
2020-11-13 01:19:47 +03:00
}
2020-11-09 21:01:52 +03:00
void bch2_fs_btree_key_cache_exit ( struct btree_key_cache * bc )
2019-03-08 03:46:10 +03:00
{
2020-11-09 21:01:52 +03:00
struct bch_fs * c = container_of ( bc , struct bch_fs , btree_key_cache ) ;
2021-03-25 06:37:33 +03:00
struct bucket_table * tbl ;
2019-03-08 03:46:10 +03:00
struct bkey_cached * ck , * n ;
2021-03-25 06:37:33 +03:00
struct rhash_head * pos ;
unsigned i ;
2022-06-17 08:07:54 +03:00
int cpu ;
2019-03-08 03:46:10 +03:00
2020-11-13 01:19:47 +03:00
if ( bc - > shrink . list . next )
unregister_shrinker ( & bc - > shrink ) ;
2020-11-09 21:01:52 +03:00
mutex_lock ( & bc - > lock ) ;
2021-03-25 06:37:33 +03:00
rcu_read_lock ( ) ;
tbl = rht_dereference_rcu ( bc - > table . tbl , & bc - > table ) ;
2021-12-21 02:18:35 +03:00
if ( tbl )
for ( i = 0 ; i < tbl - > size ; i + + )
rht_for_each_entry_rcu ( ck , pos , tbl , i , hash ) {
bkey_cached_evict ( bc , ck ) ;
list_add ( & ck - > list , & bc - > freed ) ;
}
2021-03-25 06:37:33 +03:00
rcu_read_unlock ( ) ;
2022-06-17 08:07:54 +03:00
for_each_possible_cpu ( cpu ) {
struct btree_key_cache_freelist * f =
per_cpu_ptr ( bc - > pcpu_freed , cpu ) ;
for ( i = 0 ; i < f - > nr ; i + + ) {
ck = f - > objs [ i ] ;
list_add ( & ck - > list , & bc - > freed ) ;
}
}
2021-03-25 06:37:33 +03:00
list_for_each_entry_safe ( ck , n , & bc - > freed , list ) {
2020-12-14 00:12:04 +03:00
cond_resched ( ) ;
2020-11-12 01:47:39 +03:00
bch2_journal_pin_drop ( & c - > journal , & ck - > journal ) ;
bch2_journal_preres_put ( & c - > journal , & ck - > res ) ;
2020-11-18 22:09:33 +03:00
list_del ( & ck - > list ) ;
2021-03-25 06:37:33 +03:00
kfree ( ck - > k ) ;
2020-11-18 22:09:33 +03:00
kmem_cache_free ( bch2_key_cache , ck ) ;
2019-03-08 03:46:10 +03:00
}
2020-11-09 21:01:52 +03:00
2021-04-25 01:02:59 +03:00
BUG_ON ( atomic_long_read ( & bc - > nr_dirty ) & &
! bch2_journal_error ( & c - > journal ) & &
test_bit ( BCH_FS_WAS_RW , & c - > flags ) ) ;
2021-03-25 06:37:33 +03:00
BUG_ON ( atomic_long_read ( & bc - > nr_keys ) ) ;
2020-12-14 00:12:04 +03:00
2020-11-09 21:01:52 +03:00
mutex_unlock ( & bc - > lock ) ;
2019-03-08 03:46:10 +03:00
2020-11-30 07:48:20 +03:00
if ( bc - > table_init_done )
rhashtable_destroy ( & bc - > table ) ;
2022-06-17 08:07:54 +03:00
free_percpu ( bc - > pcpu_freed ) ;
2019-03-08 03:46:10 +03:00
}
void bch2_fs_btree_key_cache_init_early ( struct btree_key_cache * c )
{
mutex_init ( & c - > lock ) ;
INIT_LIST_HEAD ( & c - > freed ) ;
}
2020-11-13 01:19:47 +03:00
int bch2_fs_btree_key_cache_init ( struct btree_key_cache * bc )
2019-03-08 03:46:10 +03:00
{
2020-11-13 01:19:47 +03:00
struct bch_fs * c = container_of ( bc , struct bch_fs , btree_key_cache ) ;
2020-11-30 07:48:20 +03:00
int ret ;
2020-11-13 01:19:47 +03:00
2022-06-17 08:07:54 +03:00
bc - > pcpu_freed = alloc_percpu ( struct btree_key_cache_freelist ) ;
if ( ! bc - > pcpu_freed )
return - ENOMEM ;
2021-04-05 08:23:55 +03:00
ret = rhashtable_init ( & bc - > table , & bch2_btree_key_cache_params ) ;
2020-11-30 07:48:20 +03:00
if ( ret )
return ret ;
bc - > table_init_done = true ;
2021-04-05 08:23:55 +03:00
bc - > shrink . seeks = 1 ;
bc - > shrink . count_objects = bch2_btree_key_cache_count ;
bc - > shrink . scan_objects = bch2_btree_key_cache_scan ;
return register_shrinker ( & bc - > shrink , " %s/btree_key_cache " , c - > name ) ;
2019-03-08 03:46:10 +03:00
}
2020-06-16 02:53:46 +03:00
void bch2_btree_key_cache_to_text ( struct printbuf * out , struct btree_key_cache * c )
{
2022-06-17 08:07:54 +03:00
prt_printf ( out , " nr_freed: \t %zu \n " , atomic_long_read ( & c - > nr_freed ) ) ;
2023-02-04 05:01:40 +03:00
prt_printf ( out , " nr_keys: \t %lu \n " , atomic_long_read ( & c - > nr_keys ) ) ;
prt_printf ( out , " nr_dirty: \t %lu \n " , atomic_long_read ( & c - > nr_dirty ) ) ;
2020-06-16 02:53:46 +03:00
}
2020-11-18 22:09:33 +03:00
void bch2_btree_key_cache_exit ( void )
{
if ( bch2_key_cache )
kmem_cache_destroy ( bch2_key_cache ) ;
}
int __init bch2_btree_key_cache_init ( void )
{
bch2_key_cache = KMEM_CACHE ( bkey_cached , 0 ) ;
if ( ! bch2_key_cache )
return - ENOMEM ;
return 0 ;
}