2018-10-06 07:46:55 +03:00
// SPDX-License-Identifier: GPL-2.0
2017-03-17 09:18:50 +03:00
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_background.h"
# include "alloc_foreground.h"
2017-03-17 09:18:50 +03:00
# include "btree_cache.h"
# include "btree_io.h"
2019-10-05 19:54:53 +03:00
# include "btree_key_cache.h"
2017-03-17 09:18:50 +03:00
# include "btree_update.h"
# include "btree_update_interior.h"
# include "btree_gc.h"
# include "buckets.h"
2022-01-05 06:32:09 +03:00
# include "buckets_waiting_for_journal.h"
2017-03-17 09:18:50 +03:00
# include "clock.h"
# include "debug.h"
2018-11-01 22:13:19 +03:00
# include "ec.h"
2017-03-17 09:18:50 +03:00
# include "error.h"
2019-04-12 05:39:39 +03:00
# include "recovery.h"
2017-03-17 09:18:50 +03:00
# include "trace.h"
2021-01-23 02:01:07 +03:00
# include "varint.h"
2017-03-17 09:18:50 +03:00
# include <linux/kthread.h>
# include <linux/math64.h>
# include <linux/random.h>
# include <linux/rculist.h>
# include <linux/rcupdate.h>
# include <linux/sched/task.h>
# include <linux/sort.h>
2021-04-19 00:54:56 +03:00
const char * const bch2_allocator_states [ ] = {
# define x(n) #n,
ALLOC_THREAD_STATES ( )
# undef x
NULL
} ;
2022-01-01 04:03:29 +03:00
/* Persistent alloc info: */
2021-01-23 02:01:07 +03:00
static const unsigned BCH_ALLOC_V1_FIELD_BYTES [ ] = {
# define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
BCH_ALLOC_FIELDS_V1 ( )
2018-07-22 06:36:11 +03:00
# undef x
} ;
2022-01-01 04:03:29 +03:00
struct bkey_alloc_unpacked {
u64 journal_seq ;
u64 bucket ;
u8 dev ;
u8 gen ;
u8 oldest_gen ;
u8 data_type ;
bool need_discard : 1 ;
bool need_inc_gen : 1 ;
# define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
} ;
2017-03-17 09:18:50 +03:00
2021-01-23 02:01:07 +03:00
static inline u64 alloc_field_v1_get ( const struct bch_alloc * a ,
const void * * p , unsigned field )
2018-07-22 06:36:11 +03:00
{
2021-01-23 02:01:07 +03:00
unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES [ field ] ;
2018-07-22 06:36:11 +03:00
u64 v ;
if ( ! ( a - > fields & ( 1 < < field ) ) )
return 0 ;
switch ( bytes ) {
case 1 :
v = * ( ( const u8 * ) * p ) ;
break ;
case 2 :
v = le16_to_cpup ( * p ) ;
break ;
case 4 :
v = le32_to_cpup ( * p ) ;
break ;
case 8 :
v = le64_to_cpup ( * p ) ;
break ;
default :
BUG ( ) ;
}
* p + = bytes ;
return v ;
}
2021-01-23 02:01:07 +03:00
static inline void alloc_field_v1_put ( struct bkey_i_alloc * a , void * * p ,
unsigned field , u64 v )
2018-07-22 06:36:11 +03:00
{
2021-01-23 02:01:07 +03:00
unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES [ field ] ;
2018-07-22 06:36:11 +03:00
if ( ! v )
return ;
a - > v . fields | = 1 < < field ;
switch ( bytes ) {
case 1 :
* ( ( u8 * ) * p ) = v ;
break ;
case 2 :
* ( ( __le16 * ) * p ) = cpu_to_le16 ( v ) ;
break ;
case 4 :
* ( ( __le32 * ) * p ) = cpu_to_le32 ( v ) ;
break ;
case 8 :
* ( ( __le64 * ) * p ) = cpu_to_le64 ( v ) ;
break ;
default :
BUG ( ) ;
}
* p + = bytes ;
}
2021-01-23 02:01:07 +03:00
static void bch2_alloc_unpack_v1 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
2019-02-13 22:46:32 +03:00
{
2021-01-23 02:01:07 +03:00
const struct bch_alloc * in = bkey_s_c_to_alloc ( k ) . v ;
const void * d = in - > data ;
unsigned idx = 0 ;
2019-04-18 01:14:46 +03:00
2021-01-23 02:01:07 +03:00
out - > gen = in - > gen ;
# define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
BCH_ALLOC_FIELDS_V1 ( )
# undef x
}
static int bch2_alloc_unpack_v2 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
2019-02-13 22:46:32 +03:00
{
2021-01-23 02:01:07 +03:00
struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2 ( k ) ;
const u8 * in = a . v - > data ;
const u8 * end = bkey_val_end ( a ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v ;
out - > gen = a . v - > gen ;
out - > oldest_gen = a . v - > oldest_gen ;
out - > data_type = a . v - > data_type ;
# define x(_name, _bits) \
if ( fieldnr < a . v - > nr_fields ) { \
2021-07-13 23:03:51 +03:00
ret = bch2_varint_decode_fast ( in , end , & v ) ; \
2021-01-23 02:01:07 +03:00
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v = 0 ; \
} \
out - > _name = v ; \
if ( v ! = out - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
return 0 ;
}
2021-10-30 04:14:23 +03:00
static int bch2_alloc_unpack_v3 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
{
struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3 ( k ) ;
const u8 * in = a . v - > data ;
const u8 * end = bkey_val_end ( a ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v ;
out - > gen = a . v - > gen ;
out - > oldest_gen = a . v - > oldest_gen ;
out - > data_type = a . v - > data_type ;
2022-01-01 04:03:29 +03:00
out - > need_discard = BCH_ALLOC_V3_NEED_DISCARD ( a . v ) ;
out - > need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN ( a . v ) ;
2021-10-30 04:14:23 +03:00
out - > journal_seq = le64_to_cpu ( a . v - > journal_seq ) ;
# define x(_name, _bits) \
if ( fieldnr < a . v - > nr_fields ) { \
ret = bch2_varint_decode_fast ( in , end , & v ) ; \
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v = 0 ; \
} \
out - > _name = v ; \
if ( v ! = out - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
return 0 ;
}
2022-01-01 04:03:29 +03:00
static struct bkey_alloc_unpacked bch2_alloc_unpack ( struct bkey_s_c k )
2021-01-23 02:01:07 +03:00
{
struct bkey_alloc_unpacked ret = {
. dev = k . k - > p . inode ,
. bucket = k . k - > p . offset ,
. gen = 0 ,
} ;
2019-02-13 22:46:32 +03:00
2021-10-30 04:14:23 +03:00
switch ( k . k - > type ) {
case KEY_TYPE_alloc :
2021-01-23 02:01:07 +03:00
bch2_alloc_unpack_v1 ( & ret , k ) ;
2021-10-30 04:14:23 +03:00
break ;
case KEY_TYPE_alloc_v2 :
bch2_alloc_unpack_v2 ( & ret , k ) ;
break ;
case KEY_TYPE_alloc_v3 :
bch2_alloc_unpack_v3 ( & ret , k ) ;
break ;
}
2021-01-23 02:01:07 +03:00
return ret ;
}
2022-01-01 04:03:29 +03:00
void bch2_alloc_to_v4 ( struct bkey_s_c k , struct bch_alloc_v4 * out )
2021-01-23 02:01:07 +03:00
{
2022-01-01 04:03:29 +03:00
if ( k . k - > type = = KEY_TYPE_alloc_v4 ) {
* out = * bkey_s_c_to_alloc_v4 ( k ) . v ;
} else {
struct bkey_alloc_unpacked u = bch2_alloc_unpack ( k ) ;
* out = ( struct bch_alloc_v4 ) {
. journal_seq = u . journal_seq ,
. flags = u . need_discard ,
. gen = u . gen ,
. oldest_gen = u . oldest_gen ,
. data_type = u . data_type ,
. stripe_redundancy = u . stripe_redundancy ,
. dirty_sectors = u . dirty_sectors ,
. cached_sectors = u . cached_sectors ,
. io_time [ READ ] = u . read_time ,
. io_time [ WRITE ] = u . write_time ,
. stripe = u . stripe ,
} ;
}
}
2021-12-26 04:39:19 +03:00
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 * bch2_alloc_to_v4_mut ( struct btree_trans * trans , struct bkey_s_c k )
{
struct bkey_i_alloc_v4 * ret ;
2021-12-26 04:39:19 +03:00
2022-01-01 04:03:29 +03:00
if ( k . k - > type = = KEY_TYPE_alloc_v4 ) {
ret = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) ) ;
if ( ! IS_ERR ( ret ) )
bkey_reassemble ( & ret - > k_i , k ) ;
} else {
ret = bch2_trans_kmalloc ( trans , sizeof ( * ret ) ) ;
if ( ! IS_ERR ( ret ) ) {
bkey_alloc_v4_init ( & ret - > k_i ) ;
ret - > k . p = k . k - > p ;
bch2_alloc_to_v4 ( k , & ret - > v ) ;
}
}
return ret ;
2019-02-13 22:46:32 +03:00
}
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update ( struct btree_trans * trans , struct btree_iter * iter ,
struct bpos pos )
2021-12-11 04:58:44 +03:00
{
2022-01-01 04:03:29 +03:00
struct bkey_s_c k ;
struct bkey_i_alloc_v4 * a ;
int ret ;
2021-12-26 04:39:19 +03:00
2022-01-01 04:03:29 +03:00
bch2_trans_iter_init ( trans , iter , BTREE_ID_alloc , pos ,
BTREE_ITER_WITH_UPDATES |
BTREE_ITER_CACHED |
BTREE_ITER_INTENT ) ;
k = bch2_btree_iter_peek_slot ( iter ) ;
ret = bkey_err ( k ) ;
if ( ret ) {
bch2_trans_iter_exit ( trans , iter ) ;
return ERR_PTR ( ret ) ;
}
a = bch2_alloc_to_v4_mut ( trans , k ) ;
if ( IS_ERR ( a ) )
bch2_trans_iter_exit ( trans , iter ) ;
return a ;
2021-12-11 04:58:44 +03:00
}
2021-04-17 04:53:23 +03:00
static unsigned bch_alloc_v1_val_u64s ( const struct bch_alloc * a )
2017-03-17 09:18:50 +03:00
{
2018-07-22 06:36:11 +03:00
unsigned i , bytes = offsetof ( struct bch_alloc , data ) ;
2017-03-17 09:18:50 +03:00
2021-01-23 02:01:07 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( BCH_ALLOC_V1_FIELD_BYTES ) ; i + + )
2018-07-22 06:36:11 +03:00
if ( a - > fields & ( 1 < < i ) )
2021-01-23 02:01:07 +03:00
bytes + = BCH_ALLOC_V1_FIELD_BYTES [ i ] ;
2017-03-17 09:18:50 +03:00
return DIV_ROUND_UP ( bytes , sizeof ( u64 ) ) ;
}
2021-01-23 02:01:07 +03:00
const char * bch2_alloc_v1_invalid ( const struct bch_fs * c , struct bkey_s_c k )
2017-03-17 09:18:50 +03:00
{
2018-11-01 22:10:01 +03:00
struct bkey_s_c_alloc a = bkey_s_c_to_alloc ( k ) ;
2017-03-17 09:18:50 +03:00
if ( k . k - > p . inode > = c - > sb . nr_devices | |
! c - > devs [ k . k - > p . inode ] )
return " invalid device " ;
2018-11-01 22:10:01 +03:00
/* allow for unknown fields */
2021-04-17 04:53:23 +03:00
if ( bkey_val_u64s ( a . k ) < bch_alloc_v1_val_u64s ( a . v ) )
2018-11-01 22:10:01 +03:00
return " incorrect value size " ;
2017-03-17 09:18:50 +03:00
return NULL ;
}
2021-01-23 02:01:07 +03:00
const char * bch2_alloc_v2_invalid ( const struct bch_fs * c , struct bkey_s_c k )
2017-03-17 09:18:50 +03:00
{
2021-01-23 02:01:07 +03:00
struct bkey_alloc_unpacked u ;
if ( k . k - > p . inode > = c - > sb . nr_devices | |
! c - > devs [ k . k - > p . inode ] )
return " invalid device " ;
2018-11-09 09:24:07 +03:00
2021-01-23 02:01:07 +03:00
if ( bch2_alloc_unpack_v2 ( & u , k ) )
return " unpack error " ;
2018-07-22 06:36:11 +03:00
2021-01-23 02:01:07 +03:00
return NULL ;
}
2021-10-30 04:14:23 +03:00
const char * bch2_alloc_v3_invalid ( const struct bch_fs * c , struct bkey_s_c k )
{
struct bkey_alloc_unpacked u ;
2022-01-01 04:03:29 +03:00
struct bch_dev * ca ;
2021-10-30 04:14:23 +03:00
if ( k . k - > p . inode > = c - > sb . nr_devices | |
! c - > devs [ k . k - > p . inode ] )
return " invalid device " ;
2022-01-01 04:03:29 +03:00
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
if ( k . k - > p . offset < ca - > mi . first_bucket | |
k . k - > p . offset > = ca - > mi . nbuckets )
return " invalid bucket " ;
2021-10-30 04:14:23 +03:00
if ( bch2_alloc_unpack_v3 ( & u , k ) )
return " unpack error " ;
return NULL ;
}
2022-01-01 04:03:29 +03:00
const char * bch2_alloc_v4_invalid ( const struct bch_fs * c , struct bkey_s_c k )
2021-01-23 02:01:07 +03:00
{
2022-01-01 04:03:29 +03:00
struct bch_dev * ca ;
2021-01-23 02:01:07 +03:00
2022-01-01 04:03:29 +03:00
if ( k . k - > p . inode > = c - > sb . nr_devices | |
! c - > devs [ k . k - > p . inode ] )
return " invalid device " ;
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
if ( k . k - > p . offset < ca - > mi . first_bucket | |
k . k - > p . offset > = ca - > mi . nbuckets )
return " invalid bucket " ;
return NULL ;
}
void bch2_alloc_v4_swab ( struct bkey_s k )
{
struct bch_alloc_v4 * a = bkey_s_to_alloc_v4 ( k ) . v ;
a - > journal_seq = swab64 ( a - > journal_seq ) ;
a - > flags = swab32 ( a - > flags ) ;
a - > dirty_sectors = swab32 ( a - > dirty_sectors ) ;
a - > cached_sectors = swab32 ( a - > cached_sectors ) ;
a - > io_time [ 0 ] = swab64 ( a - > io_time [ 0 ] ) ;
a - > io_time [ 1 ] = swab64 ( a - > io_time [ 1 ] ) ;
a - > stripe = swab32 ( a - > stripe ) ;
a - > nr_external_backpointers = swab32 ( a - > nr_external_backpointers ) ;
}
void bch2_alloc_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
struct bch_alloc_v4 a ;
bch2_alloc_to_v4 ( k , & a ) ;
pr_buf ( out , " gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu " ,
a . gen , a . oldest_gen , bch2_data_types [ a . data_type ] ,
a . journal_seq , BCH_ALLOC_V4_NEED_DISCARD ( & a ) ) ;
pr_buf ( out , " dirty_sectors %u " , a . dirty_sectors ) ;
pr_buf ( out , " cached_sectors %u " , a . cached_sectors ) ;
pr_buf ( out , " stripe %u " , a . stripe ) ;
pr_buf ( out , " stripe_redundancy %u " , a . stripe_redundancy ) ;
pr_buf ( out , " read_time %llu " , a . io_time [ READ ] ) ;
pr_buf ( out , " write_time %llu " , a . io_time [ WRITE ] ) ;
2017-03-17 09:18:50 +03:00
}
2021-12-26 04:39:19 +03:00
int bch2_alloc_read ( struct bch_fs * c , bool gc , bool metadata_only )
2017-03-17 09:18:50 +03:00
{
2021-12-26 04:07:00 +03:00
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
2022-01-01 04:03:29 +03:00
struct bch_alloc_v4 a ;
2020-10-17 04:32:02 +03:00
struct bch_dev * ca ;
struct bucket * g ;
2021-01-21 23:28:59 +03:00
int ret ;
2019-12-28 04:51:35 +03:00
2021-10-30 01:43:18 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-12-26 04:07:00 +03:00
for_each_btree_key ( & trans , iter , BTREE_ID_alloc , POS_MIN ,
BTREE_ITER_PREFETCH , k , ret ) {
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
2021-12-26 04:39:19 +03:00
g = __bucket ( ca , k . k - > p . offset , gc ) ;
2022-01-01 04:03:29 +03:00
bch2_alloc_to_v4 ( k , & a ) ;
2021-12-26 04:07:00 +03:00
2021-12-26 04:39:19 +03:00
if ( ! gc )
2022-01-01 04:03:29 +03:00
* bucket_gen ( ca , k . k - > p . offset ) = a . gen ;
2021-12-26 04:39:19 +03:00
2022-01-01 04:03:29 +03:00
g - > _mark . gen = a . gen ;
g - > io_time [ READ ] = a . io_time [ READ ] ;
g - > io_time [ WRITE ] = a . io_time [ WRITE ] ;
g - > oldest_gen = ! gc ? a . oldest_gen : a . gen ;
2021-12-26 04:07:00 +03:00
g - > gen_valid = 1 ;
2021-12-26 04:39:19 +03:00
if ( ! gc | |
( metadata_only & &
2022-01-01 04:03:29 +03:00
( a . data_type = = BCH_DATA_user | |
a . data_type = = BCH_DATA_cached | |
a . data_type = = BCH_DATA_parity ) ) ) {
g - > _mark . data_type = a . data_type ;
g - > _mark . dirty_sectors = a . dirty_sectors ;
g - > _mark . cached_sectors = a . cached_sectors ;
g - > _mark . stripe = a . stripe ! = 0 ;
g - > stripe = a . stripe ;
g - > stripe_redundancy = a . stripe_redundancy ;
2021-12-26 04:39:19 +03:00
}
2021-12-26 04:07:00 +03:00
2019-04-17 22:49:28 +03:00
}
2021-12-26 04:39:19 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2017-03-17 09:18:50 +03:00
2021-12-26 04:39:19 +03:00
bch2_trans_exit ( & trans ) ;
2019-10-05 19:54:53 +03:00
2019-08-28 00:45:42 +03:00
if ( ret )
2021-12-26 04:39:19 +03:00
bch_err ( c , " error reading alloc info: %i " , ret ) ;
2017-03-17 09:18:50 +03:00
2020-10-17 04:36:26 +03:00
return ret ;
2019-08-28 00:45:42 +03:00
}
2017-03-17 09:18:50 +03:00
/* Bucket IO clocks: */
2020-10-17 04:39:16 +03:00
int bch2_bucket_io_time_reset ( struct btree_trans * trans , unsigned dev ,
size_t bucket_nr , int rw )
{
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 * a ;
u64 now ;
2020-10-17 04:39:16 +03:00
int ret = 0 ;
2022-01-01 04:03:29 +03:00
a = bch2_trans_start_alloc_update ( trans , & iter , POS ( dev , bucket_nr ) ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
2020-10-17 04:39:16 +03:00
if ( ret )
2022-01-01 04:03:29 +03:00
return ret ;
2020-10-17 04:39:16 +03:00
2021-01-21 23:28:59 +03:00
now = atomic64_read ( & c - > io_clock [ rw ] . now ) ;
2022-01-01 04:03:29 +03:00
if ( a - > v . io_time [ rw ] = = now )
2020-10-17 04:39:16 +03:00
goto out ;
2022-01-01 04:03:29 +03:00
a - > v . io_time [ rw ] = now ;
2020-10-17 04:39:16 +03:00
2022-01-01 04:03:29 +03:00
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ? :
2020-10-17 04:39:16 +03:00
bch2_trans_commit ( trans , NULL , NULL , 0 ) ;
out :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2020-10-17 04:39:16 +03:00
return ret ;
}
2017-03-17 09:18:50 +03:00
/* Background allocator thread: */
/*
* Scans for buckets to be invalidated , invalidates them , rewrites prios / gens
* ( marking them as invalidated on disk ) , then optionally issues discard
* commands to the newly free buckets , then puts them on the various freelists .
*/
2021-01-21 23:28:59 +03:00
static bool bch2_can_invalidate_bucket ( struct bch_dev * ca , size_t b ,
struct bucket_mark m )
2017-03-17 09:18:50 +03:00
{
u8 gc_gen ;
2021-01-21 23:28:59 +03:00
if ( ! is_available_bucket ( m ) )
2017-03-17 09:18:50 +03:00
return false ;
2021-01-21 23:28:59 +03:00
if ( m . owned_by_allocator )
2021-01-22 04:51:51 +03:00
return false ;
2018-11-19 09:16:07 +03:00
if ( ca - > buckets_nouse & &
2021-01-21 23:28:59 +03:00
test_bit ( b , ca - > buckets_nouse ) )
2018-11-19 09:16:07 +03:00
return false ;
2021-12-24 12:22:20 +03:00
if ( ca - > new_fs_bucket_idx ) {
/*
* Device or filesystem is still being initialized , and we
* haven ' t fully marked superblocks & journal :
*/
if ( is_superblock_bucket ( ca , b ) )
return false ;
if ( b < ca - > new_fs_bucket_idx )
return false ;
}
2021-01-21 23:28:59 +03:00
gc_gen = bucket_gc_gen ( bucket ( ca , b ) ) ;
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
ca - > inc_gen_needs_gc + = gc_gen > = BUCKET_GC_GEN_MAX / 2 ;
ca - > inc_gen_really_needs_gc + = gc_gen > = BUCKET_GC_GEN_MAX ;
2017-03-17 09:18:50 +03:00
return gc_gen < BUCKET_GC_GEN_MAX ;
}
/*
* Determines what order we ' re going to reuse buckets , smallest bucket_key ( )
* first .
*/
2021-01-21 23:28:59 +03:00
static unsigned bucket_sort_key ( struct bucket * g , struct bucket_mark m ,
u64 now , u64 last_seq_ondisk )
2017-03-17 09:18:50 +03:00
{
2022-01-11 03:46:39 +03:00
unsigned used = m . cached_sectors ;
2017-03-17 09:18:50 +03:00
2021-01-21 23:28:59 +03:00
if ( used ) {
/*
* Prefer to keep buckets that have been read more recently , and
* buckets that have more data in them :
*/
u64 last_read = max_t ( s64 , 0 , now - g - > io_time [ READ ] ) ;
u32 last_read_scaled = max_t ( u64 , U32_MAX , div_u64 ( last_read , used ) ) ;
2017-03-17 09:18:50 +03:00
2021-01-21 23:28:59 +03:00
return - last_read_scaled ;
} else {
/*
* Prefer to use buckets with smaller gc_gen so that we don ' t
* have to walk the btree and recalculate oldest_gen - but shift
* off the low bits so that buckets will still have equal sort
* keys when there ' s only a small difference , so that we can
* keep sequential buckets together :
*/
2022-01-05 06:32:09 +03:00
return bucket_gc_gen ( g ) > > 4 ;
2021-01-21 23:28:59 +03:00
}
2017-03-17 09:18:50 +03:00
}
static inline int bucket_alloc_cmp ( alloc_heap * h ,
struct alloc_heap_entry l ,
struct alloc_heap_entry r )
{
2019-04-12 11:54:12 +03:00
return cmp_int ( l . key , r . key ) ? :
cmp_int ( r . nr , l . nr ) ? :
cmp_int ( l . bucket , r . bucket ) ;
2017-03-17 09:18:50 +03:00
}
2018-07-22 17:43:01 +03:00
static inline int bucket_idx_cmp ( const void * _l , const void * _r )
{
const struct alloc_heap_entry * l = _l , * r = _r ;
2019-04-12 11:54:12 +03:00
return cmp_int ( l - > bucket , r - > bucket ) ;
2018-07-22 17:43:01 +03:00
}
2017-03-17 09:18:50 +03:00
static void find_reclaimable_buckets_lru ( struct bch_fs * c , struct bch_dev * ca )
{
struct bucket_array * buckets ;
struct alloc_heap_entry e = { 0 } ;
2021-01-21 23:28:59 +03:00
u64 now , last_seq_ondisk ;
2018-07-22 17:43:01 +03:00
size_t b , i , nr = 0 ;
2017-03-17 09:18:50 +03:00
down_read ( & ca - > bucket_lock ) ;
buckets = bucket_array ( ca ) ;
2021-01-21 23:28:59 +03:00
ca - > alloc_heap . used = 0 ;
now = atomic64_read ( & c - > io_clock [ READ ] . now ) ;
2021-12-27 05:41:09 +03:00
last_seq_ondisk = c - > journal . flushed_seq_ondisk ;
2017-03-17 09:18:50 +03:00
/*
* Find buckets with lowest read priority , by building a maxheap sorted
* by read priority and repeatedly replacing the maximum element until
* all buckets have been visited .
*/
for ( b = ca - > mi . first_bucket ; b < ca - > mi . nbuckets ; b + + ) {
2021-01-21 23:28:59 +03:00
struct bucket * g = & buckets - > b [ b ] ;
struct bucket_mark m = READ_ONCE ( g - > mark ) ;
unsigned key = bucket_sort_key ( g , m , now , last_seq_ondisk ) ;
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
cond_resched ( ) ;
2017-03-17 09:18:50 +03:00
if ( ! bch2_can_invalidate_bucket ( ca , b , m ) )
continue ;
2022-01-05 06:32:09 +03:00
if ( ! m . data_type & &
bch2_bucket_needs_journal_commit ( & c - > buckets_waiting_for_journal ,
last_seq_ondisk ,
ca - > dev_idx , b ) ) {
ca - > buckets_waiting_on_journal + + ;
continue ;
}
2017-03-17 09:18:50 +03:00
if ( e . nr & & e . bucket + e . nr = = b & & e . key = = key ) {
e . nr + + ;
} else {
if ( e . nr )
2018-10-21 23:32:51 +03:00
heap_add_or_replace ( & ca - > alloc_heap , e ,
- bucket_alloc_cmp , NULL ) ;
2017-03-17 09:18:50 +03:00
e = ( struct alloc_heap_entry ) {
. bucket = b ,
. nr = 1 ,
. key = key ,
} ;
}
}
if ( e . nr )
2018-10-21 23:32:51 +03:00
heap_add_or_replace ( & ca - > alloc_heap , e ,
- bucket_alloc_cmp , NULL ) ;
2017-03-17 09:18:50 +03:00
2018-07-22 17:43:01 +03:00
for ( i = 0 ; i < ca - > alloc_heap . used ; i + + )
nr + = ca - > alloc_heap . data [ i ] . nr ;
2017-03-17 09:18:50 +03:00
2018-07-22 17:43:01 +03:00
while ( nr - ca - > alloc_heap . data [ 0 ] . nr > = ALLOC_SCAN_BATCH ( ca ) ) {
nr - = ca - > alloc_heap . data [ 0 ] . nr ;
2018-10-21 23:32:51 +03:00
heap_pop ( & ca - > alloc_heap , e , - bucket_alloc_cmp , NULL ) ;
2017-03-17 09:18:50 +03:00
}
2018-07-22 17:43:01 +03:00
up_read ( & ca - > bucket_lock ) ;
2017-03-17 09:18:50 +03:00
}
2018-07-22 17:43:01 +03:00
static size_t find_reclaimable_buckets ( struct bch_fs * c , struct bch_dev * ca )
2017-03-17 09:18:50 +03:00
{
2018-07-22 17:43:01 +03:00
size_t i , nr = 0 ;
2017-03-17 09:18:50 +03:00
ca - > inc_gen_needs_gc = 0 ;
2021-04-18 03:37:04 +03:00
ca - > inc_gen_really_needs_gc = 0 ;
2022-01-05 06:32:09 +03:00
ca - > buckets_waiting_on_journal = 0 ;
2017-03-17 09:18:50 +03:00
2021-12-24 10:55:11 +03:00
find_reclaimable_buckets_lru ( c , ca ) ;
2018-07-22 17:43:01 +03:00
2018-10-21 23:32:51 +03:00
heap_resort ( & ca - > alloc_heap , bucket_alloc_cmp , NULL ) ;
2018-07-22 17:43:01 +03:00
for ( i = 0 ; i < ca - > alloc_heap . used ; i + + )
nr + = ca - > alloc_heap . data [ i ] . nr ;
return nr ;
2017-03-17 09:18:50 +03:00
}
2021-04-18 03:37:04 +03:00
static int bucket_invalidate_btree ( struct btree_trans * trans ,
2021-12-24 08:34:48 +03:00
struct bch_dev * ca , u64 b ,
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 * a )
2019-02-13 22:46:32 +03:00
{
2019-03-14 03:49:16 +03:00
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-12-26 04:36:47 +03:00
struct bkey_s_c k ;
2021-04-18 03:37:04 +03:00
int ret ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc ,
POS ( ca - > dev_idx , b ) ,
BTREE_ITER_CACHED |
BTREE_ITER_INTENT ) ;
2021-12-26 04:36:47 +03:00
k = bch2_btree_iter_peek_slot ( & iter ) ;
ret = bkey_err ( k ) ;
2021-04-18 03:37:04 +03:00
if ( ret )
goto err ;
2022-01-01 04:03:29 +03:00
bkey_alloc_v4_init ( & a - > k_i ) ;
a - > k . p = iter . pos ;
bch2_alloc_to_v4 ( k , & a - > v ) ;
a - > v . gen + + ;
a - > v . data_type = 0 ;
a - > v . dirty_sectors = 0 ;
a - > v . cached_sectors = 0 ;
a - > v . io_time [ READ ] = atomic64_read ( & c - > io_clock [ READ ] . now ) ;
a - > v . io_time [ WRITE ] = atomic64_read ( & c - > io_clock [ WRITE ] . now ) ;
ret = bch2_trans_update ( trans , & iter , & a - > k_i ,
BTREE_TRIGGER_BUCKET_INVALIDATE |
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY ) ;
2021-04-18 03:37:04 +03:00
err :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-04-18 03:37:04 +03:00
return ret ;
}
static int bch2_invalidate_one_bucket ( struct bch_fs * c , struct bch_dev * ca ,
u64 * journal_seq , unsigned flags )
{
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 a ;
2019-02-13 22:46:32 +03:00
size_t b ;
2021-12-24 08:34:48 +03:00
u64 commit_seq = 0 ;
2020-05-28 22:51:50 +03:00
int ret = 0 ;
2019-02-13 22:46:32 +03:00
2021-12-24 08:34:48 +03:00
/*
* If the read - only path is trying to shut down , we can ' t be generating
* new btree updates :
*/
if ( test_bit ( BCH_FS_ALLOCATOR_STOPPING , & c - > flags ) )
return 1 ;
2019-02-13 22:46:32 +03:00
BUG_ON ( ! ca - > alloc_heap . used | |
! ca - > alloc_heap . data [ 0 ] . nr ) ;
b = ca - > alloc_heap . data [ 0 ] . bucket ;
/* first, put on free_inc and mark as owned by allocator: */
percpu_down_read ( & c - > mark_lock ) ;
2021-01-23 02:19:15 +03:00
2021-04-17 04:34:00 +03:00
bch2_mark_alloc_bucket ( c , ca , b , true ) ;
2021-01-23 02:19:15 +03:00
spin_lock ( & c - > freelist_lock ) ;
verify_not_on_freelist ( c , ca , b ) ;
BUG_ON ( ! fifo_push ( & ca - > free_inc , b ) ) ;
spin_unlock ( & c - > freelist_lock ) ;
2020-06-04 06:46:15 +03:00
2019-02-13 22:46:32 +03:00
percpu_up_read ( & c - > mark_lock ) ;
2021-12-24 08:34:48 +03:00
ret = bch2_trans_do ( c , NULL , & commit_seq ,
2021-04-18 03:37:04 +03:00
BTREE_INSERT_NOCHECK_RW |
BTREE_INSERT_NOFAIL |
flags ,
2022-01-01 04:03:29 +03:00
bucket_invalidate_btree ( & trans , ca , b , & a ) ) ;
2021-12-24 08:34:48 +03:00
2019-02-13 22:46:32 +03:00
if ( ! ret ) {
/* remove from alloc_heap: */
struct alloc_heap_entry e , * top = ca - > alloc_heap . data ;
top - > bucket + + ;
top - > nr - - ;
if ( ! top - > nr )
heap_pop ( & ca - > alloc_heap , e , bucket_alloc_cmp , NULL ) ;
2021-12-24 08:34:48 +03:00
/*
* If we invalidating cached data then we need to wait on the
* journal commit :
*/
2022-01-01 04:03:29 +03:00
if ( a . v . data_type )
2021-12-24 08:34:48 +03:00
* journal_seq = max ( * journal_seq , commit_seq ) ;
/*
* We already waiting on u . alloc_seq when we filtered out
* buckets that need journal commit :
*/
2022-01-01 04:03:29 +03:00
BUG_ON ( * journal_seq > a . v . journal_seq ) ;
2019-02-13 22:46:32 +03:00
} else {
size_t b2 ;
/* remove from free_inc: */
percpu_down_read ( & c - > mark_lock ) ;
spin_lock ( & c - > freelist_lock ) ;
2021-04-17 04:34:00 +03:00
bch2_mark_alloc_bucket ( c , ca , b , false ) ;
2019-02-13 22:46:32 +03:00
BUG_ON ( ! fifo_pop_back ( & ca - > free_inc , b2 ) ) ;
BUG_ON ( b ! = b2 ) ;
spin_unlock ( & c - > freelist_lock ) ;
percpu_up_read ( & c - > mark_lock ) ;
}
2020-05-28 23:06:13 +03:00
return ret < 0 ? ret : 0 ;
2019-02-13 22:46:32 +03:00
}
2018-07-22 17:43:01 +03:00
/*
* Pull buckets off ca - > alloc_heap , invalidate them , move them to ca - > free_inc :
*/
static int bch2_invalidate_buckets ( struct bch_fs * c , struct bch_dev * ca )
2017-03-17 09:18:50 +03:00
{
2018-07-22 17:43:01 +03:00
u64 journal_seq = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
/* Only use nowait if we've already invalidated at least one bucket: */
2018-07-22 17:43:01 +03:00
while ( ! ret & &
! fifo_full ( & ca - > free_inc ) & &
2021-04-18 03:37:04 +03:00
ca - > alloc_heap . used ) {
2021-06-01 03:52:39 +03:00
if ( kthread_should_stop ( ) ) {
ret = 1 ;
break ;
}
2021-04-18 03:37:04 +03:00
ret = bch2_invalidate_one_bucket ( c , ca , & journal_seq ,
2019-02-13 23:17:23 +03:00
( ! fifo_empty ( & ca - > free_inc )
? BTREE_INSERT_NOWAIT : 0 ) ) ;
2021-04-18 03:37:04 +03:00
/*
* We only want to batch up invalidates when they ' re going to
* require flushing the journal :
*/
if ( ! journal_seq )
break ;
}
2017-03-17 09:18:50 +03:00
/* If we used NOWAIT, don't return the error: */
2018-07-22 17:43:01 +03:00
if ( ! fifo_empty ( & ca - > free_inc ) )
ret = 0 ;
2021-10-01 17:08:13 +03:00
if ( ret < 0 )
2018-07-22 17:43:01 +03:00
bch_err ( ca , " error invalidating buckets: %i " , ret ) ;
2021-10-01 17:08:13 +03:00
if ( ret )
2018-07-22 17:43:01 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
2018-07-22 17:43:01 +03:00
if ( journal_seq )
ret = bch2_journal_flush_seq ( & c - > journal , journal_seq ) ;
if ( ret ) {
bch_err ( ca , " journal error: %i " , ret ) ;
return ret ;
}
2017-03-17 09:18:50 +03:00
2018-07-22 17:43:01 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
}
2021-04-18 03:37:04 +03:00
static void alloc_thread_set_state ( struct bch_dev * ca , unsigned new_state )
{
if ( ca - > allocator_state ! = new_state ) {
ca - > allocator_state = new_state ;
closure_wake_up ( & ca - > fs - > freelist_wait ) ;
}
}
static int push_invalidated_bucket ( struct bch_fs * c , struct bch_dev * ca , u64 b )
2017-03-17 09:18:50 +03:00
{
2018-07-22 17:43:01 +03:00
unsigned i ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2021-04-18 03:37:04 +03:00
spin_lock ( & c - > freelist_lock ) ;
for ( i = 0 ; i < RESERVE_NR ; i + + ) {
/*
* Don ' t strand buckets on the copygc freelist until
* after recovery is finished :
*/
2022-03-14 02:27:55 +03:00
if ( i = = RESERVE_movinggc & &
2021-04-18 03:37:04 +03:00
! test_bit ( BCH_FS_STARTED , & c - > flags ) )
continue ;
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
if ( fifo_push ( & ca - > free [ i ] , b ) ) {
fifo_pop ( & ca - > free_inc , b ) ;
2017-03-17 09:18:50 +03:00
ret = 1 ;
break ;
}
}
2021-04-18 03:37:04 +03:00
spin_unlock ( & c - > freelist_lock ) ;
ca - > allocator_state = ret
? ALLOCATOR_running
: ALLOCATOR_blocked_full ;
closure_wake_up ( & c - > freelist_wait ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2021-04-18 03:37:04 +03:00
static void discard_one_bucket ( struct bch_fs * c , struct bch_dev * ca , u64 b )
2017-03-17 09:18:50 +03:00
{
2022-02-21 13:05:29 +03:00
if ( ! c - > opts . nochanges & &
ca - > mi . discard & &
2021-04-18 03:37:04 +03:00
bdev_max_discard_sectors ( ca - > disk_sb . bdev ) )
blkdev_issue_discard ( ca - > disk_sb . bdev , bucket_to_sector ( ca , b ) ,
ca - > mi . bucket_size , GFP_NOFS ) ;
}
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
static bool allocator_thread_running ( struct bch_dev * ca )
{
unsigned state = ca - > mi . state = = BCH_MEMBER_STATE_rw & &
2021-12-28 07:10:06 +03:00
test_bit ( BCH_FS_ALLOCATOR_RUNNING , & ca - > fs - > flags )
2021-04-18 03:37:04 +03:00
? ALLOCATOR_running
: ALLOCATOR_stopped ;
alloc_thread_set_state ( ca , state ) ;
return state = = ALLOCATOR_running ;
2017-03-17 09:18:50 +03:00
}
2021-04-18 03:37:04 +03:00
static int buckets_available ( struct bch_dev * ca , unsigned long gc_count )
2021-03-06 02:00:55 +03:00
{
2021-04-18 03:37:04 +03:00
s64 available = dev_buckets_reclaimable ( ca ) -
( gc_count = = ca - > fs - > gc_count ? ca - > inc_gen_really_needs_gc : 0 ) ;
bool ret = available > 0 ;
alloc_thread_set_state ( ca , ret
? ALLOCATOR_running
: ALLOCATOR_blocked ) ;
return ret ;
2021-03-06 02:00:55 +03:00
}
2017-03-17 09:18:50 +03:00
/**
* bch_allocator_thread - move buckets from free_inc to reserves
*
* The free_inc FIFO is populated by find_reclaimable_buckets ( ) , and
* the reserves are depleted by bucket allocation . When we run out
* of free_inc , try to invalidate some buckets and write out
* prios and gens .
*/
static int bch2_allocator_thread ( void * arg )
{
struct bch_dev * ca = arg ;
struct bch_fs * c = ca - > fs ;
2021-04-18 03:37:04 +03:00
unsigned long gc_count = c - > gc_count ;
2018-07-22 17:43:01 +03:00
size_t nr ;
2017-03-17 09:18:50 +03:00
int ret ;
set_freezable ( ) ;
while ( 1 ) {
2021-04-18 03:37:04 +03:00
ret = kthread_wait_freezable ( allocator_thread_running ( ca ) ) ;
2018-07-22 17:43:01 +03:00
if ( ret )
goto stop ;
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
while ( ! ca - > alloc_heap . used ) {
2021-04-05 07:53:42 +03:00
cond_resched ( ) ;
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
ret = kthread_wait_freezable ( buckets_available ( ca , gc_count ) ) ;
if ( ret )
goto stop ;
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
gc_count = c - > gc_count ;
2018-07-22 17:43:01 +03:00
nr = find_reclaimable_buckets ( c , ca ) ;
2017-03-17 09:18:50 +03:00
2022-01-05 06:32:09 +03:00
if ( ! nr & & ca - > buckets_waiting_on_journal ) {
ret = bch2_journal_flush ( & c - > journal ) ;
if ( ret )
goto stop ;
} else if ( nr < ( ca - > mi . nbuckets > > 6 ) & &
ca - > buckets_waiting_on_journal > = nr / 2 ) {
bch2_journal_flush_async ( & c - > journal , NULL ) ;
}
2017-03-17 09:18:50 +03:00
2018-07-22 17:43:01 +03:00
if ( ( ca - > inc_gen_needs_gc > = ALLOC_SCAN_BATCH ( ca ) | |
ca - > inc_gen_really_needs_gc ) & &
2017-03-17 09:18:50 +03:00
c - > gc_thread ) {
atomic_inc ( & c - > kick_gc ) ;
wake_up_process ( c - > gc_thread ) ;
}
2022-01-05 06:32:09 +03:00
trace_alloc_scan ( ca , nr , ca - > inc_gen_needs_gc ,
ca - > inc_gen_really_needs_gc ) ;
2021-04-18 03:37:04 +03:00
}
2017-03-17 09:18:50 +03:00
2021-04-18 03:37:04 +03:00
ret = bch2_invalidate_buckets ( c , ca ) ;
if ( ret )
goto stop ;
2021-04-08 04:04:04 +03:00
2021-04-18 03:37:04 +03:00
while ( ! fifo_empty ( & ca - > free_inc ) ) {
u64 b = fifo_peek ( & ca - > free_inc ) ;
discard_one_bucket ( c , ca , b ) ;
ret = kthread_wait_freezable ( push_invalidated_bucket ( c , ca , b ) ) ;
2021-04-17 04:53:23 +03:00
if ( ret )
2021-04-08 04:04:04 +03:00
goto stop ;
}
2017-03-17 09:18:50 +03:00
}
stop :
2021-04-18 03:37:04 +03:00
alloc_thread_set_state ( ca , ALLOCATOR_stopped ) ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
/* Startup/shutdown (ro/rw): */
void bch2_recalc_capacity ( struct bch_fs * c )
{
struct bch_dev * ca ;
2021-04-13 16:49:23 +03:00
u64 capacity = 0 , reserved_sectors = 0 , gc_reserve ;
2018-11-05 05:55:35 +03:00
unsigned bucket_size_max = 0 ;
2017-03-17 09:18:50 +03:00
unsigned long ra_pages = 0 ;
unsigned i , j ;
lockdep_assert_held ( & c - > state_lock ) ;
for_each_online_member ( ca , c , i ) {
struct backing_dev_info * bdi = ca - > disk_sb . bdev - > bd_disk - > bdi ;
ra_pages + = bdi - > ra_pages ;
}
bch2_set_ra_pages ( c , ra_pages ) ;
for_each_rw_member ( ca , c , i ) {
2018-07-24 21:55:05 +03:00
u64 dev_reserve = 0 ;
2017-03-17 09:18:50 +03:00
/*
* We need to reserve buckets ( from the number
* of currently available buckets ) against
* foreground writes so that mainly copygc can
* make forward progress .
*
* We need enough to refill the various reserves
* from scratch - copygc will use its entire
* reserve all at once , then run against when
* its reserve is refilled ( from the formerly
* available buckets ) .
*
* This reserve is just used when considering if
* allocations for foreground writes must wait -
* not - ENOSPC calculations .
*/
2022-03-14 02:27:55 +03:00
for ( j = 0 ; j < RESERVE_none ; j + + )
2018-08-01 21:26:55 +03:00
dev_reserve + = ca - > free [ j ] . size ;
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
dev_reserve + = 1 ; /* btree write point */
dev_reserve + = 1 ; /* copygc write point */
dev_reserve + = 1 ; /* rebalance write point */
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
dev_reserve * = ca - > mi . bucket_size ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
capacity + = bucket_to_sector ( ca , ca - > mi . nbuckets -
ca - > mi . first_bucket ) ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
reserved_sectors + = dev_reserve * 2 ;
2018-11-05 05:55:35 +03:00
bucket_size_max = max_t ( unsigned , bucket_size_max ,
ca - > mi . bucket_size ) ;
2018-08-01 21:26:55 +03:00
}
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
gc_reserve = c - > opts . gc_reserve_bytes
? c - > opts . gc_reserve_bytes > > 9
: div64_u64 ( capacity * c - > opts . gc_reserve_percent , 100 ) ;
reserved_sectors = max ( gc_reserve , reserved_sectors ) ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
reserved_sectors = min ( reserved_sectors , capacity ) ;
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
c - > capacity = capacity - reserved_sectors ;
2017-03-17 09:18:50 +03:00
2018-11-05 05:55:35 +03:00
c - > bucket_size_max = bucket_size_max ;
2017-03-17 09:18:50 +03:00
/* Wake up case someone was waiting for buckets */
closure_wake_up ( & c - > freelist_wait ) ;
}
static bool bch2_dev_has_open_write_point ( struct bch_fs * c , struct bch_dev * ca )
{
struct open_bucket * ob ;
bool ret = false ;
for ( ob = c - > open_buckets ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ;
ob + + ) {
spin_lock ( & ob - > lock ) ;
if ( ob - > valid & & ! ob - > on_partial_list & &
2021-12-26 05:21:46 +03:00
ob - > dev = = ca - > dev_idx )
2017-03-17 09:18:50 +03:00
ret = true ;
spin_unlock ( & ob - > lock ) ;
}
return ret ;
}
/* device goes ro: */
void bch2_dev_allocator_remove ( struct bch_fs * c , struct bch_dev * ca )
{
unsigned i ;
BUG_ON ( ca - > alloc_thread ) ;
/* First, remove device from allocation groups: */
for ( i = 0 ; i < ARRAY_SIZE ( c - > rw_devs ) ; i + + )
clear_bit ( ca - > dev_idx , c - > rw_devs [ i ] . d ) ;
/*
* Capacity is calculated based off of devices in allocation groups :
*/
bch2_recalc_capacity ( c ) ;
/* Next, close write points that point to this device... */
for ( i = 0 ; i < ARRAY_SIZE ( c - > write_points ) ; i + + )
2018-10-06 07:46:55 +03:00
bch2_writepoint_stop ( c , ca , & c - > write_points [ i ] ) ;
2017-03-17 09:18:50 +03:00
2020-07-11 23:28:54 +03:00
bch2_writepoint_stop ( c , ca , & c - > copygc_write_point ) ;
2018-10-06 07:46:55 +03:00
bch2_writepoint_stop ( c , ca , & c - > rebalance_write_point ) ;
bch2_writepoint_stop ( c , ca , & c - > btree_write_point ) ;
2017-03-17 09:18:50 +03:00
mutex_lock ( & c - > btree_reserve_cache_lock ) ;
while ( c - > btree_reserve_cache_nr ) {
struct btree_alloc * a =
& c - > btree_reserve_cache [ - - c - > btree_reserve_cache_nr ] ;
2018-10-06 11:12:42 +03:00
bch2_open_buckets_put ( c , & a - > ob ) ;
2017-03-17 09:18:50 +03:00
}
mutex_unlock ( & c - > btree_reserve_cache_lock ) ;
2018-11-01 22:13:19 +03:00
while ( 1 ) {
struct open_bucket * ob ;
spin_lock ( & c - > freelist_lock ) ;
if ( ! ca - > open_buckets_partial_nr ) {
spin_unlock ( & c - > freelist_lock ) ;
break ;
}
ob = c - > open_buckets +
ca - > open_buckets_partial [ - - ca - > open_buckets_partial_nr ] ;
ob - > on_partial_list = false ;
spin_unlock ( & c - > freelist_lock ) ;
bch2_open_bucket_put ( c , ob ) ;
}
bch2_ec_stop_dev ( c , ca ) ;
2017-03-17 09:18:50 +03:00
/*
* Wake up threads that were blocked on allocation , so they can notice
* the device can no longer be removed and the capacity has changed :
*/
closure_wake_up ( & c - > freelist_wait ) ;
/*
* journal_res_get ( ) can block waiting for free space in the journal -
* it needs to notice there may not be devices to allocate from anymore :
*/
wake_up ( & c - > journal . wait ) ;
/* Now wait for any in flight writes: */
closure_wait_event ( & c - > open_buckets_wait ,
! bch2_dev_has_open_write_point ( c , ca ) ) ;
}
/* device goes rw: */
void bch2_dev_allocator_add ( struct bch_fs * c , struct bch_dev * ca )
{
unsigned i ;
for ( i = 0 ; i < ARRAY_SIZE ( c - > rw_devs ) ; i + + )
if ( ca - > mi . data_allowed & ( 1 < < i ) )
set_bit ( ca - > dev_idx , c - > rw_devs [ i ] . d ) ;
}
2018-11-19 09:31:41 +03:00
void bch2_dev_allocator_quiesce ( struct bch_fs * c , struct bch_dev * ca )
{
2019-02-08 22:43:53 +03:00
if ( ca - > alloc_thread )
2019-04-16 22:13:16 +03:00
closure_wait_event ( & c - > freelist_wait ,
2021-04-19 00:54:56 +03:00
ca - > allocator_state ! = ALLOCATOR_running ) ;
2018-11-19 09:31:41 +03:00
}
2017-03-17 09:18:50 +03:00
/* stop allocator thread: */
void bch2_dev_allocator_stop ( struct bch_dev * ca )
{
struct task_struct * p ;
p = rcu_dereference_protected ( ca - > alloc_thread , 1 ) ;
ca - > alloc_thread = NULL ;
/*
* We need an rcu barrier between setting ca - > alloc_thread = NULL and
* the thread shutting down to avoid bch2_wake_allocator ( ) racing :
*
* XXX : it would be better to have the rcu barrier be asynchronous
* instead of blocking us here
*/
synchronize_rcu ( ) ;
if ( p ) {
kthread_stop ( p ) ;
put_task_struct ( p ) ;
}
}
/* start allocator thread: */
int bch2_dev_allocator_start ( struct bch_dev * ca )
{
struct task_struct * p ;
/*
* allocator thread already started ?
*/
if ( ca - > alloc_thread )
return 0 ;
p = kthread_create ( bch2_allocator_thread , ca ,
2020-11-20 04:55:33 +03:00
" bch-alloc/%s " , ca - > name ) ;
2021-02-23 23:16:41 +03:00
if ( IS_ERR ( p ) ) {
bch_err ( ca - > fs , " error creating allocator thread: %li " ,
PTR_ERR ( p ) ) ;
2017-03-17 09:18:50 +03:00
return PTR_ERR ( p ) ;
2021-02-23 23:16:41 +03:00
}
2017-03-17 09:18:50 +03:00
get_task_struct ( p ) ;
rcu_assign_pointer ( ca - > alloc_thread , p ) ;
wake_up_process ( p ) ;
return 0 ;
}
2018-11-05 05:55:35 +03:00
void bch2_fs_allocator_background_init ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
spin_lock_init ( & c - > freelist_lock ) ;
}