2018-10-06 07:46:55 +03:00
// SPDX-License-Identifier: GPL-2.0
2017-03-17 09:18:50 +03:00
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_background.h"
# include "alloc_foreground.h"
2022-03-18 03:51:27 +03:00
# include "backpointers.h"
2024-06-30 01:35:18 +03:00
# include "bkey_buf.h"
2017-03-17 09:18:50 +03:00
# include "btree_cache.h"
# include "btree_io.h"
2019-10-05 19:54:53 +03:00
# include "btree_key_cache.h"
2017-03-17 09:18:50 +03:00
# include "btree_update.h"
# include "btree_update_interior.h"
# include "btree_gc.h"
2023-02-07 02:51:42 +03:00
# include "btree_write_buffer.h"
2017-03-17 09:18:50 +03:00
# include "buckets.h"
2022-01-05 06:32:09 +03:00
# include "buckets_waiting_for_journal.h"
2017-03-17 09:18:50 +03:00
# include "clock.h"
# include "debug.h"
2018-11-01 22:13:19 +03:00
# include "ec.h"
2017-03-17 09:18:50 +03:00
# include "error.h"
2021-12-12 01:13:09 +03:00
# include "lru.h"
2019-04-12 05:39:39 +03:00
# include "recovery.h"
2017-03-17 09:18:50 +03:00
# include "trace.h"
2021-01-23 02:01:07 +03:00
# include "varint.h"
2017-03-17 09:18:50 +03:00
# include <linux/kthread.h>
# include <linux/math64.h>
# include <linux/random.h>
# include <linux/rculist.h>
# include <linux/rcupdate.h>
# include <linux/sched/task.h>
# include <linux/sort.h>
2024-06-23 07:53:44 +03:00
static void bch2_discard_one_bucket_fast ( struct bch_dev * , u64 ) ;
2024-02-16 09:08:25 +03:00
2022-01-01 04:03:29 +03:00
/* Persistent alloc info: */
2021-01-23 02:01:07 +03:00
static const unsigned BCH_ALLOC_V1_FIELD_BYTES [ ] = {
# define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
BCH_ALLOC_FIELDS_V1 ( )
2018-07-22 06:36:11 +03:00
# undef x
} ;
2022-01-01 04:03:29 +03:00
struct bkey_alloc_unpacked {
u64 journal_seq ;
u8 gen ;
u8 oldest_gen ;
u8 data_type ;
bool need_discard : 1 ;
bool need_inc_gen : 1 ;
# define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
} ;
2017-03-17 09:18:50 +03:00
2021-01-23 02:01:07 +03:00
static inline u64 alloc_field_v1_get ( const struct bch_alloc * a ,
const void * * p , unsigned field )
2018-07-22 06:36:11 +03:00
{
2021-01-23 02:01:07 +03:00
unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES [ field ] ;
2018-07-22 06:36:11 +03:00
u64 v ;
if ( ! ( a - > fields & ( 1 < < field ) ) )
return 0 ;
switch ( bytes ) {
case 1 :
v = * ( ( const u8 * ) * p ) ;
break ;
case 2 :
v = le16_to_cpup ( * p ) ;
break ;
case 4 :
v = le32_to_cpup ( * p ) ;
break ;
case 8 :
v = le64_to_cpup ( * p ) ;
break ;
default :
BUG ( ) ;
}
* p + = bytes ;
return v ;
}
2021-01-23 02:01:07 +03:00
static void bch2_alloc_unpack_v1 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
2019-02-13 22:46:32 +03:00
{
2021-01-23 02:01:07 +03:00
const struct bch_alloc * in = bkey_s_c_to_alloc ( k ) . v ;
const void * d = in - > data ;
unsigned idx = 0 ;
2019-04-18 01:14:46 +03:00
2021-01-23 02:01:07 +03:00
out - > gen = in - > gen ;
# define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
BCH_ALLOC_FIELDS_V1 ( )
# undef x
}
static int bch2_alloc_unpack_v2 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
2019-02-13 22:46:32 +03:00
{
2021-01-23 02:01:07 +03:00
struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2 ( k ) ;
const u8 * in = a . v - > data ;
const u8 * end = bkey_val_end ( a ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v ;
out - > gen = a . v - > gen ;
out - > oldest_gen = a . v - > oldest_gen ;
out - > data_type = a . v - > data_type ;
# define x(_name, _bits) \
if ( fieldnr < a . v - > nr_fields ) { \
2021-07-13 23:03:51 +03:00
ret = bch2_varint_decode_fast ( in , end , & v ) ; \
2021-01-23 02:01:07 +03:00
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v = 0 ; \
} \
out - > _name = v ; \
if ( v ! = out - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
return 0 ;
}
2021-10-30 04:14:23 +03:00
static int bch2_alloc_unpack_v3 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
{
struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3 ( k ) ;
const u8 * in = a . v - > data ;
const u8 * end = bkey_val_end ( a ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v ;
out - > gen = a . v - > gen ;
out - > oldest_gen = a . v - > oldest_gen ;
out - > data_type = a . v - > data_type ;
2022-01-01 04:03:29 +03:00
out - > need_discard = BCH_ALLOC_V3_NEED_DISCARD ( a . v ) ;
out - > need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN ( a . v ) ;
2021-10-30 04:14:23 +03:00
out - > journal_seq = le64_to_cpu ( a . v - > journal_seq ) ;
# define x(_name, _bits) \
if ( fieldnr < a . v - > nr_fields ) { \
ret = bch2_varint_decode_fast ( in , end , & v ) ; \
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v = 0 ; \
} \
out - > _name = v ; \
if ( v ! = out - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
return 0 ;
}
2022-01-01 04:03:29 +03:00
static struct bkey_alloc_unpacked bch2_alloc_unpack ( struct bkey_s_c k )
2021-01-23 02:01:07 +03:00
{
2023-01-31 04:58:43 +03:00
struct bkey_alloc_unpacked ret = { . gen = 0 } ;
2019-02-13 22:46:32 +03:00
2021-10-30 04:14:23 +03:00
switch ( k . k - > type ) {
case KEY_TYPE_alloc :
2021-01-23 02:01:07 +03:00
bch2_alloc_unpack_v1 ( & ret , k ) ;
2021-10-30 04:14:23 +03:00
break ;
case KEY_TYPE_alloc_v2 :
bch2_alloc_unpack_v2 ( & ret , k ) ;
break ;
case KEY_TYPE_alloc_v3 :
bch2_alloc_unpack_v3 ( & ret , k ) ;
break ;
}
2021-01-23 02:01:07 +03:00
return ret ;
}
2021-04-17 04:53:23 +03:00
static unsigned bch_alloc_v1_val_u64s ( const struct bch_alloc * a )
2017-03-17 09:18:50 +03:00
{
2018-07-22 06:36:11 +03:00
unsigned i , bytes = offsetof ( struct bch_alloc , data ) ;
2017-03-17 09:18:50 +03:00
2021-01-23 02:01:07 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( BCH_ALLOC_V1_FIELD_BYTES ) ; i + + )
2018-07-22 06:36:11 +03:00
if ( a - > fields & ( 1 < < i ) )
2021-01-23 02:01:07 +03:00
bytes + = BCH_ALLOC_V1_FIELD_BYTES [ i ] ;
2017-03-17 09:18:50 +03:00
return DIV_ROUND_UP ( bytes , sizeof ( u64 ) ) ;
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v1_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2024-05-09 01:40:42 +03:00
enum bch_validate_flags flags ,
2023-07-07 04:16:10 +03:00
struct printbuf * err )
2017-03-17 09:18:50 +03:00
{
2018-11-01 22:10:01 +03:00
struct bkey_s_c_alloc a = bkey_s_c_to_alloc ( k ) ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2018-11-01 22:10:01 +03:00
/* allow for unknown fields */
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bkey_val_u64s ( a . k ) < bch_alloc_v1_val_u64s ( a . v ) , c , err ,
alloc_v1_val_size_bad ,
" incorrect value size (%zu < %u) " ,
bkey_val_u64s ( a . k ) , bch_alloc_v1_val_u64s ( a . v ) ) ;
fsck_err :
return ret ;
2017-03-17 09:18:50 +03:00
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v2_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2024-05-09 01:40:42 +03:00
enum bch_validate_flags flags ,
2023-07-07 04:16:10 +03:00
struct printbuf * err )
2017-03-17 09:18:50 +03:00
{
2021-01-23 02:01:07 +03:00
struct bkey_alloc_unpacked u ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2021-01-23 02:01:07 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bch2_alloc_unpack_v2 ( & u , k ) , c , err ,
alloc_v2_unpack_error ,
" unpack error " ) ;
fsck_err :
return ret ;
2021-01-23 02:01:07 +03:00
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v3_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2024-05-09 01:40:42 +03:00
enum bch_validate_flags flags ,
2023-07-07 04:16:10 +03:00
struct printbuf * err )
2021-10-30 04:14:23 +03:00
{
struct bkey_alloc_unpacked u ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2022-01-01 04:03:29 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bch2_alloc_unpack_v3 ( & u , k ) , c , err ,
alloc_v2_unpack_error ,
" unpack error " ) ;
fsck_err :
return ret ;
2021-10-30 04:14:23 +03:00
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v4_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2024-05-09 01:40:42 +03:00
enum bch_validate_flags flags , struct printbuf * err )
2021-01-23 02:01:07 +03:00
{
2022-04-07 00:22:47 +03:00
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4 ( k ) ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2022-04-07 00:22:47 +03:00
2024-05-06 05:02:28 +03:00
bkey_fsck_err_on ( alloc_v4_u64s_noerror ( a . v ) > bkey_val_u64s ( k . k ) , c , err ,
2023-10-25 03:44:36 +03:00
alloc_v4_val_size_bad ,
" bad val size (%u > %zu) " ,
2024-05-06 05:02:28 +03:00
alloc_v4_u64s_noerror ( a . v ) , bkey_val_u64s ( k . k ) ) ;
2022-01-01 04:03:29 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( ! BCH_ALLOC_V4_BACKPOINTERS_START ( a . v ) & &
BCH_ALLOC_V4_NR_BACKPOINTERS ( a . v ) , c , err ,
alloc_v4_backpointers_start_bad ,
" invalid backpointers_start " ) ;
2022-03-18 03:51:27 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( alloc_data_type ( * a . v , a . v - > data_type ) ! = a . v - > data_type , c , err ,
alloc_key_data_type_bad ,
" invalid data type (got %u should be %u) " ,
a . v - > data_type , alloc_data_type ( * a . v , a . v - > data_type ) ) ;
2022-03-18 03:51:27 +03:00
2024-06-17 17:06:03 +03:00
for ( unsigned i = 0 ; i < 2 ; i + + )
bkey_fsck_err_on ( a . v - > io_time [ i ] > LRU_TIME_MAX ,
c , err ,
alloc_key_io_time_bad ,
" invalid io_time[%s]: %llu, max %llu " ,
i = = READ ? " read " : " write " ,
a . v - > io_time [ i ] , LRU_TIME_MAX ) ;
2023-11-24 01:21:23 +03:00
unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START ( a . v ) * sizeof ( u64 ) >
offsetof ( struct bch_alloc_v4 , stripe_sectors )
? a . v - > stripe_sectors
: 0 ;
2023-08-23 01:48:09 +03:00
switch ( a . v - > data_type ) {
case BCH_DATA_free :
case BCH_DATA_need_gc_gens :
case BCH_DATA_need_discard :
2023-11-24 01:21:23 +03:00
bkey_fsck_err_on ( stripe_sectors | |
a . v - > dirty_sectors | |
a . v - > cached_sectors | |
a . v - > stripe ,
2023-11-24 02:05:18 +03:00
c , err , alloc_key_empty_but_have_data ,
2023-11-24 01:21:23 +03:00
" empty data type free but have data %u.%u.%u %u " ,
stripe_sectors ,
a . v - > dirty_sectors ,
a . v - > cached_sectors ,
a . v - > stripe ) ;
2023-08-23 01:48:09 +03:00
break ;
case BCH_DATA_sb :
case BCH_DATA_journal :
case BCH_DATA_btree :
case BCH_DATA_user :
case BCH_DATA_parity :
2023-11-24 01:21:23 +03:00
bkey_fsck_err_on ( ! a . v - > dirty_sectors & &
! stripe_sectors ,
2023-11-24 02:05:18 +03:00
c , err , alloc_key_dirty_sectors_0 ,
2023-10-25 03:44:36 +03:00
" data_type %s but dirty_sectors==0 " ,
2024-01-07 04:57:43 +03:00
bch2_data_type_str ( a . v - > data_type ) ) ;
2023-08-23 01:48:09 +03:00
break ;
case BCH_DATA_cached :
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( ! a . v - > cached_sectors | |
2023-11-24 01:21:23 +03:00
a . v - > dirty_sectors | |
stripe_sectors | |
2023-11-24 02:05:18 +03:00
a . v - > stripe ,
c , err , alloc_key_cached_inconsistency ,
2023-10-25 03:44:36 +03:00
" data type inconsistency " ) ;
bkey_fsck_err_on ( ! a . v - > io_time [ READ ] & &
c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs ,
2023-11-24 02:05:18 +03:00
c , err , alloc_key_cached_but_read_time_zero ,
2023-10-25 03:44:36 +03:00
" cached bucket with read_time == 0 " ) ;
2023-08-23 01:48:09 +03:00
break ;
case BCH_DATA_stripe :
break ;
2022-04-07 00:22:47 +03:00
}
2023-10-25 03:44:36 +03:00
fsck_err :
return ret ;
2022-01-01 04:03:29 +03:00
}
void bch2_alloc_v4_swab ( struct bkey_s k )
{
struct bch_alloc_v4 * a = bkey_s_to_alloc_v4 ( k ) . v ;
2022-03-18 03:51:27 +03:00
struct bch_backpointer * bp , * bps ;
2022-01-01 04:03:29 +03:00
a - > journal_seq = swab64 ( a - > journal_seq ) ;
a - > flags = swab32 ( a - > flags ) ;
a - > dirty_sectors = swab32 ( a - > dirty_sectors ) ;
a - > cached_sectors = swab32 ( a - > cached_sectors ) ;
a - > io_time [ 0 ] = swab64 ( a - > io_time [ 0 ] ) ;
a - > io_time [ 1 ] = swab64 ( a - > io_time [ 1 ] ) ;
a - > stripe = swab32 ( a - > stripe ) ;
a - > nr_external_backpointers = swab32 ( a - > nr_external_backpointers ) ;
2023-11-03 16:09:37 +03:00
a - > fragmentation_lru = swab64 ( a - > fragmentation_lru ) ;
2023-11-24 01:21:23 +03:00
a - > stripe_sectors = swab32 ( a - > stripe_sectors ) ;
2022-03-18 03:51:27 +03:00
bps = alloc_v4_backpointers ( a ) ;
for ( bp = bps ; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS ( a ) ; bp + + ) {
bp - > bucket_offset = swab40 ( bp - > bucket_offset ) ;
bp - > bucket_len = swab32 ( bp - > bucket_len ) ;
bch2_bpos_swab ( & bp - > pos ) ;
}
2022-01-01 04:03:29 +03:00
}
void bch2_alloc_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 _a ;
const struct bch_alloc_v4 * a = bch2_alloc_to_v4 ( k , & _a ) ;
prt_newline ( out ) ;
printbuf_indent_add ( out , 2 ) ;
2024-01-07 04:57:43 +03:00
prt_printf ( out , " gen %u oldest_gen %u data_type " , a - > gen , a - > oldest_gen ) ;
bch2_prt_data_type ( out , a - > data_type ) ;
2023-01-31 04:58:43 +03:00
prt_newline ( out ) ;
2024-04-10 23:08:24 +03:00
prt_printf ( out , " journal_seq %llu \n " , a - > journal_seq ) ;
prt_printf ( out , " need_discard %llu \n " , BCH_ALLOC_V4_NEED_DISCARD ( a ) ) ;
prt_printf ( out , " need_inc_gen %llu \n " , BCH_ALLOC_V4_NEED_INC_GEN ( a ) ) ;
prt_printf ( out , " dirty_sectors %u \n " , a - > dirty_sectors ) ;
2023-11-24 01:21:23 +03:00
prt_printf ( out , " stripe_sectors %u \n " , a - > stripe_sectors ) ;
2024-04-10 23:08:24 +03:00
prt_printf ( out , " cached_sectors %u \n " , a - > cached_sectors ) ;
prt_printf ( out , " stripe %u \n " , a - > stripe ) ;
prt_printf ( out , " stripe_redundancy %u \n " , a - > stripe_redundancy ) ;
prt_printf ( out , " io_time[READ] %llu \n " , a - > io_time [ READ ] ) ;
prt_printf ( out , " io_time[WRITE] %llu \n " , a - > io_time [ WRITE ] ) ;
prt_printf ( out , " fragmentation %llu \n " , a - > fragmentation_lru ) ;
prt_printf ( out , " bp_start %llu \n " , BCH_ALLOC_V4_BACKPOINTERS_START ( a ) ) ;
2022-03-18 03:51:27 +03:00
printbuf_indent_sub ( out , 2 ) ;
2023-01-31 04:58:43 +03:00
}
void __bch2_alloc_to_v4 ( struct bkey_s_c k , struct bch_alloc_v4 * out )
{
if ( k . k - > type = = KEY_TYPE_alloc_v4 ) {
void * src , * dst ;
* out = * bkey_s_c_to_alloc_v4 ( k ) . v ;
src = alloc_v4_backpointers ( out ) ;
SET_BCH_ALLOC_V4_BACKPOINTERS_START ( out , BCH_ALLOC_V4_U64s ) ;
dst = alloc_v4_backpointers ( out ) ;
if ( src < dst )
memset ( src , 0 , dst - src ) ;
2023-03-31 23:24:45 +03:00
SET_BCH_ALLOC_V4_NR_BACKPOINTERS ( out , 0 ) ;
2023-01-31 04:58:43 +03:00
} else {
struct bkey_alloc_unpacked u = bch2_alloc_unpack ( k ) ;
2022-01-01 04:03:29 +03:00
2023-01-31 04:58:43 +03:00
* out = ( struct bch_alloc_v4 ) {
. journal_seq = u . journal_seq ,
. flags = u . need_discard ,
. gen = u . gen ,
. oldest_gen = u . oldest_gen ,
. data_type = u . data_type ,
. stripe_redundancy = u . stripe_redundancy ,
. dirty_sectors = u . dirty_sectors ,
. cached_sectors = u . cached_sectors ,
. io_time [ READ ] = u . read_time ,
. io_time [ WRITE ] = u . write_time ,
. stripe = u . stripe ,
} ;
SET_BCH_ALLOC_V4_BACKPOINTERS_START ( out , BCH_ALLOC_V4_U64s ) ;
}
}
static noinline struct bkey_i_alloc_v4 *
__bch2_alloc_to_v4_mut ( struct btree_trans * trans , struct bkey_s_c k )
{
struct bkey_i_alloc_v4 * ret ;
2023-03-31 23:24:45 +03:00
2023-04-16 14:10:46 +03:00
ret = bch2_trans_kmalloc ( trans , max ( bkey_bytes ( k . k ) , sizeof ( struct bkey_i_alloc_v4 ) ) ) ;
2023-03-31 23:24:45 +03:00
if ( IS_ERR ( ret ) )
return ret ;
2023-01-31 04:58:43 +03:00
if ( k . k - > type = = KEY_TYPE_alloc_v4 ) {
void * src , * dst ;
bkey_reassemble ( & ret - > k_i , k ) ;
src = alloc_v4_backpointers ( & ret - > v ) ;
SET_BCH_ALLOC_V4_BACKPOINTERS_START ( & ret - > v , BCH_ALLOC_V4_U64s ) ;
dst = alloc_v4_backpointers ( & ret - > v ) ;
if ( src < dst )
memset ( src , 0 , dst - src ) ;
2023-03-31 23:24:45 +03:00
SET_BCH_ALLOC_V4_NR_BACKPOINTERS ( & ret - > v , 0 ) ;
2023-01-31 04:58:43 +03:00
set_alloc_v4_u64s ( ret ) ;
} else {
2022-03-18 03:51:27 +03:00
bkey_alloc_v4_init ( & ret - > k_i ) ;
ret - > k . p = k . k - > p ;
bch2_alloc_to_v4 ( k , & ret - > v ) ;
2023-01-31 04:58:43 +03:00
}
return ret ;
}
static inline struct bkey_i_alloc_v4 * bch2_alloc_to_v4_mut_inlined ( struct btree_trans * trans , struct bkey_s_c k )
{
2023-03-31 23:24:45 +03:00
struct bkey_s_c_alloc_v4 a ;
2023-01-31 04:58:43 +03:00
if ( likely ( k . k - > type = = KEY_TYPE_alloc_v4 ) & &
2023-03-31 23:24:45 +03:00
( ( a = bkey_s_c_to_alloc_v4 ( k ) , true ) & &
2023-04-28 06:48:33 +03:00
BCH_ALLOC_V4_NR_BACKPOINTERS ( a . v ) = = 0 ) )
2023-05-01 02:21:06 +03:00
return bch2_bkey_make_mut_noupdate_typed ( trans , k , alloc_v4 ) ;
2023-01-31 04:58:43 +03:00
return __bch2_alloc_to_v4_mut ( trans , k ) ;
}
struct bkey_i_alloc_v4 * bch2_alloc_to_v4_mut ( struct btree_trans * trans , struct bkey_s_c k )
{
return bch2_alloc_to_v4_mut_inlined ( trans , k ) ;
}
struct bkey_i_alloc_v4 *
2024-04-30 10:27:30 +03:00
bch2_trans_start_alloc_update_noupdate ( struct btree_trans * trans , struct btree_iter * iter ,
struct bpos pos )
2023-01-31 04:58:43 +03:00
{
2024-04-30 10:27:30 +03:00
struct bkey_s_c k = bch2_bkey_get_iter ( trans , iter , BTREE_ID_alloc , pos ,
BTREE_ITER_with_updates |
BTREE_ITER_cached |
BTREE_ITER_intent ) ;
int ret = bkey_err ( k ) ;
2023-01-31 04:58:43 +03:00
if ( unlikely ( ret ) )
2023-04-30 02:33:09 +03:00
return ERR_PTR ( ret ) ;
2023-01-31 04:58:43 +03:00
2024-04-30 10:27:30 +03:00
struct bkey_i_alloc_v4 * a = bch2_alloc_to_v4_mut_inlined ( trans , k ) ;
2023-01-31 04:58:43 +03:00
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( unlikely ( ret ) )
goto err ;
return a ;
err :
bch2_trans_iter_exit ( trans , iter ) ;
return ERR_PTR ( ret ) ;
2017-03-17 09:18:50 +03:00
}
2024-04-30 10:27:30 +03:00
__flatten
struct bkey_i_alloc_v4 * bch2_trans_start_alloc_update ( struct btree_trans * trans , struct bpos pos )
{
struct btree_iter iter ;
struct bkey_i_alloc_v4 * a = bch2_trans_start_alloc_update_noupdate ( trans , & iter , pos ) ;
int ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
return ERR_PTR ( ret ) ;
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
return unlikely ( ret ) ? ERR_PTR ( ret ) : a ;
}
2022-11-26 07:14:30 +03:00
static struct bpos alloc_gens_pos ( struct bpos pos , unsigned * offset )
{
* offset = pos . offset & KEY_TYPE_BUCKET_GENS_MASK ;
pos . offset > > = KEY_TYPE_BUCKET_GENS_BITS ;
return pos ;
}
static struct bpos bucket_gens_pos_to_alloc ( struct bpos pos , unsigned offset )
{
pos . offset < < = KEY_TYPE_BUCKET_GENS_BITS ;
pos . offset + = offset ;
return pos ;
}
static unsigned alloc_gen ( struct bkey_s_c k , unsigned offset )
{
return k . k - > type = = KEY_TYPE_bucket_gens
? bkey_s_c_to_bucket_gens ( k ) . v - > gens [ offset ]
: 0 ;
}
2023-10-25 03:44:36 +03:00
int bch2_bucket_gens_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2024-05-09 01:40:42 +03:00
enum bch_validate_flags flags ,
2023-07-07 04:16:10 +03:00
struct printbuf * err )
2022-11-26 07:14:30 +03:00
{
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2022-11-26 07:14:30 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bkey_val_bytes ( k . k ) ! = sizeof ( struct bch_bucket_gens ) , c , err ,
bucket_gens_val_size_bad ,
" bad val size (%zu != %zu) " ,
bkey_val_bytes ( k . k ) , sizeof ( struct bch_bucket_gens ) ) ;
fsck_err :
return ret ;
2022-11-26 07:14:30 +03:00
}
void bch2_bucket_gens_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens ( k ) ;
unsigned i ;
for ( i = 0 ; i < ARRAY_SIZE ( g . v - > gens ) ; i + + ) {
if ( i )
prt_char ( out , ' ' ) ;
prt_printf ( out , " %u " , g . v - > gens [ i ] ) ;
}
}
int bch2_bucket_gens_init ( struct bch_fs * c )
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-11-26 07:14:30 +03:00
struct bkey_i_bucket_gens g ;
bool have_bucket_gens_key = false ;
int ret ;
2023-12-08 07:33:11 +03:00
ret = for_each_btree_key ( trans , iter , BTREE_ID_alloc , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch , k , ( {
2022-11-26 07:14:30 +03:00
/*
* Not a fsck error because this is checked / repaired by
* bch2_check_alloc_key ( ) which runs later :
*/
if ( ! bch2_dev_bucket_exists ( c , k . k - > p ) )
continue ;
2023-12-17 06:30:09 +03:00
struct bch_alloc_v4 a ;
u8 gen = bch2_alloc_to_v4 ( k , & a ) - > gen ;
unsigned offset ;
struct bpos pos = alloc_gens_pos ( iter . pos , & offset ) ;
2024-03-18 03:32:36 +03:00
int ret2 = 0 ;
2022-11-26 07:14:30 +03:00
if ( have_bucket_gens_key & & bkey_cmp ( iter . pos , pos ) ) {
2024-03-18 03:32:36 +03:00
ret2 = bch2_btree_insert_trans ( trans , BTREE_ID_bucket_gens , & g . k_i , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL , BCH_TRANS_COMMIT_no_enospc ) ;
if ( ret2 )
goto iter_err ;
2022-11-26 07:14:30 +03:00
have_bucket_gens_key = false ;
}
if ( ! have_bucket_gens_key ) {
bkey_bucket_gens_init ( & g . k_i ) ;
g . k . p = pos ;
have_bucket_gens_key = true ;
}
g . v . gens [ offset ] = gen ;
2024-03-18 03:32:36 +03:00
iter_err :
ret2 ;
2023-12-08 07:28:26 +03:00
} ) ) ;
2022-11-26 07:14:30 +03:00
if ( have_bucket_gens_key & & ! ret )
2023-09-13 00:16:02 +03:00
ret = commit_do ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ,
2023-09-13 00:16:02 +03:00
bch2_btree_insert_trans ( trans , BTREE_ID_bucket_gens , & g . k_i , 0 ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2022-11-26 07:14:30 +03:00
2023-12-08 07:28:26 +03:00
bch_err_fn ( c , ret ) ;
2022-11-26 07:14:30 +03:00
return ret ;
}
2023-07-09 05:27:03 +03:00
int bch2_alloc_read ( struct bch_fs * c )
2022-11-26 07:14:30 +03:00
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2024-04-30 22:37:25 +03:00
struct bch_dev * ca = NULL ;
2022-11-26 07:14:30 +03:00
int ret ;
2023-07-09 05:27:03 +03:00
down_read ( & c - > gc_lock ) ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
if ( c - > sb . version_upgrade_complete > = bcachefs_metadata_version_bucket_gens ) {
2023-12-08 07:33:11 +03:00
ret = for_each_btree_key ( trans , iter , BTREE_ID_bucket_gens , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch , k , ( {
2023-07-09 05:27:03 +03:00
u64 start = bucket_gens_pos_to_alloc ( k . k - > p , 0 ) . offset ;
u64 end = bucket_gens_pos_to_alloc ( bpos_nosnap_successor ( k . k - > p ) , 0 ) . offset ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
if ( k . k - > type ! = KEY_TYPE_bucket_gens )
continue ;
2022-11-26 07:14:30 +03:00
2024-04-30 22:37:25 +03:00
ca = bch2_dev_iterate ( c , ca , k . k - > p . inode ) ;
2023-07-09 05:27:03 +03:00
/*
* Not a fsck error because this is checked / repaired by
* bch2_check_alloc_key ( ) which runs later :
*/
2024-04-30 22:37:25 +03:00
if ( ! ca ) {
bch2_btree_iter_set_pos ( & iter , POS ( k . k - > p . inode + 1 , 0 ) ) ;
2023-07-09 05:27:03 +03:00
continue ;
2024-04-30 22:37:25 +03:00
}
2023-07-09 05:27:03 +03:00
2024-04-30 22:37:25 +03:00
const struct bch_bucket_gens * g = bkey_s_c_to_bucket_gens ( k ) . v ;
2022-11-26 07:14:30 +03:00
2023-12-17 06:30:09 +03:00
for ( u64 b = max_t ( u64 , ca - > mi . first_bucket , start ) ;
2023-07-09 05:27:03 +03:00
b < min_t ( u64 , ca - > mi . nbuckets , end ) ;
b + + )
* bucket_gen ( ca , b ) = g - > gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] ;
2023-12-08 07:28:26 +03:00
0 ;
} ) ) ;
2023-07-09 05:27:03 +03:00
} else {
2023-12-08 07:33:11 +03:00
ret = for_each_btree_key ( trans , iter , BTREE_ID_alloc , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch , k , ( {
2024-04-30 22:37:25 +03:00
ca = bch2_dev_iterate ( c , ca , k . k - > p . inode ) ;
2023-07-09 05:27:03 +03:00
/*
* Not a fsck error because this is checked / repaired by
* bch2_check_alloc_key ( ) which runs later :
*/
2024-04-30 22:37:25 +03:00
if ( ! ca ) {
bch2_btree_iter_set_pos ( & iter , POS ( k . k - > p . inode + 1 , 0 ) ) ;
2023-07-09 05:27:03 +03:00
continue ;
2024-04-30 22:37:25 +03:00
}
2023-07-09 05:27:03 +03:00
2023-12-17 06:30:09 +03:00
struct bch_alloc_v4 a ;
2023-07-09 05:27:03 +03:00
* bucket_gen ( ca , k . k - > p . offset ) = bch2_alloc_to_v4 ( k , & a ) - > gen ;
2023-12-08 07:28:26 +03:00
0 ;
} ) ) ;
2022-11-26 07:14:30 +03:00
}
2024-04-30 22:37:25 +03:00
bch2_dev_put ( ca ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2023-07-09 05:27:03 +03:00
up_read ( & c - > gc_lock ) ;
2022-11-26 07:14:30 +03:00
2023-12-08 07:28:26 +03:00
bch_err_fn ( c , ret ) ;
2022-11-26 07:14:30 +03:00
return ret ;
}
2021-12-12 01:13:09 +03:00
/* Free space/discard btree: */
static int bch2_bucket_do_index ( struct btree_trans * trans ,
2024-04-30 22:41:48 +03:00
struct bch_dev * ca ,
2021-12-12 01:13:09 +03:00
struct bkey_s_c alloc_k ,
2022-04-01 08:29:59 +03:00
const struct bch_alloc_v4 * a ,
2021-12-12 01:13:09 +03:00
bool set )
{
struct bch_fs * c = trans - > c ;
struct btree_iter iter ;
struct bkey_s_c old ;
struct bkey_i * k ;
enum btree_id btree ;
enum bch_bkey_type old_type = ! set ? KEY_TYPE_set : KEY_TYPE_deleted ;
enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted ;
struct printbuf buf = PRINTBUF ;
int ret ;
2022-04-01 08:29:59 +03:00
if ( a - > data_type ! = BCH_DATA_free & &
a - > data_type ! = BCH_DATA_need_discard )
2021-12-12 01:13:09 +03:00
return 0 ;
2022-11-24 06:13:19 +03:00
k = bch2_trans_kmalloc_nomemzero ( trans , sizeof ( * k ) ) ;
2021-12-12 01:13:09 +03:00
if ( IS_ERR ( k ) )
return PTR_ERR ( k ) ;
bkey_init ( & k - > k ) ;
k - > k . type = new_type ;
2022-04-01 08:29:59 +03:00
switch ( a - > data_type ) {
case BCH_DATA_free :
2021-12-12 01:13:09 +03:00
btree = BTREE_ID_freespace ;
2022-04-01 08:29:59 +03:00
k - > k . p = alloc_freespace_pos ( alloc_k . k - > p , * a ) ;
2021-12-12 01:13:09 +03:00
bch2_key_resize ( & k - > k , 1 ) ;
break ;
2022-04-01 08:29:59 +03:00
case BCH_DATA_need_discard :
2021-12-12 01:13:09 +03:00
btree = BTREE_ID_need_discard ;
k - > k . p = alloc_k . k - > p ;
break ;
default :
return 0 ;
}
2023-04-30 02:33:09 +03:00
old = bch2_bkey_get_iter ( trans , & iter , btree ,
2021-12-12 01:13:09 +03:00
bkey_start_pos ( & k - > k ) ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_intent ) ;
2021-12-12 01:13:09 +03:00
ret = bkey_err ( old ) ;
if ( ret )
2023-04-30 02:33:09 +03:00
return ret ;
2021-12-12 01:13:09 +03:00
if ( ca - > mi . freespace_initialized & &
2023-07-07 09:42:28 +03:00
c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info & &
2022-04-11 01:12:04 +03:00
bch2_trans_inconsistent_on ( old . k - > type ! = old_type , trans ,
2023-06-18 20:25:35 +03:00
" incorrect key when %s %s:%llu:%llu:0 (got %s should be %s) \n "
2021-12-12 01:13:09 +03:00
" for %s " ,
set ? " setting " : " clearing " ,
2023-10-20 05:49:08 +03:00
bch2_btree_id_str ( btree ) ,
2023-06-18 20:25:35 +03:00
iter . pos . inode ,
iter . pos . offset ,
2021-12-12 01:13:09 +03:00
bch2_bkey_types [ old . k - > type ] ,
bch2_bkey_types [ old_type ] ,
( bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
ret = - EIO ;
goto err ;
}
ret = bch2_trans_update ( trans , & iter , k , 0 ) ;
err :
bch2_trans_iter_exit ( trans , & iter ) ;
printbuf_exit ( & buf ) ;
return ret ;
}
2022-11-26 07:14:30 +03:00
static noinline int bch2_bucket_gen_update ( struct btree_trans * trans ,
struct bpos bucket , u8 gen )
{
struct btree_iter iter ;
unsigned offset ;
struct bpos pos = alloc_gens_pos ( bucket , & offset ) ;
struct bkey_i_bucket_gens * g ;
struct bkey_s_c k ;
int ret ;
g = bch2_trans_kmalloc ( trans , sizeof ( * g ) ) ;
ret = PTR_ERR_OR_ZERO ( g ) ;
if ( ret )
return ret ;
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , & iter , BTREE_ID_bucket_gens , pos ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_intent |
BTREE_ITER_with_updates ) ;
2022-11-26 07:14:30 +03:00
ret = bkey_err ( k ) ;
if ( ret )
2023-04-30 02:33:09 +03:00
return ret ;
2022-11-26 07:14:30 +03:00
if ( k . k - > type ! = KEY_TYPE_bucket_gens ) {
bkey_bucket_gens_init ( & g - > k_i ) ;
g - > k . p = iter . pos ;
} else {
bkey_reassemble ( & g - > k_i , k ) ;
}
g - > v . gens [ offset ] = gen ;
ret = bch2_trans_update ( trans , & iter , & g - > k_i , 0 ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2023-12-28 09:21:01 +03:00
int bch2_trigger_alloc ( struct btree_trans * trans ,
enum btree_id btree , unsigned level ,
struct bkey_s_c old , struct bkey_s new ,
bcachefs: Fix type of flags parameter for some ->trigger() implementations
When building with clang's -Wincompatible-function-pointer-types-strict
(a warning designed to catch potential kCFI failures at build time),
there are several warnings along the lines of:
fs/bcachefs/bkey_methods.c:118:2: error: incompatible function pointer types initializing 'int (*)(struct btree_trans *, enum btree_id, unsigned int, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags)' with an expression of type 'int (struct btree_trans *, enum btree_id, unsigned int, struct bkey_s_c, struct bkey_s, unsigned int)' [-Werror,-Wincompatible-function-pointer-types-strict]
118 | BCH_BKEY_TYPES()
| ^~~~~~~~~~~~~~~~
fs/bcachefs/bcachefs_format.h:394:2: note: expanded from macro 'BCH_BKEY_TYPES'
394 | x(inode, 8) \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
fs/bcachefs/bkey_methods.c:117:41: note: expanded from macro 'x'
117 | #define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
| ^~~~~~~~~~~~~~~~~~~~
<scratch space>:277:1: note: expanded from here
277 | bch2_bkey_ops_inode
| ^~~~~~~~~~~~~~~~~~~
fs/bcachefs/inode.h:26:13: note: expanded from macro 'bch2_bkey_ops_inode'
26 | .trigger = bch2_trigger_inode, \
| ^~~~~~~~~~~~~~~~~~
There are several functions that did not have their flags parameter
converted to 'enum btree_iter_update_trigger_flags' in the recent
unification, which will cause kCFI failures at runtime because the
types, while ABI compatible (hence no warning from the non-strict
version of this warning), do not match exactly.
Fix up these functions (as well as a few other obvious functions that
should have it, even if there are no warnings currently) to resolve the
warnings and potential kCFI runtime failures.
Fixes: 31e4ef3280c8 ("bcachefs: iter/update/trigger/str_hash flag cleanup")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2024-04-23 21:58:09 +03:00
enum btree_iter_update_trigger_flags flags )
2021-12-12 01:13:09 +03:00
{
struct bch_fs * c = trans - > c ;
2024-06-06 22:06:22 +03:00
struct printbuf buf = PRINTBUF ;
2021-12-12 01:13:09 +03:00
int ret = 0 ;
2024-04-30 22:53:03 +03:00
struct bch_dev * ca = bch2_dev_bucket_tryget ( c , new . k - > p ) ;
if ( ! ca )
2023-12-28 09:21:01 +03:00
return - EIO ;
2021-12-12 01:13:09 +03:00
2023-12-28 09:21:01 +03:00
struct bch_alloc_v4 old_a_convert ;
const struct bch_alloc_v4 * old_a = bch2_alloc_to_v4 ( old , & old_a_convert ) ;
2022-04-01 08:29:59 +03:00
2024-04-08 01:05:34 +03:00
if ( flags & BTREE_TRIGGER_transactional ) {
2023-12-28 09:21:01 +03:00
struct bch_alloc_v4 * new_a = bkey_s_to_alloc_v4 ( new ) . v ;
2021-12-12 01:13:09 +03:00
2024-04-30 09:47:30 +03:00
alloc_data_type_set ( new_a , new_a - > data_type ) ;
2021-12-12 01:13:09 +03:00
2024-04-30 09:10:57 +03:00
if ( bch2_bucket_sectors_total ( * new_a ) > bch2_bucket_sectors_total ( * old_a ) ) {
2024-06-17 17:06:03 +03:00
new_a - > io_time [ READ ] = bch2_current_io_time ( c , READ ) ;
new_a - > io_time [ WRITE ] = bch2_current_io_time ( c , WRITE ) ;
2023-12-28 09:21:01 +03:00
SET_BCH_ALLOC_V4_NEED_INC_GEN ( new_a , true ) ;
SET_BCH_ALLOC_V4_NEED_DISCARD ( new_a , true ) ;
}
2021-12-12 01:13:09 +03:00
2023-12-28 09:21:01 +03:00
if ( data_type_is_empty ( new_a - > data_type ) & &
BCH_ALLOC_V4_NEED_INC_GEN ( new_a ) & &
! bch2_bucket_is_open_safe ( c , new . k - > p . inode , new . k - > p . offset ) ) {
new_a - > gen + + ;
SET_BCH_ALLOC_V4_NEED_INC_GEN ( new_a , false ) ;
2024-06-20 17:04:35 +03:00
alloc_data_type_set ( new_a , new_a - > data_type ) ;
2023-12-28 09:21:01 +03:00
}
2022-04-11 02:59:26 +03:00
2023-12-28 09:21:01 +03:00
if ( old_a - > data_type ! = new_a - > data_type | |
( new_a - > data_type = = BCH_DATA_free & &
alloc_freespace_genbits ( * old_a ) ! = alloc_freespace_genbits ( * new_a ) ) ) {
2024-04-30 22:41:48 +03:00
ret = bch2_bucket_do_index ( trans , ca , old , old_a , false ) ? :
bch2_bucket_do_index ( trans , ca , new . s_c , new_a , true ) ;
2023-12-28 09:21:01 +03:00
if ( ret )
2024-04-30 22:53:03 +03:00
goto err ;
2023-12-28 09:21:01 +03:00
}
2021-12-12 01:13:09 +03:00
2023-12-28 09:21:01 +03:00
if ( new_a - > data_type = = BCH_DATA_cached & &
! new_a - > io_time [ READ ] )
2024-06-17 17:06:03 +03:00
new_a - > io_time [ READ ] = bch2_current_io_time ( c , READ ) ;
2021-12-12 01:13:09 +03:00
2023-12-28 09:21:01 +03:00
u64 old_lru = alloc_lru_idx_read ( * old_a ) ;
u64 new_lru = alloc_lru_idx_read ( * new_a ) ;
if ( old_lru ! = new_lru ) {
ret = bch2_lru_change ( trans , new . k - > p . inode ,
bucket_to_u64 ( new . k - > p ) ,
old_lru , new_lru ) ;
if ( ret )
2024-04-30 22:53:03 +03:00
goto err ;
2023-12-28 09:21:01 +03:00
}
2022-12-05 18:24:19 +03:00
2024-04-30 22:41:48 +03:00
new_a - > fragmentation_lru = alloc_lru_idx_fragmentation ( * new_a , ca ) ;
2023-12-28 09:21:01 +03:00
if ( old_a - > fragmentation_lru ! = new_a - > fragmentation_lru ) {
ret = bch2_lru_change ( trans ,
BCH_LRU_FRAGMENTATION_START ,
bucket_to_u64 ( new . k - > p ) ,
old_a - > fragmentation_lru , new_a - > fragmentation_lru ) ;
if ( ret )
2024-04-30 22:53:03 +03:00
goto err ;
2023-12-28 09:21:01 +03:00
}
2022-12-05 18:24:19 +03:00
2023-12-28 09:21:01 +03:00
if ( old_a - > gen ! = new_a - > gen ) {
ret = bch2_bucket_gen_update ( trans , new . k - > p , new_a - > gen ) ;
if ( ret )
2024-04-30 22:53:03 +03:00
goto err ;
2023-12-28 09:21:01 +03:00
}
2022-11-26 07:14:30 +03:00
2023-12-28 09:21:01 +03:00
/*
* need to know if we ' re getting called from the invalidate path or
* not :
*/
2023-11-12 01:40:45 +03:00
2024-04-08 01:05:34 +03:00
if ( ( flags & BTREE_TRIGGER_bucket_invalidate ) & &
2023-12-28 09:21:01 +03:00
old_a - > cached_sectors ) {
ret = bch2_update_cached_sectors_list ( trans , new . k - > p . inode ,
- ( ( s64 ) old_a - > cached_sectors ) ) ;
if ( ret )
2024-04-30 22:53:03 +03:00
goto err ;
2023-12-28 09:21:01 +03:00
}
}
2021-12-12 01:13:09 +03:00
2024-04-08 01:05:34 +03:00
if ( ( flags & BTREE_TRIGGER_atomic ) & & ( flags & BTREE_TRIGGER_insert ) ) {
2023-12-28 09:21:01 +03:00
struct bch_alloc_v4 * new_a = bkey_s_to_alloc_v4 ( new ) . v ;
u64 journal_seq = trans - > journal_res . seq ;
u64 bucket_journal_seq = new_a - > journal_seq ;
2023-12-28 08:59:17 +03:00
2024-04-08 01:05:34 +03:00
if ( ( flags & BTREE_TRIGGER_insert ) & &
2023-12-28 09:21:01 +03:00
data_type_is_empty ( old_a - > data_type ) ! =
data_type_is_empty ( new_a - > data_type ) & &
new . k - > type = = KEY_TYPE_alloc_v4 ) {
struct bch_alloc_v4 * v = bkey_s_to_alloc_v4 ( new ) . v ;
2023-12-28 08:59:17 +03:00
2023-12-28 09:21:01 +03:00
/*
* If the btree updates referring to a bucket weren ' t flushed
* before the bucket became empty again , then the we don ' t have
* to wait on a journal flush before we can reuse the bucket :
*/
v - > journal_seq = bucket_journal_seq =
data_type_is_empty ( new_a - > data_type ) & &
( journal_seq = = v - > journal_seq | |
bch2_journal_noflush_seq ( & c - > journal , v - > journal_seq ) )
? 0 : journal_seq ;
}
2023-12-28 08:59:17 +03:00
2023-12-28 09:21:01 +03:00
if ( ! data_type_is_empty ( old_a - > data_type ) & &
data_type_is_empty ( new_a - > data_type ) & &
bucket_journal_seq ) {
ret = bch2_set_bucket_needs_journal_commit ( & c - > buckets_waiting_for_journal ,
c - > journal . flushed_seq_ondisk ,
new . k - > p . inode , new . k - > p . offset ,
bucket_journal_seq ) ;
if ( ret ) {
bch2_fs_fatal_error ( c ,
2024-03-18 04:51:19 +03:00
" setting bucket_needs_journal_commit: %s " , bch2_err_str ( ret ) ) ;
2024-04-30 22:53:03 +03:00
goto err ;
2023-12-28 09:21:01 +03:00
}
}
2023-12-28 08:59:17 +03:00
2023-12-28 09:21:01 +03:00
percpu_down_read ( & c - > mark_lock ) ;
2024-06-06 22:06:22 +03:00
if ( new_a - > gen ! = old_a - > gen ) {
u8 * gen = bucket_gen ( ca , new . k - > p . offset ) ;
if ( unlikely ( ! gen ) ) {
percpu_up_read ( & c - > mark_lock ) ;
goto invalid_bucket ;
}
* gen = new_a - > gen ;
}
2023-12-28 08:59:17 +03:00
2023-12-28 09:21:01 +03:00
bch2_dev_usage_update ( c , ca , old_a , new_a , journal_seq , false ) ;
2024-02-16 05:42:10 +03:00
percpu_up_read ( & c - > mark_lock ) ;
# define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
# define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr)
# define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk)
2023-12-28 08:59:17 +03:00
2024-02-16 09:08:25 +03:00
if ( statechange ( a - > data_type = = BCH_DATA_free ) & &
bucket_flushed ( new_a ) )
2023-12-28 09:21:01 +03:00
closure_wake_up ( & c - > freelist_wait ) ;
2023-12-28 08:59:17 +03:00
2024-02-16 09:08:25 +03:00
if ( statechange ( a - > data_type = = BCH_DATA_need_discard ) & &
2024-06-17 16:09:52 +03:00
! bch2_bucket_is_open_safe ( c , new . k - > p . inode , new . k - > p . offset ) & &
2024-02-16 09:08:25 +03:00
bucket_flushed ( new_a ) )
2024-06-23 07:53:44 +03:00
bch2_discard_one_bucket_fast ( ca , new . k - > p . offset ) ;
2023-12-28 08:59:17 +03:00
2024-02-16 05:42:10 +03:00
if ( statechange ( a - > data_type = = BCH_DATA_cached ) & &
! bch2_bucket_is_open ( c , new . k - > p . inode , new . k - > p . offset ) & &
2023-12-28 09:21:01 +03:00
should_invalidate_buckets ( ca , bch2_dev_usage_read ( ca ) ) )
2024-06-23 07:53:44 +03:00
bch2_dev_do_invalidates ( ca ) ;
2023-12-28 08:59:17 +03:00
2024-02-16 05:42:10 +03:00
if ( statechange ( a - > data_type = = BCH_DATA_need_gc_gens ) )
2024-04-20 05:44:12 +03:00
bch2_gc_gens_async ( c ) ;
2023-12-28 08:59:17 +03:00
}
2024-04-08 01:05:34 +03:00
if ( ( flags & BTREE_TRIGGER_gc ) & &
( flags & BTREE_TRIGGER_bucket_invalidate ) ) {
2023-12-28 09:21:01 +03:00
struct bch_alloc_v4 new_a_convert ;
const struct bch_alloc_v4 * new_a = bch2_alloc_to_v4 ( new . s_c , & new_a_convert ) ;
2023-12-28 08:59:17 +03:00
2023-12-28 09:21:01 +03:00
percpu_down_read ( & c - > mark_lock ) ;
2023-12-28 08:59:17 +03:00
struct bucket * g = gc_bucket ( ca , new . k - > p . offset ) ;
2024-06-06 22:06:22 +03:00
if ( unlikely ( ! g ) ) {
percpu_up_read ( & c - > mark_lock ) ;
goto invalid_bucket ;
}
g - > gen_valid = 1 ;
2023-12-28 08:59:17 +03:00
bucket_lock ( g ) ;
g - > gen_valid = 1 ;
g - > gen = new_a - > gen ;
g - > data_type = new_a - > data_type ;
g - > stripe = new_a - > stripe ;
g - > stripe_redundancy = new_a - > stripe_redundancy ;
g - > dirty_sectors = new_a - > dirty_sectors ;
g - > cached_sectors = new_a - > cached_sectors ;
bucket_unlock ( g ) ;
2023-12-28 09:21:01 +03:00
percpu_up_read ( & c - > mark_lock ) ;
2023-12-28 08:59:17 +03:00
}
2024-04-30 22:53:03 +03:00
err :
2024-06-06 22:06:22 +03:00
printbuf_exit ( & buf ) ;
2024-04-30 22:53:03 +03:00
bch2_dev_put ( ca ) ;
return ret ;
2024-06-06 22:06:22 +03:00
invalid_bucket :
bch2_fs_inconsistent ( c , " reference to invalid bucket \n %s " ,
( bch2_bkey_val_to_text ( & buf , c , new . s_c ) , buf . buf ) ) ;
ret = - EIO ;
goto err ;
2023-12-28 08:59:17 +03:00
}
2022-11-30 21:25:17 +03:00
/*
2024-04-08 01:05:34 +03:00
* This synthesizes deleted extents for holes , similar to BTREE_ITER_slots for
2022-11-30 21:25:17 +03:00
* extents style btrees , but works on non - extents btrees :
*/
2023-07-07 05:47:42 +03:00
static struct bkey_s_c bch2_get_key_or_hole ( struct btree_iter * iter , struct bpos end , struct bkey * hole )
2022-11-30 21:25:17 +03:00
{
struct bkey_s_c k = bch2_btree_iter_peek_slot ( iter ) ;
if ( bkey_err ( k ) )
return k ;
if ( k . k - > type ) {
return k ;
} else {
struct btree_iter iter2 ;
struct bpos next ;
bch2_trans_copy_iter ( & iter2 , iter ) ;
2023-03-29 02:37:25 +03:00
2023-12-04 08:39:38 +03:00
struct btree_path * path = btree_iter_path ( iter - > trans , iter ) ;
if ( ! bpos_eq ( path - > l [ 0 ] . b - > key . k . p , SPOS_MAX ) )
end = bkey_min ( end , bpos_nosnap_successor ( path - > l [ 0 ] . b - > key . k . p ) ) ;
2023-03-29 02:37:25 +03:00
end = bkey_min ( end , POS ( iter - > pos . inode , iter - > pos . offset + U32_MAX - 1 ) ) ;
/*
* btree node min / max is a closed interval , upto takes a half
* open interval :
*/
k = bch2_btree_iter_peek_upto ( & iter2 , end ) ;
2022-11-30 21:25:17 +03:00
next = iter2 . pos ;
bch2_trans_iter_exit ( iter - > trans , & iter2 ) ;
BUG_ON ( next . offset > = iter - > pos . offset + U32_MAX ) ;
if ( bkey_err ( k ) )
return k ;
bkey_init ( hole ) ;
hole - > p = iter - > pos ;
bch2_key_resize ( hole , next . offset - iter - > pos . offset ) ;
return ( struct bkey_s_c ) { hole , NULL } ;
}
}
2024-05-02 00:04:08 +03:00
static bool next_bucket ( struct bch_fs * c , struct bch_dev * * ca , struct bpos * bucket )
2022-11-30 21:25:17 +03:00
{
2024-05-02 00:04:08 +03:00
if ( * ca ) {
if ( bucket - > offset < ( * ca ) - > mi . first_bucket )
bucket - > offset = ( * ca ) - > mi . first_bucket ;
2022-11-30 21:25:17 +03:00
2024-05-02 00:04:08 +03:00
if ( bucket - > offset < ( * ca ) - > mi . nbuckets )
2022-11-30 21:25:17 +03:00
return true ;
2024-05-02 00:04:08 +03:00
bch2_dev_put ( * ca ) ;
* ca = NULL ;
2022-11-30 21:25:17 +03:00
bucket - > inode + + ;
bucket - > offset = 0 ;
}
rcu_read_lock ( ) ;
2024-05-02 00:04:08 +03:00
* ca = __bch2_next_dev_idx ( c , bucket - > inode , NULL ) ;
if ( * ca ) {
* bucket = POS ( ( * ca ) - > dev_idx , ( * ca ) - > mi . first_bucket ) ;
bch2_dev_get ( * ca ) ;
}
2022-11-30 21:25:17 +03:00
rcu_read_unlock ( ) ;
2024-05-02 00:04:08 +03:00
return * ca ! = NULL ;
2022-11-30 21:25:17 +03:00
}
2024-05-02 00:04:08 +03:00
static struct bkey_s_c bch2_get_key_or_real_bucket_hole ( struct btree_iter * iter ,
struct bch_dev * * ca , struct bkey * hole )
2022-11-30 21:25:17 +03:00
{
struct bch_fs * c = iter - > trans - > c ;
struct bkey_s_c k ;
again :
k = bch2_get_key_or_hole ( iter , POS_MAX , hole ) ;
if ( bkey_err ( k ) )
return k ;
2024-05-02 00:04:08 +03:00
* ca = bch2_dev_iterate_noerror ( c , * ca , k . k - > p . inode ) ;
2022-11-30 21:25:17 +03:00
if ( ! k . k - > type ) {
2024-05-02 00:04:08 +03:00
struct bpos hole_start = bkey_start_pos ( k . k ) ;
2022-11-30 21:25:17 +03:00
2024-05-02 00:04:08 +03:00
if ( ! * ca | | ! bucket_valid ( * ca , hole_start . offset ) ) {
if ( ! next_bucket ( c , ca , & hole_start ) )
2022-11-30 21:25:17 +03:00
return bkey_s_c_null ;
2024-05-02 00:04:08 +03:00
bch2_btree_iter_set_pos ( iter , hole_start ) ;
2022-11-30 21:25:17 +03:00
goto again ;
}
2024-05-02 00:04:08 +03:00
if ( k . k - > p . offset > ( * ca ) - > mi . nbuckets )
bch2_key_resize ( hole , ( * ca ) - > mi . nbuckets - hole_start . offset ) ;
2022-11-30 21:25:17 +03:00
}
return k ;
}
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_alloc_key ( struct btree_trans * trans ,
struct bkey_s_c alloc_k ,
struct btree_iter * alloc_iter ,
struct btree_iter * discard_iter ,
struct btree_iter * freespace_iter ,
struct btree_iter * bucket_gens_iter )
2022-02-17 11:11:39 +03:00
{
struct bch_fs * c = trans - > c ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2022-02-17 11:11:39 +03:00
unsigned discard_key_type , freespace_key_type ;
2022-11-26 07:14:30 +03:00
unsigned gens_offset ;
2022-11-30 21:25:17 +03:00
struct bkey_s_c k ;
2022-02-17 11:11:39 +03:00
struct printbuf buf = PRINTBUF ;
2024-04-30 22:43:20 +03:00
int ret = 0 ;
2022-02-17 11:11:39 +03:00
2024-04-30 22:43:20 +03:00
struct bch_dev * ca = bch2_dev_bucket_tryget_noerror ( c , alloc_k . k - > p ) ;
if ( fsck_err_on ( ! ca ,
c , alloc_key_to_missing_dev_bucket ,
2022-04-09 22:15:36 +03:00
" alloc key for invalid device:bucket %llu:%llu " ,
alloc_k . k - > p . inode , alloc_k . k - > p . offset ) )
2024-04-30 22:43:20 +03:00
ret = bch2_btree_delete_at ( trans , alloc_iter , 0 ) ;
if ( ! ca )
return ret ;
2022-04-05 20:44:18 +03:00
if ( ! ca - > mi . freespace_initialized )
2024-04-30 22:43:20 +03:00
goto out ;
2022-04-05 20:44:18 +03:00
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4 ( alloc_k , & a_convert ) ;
2022-04-05 20:44:18 +03:00
2022-11-26 07:14:30 +03:00
discard_key_type = a - > data_type = = BCH_DATA_need_discard ? KEY_TYPE_set : 0 ;
2022-06-20 05:43:00 +03:00
bch2_btree_iter_set_pos ( discard_iter , alloc_k . k - > p ) ;
k = bch2_btree_iter_peek_slot ( discard_iter ) ;
2022-02-17 11:11:39 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2024-03-12 04:15:26 +03:00
if ( fsck_err_on ( k . k - > type ! = discard_key_type ,
c , need_discard_key_wrong ,
" incorrect key in need_discard btree (got %s should be %s) \n "
" %s " ,
bch2_bkey_types [ k . k - > type ] ,
bch2_bkey_types [ discard_key_type ] ,
( bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
2022-02-17 11:11:39 +03:00
struct bkey_i * update =
bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . type = discard_key_type ;
2022-06-20 05:43:00 +03:00
update - > k . p = discard_iter - > pos ;
2022-02-17 11:11:39 +03:00
2022-06-20 05:43:00 +03:00
ret = bch2_trans_update ( trans , discard_iter , update , 0 ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
goto err ;
}
2022-11-26 07:14:30 +03:00
freespace_key_type = a - > data_type = = BCH_DATA_free ? KEY_TYPE_set : 0 ;
bch2_btree_iter_set_pos ( freespace_iter , alloc_freespace_pos ( alloc_k . k - > p , * a ) ) ;
2022-06-20 05:43:00 +03:00
k = bch2_btree_iter_peek_slot ( freespace_iter ) ;
2022-02-17 11:11:39 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2024-03-12 04:15:26 +03:00
if ( fsck_err_on ( k . k - > type ! = freespace_key_type ,
c , freespace_key_wrong ,
" incorrect key in freespace btree (got %s should be %s) \n "
" %s " ,
bch2_bkey_types [ k . k - > type ] ,
bch2_bkey_types [ freespace_key_type ] ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
2022-02-17 11:11:39 +03:00
struct bkey_i * update =
bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . type = freespace_key_type ;
2022-06-20 05:43:00 +03:00
update - > k . p = freespace_iter - > pos ;
2022-02-17 11:11:39 +03:00
bch2_key_resize ( & update - > k , 1 ) ;
2022-06-20 05:43:00 +03:00
ret = bch2_trans_update ( trans , freespace_iter , update , 0 ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
goto err ;
}
2022-11-26 07:14:30 +03:00
bch2_btree_iter_set_pos ( bucket_gens_iter , alloc_gens_pos ( alloc_k . k - > p , & gens_offset ) ) ;
k = bch2_btree_iter_peek_slot ( bucket_gens_iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2024-03-12 04:15:26 +03:00
if ( fsck_err_on ( a - > gen ! = alloc_gen ( k , gens_offset ) ,
c , bucket_gens_key_wrong ,
" incorrect gen in bucket_gens btree (got %u should be %u) \n "
" %s " ,
alloc_gen ( k , gens_offset ) , a - > gen ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
2022-11-26 07:14:30 +03:00
struct bkey_i_bucket_gens * g =
bch2_trans_kmalloc ( trans , sizeof ( * g ) ) ;
ret = PTR_ERR_OR_ZERO ( g ) ;
if ( ret )
goto err ;
if ( k . k - > type = = KEY_TYPE_bucket_gens ) {
bkey_reassemble ( & g - > k_i , k ) ;
} else {
bkey_bucket_gens_init ( & g - > k_i ) ;
g - > k . p = alloc_gens_pos ( alloc_k . k - > p , & gens_offset ) ;
}
g - > v . gens [ gens_offset ] = a - > gen ;
ret = bch2_trans_update ( trans , bucket_gens_iter , & g - > k_i , 0 ) ;
if ( ret )
goto err ;
}
2024-04-30 22:43:20 +03:00
out :
2022-02-17 11:11:39 +03:00
err :
fsck_err :
2024-04-30 22:43:20 +03:00
bch2_dev_put ( ca ) ;
2022-02-17 11:11:39 +03:00
printbuf_exit ( & buf ) ;
return ret ;
}
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_alloc_hole_freespace ( struct btree_trans * trans ,
2024-05-02 00:04:08 +03:00
struct bch_dev * ca ,
2023-06-27 05:26:04 +03:00
struct bpos start ,
struct bpos * end ,
struct btree_iter * freespace_iter )
2022-11-30 21:25:17 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_s_c k ;
struct printbuf buf = PRINTBUF ;
int ret ;
if ( ! ca - > mi . freespace_initialized )
return 0 ;
bch2_btree_iter_set_pos ( freespace_iter , start ) ;
k = bch2_btree_iter_peek_slot ( freespace_iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
* end = bkey_min ( k . k - > p , * end ) ;
2024-03-12 04:15:26 +03:00
if ( fsck_err_on ( k . k - > type ! = KEY_TYPE_set ,
c , freespace_hole_missing ,
" hole in alloc btree missing in freespace btree \n "
" device %llu buckets %llu-%llu " ,
freespace_iter - > pos . inode ,
freespace_iter - > pos . offset ,
end - > offset ) ) {
2022-11-30 21:25:17 +03:00
struct bkey_i * update =
bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . type = KEY_TYPE_set ;
update - > k . p = freespace_iter - > pos ;
bch2_key_resize ( & update - > k ,
min_t ( u64 , U32_MAX , end - > offset -
freespace_iter - > pos . offset ) ) ;
ret = bch2_trans_update ( trans , freespace_iter , update , 0 ) ;
if ( ret )
goto err ;
}
err :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_alloc_hole_bucket_gens ( struct btree_trans * trans ,
struct bpos start ,
struct bpos * end ,
struct btree_iter * bucket_gens_iter )
2022-11-26 07:14:30 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_s_c k ;
struct printbuf buf = PRINTBUF ;
unsigned i , gens_offset , gens_end_offset ;
int ret ;
bch2_btree_iter_set_pos ( bucket_gens_iter , alloc_gens_pos ( start , & gens_offset ) ) ;
k = bch2_btree_iter_peek_slot ( bucket_gens_iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( bkey_cmp ( alloc_gens_pos ( start , & gens_offset ) ,
alloc_gens_pos ( * end , & gens_end_offset ) ) )
gens_end_offset = KEY_TYPE_BUCKET_GENS_NR ;
if ( k . k - > type = = KEY_TYPE_bucket_gens ) {
struct bkey_i_bucket_gens g ;
bool need_update = false ;
bkey_reassemble ( & g . k_i , k ) ;
for ( i = gens_offset ; i < gens_end_offset ; i + + ) {
if ( fsck_err_on ( g . v . gens [ i ] , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_hole_wrong ,
2022-11-26 07:14:30 +03:00
" hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u) " ,
bucket_gens_pos_to_alloc ( k . k - > p , i ) . inode ,
bucket_gens_pos_to_alloc ( k . k - > p , i ) . offset ,
g . v . gens [ i ] ) ) {
g . v . gens [ i ] = 0 ;
need_update = true ;
}
}
if ( need_update ) {
2023-09-13 01:41:22 +03:00
struct bkey_i * u = bch2_trans_kmalloc ( trans , sizeof ( g ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 01:41:22 +03:00
ret = PTR_ERR_OR_ZERO ( u ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
goto err ;
2023-09-13 01:41:22 +03:00
memcpy ( u , & g , sizeof ( g ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 01:41:22 +03:00
ret = bch2_trans_update ( trans , bucket_gens_iter , u , 0 ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
goto err ;
}
}
* end = bkey_min ( * end , bucket_gens_pos_to_alloc ( bpos_nosnap_successor ( k . k - > p ) , 0 ) ) ;
err :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2023-11-17 01:28:16 +03:00
static noinline_for_stack int bch2_check_discard_freespace_key ( struct btree_trans * trans ,
2023-06-18 20:25:35 +03:00
struct btree_iter * iter )
2022-02-17 11:11:39 +03:00
{
struct bch_fs * c = trans - > c ;
struct btree_iter alloc_iter ;
2022-07-17 07:44:19 +03:00
struct bkey_s_c alloc_k ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2022-02-17 11:11:39 +03:00
u64 genbits ;
struct bpos pos ;
2022-04-01 08:29:59 +03:00
enum bch_data_type state = iter - > btree_id = = BTREE_ID_need_discard
? BCH_DATA_need_discard
: BCH_DATA_free ;
2022-02-17 11:11:39 +03:00
struct printbuf buf = PRINTBUF ;
int ret ;
2022-04-05 20:44:18 +03:00
pos = iter - > pos ;
2022-02-17 11:11:39 +03:00
pos . offset & = ~ ( ~ 0ULL < < 56 ) ;
2022-04-05 20:44:18 +03:00
genbits = iter - > pos . offset & ( ~ 0ULL < < 56 ) ;
2022-02-17 11:11:39 +03:00
2023-04-30 02:33:09 +03:00
alloc_k = bch2_bkey_get_iter ( trans , & alloc_iter , BTREE_ID_alloc , pos , 0 ) ;
ret = bkey_err ( alloc_k ) ;
if ( ret )
return ret ;
2022-02-17 11:11:39 +03:00
if ( fsck_err_on ( ! bch2_dev_bucket_exists ( c , pos ) , c ,
2023-10-25 03:44:36 +03:00
need_discard_freespace_key_to_invalid_dev_bucket ,
2022-04-09 22:15:36 +03:00
" entry in %s btree for nonexistant dev:bucket %llu:%llu " ,
2023-10-20 05:49:08 +03:00
bch2_btree_id_str ( iter - > btree_id ) , pos . inode , pos . offset ) )
2022-02-17 11:11:39 +03:00
goto delete ;
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4 ( alloc_k , & a_convert ) ;
2022-02-17 11:11:39 +03:00
2023-01-31 04:58:43 +03:00
if ( fsck_err_on ( a - > data_type ! = state | |
2022-04-01 08:29:59 +03:00
( state = = BCH_DATA_free & &
2023-01-31 04:58:43 +03:00
genbits ! = alloc_freespace_genbits ( * a ) ) , c ,
2023-10-25 03:44:36 +03:00
need_discard_freespace_key_bad ,
2023-06-18 20:25:35 +03:00
" %s \n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu) " ,
2022-07-17 07:44:19 +03:00
( bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ,
2023-10-20 05:49:08 +03:00
bch2_btree_id_str ( iter - > btree_id ) ,
2023-06-18 20:25:35 +03:00
iter - > pos . inode ,
iter - > pos . offset ,
2023-01-31 04:58:43 +03:00
a - > data_type = = state ,
genbits > > 56 , alloc_freespace_genbits ( * a ) > > 56 ) )
2022-02-17 11:11:39 +03:00
goto delete ;
out :
fsck_err :
2024-04-25 09:20:20 +03:00
bch2_set_btree_iter_dontneed ( & alloc_iter ) ;
2022-02-17 11:11:39 +03:00
bch2_trans_iter_exit ( trans , & alloc_iter ) ;
printbuf_exit ( & buf ) ;
return ret ;
delete :
2023-06-18 20:25:35 +03:00
ret = bch2_btree_delete_extent_at ( trans , iter ,
iter - > btree_id = = BTREE_ID_freespace ? 1 : 0 , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-02-17 11:11:39 +03:00
goto out ;
}
2022-11-26 07:14:30 +03:00
/*
* We ' ve already checked that generation numbers in the bucket_gens btree are
* valid for buckets that exist ; this just checks for keys for nonexistent
* buckets .
*/
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_bucket_gens_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
2022-11-26 07:14:30 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_i_bucket_gens g ;
u64 start = bucket_gens_pos_to_alloc ( k . k - > p , 0 ) . offset ;
u64 end = bucket_gens_pos_to_alloc ( bpos_nosnap_successor ( k . k - > p ) , 0 ) . offset ;
u64 b ;
2024-05-02 00:04:08 +03:00
bool need_update = false ;
2022-11-26 07:14:30 +03:00
struct printbuf buf = PRINTBUF ;
int ret = 0 ;
BUG_ON ( k . k - > type ! = KEY_TYPE_bucket_gens ) ;
bkey_reassemble ( & g . k_i , k ) ;
2024-05-02 00:04:08 +03:00
struct bch_dev * ca = bch2_dev_tryget_noerror ( c , k . k - > p . inode ) ;
if ( ! ca ) {
if ( fsck_err ( c , bucket_gens_to_invalid_dev ,
" bucket_gens key for invalid device: \n %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) )
2023-04-18 20:05:47 +03:00
ret = bch2_btree_delete_at ( trans , iter , 0 ) ;
2022-11-26 07:14:30 +03:00
goto out ;
}
if ( fsck_err_on ( end < = ca - > mi . first_bucket | |
start > = ca - > mi . nbuckets , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_to_invalid_buckets ,
2022-11-26 07:14:30 +03:00
" bucket_gens key for invalid buckets: \n %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) ) {
ret = bch2_btree_delete_at ( trans , iter , 0 ) ;
goto out ;
}
for ( b = start ; b < ca - > mi . first_bucket ; b + + )
if ( fsck_err_on ( g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_nonzero_for_invalid_buckets ,
2022-11-26 07:14:30 +03:00
" bucket_gens key has nonzero gen for invalid bucket " ) ) {
g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] = 0 ;
need_update = true ;
}
for ( b = ca - > mi . nbuckets ; b < end ; b + + )
if ( fsck_err_on ( g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_nonzero_for_invalid_buckets ,
2022-11-26 07:14:30 +03:00
" bucket_gens key has nonzero gen for invalid bucket " ) ) {
g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] = 0 ;
need_update = true ;
}
if ( need_update ) {
2023-09-13 01:41:22 +03:00
struct bkey_i * u = bch2_trans_kmalloc ( trans , sizeof ( g ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 01:41:22 +03:00
ret = PTR_ERR_OR_ZERO ( u ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
goto out ;
2023-09-13 01:41:22 +03:00
memcpy ( u , & g , sizeof ( g ) ) ;
ret = bch2_trans_update ( trans , iter , u , 0 ) ;
2022-11-26 07:14:30 +03:00
}
out :
fsck_err :
2024-05-02 00:04:08 +03:00
bch2_dev_put ( ca ) ;
2022-11-26 07:14:30 +03:00
printbuf_exit ( & buf ) ;
return ret ;
}
2022-04-05 20:44:18 +03:00
int bch2_check_alloc_info ( struct bch_fs * c )
2022-02-17 11:11:39 +03:00
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-11-26 07:14:30 +03:00
struct btree_iter iter , discard_iter , freespace_iter , bucket_gens_iter ;
2024-05-02 00:04:08 +03:00
struct bch_dev * ca = NULL ;
2022-11-30 21:25:17 +03:00
struct bkey hole ;
2022-07-17 07:44:19 +03:00
struct bkey_s_c k ;
2022-04-05 20:44:18 +03:00
int ret = 0 ;
2022-02-17 11:11:39 +03:00
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & discard_iter , BTREE_ID_need_discard , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & freespace_iter , BTREE_ID_freespace , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & bucket_gens_iter , BTREE_ID_bucket_gens , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch ) ;
2022-11-30 21:25:17 +03:00
2022-06-20 05:43:00 +03:00
while ( 1 ) {
2022-11-30 21:25:17 +03:00
struct bpos next ;
2023-09-13 00:16:02 +03:00
bch2_trans_begin ( trans ) ;
2022-11-30 21:25:17 +03:00
2024-05-02 00:04:08 +03:00
k = bch2_get_key_or_real_bucket_hole ( & iter , & ca , & hole ) ;
2022-11-30 21:25:17 +03:00
ret = bkey_err ( k ) ;
2022-04-05 20:44:18 +03:00
if ( ret )
2022-11-30 21:25:17 +03:00
goto bkey_err ;
if ( ! k . k )
2022-04-05 20:44:18 +03:00
break ;
2022-06-20 05:43:00 +03:00
2022-11-30 21:25:17 +03:00
if ( k . k - > type ) {
next = bpos_nosnap_successor ( k . k - > p ) ;
2023-09-13 00:16:02 +03:00
ret = bch2_check_alloc_key ( trans ,
2022-11-30 21:25:17 +03:00
k , & iter ,
& discard_iter ,
2022-11-26 07:14:30 +03:00
& freespace_iter ,
& bucket_gens_iter ) ;
2022-11-30 21:25:17 +03:00
if ( ret )
2022-11-26 07:14:30 +03:00
goto bkey_err ;
2022-11-30 21:25:17 +03:00
} else {
next = k . k - > p ;
2024-05-02 00:04:08 +03:00
ret = bch2_check_alloc_hole_freespace ( trans , ca ,
2022-11-30 21:25:17 +03:00
bkey_start_pos ( k . k ) ,
& next ,
2022-11-26 07:14:30 +03:00
& freespace_iter ) ? :
2023-09-13 00:16:02 +03:00
bch2_check_alloc_hole_bucket_gens ( trans ,
2022-11-26 07:14:30 +03:00
bkey_start_pos ( k . k ) ,
& next ,
& bucket_gens_iter ) ;
2022-11-30 21:25:17 +03:00
if ( ret )
goto bkey_err ;
}
2023-09-13 00:16:02 +03:00
ret = bch2_trans_commit ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-11-30 21:25:17 +03:00
if ( ret )
goto bkey_err ;
bch2_btree_iter_set_pos ( & iter , next ) ;
bkey_err :
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
break ;
2022-04-05 20:44:18 +03:00
}
2023-09-13 00:16:02 +03:00
bch2_trans_iter_exit ( trans , & bucket_gens_iter ) ;
bch2_trans_iter_exit ( trans , & freespace_iter ) ;
bch2_trans_iter_exit ( trans , & discard_iter ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
2024-05-02 00:04:08 +03:00
bch2_dev_put ( ca ) ;
ca = NULL ;
2022-02-17 11:11:39 +03:00
2022-06-20 05:43:00 +03:00
if ( ret < 0 )
2022-04-05 20:44:18 +03:00
goto err ;
2022-02-17 11:11:39 +03:00
2023-12-08 07:33:11 +03:00
ret = for_each_btree_key ( trans , iter ,
2022-07-17 07:44:19 +03:00
BTREE_ID_need_discard , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch , k ,
2023-11-17 01:28:16 +03:00
bch2_check_discard_freespace_key ( trans , & iter ) ) ;
if ( ret )
goto err ;
bch2_trans_iter_init ( trans , & iter , BTREE_ID_freespace , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch ) ;
2023-11-17 01:28:16 +03:00
while ( 1 ) {
bch2_trans_begin ( trans ) ;
k = bch2_btree_iter_peek ( & iter ) ;
if ( ! k . k )
break ;
ret = bkey_err ( k ) ? :
bch2_check_discard_freespace_key ( trans , & iter ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ) {
ret = 0 ;
continue ;
}
if ( ret ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
bch_err ( c , " while checking %s " , buf . buf ) ;
printbuf_exit ( & buf ) ;
break ;
}
bch2_btree_iter_set_pos ( & iter , bpos_nosnap_successor ( iter . pos ) ) ;
}
bch2_trans_iter_exit ( trans , & iter ) ;
if ( ret )
goto err ;
ret = for_each_btree_key_commit ( trans , iter ,
2022-11-26 07:14:30 +03:00
BTREE_ID_bucket_gens , POS_MIN ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch , k ,
2023-11-29 00:36:54 +03:00
NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-13 00:16:02 +03:00
bch2_check_bucket_gens_key ( trans , & iter , k ) ) ;
2022-02-17 11:11:39 +03:00
err :
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2023-12-17 06:43:41 +03:00
bch_err_fn ( c , ret ) ;
2023-06-20 20:49:25 +03:00
return ret ;
2022-02-17 11:11:39 +03:00
}
static int bch2_check_alloc_to_lru_ref ( struct btree_trans * trans ,
2024-06-30 01:35:18 +03:00
struct btree_iter * alloc_iter ,
struct bkey_buf * last_flushed )
2022-02-17 11:11:39 +03:00
{
struct bch_fs * c = trans - > c ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2024-06-30 01:35:18 +03:00
struct bkey_s_c alloc_k ;
2022-02-17 11:11:39 +03:00
struct printbuf buf = PRINTBUF ;
int ret ;
alloc_k = bch2_btree_iter_peek ( alloc_iter ) ;
if ( ! alloc_k . k )
return 0 ;
ret = bkey_err ( alloc_k ) ;
if ( ret )
return ret ;
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4 ( alloc_k , & a_convert ) ;
2022-02-17 11:11:39 +03:00
2024-06-30 01:35:18 +03:00
if ( a - > fragmentation_lru ) {
ret = bch2_lru_check_set ( trans , BCH_LRU_FRAGMENTATION_START ,
a - > fragmentation_lru ,
alloc_k , last_flushed ) ;
if ( ret )
return ret ;
}
2023-01-31 04:58:43 +03:00
if ( a - > data_type ! = BCH_DATA_cached )
2022-02-17 11:11:39 +03:00
return 0 ;
2023-11-25 23:46:02 +03:00
if ( fsck_err_on ( ! a - > io_time [ READ ] , c ,
alloc_key_cached_but_read_time_zero ,
" cached bucket with read_time 0 \n "
" %s " ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
struct bkey_i_alloc_v4 * a_mut =
bch2_alloc_to_v4_mut ( trans , alloc_k ) ;
ret = PTR_ERR_OR_ZERO ( a_mut ) ;
if ( ret )
goto err ;
2024-06-17 17:06:03 +03:00
a_mut - > v . io_time [ READ ] = bch2_current_io_time ( c , READ ) ;
2023-11-25 23:46:02 +03:00
ret = bch2_trans_update ( trans , alloc_iter ,
2024-04-08 01:05:34 +03:00
& a_mut - > k_i , BTREE_TRIGGER_norun ) ;
2023-11-25 23:46:02 +03:00
if ( ret )
goto err ;
a = & a_mut - > v ;
}
2024-06-30 01:35:18 +03:00
ret = bch2_lru_check_set ( trans , alloc_k . k - > p . inode , a - > io_time [ READ ] ,
alloc_k , last_flushed ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
2024-06-30 01:35:18 +03:00
goto err ;
2022-02-17 11:11:39 +03:00
err :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
int bch2_check_alloc_to_lru_refs ( struct bch_fs * c )
{
2024-06-30 01:35:18 +03:00
struct bkey_buf last_flushed ;
bch2_bkey_buf_init ( & last_flushed ) ;
bkey_init ( & last_flushed . k - > k ) ;
2023-11-25 23:46:02 +03:00
int ret = bch2_trans_run ( c ,
2023-09-13 00:16:02 +03:00
for_each_btree_key_commit ( trans , iter , BTREE_ID_alloc ,
2024-04-08 01:05:34 +03:00
POS_MIN , BTREE_ITER_prefetch , k ,
2023-11-29 00:36:54 +03:00
NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2024-06-30 01:35:18 +03:00
bch2_check_alloc_to_lru_ref ( trans , & iter , & last_flushed ) ) ) ;
bch2_bkey_buf_exit ( & last_flushed , c ) ;
2023-11-25 23:46:02 +03:00
bch_err_fn ( c , ret ) ;
2023-06-20 20:49:25 +03:00
return ret ;
2022-02-17 11:11:39 +03:00
}
2024-06-23 07:53:44 +03:00
static int discard_in_flight_add ( struct bch_dev * ca , u64 bucket , bool in_progress )
2024-02-16 09:08:25 +03:00
{
int ret ;
2024-06-23 07:53:44 +03:00
mutex_lock ( & ca - > discard_buckets_in_flight_lock ) ;
darray_for_each ( ca - > discard_buckets_in_flight , i )
if ( i - > bucket = = bucket ) {
2024-05-27 05:52:22 +03:00
ret = - BCH_ERR_EEXIST_discard_in_flight_add ;
2024-02-16 09:08:25 +03:00
goto out ;
}
2024-06-23 07:53:44 +03:00
ret = darray_push ( & ca - > discard_buckets_in_flight , ( ( struct discard_in_flight ) {
. in_progress = in_progress ,
. bucket = bucket ,
} ) ) ;
2024-02-16 09:08:25 +03:00
out :
2024-06-23 07:53:44 +03:00
mutex_unlock ( & ca - > discard_buckets_in_flight_lock ) ;
2024-02-16 09:08:25 +03:00
return ret ;
}
2024-06-23 07:53:44 +03:00
static void discard_in_flight_remove ( struct bch_dev * ca , u64 bucket )
2024-02-16 09:08:25 +03:00
{
2024-06-23 07:53:44 +03:00
mutex_lock ( & ca - > discard_buckets_in_flight_lock ) ;
darray_for_each ( ca - > discard_buckets_in_flight , i )
if ( i - > bucket = = bucket ) {
BUG_ON ( ! i - > in_progress ) ;
darray_remove_item ( & ca - > discard_buckets_in_flight , i ) ;
2024-02-16 09:08:25 +03:00
goto found ;
}
BUG ( ) ;
found :
2024-06-23 07:53:44 +03:00
mutex_unlock ( & ca - > discard_buckets_in_flight_lock ) ;
2024-02-16 09:08:25 +03:00
}
2024-01-16 01:56:22 +03:00
struct discard_buckets_state {
u64 seen ;
u64 open ;
u64 need_journal_commit ;
u64 discarded ;
u64 need_journal_commit_this_dev ;
} ;
2022-07-17 07:31:40 +03:00
static int bch2_discard_one_bucket ( struct btree_trans * trans ,
2024-06-23 07:53:44 +03:00
struct bch_dev * ca ,
2022-07-17 07:31:40 +03:00
struct btree_iter * need_discard_iter ,
struct bpos * discard_pos_done ,
2024-01-16 01:56:22 +03:00
struct discard_buckets_state * s )
2022-02-10 12:32:19 +03:00
{
struct bch_fs * c = trans - > c ;
2022-07-17 07:31:40 +03:00
struct bpos pos = need_discard_iter - > pos ;
struct btree_iter iter = { NULL } ;
2022-02-10 12:32:19 +03:00
struct bkey_s_c k ;
struct bkey_i_alloc_v4 * a ;
struct printbuf buf = PRINTBUF ;
2024-02-16 09:08:25 +03:00
bool discard_locked = false ;
2022-07-17 07:31:40 +03:00
int ret = 0 ;
if ( bch2_bucket_is_open_safe ( c , pos . inode , pos . offset ) ) {
2024-01-16 01:56:22 +03:00
s - > open + + ;
2022-07-17 07:31:40 +03:00
goto out ;
}
2022-02-10 12:32:19 +03:00
2022-07-17 07:31:40 +03:00
if ( bch2_bucket_needs_journal_commit ( & c - > buckets_waiting_for_journal ,
c - > journal . flushed_seq_ondisk ,
pos . inode , pos . offset ) ) {
2024-01-16 01:56:22 +03:00
s - > need_journal_commit + + ;
s - > need_journal_commit_this_dev + + ;
2022-07-17 07:31:40 +03:00
goto out ;
}
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , & iter , BTREE_ID_alloc ,
need_discard_iter - > pos ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_cached ) ;
2022-02-10 12:32:19 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto out ;
a = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
goto out ;
2024-04-30 09:10:57 +03:00
if ( bch2_bucket_sectors_total ( a - > v ) ) {
2024-04-03 01:30:14 +03:00
if ( bch2_trans_inconsistent_on ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info ,
trans , " attempting to discard bucket with dirty data \n %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) )
2022-12-12 03:14:30 +03:00
ret = - EIO ;
2022-04-01 08:29:59 +03:00
goto out ;
}
2022-02-10 12:32:19 +03:00
2022-12-12 03:14:30 +03:00
if ( a - > v . data_type ! = BCH_DATA_need_discard ) {
2024-04-03 01:30:14 +03:00
if ( data_type_is_empty ( a - > v . data_type ) & &
BCH_ALLOC_V4_NEED_INC_GEN ( & a - > v ) ) {
a - > v . gen + + ;
SET_BCH_ALLOC_V4_NEED_INC_GEN ( & a - > v , false ) ;
goto write ;
2022-12-12 03:14:30 +03:00
}
2024-04-03 01:30:14 +03:00
if ( bch2_trans_inconsistent_on ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info ,
trans , " bucket incorrectly set in need_discard btree \n "
" %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) )
ret = - EIO ;
goto out ;
}
if ( a - > v . journal_seq > c - > journal . flushed_seq_ondisk ) {
if ( bch2_trans_inconsistent_on ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info ,
trans , " clearing need_discard but journal_seq %llu > flushed_seq %llu \n %s " ,
a - > v . journal_seq ,
c - > journal . flushed_seq_ondisk ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) )
ret = - EIO ;
2022-02-10 12:32:19 +03:00
goto out ;
}
2024-06-23 07:53:44 +03:00
if ( discard_in_flight_add ( ca , iter . pos . offset , true ) )
2024-02-16 09:08:25 +03:00
goto out ;
discard_locked = true ;
2022-11-24 11:12:22 +03:00
if ( ! bkey_eq ( * discard_pos_done , iter . pos ) & &
2022-07-17 07:31:40 +03:00
ca - > mi . discard & & ! c - > opts . nochanges ) {
2022-02-10 12:32:19 +03:00
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
*/
2024-01-23 02:08:51 +03:00
bch2_trans_unlock_long ( trans ) ;
2022-02-10 12:32:19 +03:00
blkdev_issue_discard ( ca - > disk_sb . bdev ,
k . k - > p . offset * ca - > mi . bucket_size ,
ca - > mi . bucket_size ,
GFP_KERNEL ) ;
2022-07-17 07:31:40 +03:00
* discard_pos_done = iter . pos ;
2022-02-10 12:32:19 +03:00
2023-01-24 08:26:48 +03:00
ret = bch2_trans_relock_notrace ( trans ) ;
2022-02-10 12:32:19 +03:00
if ( ret )
goto out ;
}
SET_BCH_ALLOC_V4_NEED_DISCARD ( & a - > v , false ) ;
write :
2024-06-20 17:04:35 +03:00
alloc_data_type_set ( & a - > v , a - > v . data_type ) ;
2022-07-17 07:31:40 +03:00
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-06-28 00:32:48 +03:00
BCH_WATERMARK_btree |
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-07-17 07:31:40 +03:00
if ( ret )
goto out ;
2023-11-28 06:37:27 +03:00
count_event ( c , bucket_discard ) ;
2024-01-16 01:56:22 +03:00
s - > discarded + + ;
2022-02-10 12:32:19 +03:00
out :
2024-02-16 09:08:25 +03:00
if ( discard_locked )
2024-06-23 07:53:44 +03:00
discard_in_flight_remove ( ca , iter . pos . offset ) ;
2024-01-16 01:56:22 +03:00
s - > seen + + ;
2022-02-10 12:32:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
printbuf_exit ( & buf ) ;
return ret ;
}
static void bch2_do_discards_work ( struct work_struct * work )
{
2024-06-23 07:53:44 +03:00
struct bch_dev * ca = container_of ( work , struct bch_dev , discard_work ) ;
struct bch_fs * c = ca - > fs ;
2024-01-16 01:56:22 +03:00
struct discard_buckets_state s = { } ;
2022-07-17 07:31:40 +03:00
struct bpos discard_pos_done = POS_MAX ;
2022-02-10 12:32:19 +03:00
int ret ;
2022-07-17 07:31:40 +03:00
/*
* We ' re doing the commit in bch2_discard_one_bucket instead of using
* for_each_btree_key_commit ( ) so that we can increment counters after
* successful commit :
*/
2023-09-13 00:16:02 +03:00
ret = bch2_trans_run ( c ,
2024-06-23 07:53:44 +03:00
for_each_btree_key_upto ( trans , iter ,
BTREE_ID_need_discard ,
POS ( ca - > dev_idx , 0 ) ,
POS ( ca - > dev_idx , U64_MAX ) , 0 , k ,
bch2_discard_one_bucket ( trans , ca , & iter , & discard_pos_done , & s ) ) ) ;
2022-02-10 12:32:19 +03:00
2024-01-16 01:56:22 +03:00
trace_discard_buckets ( c , s . seen , s . open , s . need_journal_commit , s . discarded ,
2022-07-18 05:31:21 +03:00
bch2_err_str ( ret ) ) ;
2024-01-16 01:56:22 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_discard ) ;
2024-06-23 07:53:44 +03:00
percpu_ref_put ( & ca - > io_ref ) ;
}
void bch2_dev_do_discards ( struct bch_dev * ca )
{
struct bch_fs * c = ca - > fs ;
if ( ! bch2_dev_get_ioref ( c , ca - > dev_idx , WRITE ) )
return ;
if ( ! bch2_write_ref_tryget ( c , BCH_WRITE_REF_discard ) )
goto put_ioref ;
if ( queue_work ( c - > write_ref_wq , & ca - > discard_work ) )
return ;
bch2_write_ref_put ( c , BCH_WRITE_REF_discard ) ;
put_ioref :
percpu_ref_put ( & ca - > io_ref ) ;
2022-02-10 12:32:19 +03:00
}
void bch2_do_discards ( struct bch_fs * c )
{
2024-06-23 07:53:44 +03:00
for_each_member_device ( c , ca )
bch2_dev_do_discards ( ca ) ;
2022-02-10 12:32:19 +03:00
}
2024-02-16 09:08:25 +03:00
static int bch2_clear_bucket_needs_discard ( struct btree_trans * trans , struct bpos bucket )
{
struct btree_iter iter ;
2024-04-08 01:05:34 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc , bucket , BTREE_ITER_intent ) ;
2024-02-16 09:08:25 +03:00
struct bkey_s_c k = bch2_btree_iter_peek_slot ( & iter ) ;
int ret = bkey_err ( k ) ;
if ( ret )
goto err ;
struct bkey_i_alloc_v4 * a = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
goto err ;
2024-04-03 01:30:14 +03:00
BUG_ON ( a - > v . dirty_sectors ) ;
2024-02-16 09:08:25 +03:00
SET_BCH_ALLOC_V4_NEED_DISCARD ( & a - > v , false ) ;
2024-04-30 09:47:30 +03:00
alloc_data_type_set ( & a - > v , a - > v . data_type ) ;
2024-02-16 09:08:25 +03:00
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ;
err :
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
static void bch2_do_discards_fast_work ( struct work_struct * work )
{
2024-06-23 07:53:44 +03:00
struct bch_dev * ca = container_of ( work , struct bch_dev , discard_fast_work ) ;
struct bch_fs * c = ca - > fs ;
2024-02-16 09:08:25 +03:00
while ( 1 ) {
bool got_bucket = false ;
2024-06-23 07:53:44 +03:00
u64 bucket ;
2024-02-16 09:08:25 +03:00
2024-06-23 07:53:44 +03:00
mutex_lock ( & ca - > discard_buckets_in_flight_lock ) ;
darray_for_each ( ca - > discard_buckets_in_flight , i ) {
if ( i - > in_progress )
2024-02-16 09:08:25 +03:00
continue ;
got_bucket = true ;
2024-06-23 07:53:44 +03:00
bucket = i - > bucket ;
i - > in_progress = true ;
2024-02-16 09:08:25 +03:00
break ;
}
2024-06-23 07:53:44 +03:00
mutex_unlock ( & ca - > discard_buckets_in_flight_lock ) ;
2024-02-16 09:08:25 +03:00
if ( ! got_bucket )
break ;
if ( ca - > mi . discard & & ! c - > opts . nochanges )
blkdev_issue_discard ( ca - > disk_sb . bdev ,
2024-06-23 07:53:44 +03:00
bucket_to_sector ( ca , bucket ) ,
2024-02-16 09:08:25 +03:00
ca - > mi . bucket_size ,
GFP_KERNEL ) ;
int ret = bch2_trans_do ( c , NULL , NULL ,
2024-06-23 07:53:44 +03:00
BCH_WATERMARK_btree |
BCH_TRANS_COMMIT_no_enospc ,
bch2_clear_bucket_needs_discard ( trans , POS ( ca - > dev_idx , bucket ) ) ) ;
2024-02-16 09:08:25 +03:00
bch_err_fn ( c , ret ) ;
2024-06-23 07:53:44 +03:00
discard_in_flight_remove ( ca , bucket ) ;
2024-02-16 09:08:25 +03:00
if ( ret )
break ;
}
bch2_write_ref_put ( c , BCH_WRITE_REF_discard_fast ) ;
2024-06-23 07:53:44 +03:00
percpu_ref_put ( & ca - > io_ref ) ;
2024-02-16 09:08:25 +03:00
}
2024-06-23 07:53:44 +03:00
static void bch2_discard_one_bucket_fast ( struct bch_dev * ca , u64 bucket )
2024-02-16 09:08:25 +03:00
{
2024-06-23 07:53:44 +03:00
struct bch_fs * c = ca - > fs ;
if ( discard_in_flight_add ( ca , bucket , false ) )
return ;
if ( ! bch2_dev_get_ioref ( c , ca - > dev_idx , WRITE ) )
return ;
if ( ! bch2_write_ref_tryget ( c , BCH_WRITE_REF_discard_fast ) )
goto put_ioref ;
if ( queue_work ( c - > write_ref_wq , & ca - > discard_fast_work ) )
return ;
2024-02-16 09:08:25 +03:00
2024-06-23 07:53:44 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_discard_fast ) ;
put_ioref :
percpu_ref_put ( & ca - > io_ref ) ;
2024-02-16 09:08:25 +03:00
}
2022-07-17 07:31:40 +03:00
static int invalidate_one_bucket ( struct btree_trans * trans ,
2022-12-06 00:49:13 +03:00
struct btree_iter * lru_iter ,
2023-01-04 07:54:10 +03:00
struct bkey_s_c lru_k ,
2022-12-06 00:49:13 +03:00
s64 * nr_to_invalidate )
2022-02-11 02:18:41 +03:00
{
struct bch_fs * c = trans - > c ;
2023-01-04 07:54:10 +03:00
struct bkey_i_alloc_v4 * a = NULL ;
2022-04-12 00:23:39 +03:00
struct printbuf buf = PRINTBUF ;
2023-01-04 07:54:10 +03:00
struct bpos bucket = u64_to_bucket ( lru_k . k - > p . offset ) ;
2022-07-17 07:31:40 +03:00
unsigned cached_sectors ;
int ret = 0 ;
2022-02-11 02:18:41 +03:00
2022-12-06 00:49:13 +03:00
if ( * nr_to_invalidate < = 0 )
2022-07-17 07:31:40 +03:00
return 1 ;
2022-02-11 02:18:41 +03:00
2023-01-04 07:54:10 +03:00
if ( ! bch2_dev_bucket_exists ( c , bucket ) ) {
prt_str ( & buf , " lru entry points to invalid bucket " ) ;
goto err ;
}
2023-02-18 04:33:12 +03:00
if ( bch2_bucket_is_open_safe ( c , bucket . inode , bucket . offset ) )
return 0 ;
2024-04-30 10:27:30 +03:00
a = bch2_trans_start_alloc_update ( trans , bucket ) ;
2022-02-11 02:18:41 +03:00
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
goto out ;
2023-02-07 02:51:42 +03:00
/* We expect harmless races here due to the btree write buffer: */
2022-12-05 18:24:19 +03:00
if ( lru_pos_time ( lru_iter - > pos ) ! = alloc_lru_idx_read ( a - > v ) )
2023-02-07 02:51:42 +03:00
goto out ;
2022-07-17 07:31:40 +03:00
2023-02-07 02:51:42 +03:00
BUG_ON ( a - > v . data_type ! = BCH_DATA_cached ) ;
2024-04-03 01:30:14 +03:00
BUG_ON ( a - > v . dirty_sectors ) ;
2022-02-11 02:18:41 +03:00
2022-06-21 05:26:41 +03:00
if ( ! a - > v . cached_sectors )
bch_err ( c , " invalidating empty bucket, confused " ) ;
2022-07-17 07:31:40 +03:00
cached_sectors = a - > v . cached_sectors ;
2022-06-21 05:26:41 +03:00
2022-02-11 02:18:41 +03:00
SET_BCH_ALLOC_V4_NEED_INC_GEN ( & a - > v , false ) ;
a - > v . gen + + ;
a - > v . data_type = 0 ;
a - > v . dirty_sectors = 0 ;
2023-11-24 01:21:23 +03:00
a - > v . stripe_sectors = 0 ;
2022-02-11 02:18:41 +03:00
a - > v . cached_sectors = 0 ;
2024-06-17 17:06:03 +03:00
a - > v . io_time [ READ ] = bch2_current_io_time ( c , READ ) ;
a - > v . io_time [ WRITE ] = bch2_current_io_time ( c , WRITE ) ;
2022-02-11 02:18:41 +03:00
2024-04-30 10:27:30 +03:00
ret = bch2_trans_commit ( trans , NULL , NULL ,
BCH_WATERMARK_btree |
BCH_TRANS_COMMIT_no_enospc ) ;
2022-04-18 01:06:31 +03:00
if ( ret )
goto out ;
2022-07-17 07:31:40 +03:00
2022-08-27 19:48:36 +03:00
trace_and_count ( c , bucket_invalidate , c , bucket . inode , bucket . offset , cached_sectors ) ;
2022-07-17 07:31:40 +03:00
- - * nr_to_invalidate ;
2022-02-11 02:18:41 +03:00
out :
2022-04-12 00:23:39 +03:00
printbuf_exit ( & buf ) ;
2022-02-11 02:18:41 +03:00
return ret ;
2023-01-04 07:54:10 +03:00
err :
prt_str ( & buf , " \n lru key: " ) ;
bch2_bkey_val_to_text ( & buf , c , lru_k ) ;
prt_str ( & buf , " \n lru entry: " ) ;
bch2_lru_pos_to_text ( & buf , lru_iter - > pos ) ;
prt_str ( & buf , " \n alloc key: " ) ;
if ( ! a )
bch2_bpos_to_text ( & buf , bucket ) ;
else
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & a - > k_i ) ) ;
bch_err ( c , " %s " , buf . buf ) ;
2023-07-07 09:42:28 +03:00
if ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus ) {
2023-01-04 07:54:10 +03:00
bch2_inconsistent_error ( c ) ;
ret = - EINVAL ;
}
goto out ;
2022-02-11 02:18:41 +03:00
}
2024-06-17 20:15:16 +03:00
static struct bkey_s_c next_lru_key ( struct btree_trans * trans , struct btree_iter * iter ,
struct bch_dev * ca , bool * wrapped )
{
struct bkey_s_c k ;
again :
k = bch2_btree_iter_peek_upto ( iter , lru_pos ( ca - > dev_idx , U64_MAX , LRU_TIME_MAX ) ) ;
if ( ! k . k & & ! * wrapped ) {
bch2_btree_iter_set_pos ( iter , lru_pos ( ca - > dev_idx , 0 , 0 ) ) ;
* wrapped = true ;
goto again ;
}
return k ;
}
2022-02-11 02:18:41 +03:00
static void bch2_do_invalidates_work ( struct work_struct * work )
{
2024-06-23 07:53:44 +03:00
struct bch_dev * ca = container_of ( work , struct bch_dev , invalidate_work ) ;
struct bch_fs * c = ca - > fs ;
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-02-11 02:18:41 +03:00
int ret = 0 ;
2023-11-03 03:36:00 +03:00
ret = bch2_btree_write_buffer_tryflush ( trans ) ;
2023-02-07 02:51:42 +03:00
if ( ret )
goto err ;
2024-06-23 07:53:44 +03:00
s64 nr_to_invalidate =
should_invalidate_buckets ( ca , bch2_dev_usage_read ( ca ) ) ;
struct btree_iter iter ;
bool wrapped = false ;
2024-06-17 20:15:16 +03:00
2024-06-23 07:53:44 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_lru ,
lru_pos ( ca - > dev_idx , 0 ,
( ( bch2_current_io_time ( c , READ ) + U32_MAX ) &
LRU_TIME_MAX ) ) , 0 ) ;
2024-06-17 20:15:16 +03:00
2024-06-23 07:53:44 +03:00
while ( true ) {
bch2_trans_begin ( trans ) ;
2022-06-14 02:45:07 +03:00
2024-06-23 07:53:44 +03:00
struct bkey_s_c k = next_lru_key ( trans , & iter , ca , & wrapped ) ;
ret = bkey_err ( k ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
2022-07-17 07:31:40 +03:00
break ;
2024-06-23 07:53:44 +03:00
if ( ! k . k )
break ;
ret = invalidate_one_bucket ( trans , & iter , k , & nr_to_invalidate ) ;
if ( ret )
break ;
bch2_btree_iter_advance ( & iter ) ;
2022-04-01 08:29:59 +03:00
}
2024-06-23 07:53:44 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2023-02-07 02:51:42 +03:00
err :
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_invalidate ) ;
2024-06-23 07:53:44 +03:00
percpu_ref_put ( & ca - > io_ref ) ;
}
void bch2_dev_do_invalidates ( struct bch_dev * ca )
{
struct bch_fs * c = ca - > fs ;
if ( ! bch2_dev_get_ioref ( c , ca - > dev_idx , WRITE ) )
return ;
if ( ! bch2_write_ref_tryget ( c , BCH_WRITE_REF_invalidate ) )
goto put_ioref ;
if ( queue_work ( c - > write_ref_wq , & ca - > invalidate_work ) )
return ;
bch2_write_ref_put ( c , BCH_WRITE_REF_invalidate ) ;
put_ioref :
percpu_ref_put ( & ca - > io_ref ) ;
2022-02-11 02:18:41 +03:00
}
void bch2_do_invalidates ( struct bch_fs * c )
{
2024-06-23 07:53:44 +03:00
for_each_member_device ( c , ca )
bch2_dev_do_invalidates ( ca ) ;
2022-02-11 02:18:41 +03:00
}
2023-09-29 00:57:21 +03:00
int bch2_dev_freespace_init ( struct bch_fs * c , struct bch_dev * ca ,
u64 bucket_start , u64 bucket_end )
2021-12-12 01:13:09 +03:00
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2021-12-12 01:13:09 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
2022-11-30 21:25:17 +03:00
struct bkey hole ;
2023-09-29 00:57:21 +03:00
struct bpos end = POS ( ca - > dev_idx , bucket_end ) ;
2021-12-12 01:13:09 +03:00
struct bch_member * m ;
2023-09-29 00:57:21 +03:00
unsigned long last_updated = jiffies ;
2021-12-12 01:13:09 +03:00
int ret ;
2023-09-29 00:57:21 +03:00
BUG_ON ( bucket_start > bucket_end ) ;
BUG_ON ( bucket_end > ca - > mi . nbuckets ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc ,
2023-09-29 00:57:21 +03:00
POS ( ca - > dev_idx , max_t ( u64 , ca - > mi . first_bucket , bucket_start ) ) ,
2024-04-08 01:05:34 +03:00
BTREE_ITER_prefetch ) ;
2022-11-26 12:37:11 +03:00
/*
* Scan the alloc btree for every bucket on @ ca , and add buckets to the
* freespace / need_discard / need_gc_gens btrees as needed :
*/
while ( 1 ) {
2023-09-29 00:57:21 +03:00
if ( last_updated + HZ * 10 < jiffies ) {
2023-03-23 04:22:51 +03:00
bch_info ( ca , " %s: currently at %llu/%llu " ,
__func__ , iter . pos . offset , ca - > mi . nbuckets ) ;
2023-09-29 00:57:21 +03:00
last_updated = jiffies ;
2023-03-23 04:22:51 +03:00
}
2023-09-13 00:16:02 +03:00
bch2_trans_begin ( trans ) ;
2022-11-26 12:37:11 +03:00
2022-11-30 21:25:17 +03:00
if ( bkey_ge ( iter . pos , end ) ) {
ret = 0 ;
2022-11-26 12:37:11 +03:00
break ;
2022-11-30 21:25:17 +03:00
}
2022-11-26 12:37:11 +03:00
2022-11-30 21:25:17 +03:00
k = bch2_get_key_or_hole ( & iter , end , & hole ) ;
2022-11-26 12:37:11 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto bkey_err ;
if ( k . k - > type ) {
/*
* We process live keys in the alloc btree one at a
* time :
*/
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2024-04-30 22:41:48 +03:00
ret = bch2_bucket_do_index ( trans , ca , k , a , true ) ? :
2023-09-13 00:16:02 +03:00
bch2_trans_commit ( trans , NULL , NULL ,
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-11-26 12:37:11 +03:00
if ( ret )
goto bkey_err ;
bch2_btree_iter_advance ( & iter ) ;
} else {
struct bkey_i * freespace ;
2023-09-13 00:16:02 +03:00
freespace = bch2_trans_kmalloc ( trans , sizeof ( * freespace ) ) ;
2022-11-26 12:37:11 +03:00
ret = PTR_ERR_OR_ZERO ( freespace ) ;
if ( ret )
goto bkey_err ;
bkey_init ( & freespace - > k ) ;
2022-11-30 21:25:17 +03:00
freespace - > k . type = KEY_TYPE_set ;
freespace - > k . p = k . k - > p ;
freespace - > k . size = k . k - > size ;
2022-11-26 12:37:11 +03:00
2023-09-13 00:16:02 +03:00
ret = bch2_btree_insert_trans ( trans , BTREE_ID_freespace , freespace , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-11-26 12:37:11 +03:00
if ( ret )
goto bkey_err ;
2022-11-30 21:25:17 +03:00
bch2_btree_iter_set_pos ( & iter , k . k - > p ) ;
2022-11-26 12:37:11 +03:00
}
bkey_err :
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
break ;
}
2023-09-13 00:16:02 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
bch2_trans_put ( trans ) ;
2021-12-12 01:13:09 +03:00
2022-07-17 07:44:19 +03:00
if ( ret < 0 ) {
2023-09-11 08:37:34 +03:00
bch_err_msg ( ca , ret , " initializing free space " ) ;
2021-12-12 01:13:09 +03:00
return ret ;
}
mutex_lock ( & c - > sb_lock ) ;
2023-09-25 07:02:56 +03:00
m = bch2_members_v2_get_mut ( c - > disk_sb . sb , ca - > dev_idx ) ;
2021-12-12 01:13:09 +03:00
SET_BCH_MEMBER_FREESPACE_INITIALIZED ( m , true ) ;
mutex_unlock ( & c - > sb_lock ) ;
2022-07-17 07:44:19 +03:00
return 0 ;
2021-12-12 01:13:09 +03:00
}
int bch2_fs_freespace_init ( struct bch_fs * c )
{
int ret = 0 ;
bool doing_init = false ;
/*
* We can crash during the device add path , so we need to check this on
* every mount :
*/
2023-12-17 07:47:29 +03:00
for_each_member_device ( c , ca ) {
2021-12-12 01:13:09 +03:00
if ( ca - > mi . freespace_initialized )
continue ;
if ( ! doing_init ) {
bch_info ( c , " initializing freespace " ) ;
doing_init = true ;
}
2023-09-29 00:57:21 +03:00
ret = bch2_dev_freespace_init ( c , ca , 0 , ca - > mi . nbuckets ) ;
2021-12-12 01:13:09 +03:00
if ( ret ) {
2024-05-04 00:39:16 +03:00
bch2_dev_put ( ca ) ;
2023-06-20 20:49:25 +03:00
bch_err_fn ( c , ret ) ;
2021-12-12 01:13:09 +03:00
return ret ;
}
}
if ( doing_init ) {
mutex_lock ( & c - > sb_lock ) ;
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
bch_verbose ( c , " done initializing freespace " ) ;
}
2023-06-20 20:49:25 +03:00
return 0 ;
2021-12-12 01:13:09 +03:00
}
2017-03-17 09:18:50 +03:00
/* Bucket IO clocks: */
2020-10-17 04:39:16 +03:00
int bch2_bucket_io_time_reset ( struct btree_trans * trans , unsigned dev ,
size_t bucket_nr , int rw )
{
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 * a ;
u64 now ;
2020-10-17 04:39:16 +03:00
int ret = 0 ;
2024-04-10 06:23:08 +03:00
if ( bch2_trans_relock ( trans ) )
bch2_trans_begin ( trans ) ;
2024-04-30 10:27:30 +03:00
a = bch2_trans_start_alloc_update_noupdate ( trans , & iter , POS ( dev , bucket_nr ) ) ;
2022-01-01 04:03:29 +03:00
ret = PTR_ERR_OR_ZERO ( a ) ;
2020-10-17 04:39:16 +03:00
if ( ret )
2022-01-01 04:03:29 +03:00
return ret ;
2020-10-17 04:39:16 +03:00
2024-06-17 17:06:03 +03:00
now = bch2_current_io_time ( c , rw ) ;
2022-01-01 04:03:29 +03:00
if ( a - > v . io_time [ rw ] = = now )
2020-10-17 04:39:16 +03:00
goto out ;
2022-01-01 04:03:29 +03:00
a - > v . io_time [ rw ] = now ;
2020-10-17 04:39:16 +03:00
2022-01-01 04:03:29 +03:00
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ? :
2020-10-17 04:39:16 +03:00
bch2_trans_commit ( trans , NULL , NULL , 0 ) ;
out :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2020-10-17 04:39:16 +03:00
return ret ;
}
2017-03-17 09:18:50 +03:00
/* Startup/shutdown (ro/rw): */
void bch2_recalc_capacity ( struct bch_fs * c )
{
2021-04-13 16:49:23 +03:00
u64 capacity = 0 , reserved_sectors = 0 , gc_reserve ;
2018-11-05 05:55:35 +03:00
unsigned bucket_size_max = 0 ;
2017-03-17 09:18:50 +03:00
unsigned long ra_pages = 0 ;
lockdep_assert_held ( & c - > state_lock ) ;
2023-12-17 07:47:29 +03:00
for_each_online_member ( c , ca ) {
2017-03-17 09:18:50 +03:00
struct backing_dev_info * bdi = ca - > disk_sb . bdev - > bd_disk - > bdi ;
ra_pages + = bdi - > ra_pages ;
}
bch2_set_ra_pages ( c , ra_pages ) ;
2023-12-17 07:47:29 +03:00
for_each_rw_member ( c , ca ) {
2018-07-24 21:55:05 +03:00
u64 dev_reserve = 0 ;
2017-03-17 09:18:50 +03:00
/*
* We need to reserve buckets ( from the number
* of currently available buckets ) against
* foreground writes so that mainly copygc can
* make forward progress .
*
* We need enough to refill the various reserves
* from scratch - copygc will use its entire
* reserve all at once , then run against when
* its reserve is refilled ( from the formerly
* available buckets ) .
*
* This reserve is just used when considering if
* allocations for foreground writes must wait -
* not - ENOSPC calculations .
*/
2022-01-10 04:48:31 +03:00
dev_reserve + = ca - > nr_btree_reserve * 2 ;
dev_reserve + = ca - > mi . nbuckets > > 6 ; /* copygc reserve */
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
dev_reserve + = 1 ; /* btree write point */
dev_reserve + = 1 ; /* copygc write point */
dev_reserve + = 1 ; /* rebalance write point */
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
dev_reserve * = ca - > mi . bucket_size ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
capacity + = bucket_to_sector ( ca , ca - > mi . nbuckets -
ca - > mi . first_bucket ) ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
reserved_sectors + = dev_reserve * 2 ;
2018-11-05 05:55:35 +03:00
bucket_size_max = max_t ( unsigned , bucket_size_max ,
ca - > mi . bucket_size ) ;
2018-08-01 21:26:55 +03:00
}
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
gc_reserve = c - > opts . gc_reserve_bytes
? c - > opts . gc_reserve_bytes > > 9
: div64_u64 ( capacity * c - > opts . gc_reserve_percent , 100 ) ;
reserved_sectors = max ( gc_reserve , reserved_sectors ) ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
reserved_sectors = min ( reserved_sectors , capacity ) ;
2017-03-17 09:18:50 +03:00
2024-07-11 23:00:46 +03:00
c - > reserved = reserved_sectors ;
2018-08-01 21:26:55 +03:00
c - > capacity = capacity - reserved_sectors ;
2017-03-17 09:18:50 +03:00
2018-11-05 05:55:35 +03:00
c - > bucket_size_max = bucket_size_max ;
2017-03-17 09:18:50 +03:00
/* Wake up case someone was waiting for buckets */
closure_wake_up ( & c - > freelist_wait ) ;
}
2023-10-30 20:15:36 +03:00
u64 bch2_min_rw_member_capacity ( struct bch_fs * c )
{
u64 ret = U64_MAX ;
2023-12-17 07:47:29 +03:00
for_each_rw_member ( c , ca )
2023-10-30 20:15:36 +03:00
ret = min ( ret , ca - > mi . nbuckets * ca - > mi . bucket_size ) ;
return ret ;
}
2017-03-17 09:18:50 +03:00
static bool bch2_dev_has_open_write_point ( struct bch_fs * c , struct bch_dev * ca )
{
struct open_bucket * ob ;
bool ret = false ;
for ( ob = c - > open_buckets ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ;
ob + + ) {
spin_lock ( & ob - > lock ) ;
if ( ob - > valid & & ! ob - > on_partial_list & &
2021-12-26 05:21:46 +03:00
ob - > dev = = ca - > dev_idx )
2017-03-17 09:18:50 +03:00
ret = true ;
spin_unlock ( & ob - > lock ) ;
}
return ret ;
}
/* device goes ro: */
void bch2_dev_allocator_remove ( struct bch_fs * c , struct bch_dev * ca )
{
unsigned i ;
/* First, remove device from allocation groups: */
for ( i = 0 ; i < ARRAY_SIZE ( c - > rw_devs ) ; i + + )
clear_bit ( ca - > dev_idx , c - > rw_devs [ i ] . d ) ;
/*
* Capacity is calculated based off of devices in allocation groups :
*/
bch2_recalc_capacity ( c ) ;
2023-03-14 05:01:47 +03:00
bch2_open_buckets_stop ( c , ca , false ) ;
2018-11-01 22:13:19 +03:00
2017-03-17 09:18:50 +03:00
/*
* Wake up threads that were blocked on allocation , so they can notice
* the device can no longer be removed and the capacity has changed :
*/
closure_wake_up ( & c - > freelist_wait ) ;
/*
* journal_res_get ( ) can block waiting for free space in the journal -
* it needs to notice there may not be devices to allocate from anymore :
*/
wake_up ( & c - > journal . wait ) ;
/* Now wait for any in flight writes: */
closure_wait_event ( & c - > open_buckets_wait ,
! bch2_dev_has_open_write_point ( c , ca ) ) ;
}
/* device goes rw: */
void bch2_dev_allocator_add ( struct bch_fs * c , struct bch_dev * ca )
{
unsigned i ;
for ( i = 0 ; i < ARRAY_SIZE ( c - > rw_devs ) ; i + + )
if ( ca - > mi . data_allowed & ( 1 < < i ) )
set_bit ( ca - > dev_idx , c - > rw_devs [ i ] . d ) ;
}
2024-06-23 07:53:44 +03:00
void bch2_dev_allocator_background_exit ( struct bch_dev * ca )
{
darray_exit ( & ca - > discard_buckets_in_flight ) ;
}
void bch2_dev_allocator_background_init ( struct bch_dev * ca )
2024-02-16 09:08:25 +03:00
{
2024-06-23 07:53:44 +03:00
mutex_init ( & ca - > discard_buckets_in_flight_lock ) ;
INIT_WORK ( & ca - > discard_work , bch2_do_discards_work ) ;
INIT_WORK ( & ca - > discard_fast_work , bch2_do_discards_fast_work ) ;
INIT_WORK ( & ca - > invalidate_work , bch2_do_invalidates_work ) ;
2024-02-16 09:08:25 +03:00
}
2018-11-05 05:55:35 +03:00
void bch2_fs_allocator_background_init ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
spin_lock_init ( & c - > freelist_lock ) ;
}