2018-10-06 07:46:55 +03:00
// SPDX-License-Identifier: GPL-2.0
2017-03-17 09:18:50 +03:00
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_background.h"
# include "alloc_foreground.h"
2022-03-18 03:51:27 +03:00
# include "backpointers.h"
2017-03-17 09:18:50 +03:00
# include "btree_cache.h"
# include "btree_io.h"
2019-10-05 19:54:53 +03:00
# include "btree_key_cache.h"
2017-03-17 09:18:50 +03:00
# include "btree_update.h"
# include "btree_update_interior.h"
# include "btree_gc.h"
2023-02-07 02:51:42 +03:00
# include "btree_write_buffer.h"
2017-03-17 09:18:50 +03:00
# include "buckets.h"
2022-01-05 06:32:09 +03:00
# include "buckets_waiting_for_journal.h"
2017-03-17 09:18:50 +03:00
# include "clock.h"
# include "debug.h"
2018-11-01 22:13:19 +03:00
# include "ec.h"
2017-03-17 09:18:50 +03:00
# include "error.h"
2021-12-12 01:13:09 +03:00
# include "lru.h"
2019-04-12 05:39:39 +03:00
# include "recovery.h"
2017-03-17 09:18:50 +03:00
# include "trace.h"
2021-01-23 02:01:07 +03:00
# include "varint.h"
2017-03-17 09:18:50 +03:00
# include <linux/kthread.h>
# include <linux/math64.h>
# include <linux/random.h>
# include <linux/rculist.h>
# include <linux/rcupdate.h>
# include <linux/sched/task.h>
# include <linux/sort.h>
2022-01-01 04:03:29 +03:00
/* Persistent alloc info: */
2021-01-23 02:01:07 +03:00
static const unsigned BCH_ALLOC_V1_FIELD_BYTES [ ] = {
# define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
BCH_ALLOC_FIELDS_V1 ( )
2018-07-22 06:36:11 +03:00
# undef x
} ;
2022-01-01 04:03:29 +03:00
struct bkey_alloc_unpacked {
u64 journal_seq ;
u8 gen ;
u8 oldest_gen ;
u8 data_type ;
bool need_discard : 1 ;
bool need_inc_gen : 1 ;
# define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
} ;
2017-03-17 09:18:50 +03:00
2021-01-23 02:01:07 +03:00
static inline u64 alloc_field_v1_get ( const struct bch_alloc * a ,
const void * * p , unsigned field )
2018-07-22 06:36:11 +03:00
{
2021-01-23 02:01:07 +03:00
unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES [ field ] ;
2018-07-22 06:36:11 +03:00
u64 v ;
if ( ! ( a - > fields & ( 1 < < field ) ) )
return 0 ;
switch ( bytes ) {
case 1 :
v = * ( ( const u8 * ) * p ) ;
break ;
case 2 :
v = le16_to_cpup ( * p ) ;
break ;
case 4 :
v = le32_to_cpup ( * p ) ;
break ;
case 8 :
v = le64_to_cpup ( * p ) ;
break ;
default :
BUG ( ) ;
}
* p + = bytes ;
return v ;
}
2021-01-23 02:01:07 +03:00
static void bch2_alloc_unpack_v1 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
2019-02-13 22:46:32 +03:00
{
2021-01-23 02:01:07 +03:00
const struct bch_alloc * in = bkey_s_c_to_alloc ( k ) . v ;
const void * d = in - > data ;
unsigned idx = 0 ;
2019-04-18 01:14:46 +03:00
2021-01-23 02:01:07 +03:00
out - > gen = in - > gen ;
# define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
BCH_ALLOC_FIELDS_V1 ( )
# undef x
}
static int bch2_alloc_unpack_v2 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
2019-02-13 22:46:32 +03:00
{
2021-01-23 02:01:07 +03:00
struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2 ( k ) ;
const u8 * in = a . v - > data ;
const u8 * end = bkey_val_end ( a ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v ;
out - > gen = a . v - > gen ;
out - > oldest_gen = a . v - > oldest_gen ;
out - > data_type = a . v - > data_type ;
# define x(_name, _bits) \
if ( fieldnr < a . v - > nr_fields ) { \
2021-07-13 23:03:51 +03:00
ret = bch2_varint_decode_fast ( in , end , & v ) ; \
2021-01-23 02:01:07 +03:00
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v = 0 ; \
} \
out - > _name = v ; \
if ( v ! = out - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
return 0 ;
}
2021-10-30 04:14:23 +03:00
static int bch2_alloc_unpack_v3 ( struct bkey_alloc_unpacked * out ,
struct bkey_s_c k )
{
struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3 ( k ) ;
const u8 * in = a . v - > data ;
const u8 * end = bkey_val_end ( a ) ;
unsigned fieldnr = 0 ;
int ret ;
u64 v ;
out - > gen = a . v - > gen ;
out - > oldest_gen = a . v - > oldest_gen ;
out - > data_type = a . v - > data_type ;
2022-01-01 04:03:29 +03:00
out - > need_discard = BCH_ALLOC_V3_NEED_DISCARD ( a . v ) ;
out - > need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN ( a . v ) ;
2021-10-30 04:14:23 +03:00
out - > journal_seq = le64_to_cpu ( a . v - > journal_seq ) ;
# define x(_name, _bits) \
if ( fieldnr < a . v - > nr_fields ) { \
ret = bch2_varint_decode_fast ( in , end , & v ) ; \
if ( ret < 0 ) \
return ret ; \
in + = ret ; \
} else { \
v = 0 ; \
} \
out - > _name = v ; \
if ( v ! = out - > _name ) \
return - 1 ; \
fieldnr + + ;
BCH_ALLOC_FIELDS_V2 ( )
# undef x
return 0 ;
}
2022-01-01 04:03:29 +03:00
static struct bkey_alloc_unpacked bch2_alloc_unpack ( struct bkey_s_c k )
2021-01-23 02:01:07 +03:00
{
2023-01-31 04:58:43 +03:00
struct bkey_alloc_unpacked ret = { . gen = 0 } ;
2019-02-13 22:46:32 +03:00
2021-10-30 04:14:23 +03:00
switch ( k . k - > type ) {
case KEY_TYPE_alloc :
2021-01-23 02:01:07 +03:00
bch2_alloc_unpack_v1 ( & ret , k ) ;
2021-10-30 04:14:23 +03:00
break ;
case KEY_TYPE_alloc_v2 :
bch2_alloc_unpack_v2 ( & ret , k ) ;
break ;
case KEY_TYPE_alloc_v3 :
bch2_alloc_unpack_v3 ( & ret , k ) ;
break ;
}
2021-01-23 02:01:07 +03:00
return ret ;
}
2021-04-17 04:53:23 +03:00
static unsigned bch_alloc_v1_val_u64s ( const struct bch_alloc * a )
2017-03-17 09:18:50 +03:00
{
2018-07-22 06:36:11 +03:00
unsigned i , bytes = offsetof ( struct bch_alloc , data ) ;
2017-03-17 09:18:50 +03:00
2021-01-23 02:01:07 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( BCH_ALLOC_V1_FIELD_BYTES ) ; i + + )
2018-07-22 06:36:11 +03:00
if ( a - > fields & ( 1 < < i ) )
2021-01-23 02:01:07 +03:00
bytes + = BCH_ALLOC_V1_FIELD_BYTES [ i ] ;
2017-03-17 09:18:50 +03:00
return DIV_ROUND_UP ( bytes , sizeof ( u64 ) ) ;
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v1_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2017-03-17 09:18:50 +03:00
{
2018-11-01 22:10:01 +03:00
struct bkey_s_c_alloc a = bkey_s_c_to_alloc ( k ) ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2018-11-01 22:10:01 +03:00
/* allow for unknown fields */
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bkey_val_u64s ( a . k ) < bch_alloc_v1_val_u64s ( a . v ) , c , err ,
alloc_v1_val_size_bad ,
" incorrect value size (%zu < %u) " ,
bkey_val_u64s ( a . k ) , bch_alloc_v1_val_u64s ( a . v ) ) ;
fsck_err :
return ret ;
2017-03-17 09:18:50 +03:00
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v2_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2017-03-17 09:18:50 +03:00
{
2021-01-23 02:01:07 +03:00
struct bkey_alloc_unpacked u ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2021-01-23 02:01:07 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bch2_alloc_unpack_v2 ( & u , k ) , c , err ,
alloc_v2_unpack_error ,
" unpack error " ) ;
fsck_err :
return ret ;
2021-01-23 02:01:07 +03:00
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v3_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2021-10-30 04:14:23 +03:00
{
struct bkey_alloc_unpacked u ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2022-01-01 04:03:29 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bch2_alloc_unpack_v3 ( & u , k ) , c , err ,
alloc_v2_unpack_error ,
" unpack error " ) ;
fsck_err :
return ret ;
2021-10-30 04:14:23 +03:00
}
2023-10-25 03:44:36 +03:00
int bch2_alloc_v4_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags , struct printbuf * err )
2021-01-23 02:01:07 +03:00
{
2022-04-07 00:22:47 +03:00
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4 ( k ) ;
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2022-04-07 00:22:47 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( alloc_v4_u64s ( a . v ) > bkey_val_u64s ( k . k ) , c , err ,
alloc_v4_val_size_bad ,
" bad val size (%u > %zu) " ,
alloc_v4_u64s ( a . v ) , bkey_val_u64s ( k . k ) ) ;
2022-01-01 04:03:29 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( ! BCH_ALLOC_V4_BACKPOINTERS_START ( a . v ) & &
BCH_ALLOC_V4_NR_BACKPOINTERS ( a . v ) , c , err ,
alloc_v4_backpointers_start_bad ,
" invalid backpointers_start " ) ;
2022-03-18 03:51:27 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( alloc_data_type ( * a . v , a . v - > data_type ) ! = a . v - > data_type , c , err ,
alloc_key_data_type_bad ,
" invalid data type (got %u should be %u) " ,
a . v - > data_type , alloc_data_type ( * a . v , a . v - > data_type ) ) ;
2022-03-18 03:51:27 +03:00
2023-08-23 01:48:09 +03:00
switch ( a . v - > data_type ) {
case BCH_DATA_free :
case BCH_DATA_need_gc_gens :
case BCH_DATA_need_discard :
2023-11-24 02:05:18 +03:00
bkey_fsck_err_on ( bch2_bucket_sectors ( * a . v ) | | a . v - > stripe ,
c , err , alloc_key_empty_but_have_data ,
2023-10-25 03:44:36 +03:00
" empty data type free but have data " ) ;
2023-08-23 01:48:09 +03:00
break ;
case BCH_DATA_sb :
case BCH_DATA_journal :
case BCH_DATA_btree :
case BCH_DATA_user :
case BCH_DATA_parity :
2023-11-24 02:05:18 +03:00
bkey_fsck_err_on ( ! bch2_bucket_sectors_dirty ( * a . v ) ,
c , err , alloc_key_dirty_sectors_0 ,
2023-10-25 03:44:36 +03:00
" data_type %s but dirty_sectors==0 " ,
bch2_data_types [ a . v - > data_type ] ) ;
2023-08-23 01:48:09 +03:00
break ;
case BCH_DATA_cached :
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( ! a . v - > cached_sectors | |
2023-11-24 02:05:18 +03:00
bch2_bucket_sectors_dirty ( * a . v ) | |
a . v - > stripe ,
c , err , alloc_key_cached_inconsistency ,
2023-10-25 03:44:36 +03:00
" data type inconsistency " ) ;
bkey_fsck_err_on ( ! a . v - > io_time [ READ ] & &
c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs ,
2023-11-24 02:05:18 +03:00
c , err , alloc_key_cached_but_read_time_zero ,
2023-10-25 03:44:36 +03:00
" cached bucket with read_time == 0 " ) ;
2023-08-23 01:48:09 +03:00
break ;
case BCH_DATA_stripe :
break ;
2022-04-07 00:22:47 +03:00
}
2023-10-25 03:44:36 +03:00
fsck_err :
return ret ;
2022-01-01 04:03:29 +03:00
}
void bch2_alloc_v4_swab ( struct bkey_s k )
{
struct bch_alloc_v4 * a = bkey_s_to_alloc_v4 ( k ) . v ;
2022-03-18 03:51:27 +03:00
struct bch_backpointer * bp , * bps ;
2022-01-01 04:03:29 +03:00
a - > journal_seq = swab64 ( a - > journal_seq ) ;
a - > flags = swab32 ( a - > flags ) ;
a - > dirty_sectors = swab32 ( a - > dirty_sectors ) ;
a - > cached_sectors = swab32 ( a - > cached_sectors ) ;
a - > io_time [ 0 ] = swab64 ( a - > io_time [ 0 ] ) ;
a - > io_time [ 1 ] = swab64 ( a - > io_time [ 1 ] ) ;
a - > stripe = swab32 ( a - > stripe ) ;
a - > nr_external_backpointers = swab32 ( a - > nr_external_backpointers ) ;
2023-11-03 16:09:37 +03:00
a - > fragmentation_lru = swab64 ( a - > fragmentation_lru ) ;
2022-03-18 03:51:27 +03:00
bps = alloc_v4_backpointers ( a ) ;
for ( bp = bps ; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS ( a ) ; bp + + ) {
bp - > bucket_offset = swab40 ( bp - > bucket_offset ) ;
bp - > bucket_len = swab32 ( bp - > bucket_len ) ;
bch2_bpos_swab ( & bp - > pos ) ;
}
2022-01-01 04:03:29 +03:00
}
void bch2_alloc_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 _a ;
const struct bch_alloc_v4 * a = bch2_alloc_to_v4 ( k , & _a ) ;
2022-03-18 03:51:27 +03:00
unsigned i ;
2023-01-31 04:58:43 +03:00
prt_newline ( out ) ;
printbuf_indent_add ( out , 2 ) ;
prt_printf ( out , " gen %u oldest_gen %u data_type %s " ,
2022-12-19 23:55:38 +03:00
a - > gen , a - > oldest_gen ,
a - > data_type < BCH_DATA_NR
? bch2_data_types [ a - > data_type ]
: " (invalid data type) " ) ;
2023-01-31 04:58:43 +03:00
prt_newline ( out ) ;
prt_printf ( out , " journal_seq %llu " , a - > journal_seq ) ;
prt_newline ( out ) ;
prt_printf ( out , " need_discard %llu " , BCH_ALLOC_V4_NEED_DISCARD ( a ) ) ;
prt_newline ( out ) ;
prt_printf ( out , " need_inc_gen %llu " , BCH_ALLOC_V4_NEED_INC_GEN ( a ) ) ;
prt_newline ( out ) ;
prt_printf ( out , " dirty_sectors %u " , a - > dirty_sectors ) ;
prt_newline ( out ) ;
prt_printf ( out , " cached_sectors %u " , a - > cached_sectors ) ;
prt_newline ( out ) ;
prt_printf ( out , " stripe %u " , a - > stripe ) ;
prt_newline ( out ) ;
prt_printf ( out , " stripe_redundancy %u " , a - > stripe_redundancy ) ;
prt_newline ( out ) ;
prt_printf ( out , " io_time[READ] %llu " , a - > io_time [ READ ] ) ;
prt_newline ( out ) ;
prt_printf ( out , " io_time[WRITE] %llu " , a - > io_time [ WRITE ] ) ;
prt_newline ( out ) ;
2022-12-05 18:24:19 +03:00
prt_printf ( out , " fragmentation %llu " , a - > fragmentation_lru ) ;
prt_newline ( out ) ;
2022-03-18 03:51:27 +03:00
prt_printf ( out , " bp_start %llu " , BCH_ALLOC_V4_BACKPOINTERS_START ( a ) ) ;
prt_newline ( out ) ;
2023-01-31 04:58:43 +03:00
2022-03-18 03:51:27 +03:00
if ( BCH_ALLOC_V4_NR_BACKPOINTERS ( a ) ) {
struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4 ( k ) ;
const struct bch_backpointer * bps = alloc_v4_backpointers_c ( a_raw . v ) ;
2023-01-31 04:58:43 +03:00
2022-03-18 03:51:27 +03:00
prt_printf ( out , " backpointers: %llu " , BCH_ALLOC_V4_NR_BACKPOINTERS ( a_raw . v ) ) ;
printbuf_indent_add ( out , 2 ) ;
for ( i = 0 ; i < BCH_ALLOC_V4_NR_BACKPOINTERS ( a_raw . v ) ; i + + ) {
prt_newline ( out ) ;
bch2_backpointer_to_text ( out , & bps [ i ] ) ;
}
printbuf_indent_sub ( out , 2 ) ;
}
printbuf_indent_sub ( out , 2 ) ;
2023-01-31 04:58:43 +03:00
}
void __bch2_alloc_to_v4 ( struct bkey_s_c k , struct bch_alloc_v4 * out )
{
if ( k . k - > type = = KEY_TYPE_alloc_v4 ) {
void * src , * dst ;
* out = * bkey_s_c_to_alloc_v4 ( k ) . v ;
src = alloc_v4_backpointers ( out ) ;
SET_BCH_ALLOC_V4_BACKPOINTERS_START ( out , BCH_ALLOC_V4_U64s ) ;
dst = alloc_v4_backpointers ( out ) ;
if ( src < dst )
memset ( src , 0 , dst - src ) ;
2023-03-31 23:24:45 +03:00
SET_BCH_ALLOC_V4_NR_BACKPOINTERS ( out , 0 ) ;
2023-01-31 04:58:43 +03:00
} else {
struct bkey_alloc_unpacked u = bch2_alloc_unpack ( k ) ;
2022-01-01 04:03:29 +03:00
2023-01-31 04:58:43 +03:00
* out = ( struct bch_alloc_v4 ) {
. journal_seq = u . journal_seq ,
. flags = u . need_discard ,
. gen = u . gen ,
. oldest_gen = u . oldest_gen ,
. data_type = u . data_type ,
. stripe_redundancy = u . stripe_redundancy ,
. dirty_sectors = u . dirty_sectors ,
. cached_sectors = u . cached_sectors ,
. io_time [ READ ] = u . read_time ,
. io_time [ WRITE ] = u . write_time ,
. stripe = u . stripe ,
} ;
SET_BCH_ALLOC_V4_BACKPOINTERS_START ( out , BCH_ALLOC_V4_U64s ) ;
}
}
static noinline struct bkey_i_alloc_v4 *
__bch2_alloc_to_v4_mut ( struct btree_trans * trans , struct bkey_s_c k )
{
struct bkey_i_alloc_v4 * ret ;
2023-03-31 23:24:45 +03:00
2023-04-16 14:10:46 +03:00
ret = bch2_trans_kmalloc ( trans , max ( bkey_bytes ( k . k ) , sizeof ( struct bkey_i_alloc_v4 ) ) ) ;
2023-03-31 23:24:45 +03:00
if ( IS_ERR ( ret ) )
return ret ;
2023-01-31 04:58:43 +03:00
if ( k . k - > type = = KEY_TYPE_alloc_v4 ) {
void * src , * dst ;
bkey_reassemble ( & ret - > k_i , k ) ;
src = alloc_v4_backpointers ( & ret - > v ) ;
SET_BCH_ALLOC_V4_BACKPOINTERS_START ( & ret - > v , BCH_ALLOC_V4_U64s ) ;
dst = alloc_v4_backpointers ( & ret - > v ) ;
if ( src < dst )
memset ( src , 0 , dst - src ) ;
2023-03-31 23:24:45 +03:00
SET_BCH_ALLOC_V4_NR_BACKPOINTERS ( & ret - > v , 0 ) ;
2023-01-31 04:58:43 +03:00
set_alloc_v4_u64s ( ret ) ;
} else {
2022-03-18 03:51:27 +03:00
bkey_alloc_v4_init ( & ret - > k_i ) ;
ret - > k . p = k . k - > p ;
bch2_alloc_to_v4 ( k , & ret - > v ) ;
2023-01-31 04:58:43 +03:00
}
return ret ;
}
static inline struct bkey_i_alloc_v4 * bch2_alloc_to_v4_mut_inlined ( struct btree_trans * trans , struct bkey_s_c k )
{
2023-03-31 23:24:45 +03:00
struct bkey_s_c_alloc_v4 a ;
2023-01-31 04:58:43 +03:00
if ( likely ( k . k - > type = = KEY_TYPE_alloc_v4 ) & &
2023-03-31 23:24:45 +03:00
( ( a = bkey_s_c_to_alloc_v4 ( k ) , true ) & &
2023-04-28 06:48:33 +03:00
BCH_ALLOC_V4_NR_BACKPOINTERS ( a . v ) = = 0 ) )
2023-05-01 02:21:06 +03:00
return bch2_bkey_make_mut_noupdate_typed ( trans , k , alloc_v4 ) ;
2023-01-31 04:58:43 +03:00
return __bch2_alloc_to_v4_mut ( trans , k ) ;
}
struct bkey_i_alloc_v4 * bch2_alloc_to_v4_mut ( struct btree_trans * trans , struct bkey_s_c k )
{
return bch2_alloc_to_v4_mut_inlined ( trans , k ) ;
}
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update ( struct btree_trans * trans , struct btree_iter * iter ,
struct bpos pos )
{
struct bkey_s_c k ;
struct bkey_i_alloc_v4 * a ;
int ret ;
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , iter , BTREE_ID_alloc , pos ,
2023-01-31 04:58:43 +03:00
BTREE_ITER_WITH_UPDATES |
BTREE_ITER_CACHED |
BTREE_ITER_INTENT ) ;
ret = bkey_err ( k ) ;
if ( unlikely ( ret ) )
2023-04-30 02:33:09 +03:00
return ERR_PTR ( ret ) ;
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4_mut_inlined ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( unlikely ( ret ) )
goto err ;
return a ;
err :
bch2_trans_iter_exit ( trans , iter ) ;
return ERR_PTR ( ret ) ;
2017-03-17 09:18:50 +03:00
}
2022-11-26 07:14:30 +03:00
static struct bpos alloc_gens_pos ( struct bpos pos , unsigned * offset )
{
* offset = pos . offset & KEY_TYPE_BUCKET_GENS_MASK ;
pos . offset > > = KEY_TYPE_BUCKET_GENS_BITS ;
return pos ;
}
static struct bpos bucket_gens_pos_to_alloc ( struct bpos pos , unsigned offset )
{
pos . offset < < = KEY_TYPE_BUCKET_GENS_BITS ;
pos . offset + = offset ;
return pos ;
}
static unsigned alloc_gen ( struct bkey_s_c k , unsigned offset )
{
return k . k - > type = = KEY_TYPE_bucket_gens
? bkey_s_c_to_bucket_gens ( k ) . v - > gens [ offset ]
: 0 ;
}
2023-10-25 03:44:36 +03:00
int bch2_bucket_gens_invalid ( struct bch_fs * c , struct bkey_s_c k ,
2023-07-07 04:16:10 +03:00
enum bkey_invalid_flags flags ,
struct printbuf * err )
2022-11-26 07:14:30 +03:00
{
2023-10-25 03:44:36 +03:00
int ret = 0 ;
2022-11-26 07:14:30 +03:00
2023-10-25 03:44:36 +03:00
bkey_fsck_err_on ( bkey_val_bytes ( k . k ) ! = sizeof ( struct bch_bucket_gens ) , c , err ,
bucket_gens_val_size_bad ,
" bad val size (%zu != %zu) " ,
bkey_val_bytes ( k . k ) , sizeof ( struct bch_bucket_gens ) ) ;
fsck_err :
return ret ;
2022-11-26 07:14:30 +03:00
}
void bch2_bucket_gens_to_text ( struct printbuf * out , struct bch_fs * c , struct bkey_s_c k )
{
struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens ( k ) ;
unsigned i ;
for ( i = 0 ; i < ARRAY_SIZE ( g . v - > gens ) ; i + + ) {
if ( i )
prt_char ( out , ' ' ) ;
prt_printf ( out , " %u " , g . v - > gens [ i ] ) ;
}
}
int bch2_bucket_gens_init ( struct bch_fs * c )
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-11-26 07:14:30 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
struct bch_alloc_v4 a ;
struct bkey_i_bucket_gens g ;
bool have_bucket_gens_key = false ;
unsigned offset ;
struct bpos pos ;
u8 gen ;
int ret ;
2023-12-08 07:28:26 +03:00
ret = for_each_btree_key2 ( trans , iter , BTREE_ID_alloc , POS_MIN ,
BTREE_ITER_PREFETCH , k , ( {
2022-11-26 07:14:30 +03:00
/*
* Not a fsck error because this is checked / repaired by
* bch2_check_alloc_key ( ) which runs later :
*/
if ( ! bch2_dev_bucket_exists ( c , k . k - > p ) )
continue ;
gen = bch2_alloc_to_v4 ( k , & a ) - > gen ;
pos = alloc_gens_pos ( iter . pos , & offset ) ;
if ( have_bucket_gens_key & & bkey_cmp ( iter . pos , pos ) ) {
2023-09-13 00:16:02 +03:00
ret = commit_do ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ,
2023-09-13 00:16:02 +03:00
bch2_btree_insert_trans ( trans , BTREE_ID_bucket_gens , & g . k_i , 0 ) ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
break ;
have_bucket_gens_key = false ;
}
if ( ! have_bucket_gens_key ) {
bkey_bucket_gens_init ( & g . k_i ) ;
g . k . p = pos ;
have_bucket_gens_key = true ;
}
g . v . gens [ offset ] = gen ;
2023-12-08 07:28:26 +03:00
0 ;
} ) ) ;
2022-11-26 07:14:30 +03:00
if ( have_bucket_gens_key & & ! ret )
2023-09-13 00:16:02 +03:00
ret = commit_do ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ,
2023-09-13 00:16:02 +03:00
bch2_btree_insert_trans ( trans , BTREE_ID_bucket_gens , & g . k_i , 0 ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2022-11-26 07:14:30 +03:00
2023-12-08 07:28:26 +03:00
bch_err_fn ( c , ret ) ;
2022-11-26 07:14:30 +03:00
return ret ;
}
2023-07-09 05:27:03 +03:00
int bch2_alloc_read ( struct bch_fs * c )
2022-11-26 07:14:30 +03:00
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-11-26 07:14:30 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
struct bch_dev * ca ;
int ret ;
2023-07-09 05:27:03 +03:00
down_read ( & c - > gc_lock ) ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
if ( c - > sb . version_upgrade_complete > = bcachefs_metadata_version_bucket_gens ) {
const struct bch_bucket_gens * g ;
u64 b ;
2022-11-26 07:14:30 +03:00
2023-12-08 07:28:26 +03:00
ret = for_each_btree_key2 ( trans , iter , BTREE_ID_bucket_gens , POS_MIN ,
BTREE_ITER_PREFETCH , k , ( {
2023-07-09 05:27:03 +03:00
u64 start = bucket_gens_pos_to_alloc ( k . k - > p , 0 ) . offset ;
u64 end = bucket_gens_pos_to_alloc ( bpos_nosnap_successor ( k . k - > p ) , 0 ) . offset ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
if ( k . k - > type ! = KEY_TYPE_bucket_gens )
continue ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
g = bkey_s_c_to_bucket_gens ( k ) . v ;
/*
* Not a fsck error because this is checked / repaired by
* bch2_check_alloc_key ( ) which runs later :
*/
if ( ! bch2_dev_exists2 ( c , k . k - > p . inode ) )
continue ;
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
for ( b = max_t ( u64 , ca - > mi . first_bucket , start ) ;
b < min_t ( u64 , ca - > mi . nbuckets , end ) ;
b + + )
* bucket_gen ( ca , b ) = g - > gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] ;
2023-12-08 07:28:26 +03:00
0 ;
} ) ) ;
2023-07-09 05:27:03 +03:00
} else {
struct bch_alloc_v4 a ;
2023-12-08 07:28:26 +03:00
ret = for_each_btree_key2 ( trans , iter , BTREE_ID_alloc , POS_MIN ,
BTREE_ITER_PREFETCH , k , ( {
2023-07-09 05:27:03 +03:00
/*
* Not a fsck error because this is checked / repaired by
* bch2_check_alloc_key ( ) which runs later :
*/
if ( ! bch2_dev_bucket_exists ( c , k . k - > p ) )
continue ;
2022-11-26 07:14:30 +03:00
2023-07-09 05:27:03 +03:00
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
* bucket_gen ( ca , k . k - > p . offset ) = bch2_alloc_to_v4 ( k , & a ) - > gen ;
2023-12-08 07:28:26 +03:00
0 ;
} ) ) ;
2022-11-26 07:14:30 +03:00
}
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2023-07-09 05:27:03 +03:00
up_read ( & c - > gc_lock ) ;
2022-11-26 07:14:30 +03:00
2023-12-08 07:28:26 +03:00
bch_err_fn ( c , ret ) ;
2022-11-26 07:14:30 +03:00
return ret ;
}
2021-12-12 01:13:09 +03:00
/* Free space/discard btree: */
static int bch2_bucket_do_index ( struct btree_trans * trans ,
struct bkey_s_c alloc_k ,
2022-04-01 08:29:59 +03:00
const struct bch_alloc_v4 * a ,
2021-12-12 01:13:09 +03:00
bool set )
{
struct bch_fs * c = trans - > c ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , alloc_k . k - > p . inode ) ;
struct btree_iter iter ;
struct bkey_s_c old ;
struct bkey_i * k ;
enum btree_id btree ;
enum bch_bkey_type old_type = ! set ? KEY_TYPE_set : KEY_TYPE_deleted ;
enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted ;
struct printbuf buf = PRINTBUF ;
int ret ;
2022-04-01 08:29:59 +03:00
if ( a - > data_type ! = BCH_DATA_free & &
a - > data_type ! = BCH_DATA_need_discard )
2021-12-12 01:13:09 +03:00
return 0 ;
2022-11-24 06:13:19 +03:00
k = bch2_trans_kmalloc_nomemzero ( trans , sizeof ( * k ) ) ;
2021-12-12 01:13:09 +03:00
if ( IS_ERR ( k ) )
return PTR_ERR ( k ) ;
bkey_init ( & k - > k ) ;
k - > k . type = new_type ;
2022-04-01 08:29:59 +03:00
switch ( a - > data_type ) {
case BCH_DATA_free :
2021-12-12 01:13:09 +03:00
btree = BTREE_ID_freespace ;
2022-04-01 08:29:59 +03:00
k - > k . p = alloc_freespace_pos ( alloc_k . k - > p , * a ) ;
2021-12-12 01:13:09 +03:00
bch2_key_resize ( & k - > k , 1 ) ;
break ;
2022-04-01 08:29:59 +03:00
case BCH_DATA_need_discard :
2021-12-12 01:13:09 +03:00
btree = BTREE_ID_need_discard ;
k - > k . p = alloc_k . k - > p ;
break ;
default :
return 0 ;
}
2023-04-30 02:33:09 +03:00
old = bch2_bkey_get_iter ( trans , & iter , btree ,
2021-12-12 01:13:09 +03:00
bkey_start_pos ( & k - > k ) ,
BTREE_ITER_INTENT ) ;
ret = bkey_err ( old ) ;
if ( ret )
2023-04-30 02:33:09 +03:00
return ret ;
2021-12-12 01:13:09 +03:00
if ( ca - > mi . freespace_initialized & &
2023-07-07 09:42:28 +03:00
c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info & &
2022-04-11 01:12:04 +03:00
bch2_trans_inconsistent_on ( old . k - > type ! = old_type , trans ,
2023-06-18 20:25:35 +03:00
" incorrect key when %s %s:%llu:%llu:0 (got %s should be %s) \n "
2021-12-12 01:13:09 +03:00
" for %s " ,
set ? " setting " : " clearing " ,
2023-10-20 05:49:08 +03:00
bch2_btree_id_str ( btree ) ,
2023-06-18 20:25:35 +03:00
iter . pos . inode ,
iter . pos . offset ,
2021-12-12 01:13:09 +03:00
bch2_bkey_types [ old . k - > type ] ,
bch2_bkey_types [ old_type ] ,
( bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
ret = - EIO ;
goto err ;
}
ret = bch2_trans_update ( trans , & iter , k , 0 ) ;
err :
bch2_trans_iter_exit ( trans , & iter ) ;
printbuf_exit ( & buf ) ;
return ret ;
}
2022-11-26 07:14:30 +03:00
static noinline int bch2_bucket_gen_update ( struct btree_trans * trans ,
struct bpos bucket , u8 gen )
{
struct btree_iter iter ;
unsigned offset ;
struct bpos pos = alloc_gens_pos ( bucket , & offset ) ;
struct bkey_i_bucket_gens * g ;
struct bkey_s_c k ;
int ret ;
g = bch2_trans_kmalloc ( trans , sizeof ( * g ) ) ;
ret = PTR_ERR_OR_ZERO ( g ) ;
if ( ret )
return ret ;
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , & iter , BTREE_ID_bucket_gens , pos ,
BTREE_ITER_INTENT |
BTREE_ITER_WITH_UPDATES ) ;
2022-11-26 07:14:30 +03:00
ret = bkey_err ( k ) ;
if ( ret )
2023-04-30 02:33:09 +03:00
return ret ;
2022-11-26 07:14:30 +03:00
if ( k . k - > type ! = KEY_TYPE_bucket_gens ) {
bkey_bucket_gens_init ( & g - > k_i ) ;
g - > k . p = iter . pos ;
} else {
bkey_reassemble ( & g - > k_i , k ) ;
}
g - > v . gens [ offset ] = gen ;
ret = bch2_trans_update ( trans , & iter , & g - > k_i , 0 ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
}
2021-12-12 01:13:09 +03:00
int bch2_trans_mark_alloc ( struct btree_trans * trans ,
2022-04-01 04:44:55 +03:00
enum btree_id btree_id , unsigned level ,
2021-12-12 01:13:09 +03:00
struct bkey_s_c old , struct bkey_i * new ,
unsigned flags )
{
struct bch_fs * c = trans - > c ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 old_a_convert , * new_a ;
const struct bch_alloc_v4 * old_a ;
2021-12-12 01:13:09 +03:00
u64 old_lru , new_lru ;
int ret = 0 ;
/*
* Deletion only happens in the device removal path , with
* BTREE_TRIGGER_NORUN :
*/
BUG_ON ( new - > k . type ! = KEY_TYPE_alloc_v4 ) ;
2023-01-31 04:58:43 +03:00
old_a = bch2_alloc_to_v4 ( old , & old_a_convert ) ;
2021-12-12 01:13:09 +03:00
new_a = & bkey_i_to_alloc_v4 ( new ) - > v ;
2022-04-01 08:29:59 +03:00
new_a - > data_type = alloc_data_type ( * new_a , new_a - > data_type ) ;
2023-11-24 02:05:18 +03:00
if ( bch2_bucket_sectors ( * new_a ) > bch2_bucket_sectors ( * old_a ) ) {
2021-12-12 01:13:09 +03:00
new_a - > io_time [ READ ] = max_t ( u64 , 1 , atomic64_read ( & c - > io_clock [ READ ] . now ) ) ;
new_a - > io_time [ WRITE ] = max_t ( u64 , 1 , atomic64_read ( & c - > io_clock [ WRITE ] . now ) ) ;
SET_BCH_ALLOC_V4_NEED_INC_GEN ( new_a , true ) ;
2022-02-10 12:32:19 +03:00
SET_BCH_ALLOC_V4_NEED_DISCARD ( new_a , true ) ;
2021-12-12 01:13:09 +03:00
}
2022-04-01 08:29:59 +03:00
if ( data_type_is_empty ( new_a - > data_type ) & &
BCH_ALLOC_V4_NEED_INC_GEN ( new_a ) & &
2021-12-12 01:13:09 +03:00
! bch2_bucket_is_open_safe ( c , new - > k . p . inode , new - > k . p . offset ) ) {
new_a - > gen + + ;
SET_BCH_ALLOC_V4_NEED_INC_GEN ( new_a , false ) ;
}
2023-01-31 04:58:43 +03:00
if ( old_a - > data_type ! = new_a - > data_type | |
2022-04-01 08:29:59 +03:00
( new_a - > data_type = = BCH_DATA_free & &
2023-01-31 04:58:43 +03:00
alloc_freespace_genbits ( * old_a ) ! = alloc_freespace_genbits ( * new_a ) ) ) {
ret = bch2_bucket_do_index ( trans , old , old_a , false ) ? :
2022-04-01 08:29:59 +03:00
bch2_bucket_do_index ( trans , bkey_i_to_s_c ( new ) , new_a , true ) ;
2021-12-12 01:13:09 +03:00
if ( ret )
return ret ;
}
2022-04-11 02:59:26 +03:00
if ( new_a - > data_type = = BCH_DATA_cached & &
! new_a - > io_time [ READ ] )
new_a - > io_time [ READ ] = max_t ( u64 , 1 , atomic64_read ( & c - > io_clock [ READ ] . now ) ) ;
2022-12-05 18:24:19 +03:00
old_lru = alloc_lru_idx_read ( * old_a ) ;
new_lru = alloc_lru_idx_read ( * new_a ) ;
2021-12-12 01:13:09 +03:00
if ( old_lru ! = new_lru ) {
2022-12-06 00:49:13 +03:00
ret = bch2_lru_change ( trans , new - > k . p . inode ,
bucket_to_u64 ( new - > k . p ) ,
old_lru , new_lru ) ;
2021-12-12 01:13:09 +03:00
if ( ret )
return ret ;
}
2022-12-05 18:24:19 +03:00
new_a - > fragmentation_lru = alloc_lru_idx_fragmentation ( * new_a ,
bch_dev_bkey_exists ( c , new - > k . p . inode ) ) ;
if ( old_a - > fragmentation_lru ! = new_a - > fragmentation_lru ) {
ret = bch2_lru_change ( trans ,
BCH_LRU_FRAGMENTATION_START ,
bucket_to_u64 ( new - > k . p ) ,
old_a - > fragmentation_lru , new_a - > fragmentation_lru ) ;
if ( ret )
return ret ;
}
2022-11-26 07:14:30 +03:00
if ( old_a - > gen ! = new_a - > gen ) {
ret = bch2_bucket_gen_update ( trans , new - > k . p , new_a - > gen ) ;
if ( ret )
return ret ;
}
2023-11-12 01:40:45 +03:00
/*
* need to know if we ' re getting called from the invalidate path or
* not :
*/
if ( ( flags & BTREE_TRIGGER_BUCKET_INVALIDATE ) & &
old_a - > cached_sectors ) {
ret = bch2_update_cached_sectors_list ( trans , new - > k . p . inode ,
- ( ( s64 ) old_a - > cached_sectors ) ) ;
if ( ret )
return ret ;
}
2021-12-12 01:13:09 +03:00
return 0 ;
}
2022-11-30 21:25:17 +03:00
/*
* This synthesizes deleted extents for holes , similar to BTREE_ITER_SLOTS for
* extents style btrees , but works on non - extents btrees :
*/
2023-07-07 05:47:42 +03:00
static struct bkey_s_c bch2_get_key_or_hole ( struct btree_iter * iter , struct bpos end , struct bkey * hole )
2022-11-30 21:25:17 +03:00
{
struct bkey_s_c k = bch2_btree_iter_peek_slot ( iter ) ;
if ( bkey_err ( k ) )
return k ;
if ( k . k - > type ) {
return k ;
} else {
struct btree_iter iter2 ;
struct bpos next ;
bch2_trans_copy_iter ( & iter2 , iter ) ;
2023-03-29 02:37:25 +03:00
if ( ! bpos_eq ( iter - > path - > l [ 0 ] . b - > key . k . p , SPOS_MAX ) )
end = bkey_min ( end , bpos_nosnap_successor ( iter - > path - > l [ 0 ] . b - > key . k . p ) ) ;
end = bkey_min ( end , POS ( iter - > pos . inode , iter - > pos . offset + U32_MAX - 1 ) ) ;
/*
* btree node min / max is a closed interval , upto takes a half
* open interval :
*/
k = bch2_btree_iter_peek_upto ( & iter2 , end ) ;
2022-11-30 21:25:17 +03:00
next = iter2 . pos ;
bch2_trans_iter_exit ( iter - > trans , & iter2 ) ;
BUG_ON ( next . offset > = iter - > pos . offset + U32_MAX ) ;
if ( bkey_err ( k ) )
return k ;
bkey_init ( hole ) ;
hole - > p = iter - > pos ;
bch2_key_resize ( hole , next . offset - iter - > pos . offset ) ;
return ( struct bkey_s_c ) { hole , NULL } ;
}
}
static bool next_bucket ( struct bch_fs * c , struct bpos * bucket )
{
struct bch_dev * ca ;
unsigned iter ;
if ( bch2_dev_bucket_exists ( c , * bucket ) )
return true ;
if ( bch2_dev_exists2 ( c , bucket - > inode ) ) {
ca = bch_dev_bkey_exists ( c , bucket - > inode ) ;
if ( bucket - > offset < ca - > mi . first_bucket ) {
bucket - > offset = ca - > mi . first_bucket ;
return true ;
}
bucket - > inode + + ;
bucket - > offset = 0 ;
}
rcu_read_lock ( ) ;
iter = bucket - > inode ;
ca = __bch2_next_dev ( c , & iter , NULL ) ;
if ( ca )
2023-03-11 23:52:37 +03:00
* bucket = POS ( ca - > dev_idx , ca - > mi . first_bucket ) ;
2022-11-30 21:25:17 +03:00
rcu_read_unlock ( ) ;
return ca ! = NULL ;
}
2023-07-07 05:47:42 +03:00
static struct bkey_s_c bch2_get_key_or_real_bucket_hole ( struct btree_iter * iter , struct bkey * hole )
2022-11-30 21:25:17 +03:00
{
struct bch_fs * c = iter - > trans - > c ;
struct bkey_s_c k ;
again :
k = bch2_get_key_or_hole ( iter , POS_MAX , hole ) ;
if ( bkey_err ( k ) )
return k ;
if ( ! k . k - > type ) {
struct bpos bucket = bkey_start_pos ( k . k ) ;
if ( ! bch2_dev_bucket_exists ( c , bucket ) ) {
if ( ! next_bucket ( c , & bucket ) )
return bkey_s_c_null ;
bch2_btree_iter_set_pos ( iter , bucket ) ;
goto again ;
}
if ( ! bch2_dev_bucket_exists ( c , k . k - > p ) ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , bucket . inode ) ;
bch2_key_resize ( hole , ca - > mi . nbuckets - bucket . offset ) ;
}
}
return k ;
}
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_alloc_key ( struct btree_trans * trans ,
struct bkey_s_c alloc_k ,
struct btree_iter * alloc_iter ,
struct btree_iter * discard_iter ,
struct btree_iter * freespace_iter ,
struct btree_iter * bucket_gens_iter )
2022-02-17 11:11:39 +03:00
{
struct bch_fs * c = trans - > c ;
2022-04-05 20:44:18 +03:00
struct bch_dev * ca ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2022-02-17 11:11:39 +03:00
unsigned discard_key_type , freespace_key_type ;
2022-11-26 07:14:30 +03:00
unsigned gens_offset ;
2022-11-30 21:25:17 +03:00
struct bkey_s_c k ;
2022-02-17 11:11:39 +03:00
struct printbuf buf = PRINTBUF ;
int ret ;
2022-04-05 20:44:18 +03:00
if ( fsck_err_on ( ! bch2_dev_bucket_exists ( c , alloc_k . k - > p ) , c ,
2023-10-25 03:44:36 +03:00
alloc_key_to_missing_dev_bucket ,
2022-04-09 22:15:36 +03:00
" alloc key for invalid device:bucket %llu:%llu " ,
alloc_k . k - > p . inode , alloc_k . k - > p . offset ) )
2022-04-05 20:44:18 +03:00
return bch2_btree_delete_at ( trans , alloc_iter , 0 ) ;
ca = bch_dev_bkey_exists ( c , alloc_k . k - > p . inode ) ;
if ( ! ca - > mi . freespace_initialized )
return 0 ;
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4 ( alloc_k , & a_convert ) ;
2022-04-05 20:44:18 +03:00
2022-11-26 07:14:30 +03:00
discard_key_type = a - > data_type = = BCH_DATA_need_discard ? KEY_TYPE_set : 0 ;
2022-06-20 05:43:00 +03:00
bch2_btree_iter_set_pos ( discard_iter , alloc_k . k - > p ) ;
k = bch2_btree_iter_peek_slot ( discard_iter ) ;
2022-02-17 11:11:39 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2022-07-12 04:06:52 +03:00
if ( k . k - > type ! = discard_key_type & &
( c - > opts . reconstruct_alloc | |
2023-10-25 03:44:36 +03:00
fsck_err ( c , need_discard_key_wrong ,
" incorrect key in need_discard btree (got %s should be %s) \n "
2022-07-12 04:06:52 +03:00
" %s " ,
bch2_bkey_types [ k . k - > type ] ,
bch2_bkey_types [ discard_key_type ] ,
( bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) ) {
2022-02-17 11:11:39 +03:00
struct bkey_i * update =
bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . type = discard_key_type ;
2022-06-20 05:43:00 +03:00
update - > k . p = discard_iter - > pos ;
2022-02-17 11:11:39 +03:00
2022-06-20 05:43:00 +03:00
ret = bch2_trans_update ( trans , discard_iter , update , 0 ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
goto err ;
}
2022-11-26 07:14:30 +03:00
freespace_key_type = a - > data_type = = BCH_DATA_free ? KEY_TYPE_set : 0 ;
bch2_btree_iter_set_pos ( freespace_iter , alloc_freespace_pos ( alloc_k . k - > p , * a ) ) ;
2022-06-20 05:43:00 +03:00
k = bch2_btree_iter_peek_slot ( freespace_iter ) ;
2022-02-17 11:11:39 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2022-07-12 04:06:52 +03:00
if ( k . k - > type ! = freespace_key_type & &
( c - > opts . reconstruct_alloc | |
2023-10-25 03:44:36 +03:00
fsck_err ( c , freespace_key_wrong ,
" incorrect key in freespace btree (got %s should be %s) \n "
2022-07-12 04:06:52 +03:00
" %s " ,
bch2_bkey_types [ k . k - > type ] ,
bch2_bkey_types [ freespace_key_type ] ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) ) {
2022-02-17 11:11:39 +03:00
struct bkey_i * update =
bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . type = freespace_key_type ;
2022-06-20 05:43:00 +03:00
update - > k . p = freespace_iter - > pos ;
2022-02-17 11:11:39 +03:00
bch2_key_resize ( & update - > k , 1 ) ;
2022-06-20 05:43:00 +03:00
ret = bch2_trans_update ( trans , freespace_iter , update , 0 ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
goto err ;
}
2022-11-26 07:14:30 +03:00
bch2_btree_iter_set_pos ( bucket_gens_iter , alloc_gens_pos ( alloc_k . k - > p , & gens_offset ) ) ;
k = bch2_btree_iter_peek_slot ( bucket_gens_iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( a - > gen ! = alloc_gen ( k , gens_offset ) & &
( c - > opts . reconstruct_alloc | |
2023-10-25 03:44:36 +03:00
fsck_err ( c , bucket_gens_key_wrong ,
" incorrect gen in bucket_gens btree (got %u should be %u) \n "
2022-11-26 07:14:30 +03:00
" %s " ,
alloc_gen ( k , gens_offset ) , a - > gen ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) ) {
struct bkey_i_bucket_gens * g =
bch2_trans_kmalloc ( trans , sizeof ( * g ) ) ;
ret = PTR_ERR_OR_ZERO ( g ) ;
if ( ret )
goto err ;
if ( k . k - > type = = KEY_TYPE_bucket_gens ) {
bkey_reassemble ( & g - > k_i , k ) ;
} else {
bkey_bucket_gens_init ( & g - > k_i ) ;
g - > k . p = alloc_gens_pos ( alloc_k . k - > p , & gens_offset ) ;
}
g - > v . gens [ gens_offset ] = a - > gen ;
ret = bch2_trans_update ( trans , bucket_gens_iter , & g - > k_i , 0 ) ;
if ( ret )
goto err ;
}
2022-02-17 11:11:39 +03:00
err :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_alloc_hole_freespace ( struct btree_trans * trans ,
struct bpos start ,
struct bpos * end ,
struct btree_iter * freespace_iter )
2022-11-30 21:25:17 +03:00
{
struct bch_fs * c = trans - > c ;
struct bch_dev * ca ;
struct bkey_s_c k ;
struct printbuf buf = PRINTBUF ;
int ret ;
ca = bch_dev_bkey_exists ( c , start . inode ) ;
if ( ! ca - > mi . freespace_initialized )
return 0 ;
bch2_btree_iter_set_pos ( freespace_iter , start ) ;
k = bch2_btree_iter_peek_slot ( freespace_iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
* end = bkey_min ( k . k - > p , * end ) ;
if ( k . k - > type ! = KEY_TYPE_set & &
( c - > opts . reconstruct_alloc | |
2023-10-25 03:44:36 +03:00
fsck_err ( c , freespace_hole_missing ,
" hole in alloc btree missing in freespace btree \n "
2022-11-30 21:25:17 +03:00
" device %llu buckets %llu-%llu " ,
freespace_iter - > pos . inode ,
freespace_iter - > pos . offset ,
end - > offset ) ) ) {
struct bkey_i * update =
bch2_trans_kmalloc ( trans , sizeof ( * update ) ) ;
ret = PTR_ERR_OR_ZERO ( update ) ;
if ( ret )
goto err ;
bkey_init ( & update - > k ) ;
update - > k . type = KEY_TYPE_set ;
update - > k . p = freespace_iter - > pos ;
bch2_key_resize ( & update - > k ,
min_t ( u64 , U32_MAX , end - > offset -
freespace_iter - > pos . offset ) ) ;
ret = bch2_trans_update ( trans , freespace_iter , update , 0 ) ;
if ( ret )
goto err ;
}
err :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_alloc_hole_bucket_gens ( struct btree_trans * trans ,
struct bpos start ,
struct bpos * end ,
struct btree_iter * bucket_gens_iter )
2022-11-26 07:14:30 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_s_c k ;
struct printbuf buf = PRINTBUF ;
unsigned i , gens_offset , gens_end_offset ;
int ret ;
2023-06-29 02:59:56 +03:00
if ( c - > sb . version < bcachefs_metadata_version_bucket_gens )
2022-11-26 07:14:30 +03:00
return 0 ;
bch2_btree_iter_set_pos ( bucket_gens_iter , alloc_gens_pos ( start , & gens_offset ) ) ;
k = bch2_btree_iter_peek_slot ( bucket_gens_iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( bkey_cmp ( alloc_gens_pos ( start , & gens_offset ) ,
alloc_gens_pos ( * end , & gens_end_offset ) ) )
gens_end_offset = KEY_TYPE_BUCKET_GENS_NR ;
if ( k . k - > type = = KEY_TYPE_bucket_gens ) {
struct bkey_i_bucket_gens g ;
bool need_update = false ;
bkey_reassemble ( & g . k_i , k ) ;
for ( i = gens_offset ; i < gens_end_offset ; i + + ) {
if ( fsck_err_on ( g . v . gens [ i ] , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_hole_wrong ,
2022-11-26 07:14:30 +03:00
" hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u) " ,
bucket_gens_pos_to_alloc ( k . k - > p , i ) . inode ,
bucket_gens_pos_to_alloc ( k . k - > p , i ) . offset ,
g . v . gens [ i ] ) ) {
g . v . gens [ i ] = 0 ;
need_update = true ;
}
}
if ( need_update ) {
2023-09-13 01:41:22 +03:00
struct bkey_i * u = bch2_trans_kmalloc ( trans , sizeof ( g ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 01:41:22 +03:00
ret = PTR_ERR_OR_ZERO ( u ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
goto err ;
2023-09-13 01:41:22 +03:00
memcpy ( u , & g , sizeof ( g ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 01:41:22 +03:00
ret = bch2_trans_update ( trans , bucket_gens_iter , u , 0 ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
goto err ;
}
}
* end = bkey_min ( * end , bucket_gens_pos_to_alloc ( bpos_nosnap_successor ( k . k - > p ) , 0 ) ) ;
err :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2023-11-17 01:28:16 +03:00
static noinline_for_stack int bch2_check_discard_freespace_key ( struct btree_trans * trans ,
2023-06-18 20:25:35 +03:00
struct btree_iter * iter )
2022-02-17 11:11:39 +03:00
{
struct bch_fs * c = trans - > c ;
struct btree_iter alloc_iter ;
2022-07-17 07:44:19 +03:00
struct bkey_s_c alloc_k ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2022-02-17 11:11:39 +03:00
u64 genbits ;
struct bpos pos ;
2022-04-01 08:29:59 +03:00
enum bch_data_type state = iter - > btree_id = = BTREE_ID_need_discard
? BCH_DATA_need_discard
: BCH_DATA_free ;
2022-02-17 11:11:39 +03:00
struct printbuf buf = PRINTBUF ;
int ret ;
2022-04-05 20:44:18 +03:00
pos = iter - > pos ;
2022-02-17 11:11:39 +03:00
pos . offset & = ~ ( ~ 0ULL < < 56 ) ;
2022-04-05 20:44:18 +03:00
genbits = iter - > pos . offset & ( ~ 0ULL < < 56 ) ;
2022-02-17 11:11:39 +03:00
2023-04-30 02:33:09 +03:00
alloc_k = bch2_bkey_get_iter ( trans , & alloc_iter , BTREE_ID_alloc , pos , 0 ) ;
ret = bkey_err ( alloc_k ) ;
if ( ret )
return ret ;
2022-02-17 11:11:39 +03:00
if ( fsck_err_on ( ! bch2_dev_bucket_exists ( c , pos ) , c ,
2023-10-25 03:44:36 +03:00
need_discard_freespace_key_to_invalid_dev_bucket ,
2022-04-09 22:15:36 +03:00
" entry in %s btree for nonexistant dev:bucket %llu:%llu " ,
2023-10-20 05:49:08 +03:00
bch2_btree_id_str ( iter - > btree_id ) , pos . inode , pos . offset ) )
2022-02-17 11:11:39 +03:00
goto delete ;
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4 ( alloc_k , & a_convert ) ;
2022-02-17 11:11:39 +03:00
2023-01-31 04:58:43 +03:00
if ( fsck_err_on ( a - > data_type ! = state | |
2022-04-01 08:29:59 +03:00
( state = = BCH_DATA_free & &
2023-01-31 04:58:43 +03:00
genbits ! = alloc_freespace_genbits ( * a ) ) , c ,
2023-10-25 03:44:36 +03:00
need_discard_freespace_key_bad ,
2023-06-18 20:25:35 +03:00
" %s \n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu) " ,
2022-07-17 07:44:19 +03:00
( bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ,
2023-10-20 05:49:08 +03:00
bch2_btree_id_str ( iter - > btree_id ) ,
2023-06-18 20:25:35 +03:00
iter - > pos . inode ,
iter - > pos . offset ,
2023-01-31 04:58:43 +03:00
a - > data_type = = state ,
genbits > > 56 , alloc_freespace_genbits ( * a ) > > 56 ) )
2022-02-17 11:11:39 +03:00
goto delete ;
out :
fsck_err :
2023-06-18 20:25:35 +03:00
set_btree_iter_dontneed ( & alloc_iter ) ;
2022-02-17 11:11:39 +03:00
bch2_trans_iter_exit ( trans , & alloc_iter ) ;
printbuf_exit ( & buf ) ;
return ret ;
delete :
2023-06-18 20:25:35 +03:00
ret = bch2_btree_delete_extent_at ( trans , iter ,
iter - > btree_id = = BTREE_ID_freespace ? 1 : 0 , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-02-17 11:11:39 +03:00
goto out ;
}
2022-11-26 07:14:30 +03:00
/*
* We ' ve already checked that generation numbers in the bucket_gens btree are
* valid for buckets that exist ; this just checks for keys for nonexistent
* buckets .
*/
2023-06-27 05:26:04 +03:00
static noinline_for_stack
int bch2_check_bucket_gens_key ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c k )
2022-11-26 07:14:30 +03:00
{
struct bch_fs * c = trans - > c ;
struct bkey_i_bucket_gens g ;
struct bch_dev * ca ;
u64 start = bucket_gens_pos_to_alloc ( k . k - > p , 0 ) . offset ;
u64 end = bucket_gens_pos_to_alloc ( bpos_nosnap_successor ( k . k - > p ) , 0 ) . offset ;
u64 b ;
2023-04-18 20:05:47 +03:00
bool need_update = false , dev_exists ;
2022-11-26 07:14:30 +03:00
struct printbuf buf = PRINTBUF ;
int ret = 0 ;
BUG_ON ( k . k - > type ! = KEY_TYPE_bucket_gens ) ;
bkey_reassemble ( & g . k_i , k ) ;
2023-04-18 20:05:47 +03:00
/* if no bch_dev, skip out whether we repair or not */
dev_exists = bch2_dev_exists2 ( c , k . k - > p . inode ) ;
if ( ! dev_exists ) {
if ( fsck_err_on ( ! dev_exists , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_to_invalid_dev ,
2023-04-18 20:05:47 +03:00
" bucket_gens key for invalid device: \n %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) ) {
ret = bch2_btree_delete_at ( trans , iter , 0 ) ;
}
2022-11-26 07:14:30 +03:00
goto out ;
}
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
if ( fsck_err_on ( end < = ca - > mi . first_bucket | |
start > = ca - > mi . nbuckets , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_to_invalid_buckets ,
2022-11-26 07:14:30 +03:00
" bucket_gens key for invalid buckets: \n %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) ) {
ret = bch2_btree_delete_at ( trans , iter , 0 ) ;
goto out ;
}
for ( b = start ; b < ca - > mi . first_bucket ; b + + )
if ( fsck_err_on ( g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_nonzero_for_invalid_buckets ,
2022-11-26 07:14:30 +03:00
" bucket_gens key has nonzero gen for invalid bucket " ) ) {
g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] = 0 ;
need_update = true ;
}
for ( b = ca - > mi . nbuckets ; b < end ; b + + )
if ( fsck_err_on ( g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] , c ,
2023-10-25 03:44:36 +03:00
bucket_gens_nonzero_for_invalid_buckets ,
2022-11-26 07:14:30 +03:00
" bucket_gens key has nonzero gen for invalid bucket " ) ) {
g . v . gens [ b & KEY_TYPE_BUCKET_GENS_MASK ] = 0 ;
need_update = true ;
}
if ( need_update ) {
2023-09-13 01:41:22 +03:00
struct bkey_i * u = bch2_trans_kmalloc ( trans , sizeof ( g ) ) ;
2022-11-26 07:14:30 +03:00
2023-09-13 01:41:22 +03:00
ret = PTR_ERR_OR_ZERO ( u ) ;
2022-11-26 07:14:30 +03:00
if ( ret )
goto out ;
2023-09-13 01:41:22 +03:00
memcpy ( u , & g , sizeof ( g ) ) ;
ret = bch2_trans_update ( trans , iter , u , 0 ) ;
2022-11-26 07:14:30 +03:00
}
out :
fsck_err :
printbuf_exit ( & buf ) ;
return ret ;
}
2022-04-05 20:44:18 +03:00
int bch2_check_alloc_info ( struct bch_fs * c )
2022-02-17 11:11:39 +03:00
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-11-26 07:14:30 +03:00
struct btree_iter iter , discard_iter , freespace_iter , bucket_gens_iter ;
2022-11-30 21:25:17 +03:00
struct bkey hole ;
2022-07-17 07:44:19 +03:00
struct bkey_s_c k ;
2022-04-05 20:44:18 +03:00
int ret = 0 ;
2022-02-17 11:11:39 +03:00
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc , POS_MIN ,
2022-06-20 05:43:00 +03:00
BTREE_ITER_PREFETCH ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & discard_iter , BTREE_ID_need_discard , POS_MIN ,
2022-06-20 05:43:00 +03:00
BTREE_ITER_PREFETCH ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & freespace_iter , BTREE_ID_freespace , POS_MIN ,
2022-06-20 05:43:00 +03:00
BTREE_ITER_PREFETCH ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & bucket_gens_iter , BTREE_ID_bucket_gens , POS_MIN ,
2022-11-26 07:14:30 +03:00
BTREE_ITER_PREFETCH ) ;
2022-11-30 21:25:17 +03:00
2022-06-20 05:43:00 +03:00
while ( 1 ) {
2022-11-30 21:25:17 +03:00
struct bpos next ;
2023-09-13 00:16:02 +03:00
bch2_trans_begin ( trans ) ;
2022-11-30 21:25:17 +03:00
k = bch2_get_key_or_real_bucket_hole ( & iter , & hole ) ;
ret = bkey_err ( k ) ;
2022-04-05 20:44:18 +03:00
if ( ret )
2022-11-30 21:25:17 +03:00
goto bkey_err ;
if ( ! k . k )
2022-04-05 20:44:18 +03:00
break ;
2022-06-20 05:43:00 +03:00
2022-11-30 21:25:17 +03:00
if ( k . k - > type ) {
next = bpos_nosnap_successor ( k . k - > p ) ;
2023-09-13 00:16:02 +03:00
ret = bch2_check_alloc_key ( trans ,
2022-11-30 21:25:17 +03:00
k , & iter ,
& discard_iter ,
2022-11-26 07:14:30 +03:00
& freespace_iter ,
& bucket_gens_iter ) ;
2022-11-30 21:25:17 +03:00
if ( ret )
2022-11-26 07:14:30 +03:00
goto bkey_err ;
2022-11-30 21:25:17 +03:00
} else {
next = k . k - > p ;
2023-09-13 00:16:02 +03:00
ret = bch2_check_alloc_hole_freespace ( trans ,
2022-11-30 21:25:17 +03:00
bkey_start_pos ( k . k ) ,
& next ,
2022-11-26 07:14:30 +03:00
& freespace_iter ) ? :
2023-09-13 00:16:02 +03:00
bch2_check_alloc_hole_bucket_gens ( trans ,
2022-11-26 07:14:30 +03:00
bkey_start_pos ( k . k ) ,
& next ,
& bucket_gens_iter ) ;
2022-11-30 21:25:17 +03:00
if ( ret )
goto bkey_err ;
}
2023-09-13 00:16:02 +03:00
ret = bch2_trans_commit ( trans , NULL , NULL ,
2023-11-29 00:36:54 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-11-30 21:25:17 +03:00
if ( ret )
goto bkey_err ;
bch2_btree_iter_set_pos ( & iter , next ) ;
bkey_err :
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
break ;
2022-04-05 20:44:18 +03:00
}
2023-09-13 00:16:02 +03:00
bch2_trans_iter_exit ( trans , & bucket_gens_iter ) ;
bch2_trans_iter_exit ( trans , & freespace_iter ) ;
bch2_trans_iter_exit ( trans , & discard_iter ) ;
bch2_trans_iter_exit ( trans , & iter ) ;
2022-02-17 11:11:39 +03:00
2022-06-20 05:43:00 +03:00
if ( ret < 0 )
2022-04-05 20:44:18 +03:00
goto err ;
2022-02-17 11:11:39 +03:00
2023-09-13 00:16:02 +03:00
ret = for_each_btree_key2 ( trans , iter ,
2022-07-17 07:44:19 +03:00
BTREE_ID_need_discard , POS_MIN ,
BTREE_ITER_PREFETCH , k ,
2023-11-17 01:28:16 +03:00
bch2_check_discard_freespace_key ( trans , & iter ) ) ;
if ( ret )
goto err ;
bch2_trans_iter_init ( trans , & iter , BTREE_ID_freespace , POS_MIN ,
BTREE_ITER_PREFETCH ) ;
while ( 1 ) {
bch2_trans_begin ( trans ) ;
k = bch2_btree_iter_peek ( & iter ) ;
if ( ! k . k )
break ;
ret = bkey_err ( k ) ? :
bch2_check_discard_freespace_key ( trans , & iter ) ;
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ) {
ret = 0 ;
continue ;
}
if ( ret ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
bch_err ( c , " while checking %s " , buf . buf ) ;
printbuf_exit ( & buf ) ;
break ;
}
bch2_btree_iter_set_pos ( & iter , bpos_nosnap_successor ( iter . pos ) ) ;
}
bch2_trans_iter_exit ( trans , & iter ) ;
if ( ret )
goto err ;
ret = for_each_btree_key_commit ( trans , iter ,
2022-11-26 07:14:30 +03:00
BTREE_ID_bucket_gens , POS_MIN ,
BTREE_ITER_PREFETCH , k ,
2023-11-29 00:36:54 +03:00
NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-13 00:16:02 +03:00
bch2_check_bucket_gens_key ( trans , & iter , k ) ) ;
2022-02-17 11:11:39 +03:00
err :
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2023-06-20 20:49:25 +03:00
if ( ret )
bch_err_fn ( c , ret ) ;
return ret ;
2022-02-17 11:11:39 +03:00
}
static int bch2_check_alloc_to_lru_ref ( struct btree_trans * trans ,
struct btree_iter * alloc_iter )
{
struct bch_fs * c = trans - > c ;
struct btree_iter lru_iter ;
2023-01-31 04:58:43 +03:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2023-04-30 02:33:09 +03:00
struct bkey_s_c alloc_k , lru_k ;
2022-02-17 11:11:39 +03:00
struct printbuf buf = PRINTBUF ;
int ret ;
alloc_k = bch2_btree_iter_peek ( alloc_iter ) ;
if ( ! alloc_k . k )
return 0 ;
ret = bkey_err ( alloc_k ) ;
if ( ret )
return ret ;
2023-01-31 04:58:43 +03:00
a = bch2_alloc_to_v4 ( alloc_k , & a_convert ) ;
2022-02-17 11:11:39 +03:00
2023-01-31 04:58:43 +03:00
if ( a - > data_type ! = BCH_DATA_cached )
2022-02-17 11:11:39 +03:00
return 0 ;
2023-11-25 23:46:02 +03:00
if ( fsck_err_on ( ! a - > io_time [ READ ] , c ,
alloc_key_cached_but_read_time_zero ,
" cached bucket with read_time 0 \n "
" %s " ,
( printbuf_reset ( & buf ) ,
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
struct bkey_i_alloc_v4 * a_mut =
bch2_alloc_to_v4_mut ( trans , alloc_k ) ;
ret = PTR_ERR_OR_ZERO ( a_mut ) ;
if ( ret )
goto err ;
a_mut - > v . io_time [ READ ] = atomic64_read ( & c - > io_clock [ READ ] . now ) ;
ret = bch2_trans_update ( trans , alloc_iter ,
& a_mut - > k_i , BTREE_TRIGGER_NORUN ) ;
if ( ret )
goto err ;
a = & a_mut - > v ;
}
2023-04-30 02:33:09 +03:00
lru_k = bch2_bkey_get_iter ( trans , & lru_iter , BTREE_ID_lru ,
2022-12-06 00:49:13 +03:00
lru_pos ( alloc_k . k - > p . inode ,
bucket_to_u64 ( alloc_k . k - > p ) ,
a - > io_time [ READ ] ) , 0 ) ;
2023-04-30 02:33:09 +03:00
ret = bkey_err ( lru_k ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
2023-04-30 02:33:09 +03:00
return ret ;
2022-02-17 11:11:39 +03:00
2023-11-25 23:46:02 +03:00
if ( fsck_err_on ( lru_k . k - > type ! = KEY_TYPE_set , c ,
2023-10-25 03:44:36 +03:00
alloc_key_to_missing_lru_entry ,
2022-12-06 00:49:13 +03:00
" missing lru entry \n "
2022-02-17 11:11:39 +03:00
" %s " ,
( printbuf_reset ( & buf ) ,
2022-12-06 00:49:13 +03:00
bch2_bkey_val_to_text ( & buf , c , alloc_k ) , buf . buf ) ) ) {
2022-04-11 02:59:26 +03:00
ret = bch2_lru_set ( trans ,
alloc_k . k - > p . inode ,
2022-12-06 00:49:13 +03:00
bucket_to_u64 ( alloc_k . k - > p ) ,
2023-11-25 23:46:02 +03:00
a - > io_time [ READ ] ) ;
2022-02-17 11:11:39 +03:00
if ( ret )
goto err ;
}
err :
fsck_err :
bch2_trans_iter_exit ( trans , & lru_iter ) ;
printbuf_exit ( & buf ) ;
return ret ;
}
int bch2_check_alloc_to_lru_refs ( struct bch_fs * c )
{
struct btree_iter iter ;
struct bkey_s_c k ;
2023-11-25 23:46:02 +03:00
int ret = bch2_trans_run ( c ,
2023-09-13 00:16:02 +03:00
for_each_btree_key_commit ( trans , iter , BTREE_ID_alloc ,
2023-06-20 20:49:25 +03:00
POS_MIN , BTREE_ITER_PREFETCH , k ,
2023-11-29 00:36:54 +03:00
NULL , NULL , BCH_TRANS_COMMIT_no_enospc ,
2023-09-13 00:16:02 +03:00
bch2_check_alloc_to_lru_ref ( trans , & iter ) ) ) ;
2023-11-25 23:46:02 +03:00
bch_err_fn ( c , ret ) ;
2023-06-20 20:49:25 +03:00
return ret ;
2022-02-17 11:11:39 +03:00
}
2022-07-17 07:31:40 +03:00
static int bch2_discard_one_bucket ( struct btree_trans * trans ,
struct btree_iter * need_discard_iter ,
struct bpos * discard_pos_done ,
u64 * seen ,
u64 * open ,
u64 * need_journal_commit ,
u64 * discarded )
2022-02-10 12:32:19 +03:00
{
struct bch_fs * c = trans - > c ;
2022-07-17 07:31:40 +03:00
struct bpos pos = need_discard_iter - > pos ;
struct btree_iter iter = { NULL } ;
2022-02-10 12:32:19 +03:00
struct bkey_s_c k ;
2022-07-17 07:31:40 +03:00
struct bch_dev * ca ;
2022-02-10 12:32:19 +03:00
struct bkey_i_alloc_v4 * a ;
struct printbuf buf = PRINTBUF ;
2022-07-17 07:31:40 +03:00
int ret = 0 ;
ca = bch_dev_bkey_exists ( c , pos . inode ) ;
if ( ! percpu_ref_tryget ( & ca - > io_ref ) ) {
bch2_btree_iter_set_pos ( need_discard_iter , POS ( pos . inode + 1 , 0 ) ) ;
return 0 ;
}
if ( bch2_bucket_is_open_safe ( c , pos . inode , pos . offset ) ) {
( * open ) + + ;
goto out ;
}
2022-02-10 12:32:19 +03:00
2022-07-17 07:31:40 +03:00
if ( bch2_bucket_needs_journal_commit ( & c - > buckets_waiting_for_journal ,
c - > journal . flushed_seq_ondisk ,
pos . inode , pos . offset ) ) {
( * need_journal_commit ) + + ;
goto out ;
}
2023-04-30 02:33:09 +03:00
k = bch2_bkey_get_iter ( trans , & iter , BTREE_ID_alloc ,
need_discard_iter - > pos ,
BTREE_ITER_CACHED ) ;
2022-02-10 12:32:19 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto out ;
a = bch2_alloc_to_v4_mut ( trans , k ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
goto out ;
if ( BCH_ALLOC_V4_NEED_INC_GEN ( & a - > v ) ) {
a - > v . gen + + ;
SET_BCH_ALLOC_V4_NEED_INC_GEN ( & a - > v , false ) ;
goto write ;
}
2022-12-12 03:14:30 +03:00
if ( a - > v . journal_seq > c - > journal . flushed_seq_ondisk ) {
2023-07-07 09:42:28 +03:00
if ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info ) {
2022-12-12 03:14:30 +03:00
bch2_trans_inconsistent ( trans ,
" clearing need_discard but journal_seq %llu > flushed_seq %llu \n "
" %s " ,
a - > v . journal_seq ,
c - > journal . flushed_seq_ondisk ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) ;
ret = - EIO ;
}
2022-04-01 08:29:59 +03:00
goto out ;
}
2022-02-10 12:32:19 +03:00
2022-12-12 03:14:30 +03:00
if ( a - > v . data_type ! = BCH_DATA_need_discard ) {
2023-07-07 09:42:28 +03:00
if ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info ) {
2022-12-12 03:14:30 +03:00
bch2_trans_inconsistent ( trans ,
" bucket incorrectly set in need_discard btree \n "
" %s " ,
( bch2_bkey_val_to_text ( & buf , c , k ) , buf . buf ) ) ;
ret = - EIO ;
}
2022-02-10 12:32:19 +03:00
goto out ;
}
2022-11-24 11:12:22 +03:00
if ( ! bkey_eq ( * discard_pos_done , iter . pos ) & &
2022-07-17 07:31:40 +03:00
ca - > mi . discard & & ! c - > opts . nochanges ) {
2022-02-10 12:32:19 +03:00
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
*/
bch2_trans_unlock ( trans ) ;
blkdev_issue_discard ( ca - > disk_sb . bdev ,
k . k - > p . offset * ca - > mi . bucket_size ,
ca - > mi . bucket_size ,
GFP_KERNEL ) ;
2022-07-17 07:31:40 +03:00
* discard_pos_done = iter . pos ;
2022-02-10 12:32:19 +03:00
2023-01-24 08:26:48 +03:00
ret = bch2_trans_relock_notrace ( trans ) ;
2022-02-10 12:32:19 +03:00
if ( ret )
goto out ;
}
SET_BCH_ALLOC_V4_NEED_DISCARD ( & a - > v , false ) ;
2022-04-01 08:29:59 +03:00
a - > v . data_type = alloc_data_type ( a - > v , a - > v . data_type ) ;
2022-02-10 12:32:19 +03:00
write :
2022-07-17 07:31:40 +03:00
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-06-28 00:32:48 +03:00
BCH_WATERMARK_btree |
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-07-17 07:31:40 +03:00
if ( ret )
goto out ;
2023-11-28 06:37:27 +03:00
count_event ( c , bucket_discard ) ;
2022-07-17 07:31:40 +03:00
( * discarded ) + + ;
2022-02-10 12:32:19 +03:00
out :
2022-07-17 07:31:40 +03:00
( * seen ) + + ;
2022-02-10 12:32:19 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2022-07-17 07:31:40 +03:00
percpu_ref_put ( & ca - > io_ref ) ;
2022-02-10 12:32:19 +03:00
printbuf_exit ( & buf ) ;
return ret ;
}
static void bch2_do_discards_work ( struct work_struct * work )
{
struct bch_fs * c = container_of ( work , struct bch_fs , discard_work ) ;
struct btree_iter iter ;
struct bkey_s_c k ;
u64 seen = 0 , open = 0 , need_journal_commit = 0 , discarded = 0 ;
2022-07-17 07:31:40 +03:00
struct bpos discard_pos_done = POS_MAX ;
2022-02-10 12:32:19 +03:00
int ret ;
2022-07-17 07:31:40 +03:00
/*
* We ' re doing the commit in bch2_discard_one_bucket instead of using
* for_each_btree_key_commit ( ) so that we can increment counters after
* successful commit :
*/
2023-09-13 00:16:02 +03:00
ret = bch2_trans_run ( c ,
for_each_btree_key2 ( trans , iter ,
BTREE_ID_need_discard , POS_MIN , 0 , k ,
bch2_discard_one_bucket ( trans , & iter , & discard_pos_done ,
& seen ,
& open ,
& need_journal_commit ,
& discarded ) ) ) ;
2022-02-10 12:32:19 +03:00
if ( need_journal_commit * 2 > seen )
bch2_journal_flush_async ( & c - > journal , NULL ) ;
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_discard ) ;
2022-02-10 12:32:19 +03:00
2022-07-18 05:31:21 +03:00
trace_discard_buckets ( c , seen , open , need_journal_commit , discarded ,
bch2_err_str ( ret ) ) ;
2022-02-10 12:32:19 +03:00
}
void bch2_do_discards ( struct bch_fs * c )
{
2023-02-09 20:21:45 +03:00
if ( bch2_write_ref_tryget ( c , BCH_WRITE_REF_discard ) & &
2023-03-23 21:09:05 +03:00
! queue_work ( c - > write_ref_wq , & c - > discard_work ) )
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_discard ) ;
2022-02-10 12:32:19 +03:00
}
2022-07-17 07:31:40 +03:00
static int invalidate_one_bucket ( struct btree_trans * trans ,
2022-12-06 00:49:13 +03:00
struct btree_iter * lru_iter ,
2023-01-04 07:54:10 +03:00
struct bkey_s_c lru_k ,
2022-12-06 00:49:13 +03:00
s64 * nr_to_invalidate )
2022-02-11 02:18:41 +03:00
{
struct bch_fs * c = trans - > c ;
2022-07-17 07:31:40 +03:00
struct btree_iter alloc_iter = { NULL } ;
2023-01-04 07:54:10 +03:00
struct bkey_i_alloc_v4 * a = NULL ;
2022-04-12 00:23:39 +03:00
struct printbuf buf = PRINTBUF ;
2023-01-04 07:54:10 +03:00
struct bpos bucket = u64_to_bucket ( lru_k . k - > p . offset ) ;
2022-07-17 07:31:40 +03:00
unsigned cached_sectors ;
int ret = 0 ;
2022-02-11 02:18:41 +03:00
2022-12-06 00:49:13 +03:00
if ( * nr_to_invalidate < = 0 )
2022-07-17 07:31:40 +03:00
return 1 ;
2022-02-11 02:18:41 +03:00
2023-01-04 07:54:10 +03:00
if ( ! bch2_dev_bucket_exists ( c , bucket ) ) {
prt_str ( & buf , " lru entry points to invalid bucket " ) ;
goto err ;
}
2023-02-18 04:33:12 +03:00
if ( bch2_bucket_is_open_safe ( c , bucket . inode , bucket . offset ) )
return 0 ;
2022-07-17 07:31:40 +03:00
a = bch2_trans_start_alloc_update ( trans , & alloc_iter , bucket ) ;
2022-02-11 02:18:41 +03:00
ret = PTR_ERR_OR_ZERO ( a ) ;
if ( ret )
goto out ;
2023-02-07 02:51:42 +03:00
/* We expect harmless races here due to the btree write buffer: */
2022-12-05 18:24:19 +03:00
if ( lru_pos_time ( lru_iter - > pos ) ! = alloc_lru_idx_read ( a - > v ) )
2023-02-07 02:51:42 +03:00
goto out ;
2022-07-17 07:31:40 +03:00
2023-02-07 02:51:42 +03:00
BUG_ON ( a - > v . data_type ! = BCH_DATA_cached ) ;
2022-02-11 02:18:41 +03:00
2022-06-21 05:26:41 +03:00
if ( ! a - > v . cached_sectors )
bch_err ( c , " invalidating empty bucket, confused " ) ;
2022-07-17 07:31:40 +03:00
cached_sectors = a - > v . cached_sectors ;
2022-06-21 05:26:41 +03:00
2022-02-11 02:18:41 +03:00
SET_BCH_ALLOC_V4_NEED_INC_GEN ( & a - > v , false ) ;
a - > v . gen + + ;
a - > v . data_type = 0 ;
a - > v . dirty_sectors = 0 ;
a - > v . cached_sectors = 0 ;
a - > v . io_time [ READ ] = atomic64_read ( & c - > io_clock [ READ ] . now ) ;
a - > v . io_time [ WRITE ] = atomic64_read ( & c - > io_clock [ WRITE ] . now ) ;
2022-07-17 07:31:40 +03:00
ret = bch2_trans_update ( trans , & alloc_iter , & a - > k_i ,
BTREE_TRIGGER_BUCKET_INVALIDATE ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-06-28 00:32:48 +03:00
BCH_WATERMARK_btree |
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-04-18 01:06:31 +03:00
if ( ret )
goto out ;
2022-07-17 07:31:40 +03:00
2022-08-27 19:48:36 +03:00
trace_and_count ( c , bucket_invalidate , c , bucket . inode , bucket . offset , cached_sectors ) ;
2022-07-17 07:31:40 +03:00
- - * nr_to_invalidate ;
2022-02-11 02:18:41 +03:00
out :
bch2_trans_iter_exit ( trans , & alloc_iter ) ;
2022-04-12 00:23:39 +03:00
printbuf_exit ( & buf ) ;
2022-02-11 02:18:41 +03:00
return ret ;
2023-01-04 07:54:10 +03:00
err :
prt_str ( & buf , " \n lru key: " ) ;
bch2_bkey_val_to_text ( & buf , c , lru_k ) ;
prt_str ( & buf , " \n lru entry: " ) ;
bch2_lru_pos_to_text ( & buf , lru_iter - > pos ) ;
prt_str ( & buf , " \n alloc key: " ) ;
if ( ! a )
bch2_bpos_to_text ( & buf , bucket ) ;
else
bch2_bkey_val_to_text ( & buf , c , bkey_i_to_s_c ( & a - > k_i ) ) ;
bch_err ( c , " %s " , buf . buf ) ;
2023-07-07 09:42:28 +03:00
if ( c - > curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus ) {
2023-01-04 07:54:10 +03:00
bch2_inconsistent_error ( c ) ;
ret = - EINVAL ;
}
goto out ;
2022-02-11 02:18:41 +03:00
}
static void bch2_do_invalidates_work ( struct work_struct * work )
{
struct bch_fs * c = container_of ( work , struct bch_fs , invalidate_work ) ;
struct bch_dev * ca ;
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2022-07-17 07:31:40 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
unsigned i ;
2022-02-11 02:18:41 +03:00
int ret = 0 ;
2023-11-03 03:36:00 +03:00
ret = bch2_btree_write_buffer_tryflush ( trans ) ;
2023-02-07 02:51:42 +03:00
if ( ret )
goto err ;
2022-04-01 08:29:59 +03:00
for_each_member_device ( ca , c , i ) {
s64 nr_to_invalidate =
should_invalidate_buckets ( ca , bch2_dev_usage_read ( ca ) ) ;
2023-09-13 00:16:02 +03:00
ret = for_each_btree_key2_upto ( trans , iter , BTREE_ID_lru ,
2022-12-06 00:49:13 +03:00
lru_pos ( ca - > dev_idx , 0 , 0 ) ,
lru_pos ( ca - > dev_idx , U64_MAX , LRU_TIME_MAX ) ,
BTREE_ITER_INTENT , k ,
2023-09-13 00:16:02 +03:00
invalidate_one_bucket ( trans , & iter , k , & nr_to_invalidate ) ) ;
2022-06-14 02:45:07 +03:00
2022-07-17 07:31:40 +03:00
if ( ret < 0 ) {
percpu_ref_put ( & ca - > ref ) ;
break ;
2022-06-14 02:45:07 +03:00
}
2022-04-01 08:29:59 +03:00
}
2023-02-07 02:51:42 +03:00
err :
2023-09-13 00:16:02 +03:00
bch2_trans_put ( trans ) ;
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_invalidate ) ;
2022-02-11 02:18:41 +03:00
}
void bch2_do_invalidates ( struct bch_fs * c )
{
2023-02-09 20:21:45 +03:00
if ( bch2_write_ref_tryget ( c , BCH_WRITE_REF_invalidate ) & &
2023-03-23 21:09:05 +03:00
! queue_work ( c - > write_ref_wq , & c - > invalidate_work ) )
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_invalidate ) ;
2022-02-11 02:18:41 +03:00
}
2023-09-29 00:57:21 +03:00
int bch2_dev_freespace_init ( struct bch_fs * c , struct bch_dev * ca ,
u64 bucket_start , u64 bucket_end )
2021-12-12 01:13:09 +03:00
{
2023-09-13 00:16:02 +03:00
struct btree_trans * trans = bch2_trans_get ( c ) ;
2021-12-12 01:13:09 +03:00
struct btree_iter iter ;
struct bkey_s_c k ;
2022-11-30 21:25:17 +03:00
struct bkey hole ;
2023-09-29 00:57:21 +03:00
struct bpos end = POS ( ca - > dev_idx , bucket_end ) ;
2021-12-12 01:13:09 +03:00
struct bch_member * m ;
2023-09-29 00:57:21 +03:00
unsigned long last_updated = jiffies ;
2021-12-12 01:13:09 +03:00
int ret ;
2023-09-29 00:57:21 +03:00
BUG_ON ( bucket_start > bucket_end ) ;
BUG_ON ( bucket_end > ca - > mi . nbuckets ) ;
2023-09-13 00:16:02 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc ,
2023-09-29 00:57:21 +03:00
POS ( ca - > dev_idx , max_t ( u64 , ca - > mi . first_bucket , bucket_start ) ) ,
BTREE_ITER_PREFETCH ) ;
2022-11-26 12:37:11 +03:00
/*
* Scan the alloc btree for every bucket on @ ca , and add buckets to the
* freespace / need_discard / need_gc_gens btrees as needed :
*/
while ( 1 ) {
2023-09-29 00:57:21 +03:00
if ( last_updated + HZ * 10 < jiffies ) {
2023-03-23 04:22:51 +03:00
bch_info ( ca , " %s: currently at %llu/%llu " ,
__func__ , iter . pos . offset , ca - > mi . nbuckets ) ;
2023-09-29 00:57:21 +03:00
last_updated = jiffies ;
2023-03-23 04:22:51 +03:00
}
2023-09-13 00:16:02 +03:00
bch2_trans_begin ( trans ) ;
2022-11-26 12:37:11 +03:00
2022-11-30 21:25:17 +03:00
if ( bkey_ge ( iter . pos , end ) ) {
ret = 0 ;
2022-11-26 12:37:11 +03:00
break ;
2022-11-30 21:25:17 +03:00
}
2022-11-26 12:37:11 +03:00
2022-11-30 21:25:17 +03:00
k = bch2_get_key_or_hole ( & iter , end , & hole ) ;
2022-11-26 12:37:11 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto bkey_err ;
if ( k . k - > type ) {
/*
* We process live keys in the alloc btree one at a
* time :
*/
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2023-09-13 00:16:02 +03:00
ret = bch2_bucket_do_index ( trans , k , a , true ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-11-26 12:37:11 +03:00
if ( ret )
goto bkey_err ;
bch2_btree_iter_advance ( & iter ) ;
} else {
struct bkey_i * freespace ;
2023-09-13 00:16:02 +03:00
freespace = bch2_trans_kmalloc ( trans , sizeof ( * freespace ) ) ;
2022-11-26 12:37:11 +03:00
ret = PTR_ERR_OR_ZERO ( freespace ) ;
if ( ret )
goto bkey_err ;
bkey_init ( & freespace - > k ) ;
2022-11-30 21:25:17 +03:00
freespace - > k . type = KEY_TYPE_set ;
freespace - > k . p = k . k - > p ;
freespace - > k . size = k . k - > size ;
2022-11-26 12:37:11 +03:00
2023-09-13 00:16:02 +03:00
ret = bch2_btree_insert_trans ( trans , BTREE_ID_freespace , freespace , 0 ) ? :
bch2_trans_commit ( trans , NULL , NULL ,
2023-11-12 00:31:50 +03:00
BCH_TRANS_COMMIT_no_enospc ) ;
2022-11-26 12:37:11 +03:00
if ( ret )
goto bkey_err ;
2022-11-30 21:25:17 +03:00
bch2_btree_iter_set_pos ( & iter , k . k - > p ) ;
2022-11-26 12:37:11 +03:00
}
bkey_err :
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
continue ;
if ( ret )
break ;
}
2023-09-13 00:16:02 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
bch2_trans_put ( trans ) ;
2021-12-12 01:13:09 +03:00
2022-07-17 07:44:19 +03:00
if ( ret < 0 ) {
2023-09-11 08:37:34 +03:00
bch_err_msg ( ca , ret , " initializing free space " ) ;
2021-12-12 01:13:09 +03:00
return ret ;
}
mutex_lock ( & c - > sb_lock ) ;
2023-09-25 07:02:56 +03:00
m = bch2_members_v2_get_mut ( c - > disk_sb . sb , ca - > dev_idx ) ;
2021-12-12 01:13:09 +03:00
SET_BCH_MEMBER_FREESPACE_INITIALIZED ( m , true ) ;
mutex_unlock ( & c - > sb_lock ) ;
2022-07-17 07:44:19 +03:00
return 0 ;
2021-12-12 01:13:09 +03:00
}
int bch2_fs_freespace_init ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
int ret = 0 ;
bool doing_init = false ;
/*
* We can crash during the device add path , so we need to check this on
* every mount :
*/
for_each_member_device ( ca , c , i ) {
if ( ca - > mi . freespace_initialized )
continue ;
if ( ! doing_init ) {
bch_info ( c , " initializing freespace " ) ;
doing_init = true ;
}
2023-09-29 00:57:21 +03:00
ret = bch2_dev_freespace_init ( c , ca , 0 , ca - > mi . nbuckets ) ;
2021-12-12 01:13:09 +03:00
if ( ret ) {
percpu_ref_put ( & ca - > ref ) ;
2023-06-20 20:49:25 +03:00
bch_err_fn ( c , ret ) ;
2021-12-12 01:13:09 +03:00
return ret ;
}
}
if ( doing_init ) {
mutex_lock ( & c - > sb_lock ) ;
bch2_write_super ( c ) ;
mutex_unlock ( & c - > sb_lock ) ;
bch_verbose ( c , " done initializing freespace " ) ;
}
2023-06-20 20:49:25 +03:00
return 0 ;
2021-12-12 01:13:09 +03:00
}
2017-03-17 09:18:50 +03:00
/* Bucket IO clocks: */
2020-10-17 04:39:16 +03:00
int bch2_bucket_io_time_reset ( struct btree_trans * trans , unsigned dev ,
size_t bucket_nr , int rw )
{
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2022-01-01 04:03:29 +03:00
struct bkey_i_alloc_v4 * a ;
u64 now ;
2020-10-17 04:39:16 +03:00
int ret = 0 ;
2022-01-01 04:03:29 +03:00
a = bch2_trans_start_alloc_update ( trans , & iter , POS ( dev , bucket_nr ) ) ;
ret = PTR_ERR_OR_ZERO ( a ) ;
2020-10-17 04:39:16 +03:00
if ( ret )
2022-01-01 04:03:29 +03:00
return ret ;
2020-10-17 04:39:16 +03:00
2021-01-21 23:28:59 +03:00
now = atomic64_read ( & c - > io_clock [ rw ] . now ) ;
2022-01-01 04:03:29 +03:00
if ( a - > v . io_time [ rw ] = = now )
2020-10-17 04:39:16 +03:00
goto out ;
2022-01-01 04:03:29 +03:00
a - > v . io_time [ rw ] = now ;
2020-10-17 04:39:16 +03:00
2022-01-01 04:03:29 +03:00
ret = bch2_trans_update ( trans , & iter , & a - > k_i , 0 ) ? :
2020-10-17 04:39:16 +03:00
bch2_trans_commit ( trans , NULL , NULL , 0 ) ;
out :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2020-10-17 04:39:16 +03:00
return ret ;
}
2017-03-17 09:18:50 +03:00
/* Startup/shutdown (ro/rw): */
void bch2_recalc_capacity ( struct bch_fs * c )
{
struct bch_dev * ca ;
2021-04-13 16:49:23 +03:00
u64 capacity = 0 , reserved_sectors = 0 , gc_reserve ;
2018-11-05 05:55:35 +03:00
unsigned bucket_size_max = 0 ;
2017-03-17 09:18:50 +03:00
unsigned long ra_pages = 0 ;
2022-01-10 04:48:31 +03:00
unsigned i ;
2017-03-17 09:18:50 +03:00
lockdep_assert_held ( & c - > state_lock ) ;
for_each_online_member ( ca , c , i ) {
struct backing_dev_info * bdi = ca - > disk_sb . bdev - > bd_disk - > bdi ;
ra_pages + = bdi - > ra_pages ;
}
bch2_set_ra_pages ( c , ra_pages ) ;
for_each_rw_member ( ca , c , i ) {
2018-07-24 21:55:05 +03:00
u64 dev_reserve = 0 ;
2017-03-17 09:18:50 +03:00
/*
* We need to reserve buckets ( from the number
* of currently available buckets ) against
* foreground writes so that mainly copygc can
* make forward progress .
*
* We need enough to refill the various reserves
* from scratch - copygc will use its entire
* reserve all at once , then run against when
* its reserve is refilled ( from the formerly
* available buckets ) .
*
* This reserve is just used when considering if
* allocations for foreground writes must wait -
* not - ENOSPC calculations .
*/
2022-01-10 04:48:31 +03:00
dev_reserve + = ca - > nr_btree_reserve * 2 ;
dev_reserve + = ca - > mi . nbuckets > > 6 ; /* copygc reserve */
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
dev_reserve + = 1 ; /* btree write point */
dev_reserve + = 1 ; /* copygc write point */
dev_reserve + = 1 ; /* rebalance write point */
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
dev_reserve * = ca - > mi . bucket_size ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
capacity + = bucket_to_sector ( ca , ca - > mi . nbuckets -
ca - > mi . first_bucket ) ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
reserved_sectors + = dev_reserve * 2 ;
2018-11-05 05:55:35 +03:00
bucket_size_max = max_t ( unsigned , bucket_size_max ,
ca - > mi . bucket_size ) ;
2018-08-01 21:26:55 +03:00
}
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
gc_reserve = c - > opts . gc_reserve_bytes
? c - > opts . gc_reserve_bytes > > 9
: div64_u64 ( capacity * c - > opts . gc_reserve_percent , 100 ) ;
reserved_sectors = max ( gc_reserve , reserved_sectors ) ;
2017-03-17 09:18:50 +03:00
2018-07-24 21:55:05 +03:00
reserved_sectors = min ( reserved_sectors , capacity ) ;
2017-03-17 09:18:50 +03:00
2018-08-01 21:26:55 +03:00
c - > capacity = capacity - reserved_sectors ;
2017-03-17 09:18:50 +03:00
2018-11-05 05:55:35 +03:00
c - > bucket_size_max = bucket_size_max ;
2017-03-17 09:18:50 +03:00
/* Wake up case someone was waiting for buckets */
closure_wake_up ( & c - > freelist_wait ) ;
}
2023-10-30 20:15:36 +03:00
u64 bch2_min_rw_member_capacity ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
u64 ret = U64_MAX ;
for_each_rw_member ( ca , c , i )
ret = min ( ret , ca - > mi . nbuckets * ca - > mi . bucket_size ) ;
return ret ;
}
2017-03-17 09:18:50 +03:00
static bool bch2_dev_has_open_write_point ( struct bch_fs * c , struct bch_dev * ca )
{
struct open_bucket * ob ;
bool ret = false ;
for ( ob = c - > open_buckets ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ;
ob + + ) {
spin_lock ( & ob - > lock ) ;
if ( ob - > valid & & ! ob - > on_partial_list & &
2021-12-26 05:21:46 +03:00
ob - > dev = = ca - > dev_idx )
2017-03-17 09:18:50 +03:00
ret = true ;
spin_unlock ( & ob - > lock ) ;
}
return ret ;
}
/* device goes ro: */
void bch2_dev_allocator_remove ( struct bch_fs * c , struct bch_dev * ca )
{
unsigned i ;
/* First, remove device from allocation groups: */
for ( i = 0 ; i < ARRAY_SIZE ( c - > rw_devs ) ; i + + )
clear_bit ( ca - > dev_idx , c - > rw_devs [ i ] . d ) ;
/*
* Capacity is calculated based off of devices in allocation groups :
*/
bch2_recalc_capacity ( c ) ;
2023-03-14 05:01:47 +03:00
bch2_open_buckets_stop ( c , ca , false ) ;
2018-11-01 22:13:19 +03:00
2017-03-17 09:18:50 +03:00
/*
* Wake up threads that were blocked on allocation , so they can notice
* the device can no longer be removed and the capacity has changed :
*/
closure_wake_up ( & c - > freelist_wait ) ;
/*
* journal_res_get ( ) can block waiting for free space in the journal -
* it needs to notice there may not be devices to allocate from anymore :
*/
wake_up ( & c - > journal . wait ) ;
/* Now wait for any in flight writes: */
closure_wait_event ( & c - > open_buckets_wait ,
! bch2_dev_has_open_write_point ( c , ca ) ) ;
}
/* device goes rw: */
void bch2_dev_allocator_add ( struct bch_fs * c , struct bch_dev * ca )
{
unsigned i ;
for ( i = 0 ; i < ARRAY_SIZE ( c - > rw_devs ) ; i + + )
if ( ca - > mi . data_allowed & ( 1 < < i ) )
set_bit ( ca - > dev_idx , c - > rw_devs [ i ] . d ) ;
}
2018-11-05 05:55:35 +03:00
void bch2_fs_allocator_background_init ( struct bch_fs * c )
2017-03-17 09:18:50 +03:00
{
spin_lock_init ( & c - > freelist_lock ) ;
2022-02-10 12:32:19 +03:00
INIT_WORK ( & c - > discard_work , bch2_do_discards_work ) ;
2022-02-11 02:18:41 +03:00
INIT_WORK ( & c - > invalidate_work , bch2_do_invalidates_work ) ;
2017-03-17 09:18:50 +03:00
}