2018-11-01 22:13:19 +03:00
// SPDX-License-Identifier: GPL-2.0
/* erasure coding */
# include "bcachefs.h"
# include "alloc_foreground.h"
2022-10-09 07:29:51 +03:00
# include "backpointers.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2018-11-01 22:13:19 +03:00
# include "bset.h"
# include "btree_gc.h"
# include "btree_update.h"
2022-10-09 07:29:51 +03:00
# include "btree_write_buffer.h"
2018-11-01 22:13:19 +03:00
# include "buckets.h"
# include "disk_groups.h"
# include "ec.h"
# include "error.h"
# include "io.h"
# include "keylist.h"
2019-04-12 05:39:39 +03:00
# include "recovery.h"
2021-11-30 00:38:27 +03:00
# include "replicas.h"
2018-11-01 22:13:19 +03:00
# include "super-io.h"
# include "util.h"
2018-11-23 10:06:18 +03:00
# include <linux/sort.h>
# ifdef __KERNEL__
2018-11-01 22:13:19 +03:00
# include <linux/raid/pq.h>
# include <linux/raid/xor.h>
2018-11-23 10:06:18 +03:00
static void raid5_recov ( unsigned disks , unsigned failed_idx ,
size_t size , void * * data )
{
unsigned i = 2 , nr ;
BUG_ON ( failed_idx > = disks ) ;
swap ( data [ 0 ] , data [ failed_idx ] ) ;
memcpy ( data [ 0 ] , data [ 1 ] , size ) ;
while ( i < disks ) {
nr = min_t ( unsigned , disks - i , MAX_XOR_BLOCKS ) ;
xor_blocks ( nr , size , data [ 0 ] , data + i ) ;
i + = nr ;
}
swap ( data [ 0 ] , data [ failed_idx ] ) ;
}
static void raid_gen ( int nd , int np , size_t size , void * * v )
{
if ( np > = 1 )
raid5_recov ( nd + np , nd , size , v ) ;
if ( np > = 2 )
raid6_call . gen_syndrome ( nd + np , size , v ) ;
BUG_ON ( np > 2 ) ;
}
static void raid_rec ( int nr , int * ir , int nd , int np , size_t size , void * * v )
{
switch ( nr ) {
case 0 :
break ;
case 1 :
if ( ir [ 0 ] < nd + 1 )
raid5_recov ( nd + 1 , ir [ 0 ] , size , v ) ;
else
raid6_call . gen_syndrome ( nd + np , size , v ) ;
break ;
case 2 :
if ( ir [ 1 ] < nd ) {
/* data+data failure. */
raid6_2data_recov ( nd + np , size , ir [ 0 ] , ir [ 1 ] , v ) ;
} else if ( ir [ 0 ] < nd ) {
/* data + p/q failure */
if ( ir [ 1 ] = = nd ) /* data + p failure */
raid6_datap_recov ( nd + np , size , ir [ 0 ] , v ) ;
else { /* data + q failure */
raid5_recov ( nd + 1 , ir [ 0 ] , size , v ) ;
raid6_call . gen_syndrome ( nd + np , size , v ) ;
}
} else {
raid_gen ( nd , np , size , v ) ;
}
break ;
default :
BUG ( ) ;
}
}
# else
# include <raid/raid.h>
# endif
2018-11-01 22:13:19 +03:00
struct ec_bio {
struct bch_dev * ca ;
struct ec_stripe_buf * buf ;
size_t idx ;
struct bio bio ;
} ;
/* Stripes btree keys: */
2022-04-04 00:50:01 +03:00
int bch2_stripe_invalid ( const struct bch_fs * c , struct bkey_s_c k ,
2022-12-21 03:58:16 +03:00
unsigned flags , struct printbuf * err )
2018-11-01 22:13:19 +03:00
{
2018-11-01 22:10:01 +03:00
const struct bch_stripe * s = bkey_s_c_to_stripe ( k ) . v ;
2022-11-24 11:12:22 +03:00
if ( bkey_eq ( k . k - > p , POS_MIN ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " stripe at POS_MIN " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2021-01-23 02:01:07 +03:00
2022-04-04 00:50:01 +03:00
if ( k . k - > p . inode ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " nonzero inode field " ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2018-11-01 22:13:19 +03:00
2022-04-04 00:50:01 +03:00
if ( bkey_val_bytes ( k . k ) < sizeof ( * s ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " incorrect value size (%zu < %zu) " ,
2022-04-04 00:50:01 +03:00
bkey_val_bytes ( k . k ) , sizeof ( * s ) ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2018-11-01 22:13:19 +03:00
2022-04-04 00:50:01 +03:00
if ( bkey_val_u64s ( k . k ) < stripe_val_u64s ( s ) ) {
2023-02-04 05:01:40 +03:00
prt_printf ( err , " incorrect value size (%zu < %u) " ,
2022-04-04 00:50:01 +03:00
bkey_val_u64s ( k . k ) , stripe_val_u64s ( s ) ) ;
2022-11-20 06:39:08 +03:00
return - BCH_ERR_invalid_bkey ;
2022-04-04 00:50:01 +03:00
}
2018-11-01 22:13:19 +03:00
2022-12-21 03:58:16 +03:00
return bch2_bkey_ptrs_invalid ( c , k , flags , err ) ;
2018-11-01 22:13:19 +03:00
}
2018-11-01 22:10:01 +03:00
void bch2_stripe_to_text ( struct printbuf * out , struct bch_fs * c ,
2018-11-01 22:13:19 +03:00
struct bkey_s_c k )
{
2018-11-01 22:10:01 +03:00
const struct bch_stripe * s = bkey_s_c_to_stripe ( k ) . v ;
2023-02-25 03:26:03 +03:00
unsigned i , nr_data = s - > nr_blocks - s - > nr_redundant ;
2018-11-01 22:10:01 +03:00
2023-02-04 05:01:40 +03:00
prt_printf ( out , " algo %u sectors %u blocks %u:%u csum %u gran %u " ,
2018-11-01 22:10:01 +03:00
s - > algorithm ,
le16_to_cpu ( s - > sectors ) ,
2023-02-25 03:26:03 +03:00
nr_data ,
2018-11-01 22:10:01 +03:00
s - > nr_redundant ,
s - > csum_type ,
1U < < s - > csum_granularity_bits ) ;
2023-02-25 03:26:03 +03:00
for ( i = 0 ; i < s - > nr_blocks ; i + + ) {
const struct bch_extent_ptr * ptr = s - > ptrs + i ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
u32 offset ;
u64 b = sector_to_bucket_and_offset ( ca , ptr - > offset , & offset ) ;
prt_printf ( out , " %u:%llu:%u " , ptr - > dev , b , offset ) ;
if ( i < nr_data )
prt_printf ( out , " #%u " , stripe_blockcount_get ( s , i ) ) ;
if ( ptr_stale ( ca , ptr ) )
prt_printf ( out , " stale " ) ;
}
2018-11-01 22:13:19 +03:00
}
2021-01-11 21:51:23 +03:00
/* returns blocknr in stripe that we matched: */
2021-12-09 22:19:18 +03:00
static const struct bch_extent_ptr * bkey_matches_stripe ( struct bch_stripe * s ,
struct bkey_s_c k , unsigned * block )
2018-11-01 22:13:19 +03:00
{
2021-01-11 21:51:23 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
unsigned i , nr_data = s - > nr_blocks - s - > nr_redundant ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
bkey_for_each_ptr ( ptrs , ptr )
for ( i = 0 ; i < nr_data ; i + + )
2021-03-13 00:55:28 +03:00
if ( __bch2_ptr_matches_stripe ( & s - > ptrs [ i ] , ptr ,
2021-12-09 22:19:18 +03:00
le16_to_cpu ( s - > sectors ) ) ) {
* block = i ;
return ptr ;
}
2018-11-01 22:13:19 +03:00
2021-12-09 22:19:18 +03:00
return NULL ;
2018-11-01 22:13:19 +03:00
}
2019-05-13 07:30:02 +03:00
static bool extent_has_stripe_ptr ( struct bkey_s_c k , u64 idx )
{
2019-07-25 20:52:14 +03:00
switch ( k . k - > type ) {
case KEY_TYPE_extent : {
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const union bch_extent_entry * entry ;
2019-05-13 07:30:02 +03:00
2019-07-25 20:52:14 +03:00
extent_for_each_entry ( e , entry )
if ( extent_entry_type ( entry ) = =
BCH_EXTENT_ENTRY_stripe_ptr & &
entry - > stripe_ptr . idx = = idx )
return true ;
2019-05-13 07:30:02 +03:00
2019-07-25 20:52:14 +03:00
break ;
}
}
2019-05-13 07:30:02 +03:00
return false ;
}
2020-12-15 03:41:03 +03:00
/* Stripe bufs: */
2021-01-11 21:51:23 +03:00
static void ec_stripe_buf_exit ( struct ec_stripe_buf * buf )
2020-12-15 03:41:03 +03:00
{
unsigned i ;
2021-01-11 21:51:23 +03:00
for ( i = 0 ; i < buf - > key . v . nr_blocks ; i + + ) {
kvpfree ( buf - > data [ i ] , buf - > size < < 9 ) ;
buf - > data [ i ] = NULL ;
2020-12-15 03:41:03 +03:00
}
}
2023-03-05 10:52:40 +03:00
/* XXX: this is a non-mempoolified memory allocation: */
2021-01-11 21:51:23 +03:00
static int ec_stripe_buf_init ( struct ec_stripe_buf * buf ,
2023-03-05 10:52:40 +03:00
unsigned offset , unsigned size )
2020-12-15 03:41:03 +03:00
{
2021-01-11 21:51:23 +03:00
struct bch_stripe * v = & buf - > key . v ;
unsigned csum_granularity = 1U < < v - > csum_granularity_bits ;
unsigned end = offset + size ;
2020-12-15 03:41:03 +03:00
unsigned i ;
2021-01-11 21:51:23 +03:00
BUG_ON ( end > le16_to_cpu ( v - > sectors ) ) ;
offset = round_down ( offset , csum_granularity ) ;
end = min_t ( unsigned , le16_to_cpu ( v - > sectors ) ,
round_up ( end , csum_granularity ) ) ;
2020-12-15 03:41:03 +03:00
2021-01-11 21:51:23 +03:00
buf - > offset = offset ;
buf - > size = end - offset ;
memset ( buf - > valid , 0xFF , sizeof ( buf - > valid ) ) ;
for ( i = 0 ; i < buf - > key . v . nr_blocks ; i + + ) {
buf - > data [ i ] = kvpmalloc ( buf - > size < < 9 , GFP_KERNEL ) ;
if ( ! buf - > data [ i ] )
2020-12-15 03:41:03 +03:00
goto err ;
}
return 0 ;
err :
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( buf ) ;
2023-03-05 10:52:40 +03:00
return - BCH_ERR_ENOMEM_stripe_buf ;
2020-12-15 03:41:03 +03:00
}
2018-11-01 22:13:19 +03:00
/* Checksumming: */
2021-01-11 21:51:23 +03:00
static struct bch_csum ec_block_checksum ( struct ec_stripe_buf * buf ,
unsigned block , unsigned offset )
2018-11-01 22:13:19 +03:00
{
struct bch_stripe * v = & buf - > key . v ;
unsigned csum_granularity = 1 < < v - > csum_granularity_bits ;
2021-01-11 21:51:23 +03:00
unsigned end = buf - > offset + buf - > size ;
unsigned len = min ( csum_granularity , end - offset ) ;
BUG_ON ( offset > = end ) ;
BUG_ON ( offset < buf - > offset ) ;
BUG_ON ( offset & ( csum_granularity - 1 ) ) ;
BUG_ON ( offset + len ! = le16_to_cpu ( v - > sectors ) & &
( len & ( csum_granularity - 1 ) ) ) ;
return bch2_checksum ( NULL , v - > csum_type ,
null_nonce ( ) ,
buf - > data [ block ] + ( ( offset - buf - > offset ) < < 9 ) ,
len < < 9 ) ;
}
static void ec_generate_checksums ( struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned i , j , csums_per_device = stripe_csums_per_device ( v ) ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
if ( ! v - > csum_type )
2018-11-01 22:13:19 +03:00
return ;
BUG_ON ( buf - > offset ) ;
BUG_ON ( buf - > size ! = le16_to_cpu ( v - > sectors ) ) ;
2021-01-11 21:51:23 +03:00
for ( i = 0 ; i < v - > nr_blocks ; i + + )
for ( j = 0 ; j < csums_per_device ; j + + )
stripe_csum_set ( v , i , j ,
ec_block_checksum ( buf , i , j < < v - > csum_granularity_bits ) ) ;
2018-11-01 22:13:19 +03:00
}
static void ec_validate_checksums ( struct bch_fs * c , struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned csum_granularity = 1 < < v - > csum_granularity_bits ;
unsigned i ;
2021-01-11 21:51:23 +03:00
if ( ! v - > csum_type )
2018-11-01 22:13:19 +03:00
return ;
for ( i = 0 ; i < v - > nr_blocks ; i + + ) {
unsigned offset = buf - > offset ;
unsigned end = buf - > offset + buf - > size ;
if ( ! test_bit ( i , buf - > valid ) )
continue ;
while ( offset < end ) {
unsigned j = offset > > v - > csum_granularity_bits ;
unsigned len = min ( csum_granularity , end - offset ) ;
2021-01-11 21:51:23 +03:00
struct bch_csum want = stripe_csum_get ( v , i , j ) ;
struct bch_csum got = ec_block_checksum ( buf , i , offset ) ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
if ( bch2_crc_cmp ( want , got ) ) {
2022-02-25 21:18:19 +03:00
struct printbuf buf2 = PRINTBUF ;
2021-01-23 02:01:07 +03:00
2022-02-25 21:18:19 +03:00
bch2_bkey_val_to_text ( & buf2 , c , bkey_i_to_s_c ( & buf - > key . k_i ) ) ;
2021-01-23 02:01:07 +03:00
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2021-01-23 02:01:07 +03:00
" stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx \n %s " ,
( void * ) _RET_IP_ , i , j , v - > csum_type ,
2022-02-25 21:18:19 +03:00
want . lo , got . lo , buf2 . buf ) ;
printbuf_exit ( & buf2 ) ;
2018-11-01 22:13:19 +03:00
clear_bit ( i , buf - > valid ) ;
break ;
}
offset + = len ;
}
}
}
/* Erasure coding: */
static void ec_generate_ec ( struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned nr_data = v - > nr_blocks - v - > nr_redundant ;
unsigned bytes = le16_to_cpu ( v - > sectors ) < < 9 ;
2018-11-23 10:06:18 +03:00
raid_gen ( nr_data , v - > nr_redundant , bytes , buf - > data ) ;
2018-11-01 22:13:19 +03:00
}
static unsigned ec_nr_failed ( struct ec_stripe_buf * buf )
{
2020-12-15 03:41:03 +03:00
return buf - > key . v . nr_blocks -
bitmap_weight ( buf - > valid , buf - > key . v . nr_blocks ) ;
2018-11-01 22:13:19 +03:00
}
static int ec_do_recov ( struct bch_fs * c , struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
2020-12-16 22:23:27 +03:00
unsigned i , failed [ BCH_BKEY_PTRS_MAX ] , nr_failed = 0 ;
2018-11-01 22:13:19 +03:00
unsigned nr_data = v - > nr_blocks - v - > nr_redundant ;
unsigned bytes = buf - > size < < 9 ;
if ( ec_nr_failed ( buf ) > v - > nr_redundant ) {
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2018-11-01 22:13:19 +03:00
" error doing reconstruct read: unable to read enough blocks " ) ;
return - 1 ;
}
for ( i = 0 ; i < nr_data ; i + + )
if ( ! test_bit ( i , buf - > valid ) )
failed [ nr_failed + + ] = i ;
2018-11-23 10:06:18 +03:00
raid_rec ( nr_failed , failed , nr_data , v - > nr_redundant , bytes , buf - > data ) ;
2018-11-01 22:13:19 +03:00
return 0 ;
}
/* IO: */
static void ec_block_endio ( struct bio * bio )
{
struct ec_bio * ec_bio = container_of ( bio , struct ec_bio , bio ) ;
2021-01-23 02:01:07 +03:00
struct bch_stripe * v = & ec_bio - > buf - > key . v ;
struct bch_extent_ptr * ptr = & v - > ptrs [ ec_bio - > idx ] ;
2018-11-01 22:13:19 +03:00
struct bch_dev * ca = ec_bio - > ca ;
struct closure * cl = bio - > bi_private ;
2020-12-03 21:57:22 +03:00
if ( bch2_dev_io_err_on ( bio - > bi_status , ca , " erasure coding %s error: %s " ,
2020-07-02 20:43:58 +03:00
bio_data_dir ( bio ) ? " write " : " read " ,
2020-07-21 20:34:22 +03:00
bch2_blk_status_to_str ( bio - > bi_status ) ) )
2018-11-01 22:13:19 +03:00
clear_bit ( ec_bio - > idx , ec_bio - > buf - > valid ) ;
2021-01-23 02:01:07 +03:00
if ( ptr_stale ( ca , ptr ) ) {
bch_err_ratelimited ( ca - > fs ,
" error %s stripe: stale pointer after io " ,
bio_data_dir ( bio ) = = READ ? " reading from " : " writing to " ) ;
clear_bit ( ec_bio - > idx , ec_bio - > buf - > valid ) ;
}
2018-11-01 22:13:19 +03:00
bio_put ( & ec_bio - > bio ) ;
percpu_ref_put ( & ca - > io_ref ) ;
closure_put ( cl ) ;
}
static void ec_block_io ( struct bch_fs * c , struct ec_stripe_buf * buf ,
unsigned rw , unsigned idx , struct closure * cl )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned offset = 0 , bytes = buf - > size < < 9 ;
struct bch_extent_ptr * ptr = & v - > ptrs [ idx ] ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2020-07-10 01:31:51 +03:00
enum bch_data_type data_type = idx < buf - > key . v . nr_blocks - buf - > key . v . nr_redundant
? BCH_DATA_user
: BCH_DATA_parity ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
if ( ptr_stale ( ca , ptr ) ) {
bch_err_ratelimited ( c ,
" error %s stripe: stale pointer " ,
rw = = READ ? " reading from " : " writing to " ) ;
clear_bit ( idx , buf - > valid ) ;
return ;
}
2018-11-01 22:13:19 +03:00
if ( ! bch2_dev_get_ioref ( ca , rw ) ) {
clear_bit ( idx , buf - > valid ) ;
return ;
}
2020-07-10 01:31:51 +03:00
this_cpu_add ( ca - > io_done - > sectors [ rw ] [ data_type ] , buf - > size ) ;
2018-11-01 22:13:19 +03:00
while ( offset < bytes ) {
unsigned nr_iovecs = min_t ( size_t , BIO_MAX_VECS ,
DIV_ROUND_UP ( bytes , PAGE_SIZE ) ) ;
unsigned b = min_t ( size_t , bytes - offset ,
nr_iovecs < < PAGE_SHIFT ) ;
struct ec_bio * ec_bio ;
ec_bio = container_of ( bio_alloc_bioset ( ca - > disk_sb . bdev ,
nr_iovecs ,
rw ,
GFP_KERNEL ,
& c - > ec_bioset ) ,
struct ec_bio , bio ) ;
ec_bio - > ca = ca ;
ec_bio - > buf = buf ;
ec_bio - > idx = idx ;
ec_bio - > bio . bi_iter . bi_sector = ptr - > offset + buf - > offset + ( offset > > 9 ) ;
ec_bio - > bio . bi_end_io = ec_block_endio ;
ec_bio - > bio . bi_private = cl ;
2019-07-04 02:27:42 +03:00
bch2_bio_map ( & ec_bio - > bio , buf - > data [ idx ] + offset , b ) ;
2018-11-01 22:13:19 +03:00
closure_get ( cl ) ;
percpu_ref_get ( & ca - > io_ref ) ;
submit_bio ( & ec_bio - > bio ) ;
offset + = b ;
}
percpu_ref_put ( & ca - > io_ref ) ;
}
2023-02-23 03:28:58 +03:00
static int get_stripe_key_trans ( struct btree_trans * trans , u64 idx ,
struct ec_stripe_buf * stripe )
2018-11-01 22:13:19 +03:00
{
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-01-11 21:51:23 +03:00
struct bkey_s_c k ;
int ret ;
2023-02-23 03:28:58 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_stripes ,
2021-08-30 22:18:31 +03:00
POS ( 0 , idx ) , BTREE_ITER_SLOTS ) ;
k = bch2_btree_iter_peek_slot ( & iter ) ;
2021-01-11 21:51:23 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( k . k - > type ! = KEY_TYPE_stripe ) {
ret = - ENOENT ;
goto err ;
}
bkey_reassemble ( & stripe - > key . k_i , k ) ;
err :
2023-02-23 03:28:58 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-01-11 21:51:23 +03:00
return ret ;
}
2023-02-23 03:28:58 +03:00
static int get_stripe_key ( struct bch_fs * c , u64 idx , struct ec_stripe_buf * stripe )
{
return bch2_trans_run ( c , get_stripe_key_trans ( & trans , idx , stripe ) ) ;
}
2021-01-11 21:51:23 +03:00
/* recovery read path: */
int bch2_ec_read_extent ( struct bch_fs * c , struct bch_read_bio * rbio )
{
2018-11-01 22:13:19 +03:00
struct ec_stripe_buf * buf ;
struct closure cl ;
struct bch_stripe * v ;
2021-01-11 21:51:23 +03:00
unsigned i , offset ;
int ret = 0 ;
2018-11-01 22:13:19 +03:00
closure_init_stack ( & cl ) ;
2019-10-09 01:45:29 +03:00
BUG_ON ( ! rbio - > pick . has_ec ) ;
2018-11-01 22:13:19 +03:00
buf = kzalloc ( sizeof ( * buf ) , GFP_NOIO ) ;
if ( ! buf )
return - ENOMEM ;
2021-01-11 21:51:23 +03:00
ret = get_stripe_key ( c , rbio - > pick . ec . idx , buf ) ;
if ( ret ) {
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2021-01-11 21:51:23 +03:00
" error doing reconstruct read: error %i looking up stripe " , ret ) ;
2018-11-01 22:13:19 +03:00
kfree ( buf ) ;
2021-01-11 21:51:23 +03:00
return - EIO ;
2018-11-01 22:13:19 +03:00
}
v = & buf - > key . v ;
2021-01-11 21:51:23 +03:00
if ( ! bch2_ptr_matches_stripe ( v , rbio - > pick ) ) {
bch_err_ratelimited ( c ,
" error doing reconstruct read: pointer doesn't match stripe " ) ;
ret = - EIO ;
goto err ;
2018-11-01 22:13:19 +03:00
}
2021-01-11 21:51:23 +03:00
offset = rbio - > bio . bi_iter . bi_sector - v - > ptrs [ rbio - > pick . ec . block ] . offset ;
if ( offset + bio_sectors ( & rbio - > bio ) > le16_to_cpu ( v - > sectors ) ) {
bch_err_ratelimited ( c ,
" error doing reconstruct read: read is bigger than stripe " ) ;
ret = - EIO ;
goto err ;
}
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
ret = ec_stripe_buf_init ( buf , offset , bio_sectors ( & rbio - > bio ) ) ;
if ( ret )
goto err ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
for ( i = 0 ; i < v - > nr_blocks ; i + + )
2018-11-01 22:13:19 +03:00
ec_block_io ( c , buf , REQ_OP_READ , i , & cl ) ;
closure_sync ( & cl ) ;
if ( ec_nr_failed ( buf ) > v - > nr_redundant ) {
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2018-11-01 22:13:19 +03:00
" error doing reconstruct read: unable to read enough blocks " ) ;
ret = - EIO ;
goto err ;
}
ec_validate_checksums ( c , buf ) ;
ret = ec_do_recov ( c , buf ) ;
if ( ret )
goto err ;
memcpy_to_bio ( & rbio - > bio , rbio - > bio . bi_iter ,
2021-01-11 21:51:23 +03:00
buf - > data [ rbio - > pick . ec . block ] + ( ( offset - buf - > offset ) < < 9 ) ) ;
2018-11-01 22:13:19 +03:00
err :
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( buf ) ;
2018-11-01 22:13:19 +03:00
kfree ( buf ) ;
return ret ;
}
2018-11-25 01:09:44 +03:00
/* stripe bucket accounting: */
2018-11-01 22:13:19 +03:00
static int __ec_stripe_mem_alloc ( struct bch_fs * c , size_t idx , gfp_t gfp )
{
ec_stripes_heap n , * h = & c - > ec_stripes_heap ;
if ( idx > = h - > size ) {
if ( ! init_heap ( & n , max ( 1024UL , roundup_pow_of_two ( idx + 1 ) ) , gfp ) )
return - ENOMEM ;
2023-02-19 04:49:37 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
if ( n . size > h - > size ) {
memcpy ( n . data , h - > data , h - > used * sizeof ( h - > data [ 0 ] ) ) ;
n . used = h - > used ;
swap ( * h , n ) ;
}
2023-02-19 04:49:37 +03:00
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
free_heap ( & n ) ;
}
2021-12-05 07:07:33 +03:00
if ( ! genradix_ptr_alloc ( & c - > stripes , idx , gfp ) )
2018-11-25 01:09:44 +03:00
return - ENOMEM ;
if ( c - > gc_pos . phase ! = GC_PHASE_NOT_RUNNING & &
2021-12-05 07:07:33 +03:00
! genradix_ptr_alloc ( & c - > gc_stripes , idx , gfp ) )
2018-11-01 22:13:19 +03:00
return - ENOMEM ;
return 0 ;
}
2021-08-25 04:30:06 +03:00
static int ec_stripe_mem_alloc ( struct btree_trans * trans ,
2018-11-01 22:13:19 +03:00
struct btree_iter * iter )
{
size_t idx = iter - > pos . offset ;
2021-08-25 04:30:06 +03:00
if ( ! __ec_stripe_mem_alloc ( trans - > c , idx , GFP_NOWAIT | __GFP_NOWARN ) )
2022-07-18 06:06:38 +03:00
return 0 ;
2018-11-01 22:13:19 +03:00
2021-08-25 04:30:06 +03:00
bch2_trans_unlock ( trans ) ;
2019-03-11 21:59:58 +03:00
2022-07-18 06:06:38 +03:00
return __ec_stripe_mem_alloc ( trans - > c , idx , GFP_KERNEL ) ? :
bch2_trans_relock ( trans ) ;
2018-11-01 22:13:19 +03:00
}
2023-02-19 05:07:25 +03:00
/*
* Hash table of open stripes :
* Stripes that are being created or modified are kept in a hash table , so that
* stripe deletion can skip them .
*/
static bool __bch2_stripe_is_open ( struct bch_fs * c , u64 idx )
{
unsigned hash = hash_64 ( idx , ilog2 ( ARRAY_SIZE ( c - > ec_stripes_new ) ) ) ;
struct ec_stripe_new * s ;
hlist_for_each_entry ( s , & c - > ec_stripes_new [ hash ] , hash )
if ( s - > idx = = idx )
return true ;
return false ;
}
static bool bch2_stripe_is_open ( struct bch_fs * c , u64 idx )
{
bool ret = false ;
spin_lock ( & c - > ec_stripes_new_lock ) ;
ret = __bch2_stripe_is_open ( c , idx ) ;
spin_unlock ( & c - > ec_stripes_new_lock ) ;
return ret ;
}
static bool bch2_try_open_stripe ( struct bch_fs * c ,
struct ec_stripe_new * s ,
u64 idx )
{
bool ret ;
spin_lock ( & c - > ec_stripes_new_lock ) ;
ret = ! __bch2_stripe_is_open ( c , idx ) ;
if ( ret ) {
unsigned hash = hash_64 ( idx , ilog2 ( ARRAY_SIZE ( c - > ec_stripes_new ) ) ) ;
s - > idx = idx ;
hlist_add_head ( & s - > hash , & c - > ec_stripes_new [ hash ] ) ;
}
spin_unlock ( & c - > ec_stripes_new_lock ) ;
return ret ;
}
static void bch2_stripe_close ( struct bch_fs * c , struct ec_stripe_new * s )
{
BUG_ON ( ! s - > idx ) ;
spin_lock ( & c - > ec_stripes_new_lock ) ;
hlist_del_init ( & s - > hash ) ;
spin_unlock ( & c - > ec_stripes_new_lock ) ;
s - > idx = 0 ;
}
/* Heap of all existing stripes, ordered by blocks_nonempty */
2023-02-19 05:31:07 +03:00
static u64 stripe_idx_to_delete ( struct bch_fs * c )
2018-11-01 22:13:19 +03:00
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
2023-02-19 05:07:25 +03:00
size_t heap_idx ;
lockdep_assert_held ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
2023-02-19 05:07:25 +03:00
for ( heap_idx = 0 ; heap_idx < h - > used ; heap_idx + + )
if ( h - > data [ heap_idx ] . blocks_nonempty = = 0 & &
! bch2_stripe_is_open ( c , h - > data [ heap_idx ] . idx ) )
return h - > data [ heap_idx ] . idx ;
return 0 ;
2018-11-01 22:13:19 +03:00
}
static inline int ec_stripes_heap_cmp ( ec_stripes_heap * h ,
struct ec_stripe_heap_entry l ,
struct ec_stripe_heap_entry r )
{
return ( ( l . blocks_nonempty > r . blocks_nonempty ) -
( l . blocks_nonempty < r . blocks_nonempty ) ) ;
}
static inline void ec_stripes_heap_set_backpointer ( ec_stripes_heap * h ,
size_t i )
{
struct bch_fs * c = container_of ( h , struct bch_fs , ec_stripes_heap ) ;
2021-12-05 07:07:33 +03:00
genradix_ptr ( & c - > stripes , h - > data [ i ] . idx ) - > heap_idx = i ;
2018-11-01 22:13:19 +03:00
}
static void heap_verify_backpointer ( struct bch_fs * c , size_t idx )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
2021-12-05 07:07:33 +03:00
struct stripe * m = genradix_ptr ( & c - > stripes , idx ) ;
2018-11-01 22:13:19 +03:00
BUG_ON ( m - > heap_idx > = h - > used ) ;
BUG_ON ( h - > data [ m - > heap_idx ] . idx ! = idx ) ;
}
void bch2_stripes_heap_del ( struct bch_fs * c ,
2018-11-25 01:09:44 +03:00
struct stripe * m , size_t idx )
2018-11-01 22:13:19 +03:00
{
2023-02-19 06:11:50 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
heap_verify_backpointer ( c , idx ) ;
heap_del ( & c - > ec_stripes_heap , m - > heap_idx ,
ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
2023-02-19 06:11:50 +03:00
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
}
void bch2_stripes_heap_insert ( struct bch_fs * c ,
2018-11-25 01:09:44 +03:00
struct stripe * m , size_t idx )
2018-11-01 22:13:19 +03:00
{
2023-02-19 06:11:50 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
BUG_ON ( heap_full ( & c - > ec_stripes_heap ) ) ;
heap_add ( & c - > ec_stripes_heap , ( ( struct ec_stripe_heap_entry ) {
. idx = idx ,
2018-11-26 04:53:51 +03:00
. blocks_nonempty = m - > blocks_nonempty ,
2018-11-01 22:13:19 +03:00
} ) ,
ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
heap_verify_backpointer ( c , idx ) ;
2023-02-19 06:11:50 +03:00
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
}
2020-07-07 03:18:13 +03:00
void bch2_stripes_heap_update ( struct bch_fs * c ,
struct stripe * m , size_t idx )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
2023-02-19 06:11:50 +03:00
bool do_deletes ;
2020-07-07 03:18:13 +03:00
size_t i ;
2023-02-19 06:11:50 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2020-07-07 03:18:13 +03:00
heap_verify_backpointer ( c , idx ) ;
h - > data [ m - > heap_idx ] . blocks_nonempty = m - > blocks_nonempty ;
i = m - > heap_idx ;
heap_sift_up ( h , i , ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
heap_sift_down ( h , i , ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
heap_verify_backpointer ( c , idx ) ;
2023-02-19 06:11:50 +03:00
do_deletes = stripe_idx_to_delete ( c ) ! = 0 ;
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
if ( do_deletes )
2023-02-09 20:22:58 +03:00
bch2_do_stripe_deletes ( c ) ;
2020-07-07 03:18:13 +03:00
}
2018-11-25 01:09:44 +03:00
/* stripe deletion */
2023-02-19 05:31:07 +03:00
static int ec_stripe_delete ( struct btree_trans * trans , u64 idx )
2018-11-01 22:13:19 +03:00
{
2023-02-19 05:31:07 +03:00
struct bch_fs * c = trans - > c ;
struct btree_iter iter ;
struct bkey_s_c k ;
struct bkey_s_c_stripe s ;
int ret ;
bch2_trans_iter_init ( trans , & iter , BTREE_ID_stripes , POS ( 0 , idx ) ,
BTREE_ITER_INTENT ) ;
k = bch2_btree_iter_peek_slot ( & iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( k . k - > type ! = KEY_TYPE_stripe ) {
bch2_fs_inconsistent ( c , " attempting to delete nonexistent stripe %llu " , idx ) ;
ret = - EINVAL ;
goto err ;
}
s = bkey_s_c_to_stripe ( k ) ;
for ( unsigned i = 0 ; i < s . v - > nr_blocks ; i + + )
if ( stripe_blockcount_get ( s . v , i ) ) {
struct printbuf buf = PRINTBUF ;
bch2_bkey_val_to_text ( & buf , c , k ) ;
bch2_fs_inconsistent ( c , " attempting to delete nonempty stripe %s " , buf . buf ) ;
printbuf_exit ( & buf ) ;
ret = - EINVAL ;
goto err ;
}
ret = bch2_btree_delete_at ( trans , & iter , 0 ) ;
err :
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
2018-11-01 22:13:19 +03:00
}
static void ec_stripe_delete_work ( struct work_struct * work )
{
struct bch_fs * c =
container_of ( work , struct bch_fs , ec_stripe_delete_work ) ;
2023-02-19 05:31:07 +03:00
struct btree_trans trans ;
int ret ;
u64 idx ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2018-11-01 22:13:19 +03:00
while ( 1 ) {
2023-02-19 04:49:37 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
idx = stripe_idx_to_delete ( c ) ;
2023-02-19 04:49:37 +03:00
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
2023-02-19 05:31:07 +03:00
if ( ! idx )
break ;
ret = commit_do ( & trans , NULL , NULL , BTREE_INSERT_NOFAIL ,
ec_stripe_delete ( & trans , idx ) ) ;
if ( ret ) {
bch_err ( c , " %s: err %s " , __func__ , bch2_err_str ( ret ) ) ;
2019-07-10 23:04:58 +03:00
break ;
2023-02-19 05:31:07 +03:00
}
2018-11-01 22:13:19 +03:00
}
2023-02-09 20:22:58 +03:00
2023-02-19 05:31:07 +03:00
bch2_trans_exit ( & trans ) ;
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_stripe_delete ) ;
2023-02-09 20:22:58 +03:00
}
void bch2_do_stripe_deletes ( struct bch_fs * c )
{
2023-02-09 20:21:45 +03:00
if ( bch2_write_ref_tryget ( c , BCH_WRITE_REF_stripe_delete ) & &
2023-02-09 20:22:58 +03:00
! schedule_work ( & c - > ec_stripe_delete_work ) )
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_stripe_delete ) ;
2018-11-01 22:13:19 +03:00
}
2018-11-25 01:09:44 +03:00
/* stripe creation: */
2023-02-19 05:07:25 +03:00
static int ec_stripe_key_update ( struct btree_trans * trans ,
struct bkey_i_stripe * new ,
bool create )
2018-11-01 22:13:19 +03:00
{
2021-12-05 07:07:33 +03:00
struct bch_fs * c = trans - > c ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2018-11-01 22:13:19 +03:00
struct bkey_s_c k ;
2021-01-18 01:43:49 +03:00
int ret ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( trans , & iter , BTREE_ID_stripes ,
new - > k . p , BTREE_ITER_INTENT ) ;
k = bch2_btree_iter_peek_slot ( & iter ) ;
2021-01-18 01:43:49 +03:00
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
2023-02-19 05:07:25 +03:00
if ( k . k - > type ! = ( create ? KEY_TYPE_deleted : KEY_TYPE_stripe ) ) {
bch2_fs_inconsistent ( c , " error %s stripe: got existing key type %s " ,
create ? " creating " : " updating " ,
bch2_bkey_types [ k . k - > type ] ) ;
ret = - EINVAL ;
2021-01-18 01:43:49 +03:00
goto err ;
}
2023-02-19 05:07:25 +03:00
if ( k . k - > type = = KEY_TYPE_stripe ) {
const struct bch_stripe * old = bkey_s_c_to_stripe ( k ) . v ;
unsigned i ;
2021-01-18 01:43:49 +03:00
2023-02-19 05:07:25 +03:00
if ( old - > nr_blocks ! = new - > v . nr_blocks ) {
bch_err ( c , " error updating stripe: nr_blocks does not match " ) ;
ret = - EINVAL ;
goto err ;
}
2021-01-18 01:43:49 +03:00
2023-03-03 11:11:06 +03:00
for ( i = 0 ; i < new - > v . nr_blocks ; i + + ) {
unsigned v = stripe_blockcount_get ( old , i ) ;
BUG_ON ( v & &
( old - > ptrs [ i ] . dev ! = new - > v . ptrs [ i ] . dev | |
old - > ptrs [ i ] . gen ! = new - > v . ptrs [ i ] . gen | |
old - > ptrs [ i ] . offset ! = new - > v . ptrs [ i ] . offset ) ) ;
stripe_blockcount_set ( & new - > v , i , v ) ;
}
2023-02-19 05:07:25 +03:00
}
2021-01-18 01:43:49 +03:00
2021-08-30 22:18:31 +03:00
ret = bch2_trans_update ( trans , & iter , & new - > k_i , 0 ) ;
2021-01-18 01:43:49 +03:00
err :
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( trans , & iter ) ;
2021-01-18 01:43:49 +03:00
return ret ;
}
2022-07-17 07:44:19 +03:00
static int ec_stripe_update_extent ( struct btree_trans * trans ,
2022-10-09 07:29:51 +03:00
struct bpos bucket , u8 gen ,
2022-07-17 07:44:19 +03:00
struct ec_stripe_buf * s ,
2022-10-09 07:29:51 +03:00
u64 * bp_offset )
2022-07-17 07:44:19 +03:00
{
2022-10-09 07:29:51 +03:00
struct bch_fs * c = trans - > c ;
struct bch_backpointer bp ;
struct btree_iter iter ;
struct bkey_s_c k ;
2022-07-17 07:44:19 +03:00
const struct bch_extent_ptr * ptr_c ;
struct bch_extent_ptr * ptr , * ec_ptr = NULL ;
2023-02-23 01:57:59 +03:00
struct bch_extent_stripe_ptr stripe_ptr ;
2022-07-17 07:44:19 +03:00
struct bkey_i * n ;
int ret , dev , block ;
2022-10-14 14:02:36 +03:00
ret = bch2_get_next_backpointer ( trans , bucket , gen ,
bp_offset , & bp , BTREE_ITER_CACHED ) ;
2022-10-09 07:29:51 +03:00
if ( ret )
return ret ;
if ( * bp_offset = = U64_MAX )
return 0 ;
2022-07-17 07:44:19 +03:00
2023-03-01 07:11:36 +03:00
if ( bp . level ) {
struct printbuf buf = PRINTBUF ;
struct btree_iter node_iter ;
struct btree * b ;
b = bch2_backpointer_get_node ( trans , & node_iter , bucket , * bp_offset , bp ) ;
bch2_trans_iter_exit ( trans , & node_iter ) ;
prt_printf ( & buf , " found btree node in erasure coded bucket: b=%px \n " , b ) ;
bch2_backpointer_to_text ( & buf , & bp ) ;
bch2_fs_inconsistent ( c , " %s " , buf . buf ) ;
printbuf_exit ( & buf ) ;
2022-10-09 07:29:51 +03:00
return - EIO ;
2023-03-01 07:11:36 +03:00
}
2022-10-09 07:29:51 +03:00
k = bch2_backpointer_get_key ( trans , & iter , bucket , * bp_offset , bp ) ;
ret = bkey_err ( k ) ;
if ( ret )
return ret ;
if ( ! k . k ) {
/*
* extent no longer exists - we could flush the btree
* write buffer and retry to verify , but no need :
*/
2022-07-17 07:44:19 +03:00
return 0 ;
2022-10-09 07:29:51 +03:00
}
if ( extent_has_stripe_ptr ( k , s - > key . k . p . offset ) )
goto out ;
2022-07-17 07:44:19 +03:00
ptr_c = bkey_matches_stripe ( & s - > key . v , k , & block ) ;
/*
* It doesn ' t generally make sense to erasure code cached ptrs :
* XXX : should we be incrementing a counter ?
*/
if ( ! ptr_c | | ptr_c - > cached )
2022-10-09 07:29:51 +03:00
goto out ;
2022-07-17 07:44:19 +03:00
dev = s - > key . v . ptrs [ block ] . dev ;
2023-02-23 01:57:59 +03:00
n = bch2_trans_kmalloc ( trans , bkey_bytes ( k . k ) + sizeof ( stripe_ptr ) ) ;
2022-07-17 07:44:19 +03:00
ret = PTR_ERR_OR_ZERO ( n ) ;
if ( ret )
2022-10-09 07:29:51 +03:00
goto out ;
2018-11-01 22:13:19 +03:00
2023-02-23 01:57:59 +03:00
bkey_reassemble ( n , k ) ;
2022-07-17 07:44:19 +03:00
bch2_bkey_drop_ptrs ( bkey_i_to_s ( n ) , ptr , ptr - > dev ! = dev ) ;
ec_ptr = ( void * ) bch2_bkey_has_device ( bkey_i_to_s_c ( n ) , dev ) ;
BUG_ON ( ! ec_ptr ) ;
2018-11-01 22:13:19 +03:00
2023-02-23 01:57:59 +03:00
stripe_ptr = ( struct bch_extent_stripe_ptr ) {
. type = 1 < < BCH_EXTENT_ENTRY_stripe_ptr ,
. block = block ,
. redundancy = s - > key . v . nr_redundant ,
. idx = s - > key . k . p . offset ,
} ;
__extent_entry_insert ( n ,
( union bch_extent_entry * ) ec_ptr ,
( union bch_extent_entry * ) & stripe_ptr ) ;
2021-08-30 02:34:37 +03:00
2022-10-09 07:29:51 +03:00
ret = bch2_trans_update ( trans , & iter , n , 0 ) ;
out :
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
2022-07-17 07:44:19 +03:00
}
2018-11-01 22:13:19 +03:00
2022-10-09 07:29:51 +03:00
static int ec_stripe_update_bucket ( struct btree_trans * trans , struct ec_stripe_buf * s ,
unsigned block )
2022-07-17 07:44:19 +03:00
{
2022-10-09 07:29:51 +03:00
struct bch_fs * c = trans - > c ;
struct bch_extent_ptr bucket = s - > key . v . ptrs [ block ] ;
struct bpos bucket_pos = PTR_BUCKET_POS ( c , & bucket ) ;
u64 bp_offset = 0 ;
int ret = 0 ;
while ( 1 ) {
ret = commit_do ( trans , NULL , NULL ,
BTREE_INSERT_NOFAIL ,
ec_stripe_update_extent ( trans , bucket_pos , bucket . gen ,
s , & bp_offset ) ) ;
if ( ret )
break ;
if ( bp_offset = = U64_MAX )
break ;
bp_offset + + ;
}
return ret ;
}
static int ec_stripe_update_extents ( struct bch_fs * c , struct ec_stripe_buf * s )
{
struct btree_trans trans ;
struct bch_stripe * v = & s - > key . v ;
unsigned i , nr_data = v - > nr_blocks - v - > nr_redundant ;
int ret = 0 ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2019-03-14 03:49:16 +03:00
2022-10-09 07:29:51 +03:00
ret = bch2_btree_write_buffer_flush ( & trans ) ;
if ( ret )
goto err ;
for ( i = 0 ; i < nr_data ; i + + ) {
ret = ec_stripe_update_bucket ( & trans , s , i ) ;
if ( ret )
break ;
}
err :
bch2_trans_exit ( & trans ) ;
return ret ;
2018-11-01 22:13:19 +03:00
}
2023-02-27 01:12:36 +03:00
static void zero_out_rest_of_ec_bucket ( struct bch_fs * c ,
struct ec_stripe_new * s ,
unsigned block ,
struct open_bucket * ob )
{
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
unsigned offset = ca - > mi . bucket_size - ob - > sectors_free ;
int ret ;
if ( ! bch2_dev_get_ioref ( ca , WRITE ) ) {
s - > err = - EROFS ;
return ;
}
memset ( s - > new_stripe . data [ block ] + ( offset < < 9 ) ,
0 ,
ob - > sectors_free < < 9 ) ;
ret = blkdev_issue_zeroout ( ca - > disk_sb . bdev ,
ob - > bucket * ca - > mi . bucket_size + offset ,
ob - > sectors_free ,
GFP_KERNEL , 0 ) ;
percpu_ref_put ( & ca - > io_ref ) ;
if ( ret )
s - > err = ret ;
}
2018-11-01 22:13:19 +03:00
/*
* data buckets of new stripe all written : create the stripe
*/
static void ec_stripe_create ( struct ec_stripe_new * s )
{
struct bch_fs * c = s - > c ;
struct open_bucket * ob ;
2020-12-15 03:41:03 +03:00
struct bch_stripe * v = & s - > new_stripe . key . v ;
2018-11-01 22:13:19 +03:00
unsigned i , nr_data = v - > nr_blocks - v - > nr_redundant ;
int ret ;
BUG_ON ( s - > h - > s = = s ) ;
2020-12-15 03:41:03 +03:00
closure_sync ( & s - > iodone ) ;
2018-11-01 22:13:19 +03:00
2023-02-27 01:12:36 +03:00
for ( i = 0 ; i < nr_data ; i + + )
if ( s - > blocks [ i ] ) {
ob = c - > open_buckets + s - > blocks [ i ] ;
if ( ob - > sectors_free )
zero_out_rest_of_ec_bucket ( c , s , i , ob ) ;
}
2018-11-01 22:13:19 +03:00
if ( s - > err ) {
2022-12-12 04:37:11 +03:00
if ( ! bch2_err_matches ( s - > err , EROFS ) )
2020-07-07 03:59:46 +03:00
bch_err ( c , " error creating stripe: error writing data buckets " ) ;
2018-11-01 22:13:19 +03:00
goto err ;
}
2020-12-15 03:41:03 +03:00
if ( s - > have_existing_stripe ) {
ec_validate_checksums ( c , & s - > existing_stripe ) ;
if ( ec_do_recov ( c , & s - > existing_stripe ) ) {
bch_err ( c , " error creating stripe: error reading existing stripe " ) ;
goto err ;
}
for ( i = 0 ; i < nr_data ; i + + )
if ( stripe_blockcount_get ( & s - > existing_stripe . key . v , i ) )
swap ( s - > new_stripe . data [ i ] ,
s - > existing_stripe . data [ i ] ) ;
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( & s - > existing_stripe ) ;
2020-12-15 03:41:03 +03:00
}
2020-07-07 05:33:54 +03:00
BUG_ON ( ! s - > allocated ) ;
2023-03-05 10:52:40 +03:00
BUG_ON ( ! s - > idx ) ;
2020-07-07 05:33:54 +03:00
2020-12-15 03:41:03 +03:00
ec_generate_ec ( & s - > new_stripe ) ;
2018-11-01 22:13:19 +03:00
2020-12-15 03:41:03 +03:00
ec_generate_checksums ( & s - > new_stripe ) ;
2018-11-01 22:13:19 +03:00
/* write p/q: */
for ( i = nr_data ; i < v - > nr_blocks ; i + + )
2020-12-15 03:41:03 +03:00
ec_block_io ( c , & s - > new_stripe , REQ_OP_WRITE , i , & s - > iodone ) ;
closure_sync ( & s - > iodone ) ;
2018-11-01 22:13:19 +03:00
2020-12-15 03:41:03 +03:00
if ( ec_nr_failed ( & s - > new_stripe ) ) {
bch_err ( c , " error creating stripe: error writing redundancy buckets " ) ;
2023-02-19 05:10:13 +03:00
goto err ;
2020-12-15 03:41:03 +03:00
}
2018-11-01 22:13:19 +03:00
2021-12-05 07:07:33 +03:00
ret = bch2_trans_do ( c , & s - > res , NULL , BTREE_INSERT_NOFAIL ,
2023-02-19 05:07:25 +03:00
ec_stripe_key_update ( & trans , & s - > new_stripe . key ,
! s - > have_existing_stripe ) ) ;
2018-11-01 22:13:19 +03:00
if ( ret ) {
bch_err ( c , " error creating stripe: error creating stripe key " ) ;
2023-02-19 05:10:13 +03:00
goto err ;
2018-11-01 22:13:19 +03:00
}
2022-10-09 07:29:51 +03:00
ret = ec_stripe_update_extents ( c , & s - > new_stripe ) ;
2023-02-19 05:07:25 +03:00
if ( ret ) {
2022-10-09 07:29:51 +03:00
bch_err ( c , " error creating stripe: error updating pointers: %s " ,
bch2_err_str ( ret ) ) ;
2023-02-19 05:07:25 +03:00
goto err ;
}
2018-11-01 22:13:19 +03:00
err :
2020-07-10 01:31:51 +03:00
bch2_disk_reservation_put ( c , & s - > res ) ;
2021-01-19 07:26:42 +03:00
for ( i = 0 ; i < v - > nr_blocks ; i + + )
if ( s - > blocks [ i ] ) {
ob = c - > open_buckets + s - > blocks [ i ] ;
if ( i < nr_data ) {
ob - > ec = NULL ;
__bch2_open_bucket_put ( c , ob ) ;
} else {
bch2_open_bucket_put ( c , ob ) ;
}
}
2018-11-01 22:13:19 +03:00
2023-03-08 11:57:32 +03:00
mutex_lock ( & c - > ec_stripe_new_lock ) ;
list_del ( & s - > list ) ;
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
2023-03-05 10:52:40 +03:00
if ( s - > idx )
bch2_stripe_close ( c , s ) ;
2023-02-28 06:30:54 +03:00
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( & s - > existing_stripe ) ;
ec_stripe_buf_exit ( & s - > new_stripe ) ;
2020-12-15 03:41:03 +03:00
closure_debug_destroy ( & s - > iodone ) ;
2018-11-01 22:13:19 +03:00
kfree ( s ) ;
}
2023-02-19 05:10:13 +03:00
static struct ec_stripe_new * get_pending_stripe ( struct bch_fs * c )
2018-11-01 22:13:19 +03:00
{
2023-02-19 05:10:13 +03:00
struct ec_stripe_new * s ;
2020-07-07 03:59:46 +03:00
mutex_lock ( & c - > ec_stripe_new_lock ) ;
2023-02-19 05:10:13 +03:00
list_for_each_entry ( s , & c - > ec_stripe_new_list , list )
2023-03-08 11:57:32 +03:00
if ( ! atomic_read ( & s - > pin ) )
2023-02-19 05:10:13 +03:00
goto out ;
s = NULL ;
out :
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
2023-02-19 05:10:13 +03:00
return s ;
}
static void ec_stripe_create_work ( struct work_struct * work )
{
struct bch_fs * c = container_of ( work ,
struct bch_fs , ec_stripe_create_work ) ;
struct ec_stripe_new * s ;
while ( ( s = get_pending_stripe ( c ) ) )
ec_stripe_create ( s ) ;
bch2_write_ref_put ( c , BCH_WRITE_REF_stripe_create ) ;
}
void bch2_ec_do_stripe_creates ( struct bch_fs * c )
{
bch2_write_ref_get ( c , BCH_WRITE_REF_stripe_create ) ;
if ( ! queue_work ( system_long_wq , & c - > ec_stripe_create_work ) )
bch2_write_ref_put ( c , BCH_WRITE_REF_stripe_create ) ;
2020-07-07 03:59:46 +03:00
}
2018-11-01 22:13:19 +03:00
2020-07-07 03:59:46 +03:00
static void ec_stripe_new_put ( struct bch_fs * c , struct ec_stripe_new * s )
{
BUG_ON ( atomic_read ( & s - > pin ) < = 0 ) ;
2023-03-05 10:52:40 +03:00
BUG_ON ( ! s - > err & & ! s - > idx ) ;
2018-11-01 22:13:19 +03:00
2023-02-19 05:10:13 +03:00
if ( atomic_dec_and_test ( & s - > pin ) )
bch2_ec_do_stripe_creates ( c ) ;
2018-11-01 22:13:19 +03:00
}
2020-07-07 03:59:46 +03:00
static void ec_stripe_set_pending ( struct bch_fs * c , struct ec_stripe_head * h )
2018-11-01 22:13:19 +03:00
{
2020-07-07 03:59:46 +03:00
struct ec_stripe_new * s = h - > s ;
2020-07-07 05:33:54 +03:00
BUG_ON ( ! s - > allocated & & ! s - > err ) ;
2020-07-07 03:59:46 +03:00
h - > s = NULL ;
s - > pending = true ;
mutex_lock ( & c - > ec_stripe_new_lock ) ;
list_add ( & s - > list , & c - > ec_stripe_new_list ) ;
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
ec_stripe_new_put ( c , s ) ;
2018-11-01 22:13:19 +03:00
}
/* have a full bucket - hand it off to be erasure coded: */
void bch2_ec_bucket_written ( struct bch_fs * c , struct open_bucket * ob )
{
struct ec_stripe_new * s = ob - > ec ;
2020-07-07 03:59:46 +03:00
ec_stripe_new_put ( c , s ) ;
2018-11-01 22:13:19 +03:00
}
void bch2_ec_bucket_cancel ( struct bch_fs * c , struct open_bucket * ob )
{
struct ec_stripe_new * s = ob - > ec ;
s - > err = - EIO ;
}
void * bch2_writepoint_ec_buf ( struct bch_fs * c , struct write_point * wp )
{
struct open_bucket * ob = ec_open_bucket ( c , & wp - > ptrs ) ;
struct bch_dev * ca ;
unsigned offset ;
if ( ! ob )
return NULL ;
2023-03-05 10:52:40 +03:00
BUG_ON ( ! ob - > ec - > new_stripe . data [ ob - > ec_idx ] ) ;
2021-12-26 05:21:46 +03:00
ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-11-01 22:13:19 +03:00
offset = ca - > mi . bucket_size - ob - > sectors_free ;
2020-12-15 03:41:03 +03:00
return ob - > ec - > new_stripe . data [ ob - > ec_idx ] + ( offset < < 9 ) ;
2018-11-01 22:13:19 +03:00
}
static int unsigned_cmp ( const void * _l , const void * _r )
{
unsigned l = * ( ( const unsigned * ) _l ) ;
unsigned r = * ( ( const unsigned * ) _r ) ;
2019-04-12 11:54:12 +03:00
return cmp_int ( l , r ) ;
2018-11-01 22:13:19 +03:00
}
/* pick most common bucket size: */
static unsigned pick_blocksize ( struct bch_fs * c ,
struct bch_devs_mask * devs )
{
struct bch_dev * ca ;
unsigned i , nr = 0 , sizes [ BCH_SB_MEMBERS_MAX ] ;
struct {
unsigned nr , size ;
} cur = { 0 , 0 } , best = { 0 , 0 } ;
for_each_member_device_rcu ( ca , c , i , devs )
sizes [ nr + + ] = ca - > mi . bucket_size ;
sort ( sizes , nr , sizeof ( unsigned ) , unsigned_cmp , NULL ) ;
for ( i = 0 ; i < nr ; i + + ) {
if ( sizes [ i ] ! = cur . size ) {
if ( cur . nr > best . nr )
best = cur ;
cur . nr = 0 ;
cur . size = sizes [ i ] ;
}
cur . nr + + ;
}
if ( cur . nr > best . nr )
best = cur ;
return best . size ;
}
2020-06-30 21:44:19 +03:00
static bool may_create_new_stripe ( struct bch_fs * c )
{
return false ;
}
2020-07-07 05:33:54 +03:00
static void ec_stripe_key_init ( struct bch_fs * c ,
struct bkey_i_stripe * s ,
unsigned nr_data ,
unsigned nr_parity ,
unsigned stripe_size )
{
unsigned u64s ;
bkey_stripe_init ( & s - > k_i ) ;
s - > v . sectors = cpu_to_le16 ( stripe_size ) ;
s - > v . algorithm = 0 ;
s - > v . nr_blocks = nr_data + nr_parity ;
s - > v . nr_redundant = nr_parity ;
2021-12-14 22:34:03 +03:00
s - > v . csum_granularity_bits = ilog2 ( c - > opts . encoded_extent_max > > 9 ) ;
2021-11-11 20:11:33 +03:00
s - > v . csum_type = BCH_CSUM_crc32c ;
2020-07-07 05:33:54 +03:00
s - > v . pad = 0 ;
while ( ( u64s = stripe_val_u64s ( & s - > v ) ) > BKEY_VAL_U64s_MAX ) {
BUG_ON ( 1 < < s - > v . csum_granularity_bits > =
le16_to_cpu ( s - > v . sectors ) | |
s - > v . csum_granularity_bits = = U8_MAX ) ;
s - > v . csum_granularity_bits + + ;
}
set_bkey_val_u64s ( & s - > k , u64s ) ;
}
static int ec_new_stripe_alloc ( struct bch_fs * c , struct ec_stripe_head * h )
2018-11-01 22:13:19 +03:00
{
struct ec_stripe_new * s ;
lockdep_assert_held ( & h - > lock ) ;
s = kzalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( ! s )
return - ENOMEM ;
mutex_init ( & s - > lock ) ;
2020-12-15 03:41:03 +03:00
closure_init ( & s - > iodone , NULL ) ;
2018-11-01 22:13:19 +03:00
atomic_set ( & s - > pin , 1 ) ;
s - > c = c ;
s - > h = h ;
2020-07-07 05:33:54 +03:00
s - > nr_data = min_t ( unsigned , h - > nr_active_devs ,
2020-12-16 22:23:27 +03:00
BCH_BKEY_PTRS_MAX ) - h - > redundancy ;
2020-07-07 05:33:54 +03:00
s - > nr_parity = h - > redundancy ;
2018-11-01 22:13:19 +03:00
2020-12-15 03:41:03 +03:00
ec_stripe_key_init ( c , & s - > new_stripe . key , s - > nr_data ,
2020-07-07 05:33:54 +03:00
s - > nr_parity , h - > blocksize ) ;
2018-11-01 22:13:19 +03:00
h - > s = s ;
return 0 ;
}
static struct ec_stripe_head *
ec_new_stripe_head_alloc ( struct bch_fs * c , unsigned target ,
2020-12-15 20:53:30 +03:00
unsigned algo , unsigned redundancy ,
2023-03-03 10:43:39 +03:00
enum alloc_reserve reserve )
2018-11-01 22:13:19 +03:00
{
struct ec_stripe_head * h ;
struct bch_dev * ca ;
unsigned i ;
h = kzalloc ( sizeof ( * h ) , GFP_KERNEL ) ;
if ( ! h )
return NULL ;
mutex_init ( & h - > lock ) ;
2023-02-18 06:43:47 +03:00
BUG_ON ( ! mutex_trylock ( & h - > lock ) ) ;
2018-11-01 22:13:19 +03:00
h - > target = target ;
h - > algo = algo ;
h - > redundancy = redundancy ;
2023-03-03 10:43:39 +03:00
h - > reserve = reserve ;
2018-11-01 22:13:19 +03:00
rcu_read_lock ( ) ;
2020-07-10 01:28:11 +03:00
h - > devs = target_rw_devs ( c , BCH_DATA_user , target ) ;
2018-11-01 22:13:19 +03:00
for_each_member_device_rcu ( ca , c , i , & h - > devs )
if ( ! ca - > mi . durability )
__clear_bit ( i , h - > devs . d ) ;
h - > blocksize = pick_blocksize ( c , & h - > devs ) ;
for_each_member_device_rcu ( ca , c , i , & h - > devs )
if ( ca - > mi . bucket_size = = h - > blocksize )
h - > nr_active_devs + + ;
rcu_read_unlock ( ) ;
2020-07-07 03:59:46 +03:00
list_add ( & h - > list , & c - > ec_stripe_head_list ) ;
2018-11-01 22:13:19 +03:00
return h ;
}
2020-07-07 03:59:46 +03:00
void bch2_ec_stripe_head_put ( struct bch_fs * c , struct ec_stripe_head * h )
2018-11-01 22:13:19 +03:00
{
if ( h - > s & &
2020-07-07 05:33:54 +03:00
h - > s - > allocated & &
2018-11-01 22:13:19 +03:00
bitmap_weight ( h - > s - > blocks_allocated ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_data ) = = h - > s - > nr_data )
2020-07-07 03:59:46 +03:00
ec_stripe_set_pending ( c , h ) ;
2018-11-01 22:13:19 +03:00
mutex_unlock ( & h - > lock ) ;
}
2023-02-18 05:04:46 +03:00
struct ec_stripe_head * __bch2_ec_stripe_head_get ( struct btree_trans * trans ,
2020-12-15 20:53:30 +03:00
unsigned target ,
unsigned algo ,
unsigned redundancy ,
2023-03-03 10:43:39 +03:00
enum alloc_reserve reserve )
2018-11-01 22:13:19 +03:00
{
2023-02-18 05:04:46 +03:00
struct bch_fs * c = trans - > c ;
2018-11-01 22:13:19 +03:00
struct ec_stripe_head * h ;
2023-02-18 05:04:46 +03:00
int ret ;
2018-11-01 22:13:19 +03:00
if ( ! redundancy )
return NULL ;
2023-02-18 06:43:47 +03:00
ret = bch2_trans_mutex_lock ( trans , & c - > ec_stripe_head_lock ) ;
if ( ret )
return ERR_PTR ( ret ) ;
2023-02-18 05:04:46 +03:00
2020-07-07 03:59:46 +03:00
list_for_each_entry ( h , & c - > ec_stripe_head_list , list )
2018-11-01 22:13:19 +03:00
if ( h - > target = = target & &
h - > algo = = algo & &
2020-12-15 20:53:30 +03:00
h - > redundancy = = redundancy & &
2023-03-03 10:43:39 +03:00
h - > reserve = = reserve ) {
2023-02-18 06:43:47 +03:00
ret = bch2_trans_mutex_lock ( trans , & h - > lock ) ;
if ( ret )
h = ERR_PTR ( ret ) ;
2018-11-01 22:13:19 +03:00
goto found ;
}
2023-03-03 10:43:39 +03:00
h = ec_new_stripe_head_alloc ( c , target , algo , redundancy , reserve ) ;
2018-11-01 22:13:19 +03:00
found :
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
2018-11-01 22:13:19 +03:00
return h ;
}
2023-02-18 04:50:55 +03:00
static int new_stripe_alloc_buckets ( struct btree_trans * trans , struct ec_stripe_head * h ,
2023-03-03 10:43:39 +03:00
enum alloc_reserve reserve , struct closure * cl )
2020-07-07 05:33:54 +03:00
{
2023-02-18 04:50:55 +03:00
struct bch_fs * c = trans - > c ;
2021-01-19 07:26:42 +03:00
struct bch_devs_mask devs = h - > devs ;
2020-07-07 05:33:54 +03:00
struct open_bucket * ob ;
2021-01-19 07:26:42 +03:00
struct open_buckets buckets ;
unsigned i , j , nr_have_parity = 0 , nr_have_data = 0 ;
2020-07-07 05:33:54 +03:00
bool have_cache = true ;
2021-11-28 21:42:05 +03:00
int ret = 0 ;
2020-07-07 05:33:54 +03:00
2023-03-05 10:52:40 +03:00
BUG_ON ( h - > s - > new_stripe . key . v . nr_blocks ! = h - > s - > nr_data + h - > s - > nr_parity ) ;
BUG_ON ( h - > s - > new_stripe . key . v . nr_redundant ! = h - > s - > nr_parity ) ;
2023-03-03 10:43:39 +03:00
for_each_set_bit ( i , h - > s - > blocks_gotten , h - > s - > new_stripe . key . v . nr_blocks ) {
__clear_bit ( h - > s - > new_stripe . key . v . ptrs [ i ] . dev , devs . d ) ;
if ( i < h - > s - > nr_data )
nr_have_data + + ;
else
nr_have_parity + + ;
2020-07-07 05:33:54 +03:00
}
2021-01-19 07:26:42 +03:00
BUG_ON ( nr_have_data > h - > s - > nr_data ) ;
BUG_ON ( nr_have_parity > h - > s - > nr_parity ) ;
2020-07-07 05:33:54 +03:00
2021-01-19 07:26:42 +03:00
buckets . nr = 0 ;
if ( nr_have_parity < h - > s - > nr_parity ) {
2023-02-18 04:50:55 +03:00
ret = bch2_bucket_alloc_set_trans ( trans , & buckets ,
2020-07-07 05:33:54 +03:00
& h - > parity_stripe ,
& devs ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_parity ,
& nr_have_parity ,
2020-07-07 05:33:54 +03:00
& have_cache ,
2023-03-03 10:43:39 +03:00
reserve ,
2020-07-07 05:33:54 +03:00
0 ,
2020-12-15 20:38:17 +03:00
cl ) ;
2021-01-19 07:26:42 +03:00
open_bucket_for_each ( c , & buckets , ob , i ) {
j = find_next_zero_bit ( h - > s - > blocks_gotten ,
h - > s - > nr_data + h - > s - > nr_parity ,
h - > s - > nr_data ) ;
BUG_ON ( j > = h - > s - > nr_data + h - > s - > nr_parity ) ;
h - > s - > blocks [ j ] = buckets . v [ i ] ;
2021-12-26 05:21:46 +03:00
h - > s - > new_stripe . key . v . ptrs [ j ] = bch2_ob_ptr ( c , ob ) ;
2021-01-19 07:26:42 +03:00
__set_bit ( j , h - > s - > blocks_gotten ) ;
}
2020-07-07 05:33:54 +03:00
if ( ret )
2022-01-10 04:48:31 +03:00
return ret ;
2020-07-07 05:33:54 +03:00
}
2021-01-19 07:26:42 +03:00
buckets . nr = 0 ;
if ( nr_have_data < h - > s - > nr_data ) {
2023-02-18 04:50:55 +03:00
ret = bch2_bucket_alloc_set_trans ( trans , & buckets ,
2020-07-07 05:33:54 +03:00
& h - > block_stripe ,
& devs ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_data ,
& nr_have_data ,
2020-07-07 05:33:54 +03:00
& have_cache ,
2023-03-03 10:43:39 +03:00
reserve ,
2020-07-07 05:33:54 +03:00
0 ,
2020-12-15 20:38:17 +03:00
cl ) ;
2021-01-19 07:26:42 +03:00
open_bucket_for_each ( c , & buckets , ob , i ) {
j = find_next_zero_bit ( h - > s - > blocks_gotten ,
h - > s - > nr_data , 0 ) ;
BUG_ON ( j > = h - > s - > nr_data ) ;
h - > s - > blocks [ j ] = buckets . v [ i ] ;
2021-12-26 05:21:46 +03:00
h - > s - > new_stripe . key . v . ptrs [ j ] = bch2_ob_ptr ( c , ob ) ;
2021-01-19 07:26:42 +03:00
__set_bit ( j , h - > s - > blocks_gotten ) ;
}
2020-07-07 05:33:54 +03:00
if ( ret )
2022-01-10 04:48:31 +03:00
return ret ;
2020-07-07 05:33:54 +03:00
}
2022-01-10 04:48:31 +03:00
return 0 ;
2020-07-07 05:33:54 +03:00
}
2020-06-30 21:44:19 +03:00
/* XXX: doesn't obey target: */
static s64 get_existing_stripe ( struct bch_fs * c ,
2021-01-11 21:51:23 +03:00
struct ec_stripe_head * head )
2020-06-30 21:44:19 +03:00
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
struct stripe * m ;
size_t heap_idx ;
u64 stripe_idx ;
2021-01-23 02:01:07 +03:00
s64 ret = - 1 ;
2020-06-30 21:44:19 +03:00
if ( may_create_new_stripe ( c ) )
return - 1 ;
2023-02-19 04:49:37 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2020-06-30 21:44:19 +03:00
for ( heap_idx = 0 ; heap_idx < h - > used ; heap_idx + + ) {
2021-01-23 02:01:07 +03:00
/* No blocks worth reusing, stripe will just be deleted: */
2020-06-30 21:44:19 +03:00
if ( ! h - > data [ heap_idx ] . blocks_nonempty )
continue ;
stripe_idx = h - > data [ heap_idx ] . idx ;
2023-02-19 05:07:25 +03:00
2021-12-05 07:07:33 +03:00
m = genradix_ptr ( & c - > stripes , stripe_idx ) ;
2020-06-30 21:44:19 +03:00
2021-01-11 21:51:23 +03:00
if ( m - > algorithm = = head - > algo & &
m - > nr_redundant = = head - > redundancy & &
m - > sectors = = head - > blocksize & &
2023-02-19 06:11:50 +03:00
m - > blocks_nonempty < m - > nr_blocks - m - > nr_redundant & &
bch2_try_open_stripe ( c , head - > s , stripe_idx ) ) {
2021-01-23 02:01:07 +03:00
ret = stripe_idx ;
break ;
2020-06-30 21:44:19 +03:00
}
}
2023-02-19 04:49:37 +03:00
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
2021-01-23 02:01:07 +03:00
return ret ;
2020-06-30 21:44:19 +03:00
}
2023-02-23 03:28:58 +03:00
static int __bch2_ec_stripe_head_reuse ( struct btree_trans * trans , struct ec_stripe_head * h )
2021-02-10 03:18:13 +03:00
{
2023-02-23 03:28:58 +03:00
struct bch_fs * c = trans - > c ;
2021-02-10 03:18:13 +03:00
unsigned i ;
s64 idx ;
int ret ;
2023-03-05 10:52:40 +03:00
/*
* If we can ' t allocate a new stripe , and there ' s no stripes with empty
* blocks for us to reuse , that means we have to wait on copygc :
*/
2021-02-10 03:18:13 +03:00
idx = get_existing_stripe ( c , h ) ;
2022-10-09 09:30:50 +03:00
if ( idx < 0 )
2023-03-05 10:52:40 +03:00
return - BCH_ERR_stripe_alloc_blocked ;
2021-02-10 03:18:13 +03:00
2023-02-23 03:28:58 +03:00
ret = get_stripe_key_trans ( trans , idx , & h - > s - > existing_stripe ) ;
2021-02-10 03:18:13 +03:00
if ( ret ) {
2023-03-03 11:11:06 +03:00
bch2_stripe_close ( c , h - > s ) ;
if ( ! bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
bch2_fs_fatal_error ( c , " error reading stripe key: %s " , bch2_err_str ( ret ) ) ;
2021-02-10 03:18:13 +03:00
return ret ;
}
2023-03-05 10:52:40 +03:00
BUG_ON ( h - > s - > existing_stripe . key . v . nr_redundant ! = h - > s - > nr_parity ) ;
h - > s - > nr_data = h - > s - > existing_stripe . key . v . nr_blocks -
h - > s - > existing_stripe . key . v . nr_redundant ;
ret = ec_stripe_buf_init ( & h - > s - > existing_stripe , 0 , h - > blocksize ) ;
if ( ret ) {
bch2_stripe_close ( c , h - > s ) ;
return ret ;
2021-02-10 03:18:13 +03:00
}
BUG_ON ( h - > s - > existing_stripe . size ! = h - > blocksize ) ;
BUG_ON ( h - > s - > existing_stripe . size ! = h - > s - > existing_stripe . key . v . sectors ) ;
2023-03-02 09:54:17 +03:00
/*
* Free buckets we initially allocated - they might conflict with
* blocks from the stripe we ' re reusing :
*/
for_each_set_bit ( i , h - > s - > blocks_gotten , h - > s - > new_stripe . key . v . nr_blocks ) {
bch2_open_bucket_put ( c , c - > open_buckets + h - > s - > blocks [ i ] ) ;
h - > s - > blocks [ i ] = 0 ;
}
memset ( h - > s - > blocks_gotten , 0 , sizeof ( h - > s - > blocks_gotten ) ) ;
memset ( h - > s - > blocks_allocated , 0 , sizeof ( h - > s - > blocks_allocated ) ) ;
2021-02-10 03:18:13 +03:00
for ( i = 0 ; i < h - > s - > existing_stripe . key . v . nr_blocks ; i + + ) {
if ( stripe_blockcount_get ( & h - > s - > existing_stripe . key . v , i ) ) {
__set_bit ( i , h - > s - > blocks_gotten ) ;
__set_bit ( i , h - > s - > blocks_allocated ) ;
}
ec_block_io ( c , & h - > s - > existing_stripe , READ , i , & h - > s - > iodone ) ;
}
2023-03-03 11:11:06 +03:00
bkey_copy ( & h - > s - > new_stripe . key . k_i , & h - > s - > existing_stripe . key . k_i ) ;
h - > s - > have_existing_stripe = true ;
2021-02-10 03:18:13 +03:00
return 0 ;
}
2023-02-19 05:07:25 +03:00
static int __bch2_ec_stripe_head_reserve ( struct btree_trans * trans , struct ec_stripe_head * h )
2021-02-10 03:18:13 +03:00
{
2023-02-19 05:07:25 +03:00
struct bch_fs * c = trans - > c ;
struct btree_iter iter ;
struct bkey_s_c k ;
struct bpos min_pos = POS ( 0 , 1 ) ;
struct bpos start_pos = bpos_max ( min_pos , POS ( 0 , c - > ec_stripe_hint ) ) ;
int ret ;
2023-03-03 11:11:06 +03:00
if ( ! h - > s - > res . sectors ) {
ret = bch2_disk_reservation_get ( c , & h - > s - > res ,
2023-02-19 05:07:25 +03:00
h - > blocksize ,
2023-03-03 11:11:06 +03:00
h - > s - > nr_parity ,
BCH_DISK_RESERVATION_NOFAIL ) ;
if ( ret )
return ret ;
}
2023-02-19 05:07:25 +03:00
for_each_btree_key_norestart ( trans , iter , BTREE_ID_stripes , start_pos ,
BTREE_ITER_SLOTS | BTREE_ITER_INTENT , k , ret ) {
if ( bkey_gt ( k . k - > p , POS ( 0 , U32_MAX ) ) ) {
if ( start_pos . offset ) {
start_pos = min_pos ;
bch2_btree_iter_set_pos ( & iter , start_pos ) ;
continue ;
}
ret = - BCH_ERR_ENOSPC_stripe_create ;
break ;
}
if ( bkey_deleted ( k . k ) & &
bch2_try_open_stripe ( c , h - > s , k . k - > p . offset ) )
break ;
}
c - > ec_stripe_hint = iter . pos . offset ;
if ( ret )
goto err ;
ret = ec_stripe_mem_alloc ( trans , & iter ) ;
if ( ret ) {
bch2_stripe_close ( c , h - > s ) ;
goto err ;
}
h - > s - > new_stripe . key . k . p = iter . pos ;
out :
bch2_trans_iter_exit ( trans , & iter ) ;
return ret ;
err :
bch2_disk_reservation_put ( c , & h - > s - > res ) ;
goto out ;
2021-02-10 03:18:13 +03:00
}
2023-02-18 04:50:55 +03:00
struct ec_stripe_head * bch2_ec_stripe_head_get ( struct btree_trans * trans ,
2020-07-07 05:33:54 +03:00
unsigned target ,
unsigned algo ,
2020-12-15 20:38:17 +03:00
unsigned redundancy ,
2023-03-03 10:43:39 +03:00
enum alloc_reserve reserve ,
2020-12-15 20:38:17 +03:00
struct closure * cl )
2020-07-07 05:33:54 +03:00
{
2023-02-18 04:50:55 +03:00
struct bch_fs * c = trans - > c ;
2020-07-07 05:33:54 +03:00
struct ec_stripe_head * h ;
2023-03-02 09:54:17 +03:00
bool waiting = false ;
2020-07-10 01:31:51 +03:00
int ret ;
2020-07-07 05:33:54 +03:00
2023-03-03 10:43:39 +03:00
h = __bch2_ec_stripe_head_get ( trans , target , algo , redundancy , reserve ) ;
2023-02-18 05:04:46 +03:00
if ( ! h )
2020-12-15 03:41:03 +03:00
bch_err ( c , " no stripe head " ) ;
2023-02-18 05:04:46 +03:00
if ( IS_ERR_OR_NULL ( h ) )
return h ;
2020-07-07 05:33:54 +03:00
2023-03-02 09:54:17 +03:00
if ( ! h - > s ) {
2020-12-09 21:39:30 +03:00
if ( ec_new_stripe_alloc ( c , h ) ) {
2021-02-10 03:18:13 +03:00
ret = - ENOMEM ;
2020-12-15 03:41:03 +03:00
bch_err ( c , " failed to allocate new stripe " ) ;
2021-02-10 03:18:13 +03:00
goto err ;
2020-12-15 03:41:03 +03:00
}
2020-12-09 21:39:30 +03:00
}
2020-06-30 21:44:19 +03:00
2023-03-02 09:54:17 +03:00
if ( h - > s - > allocated )
goto allocated ;
2023-02-19 05:07:25 +03:00
2023-03-02 09:54:17 +03:00
if ( h - > s - > have_existing_stripe )
goto alloc_existing ;
/* First, try to allocate a full stripe: */
ret = new_stripe_alloc_buckets ( trans , h , RESERVE_stripe , NULL ) ? :
__bch2_ec_stripe_head_reserve ( trans , h ) ;
if ( ! ret )
2023-03-05 10:52:40 +03:00
goto allocate_buf ;
2023-03-02 09:54:17 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) | |
bch2_err_matches ( ret , ENOMEM ) )
2021-02-10 03:18:13 +03:00
goto err ;
2020-07-10 01:31:51 +03:00
2023-03-02 09:54:17 +03:00
/*
* Not enough buckets available for a full stripe : we must reuse an
* existing stripe :
*/
while ( 1 ) {
ret = __bch2_ec_stripe_head_reuse ( trans , h ) ;
if ( ! ret )
break ;
if ( waiting | | ! cl | | ret ! = - BCH_ERR_stripe_alloc_blocked )
2021-02-10 03:18:13 +03:00
goto err ;
2020-07-07 05:33:54 +03:00
2023-03-02 09:54:17 +03:00
/* XXX freelist_wait? */
closure_wait ( & c - > freelist_wait , cl ) ;
waiting = true ;
2020-07-07 05:33:54 +03:00
}
2021-02-10 03:18:13 +03:00
2023-03-02 09:54:17 +03:00
if ( waiting )
closure_wake_up ( & c - > freelist_wait ) ;
alloc_existing :
/*
* Retry allocating buckets , with the reserve watermark for this
* particular write :
*/
ret = new_stripe_alloc_buckets ( trans , h , reserve , cl ) ;
if ( ret )
goto err ;
2023-03-05 10:52:40 +03:00
allocate_buf :
ret = ec_stripe_buf_init ( & h - > s - > new_stripe , 0 , h - > blocksize ) ;
if ( ret )
goto err ;
2023-03-02 09:54:17 +03:00
h - > s - > allocated = true ;
2023-03-05 10:52:40 +03:00
allocated :
2023-03-02 09:54:17 +03:00
BUG_ON ( ! h - > s - > idx ) ;
2023-03-05 10:52:40 +03:00
BUG_ON ( ! h - > s - > new_stripe . data [ 0 ] ) ;
2023-02-19 05:07:25 +03:00
BUG_ON ( trans - > restarted ) ;
2020-07-07 05:33:54 +03:00
return h ;
2021-02-10 03:18:13 +03:00
err :
bch2_ec_stripe_head_put ( c , h ) ;
2021-11-28 21:42:05 +03:00
return ERR_PTR ( ret ) ;
2020-07-07 05:33:54 +03:00
}
2018-11-01 22:13:19 +03:00
void bch2_ec_stop_dev ( struct bch_fs * c , struct bch_dev * ca )
{
struct ec_stripe_head * h ;
struct open_bucket * ob ;
unsigned i ;
2020-07-07 03:59:46 +03:00
mutex_lock ( & c - > ec_stripe_head_lock ) ;
list_for_each_entry ( h , & c - > ec_stripe_head_list , list ) {
2018-11-01 22:13:19 +03:00
mutex_lock ( & h - > lock ) ;
if ( ! h - > s )
goto unlock ;
2021-01-19 07:26:42 +03:00
for ( i = 0 ; i < h - > s - > new_stripe . key . v . nr_blocks ; i + + ) {
if ( ! h - > s - > blocks [ i ] )
continue ;
ob = c - > open_buckets + h - > s - > blocks [ i ] ;
2021-12-26 05:21:46 +03:00
if ( ob - > dev = = ca - > dev_idx )
2018-11-01 22:13:19 +03:00
goto found ;
2021-01-19 07:26:42 +03:00
}
2018-11-01 22:13:19 +03:00
goto unlock ;
found :
2020-07-07 03:59:46 +03:00
h - > s - > err = - EROFS ;
ec_stripe_set_pending ( c , h ) ;
2018-11-01 22:13:19 +03:00
unlock :
mutex_unlock ( & h - > lock ) ;
}
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
2018-11-01 22:13:19 +03:00
}
2021-12-26 04:07:00 +03:00
int bch2_stripes_read ( struct bch_fs * c )
2018-11-26 04:53:51 +03:00
{
2021-12-26 04:07:00 +03:00
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
2021-11-30 00:38:27 +03:00
const struct bch_stripe * s ;
struct stripe * m ;
unsigned i ;
2021-12-26 04:07:00 +03:00
int ret ;
2019-07-13 00:08:32 +03:00
2021-12-26 04:07:00 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-11-30 00:38:27 +03:00
2021-12-26 04:07:00 +03:00
for_each_btree_key ( & trans , iter , BTREE_ID_stripes , POS_MIN ,
BTREE_ITER_PREFETCH , k , ret ) {
if ( k . k - > type ! = KEY_TYPE_stripe )
continue ;
2021-11-28 22:31:19 +03:00
2021-12-26 04:07:00 +03:00
ret = __ec_stripe_mem_alloc ( c , k . k - > p . offset , GFP_KERNEL ) ;
if ( ret )
break ;
2021-11-30 00:38:27 +03:00
2021-12-26 04:07:00 +03:00
s = bkey_s_c_to_stripe ( k ) . v ;
2021-11-30 00:38:27 +03:00
2021-12-26 04:07:00 +03:00
m = genradix_ptr ( & c - > stripes , k . k - > p . offset ) ;
m - > sectors = le16_to_cpu ( s - > sectors ) ;
m - > algorithm = s - > algorithm ;
m - > nr_blocks = s - > nr_blocks ;
m - > nr_redundant = s - > nr_redundant ;
m - > blocks_nonempty = 0 ;
2021-11-30 00:38:27 +03:00
2021-12-26 04:07:00 +03:00
for ( i = 0 ; i < s - > nr_blocks ; i + + )
m - > blocks_nonempty + = ! ! stripe_blockcount_get ( s , i ) ;
2018-11-26 04:53:51 +03:00
2023-02-19 06:11:50 +03:00
bch2_stripes_heap_insert ( c , m , k . k - > p . offset ) ;
2021-12-26 04:07:00 +03:00
}
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-10-30 01:43:18 +03:00
bch2_trans_exit ( & trans ) ;
2021-12-26 04:07:00 +03:00
2020-05-24 21:06:10 +03:00
if ( ret )
2019-04-17 22:49:28 +03:00
bch_err ( c , " error reading stripes: %i " , ret ) ;
2018-11-26 04:53:51 +03:00
2020-05-24 21:06:10 +03:00
return ret ;
2018-11-26 04:53:51 +03:00
}
2020-07-07 03:18:13 +03:00
void bch2_stripes_heap_to_text ( struct printbuf * out , struct bch_fs * c )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
struct stripe * m ;
size_t i ;
2023-02-19 04:49:37 +03:00
mutex_lock ( & c - > ec_stripes_heap_lock ) ;
2020-11-05 20:16:05 +03:00
for ( i = 0 ; i < min_t ( size_t , h - > used , 20 ) ; i + + ) {
2021-12-05 07:07:33 +03:00
m = genradix_ptr ( & c - > stripes , h - > data [ i ] . idx ) ;
2020-07-07 03:18:13 +03:00
2023-02-04 05:01:40 +03:00
prt_printf ( out , " %zu %u/%u+%u \n " , h - > data [ i ] . idx ,
2020-07-07 03:18:13 +03:00
h - > data [ i ] . blocks_nonempty ,
m - > nr_blocks - m - > nr_redundant ,
m - > nr_redundant ) ;
}
2023-02-19 04:49:37 +03:00
mutex_unlock ( & c - > ec_stripes_heap_lock ) ;
2020-07-07 03:18:13 +03:00
}
2020-07-26 00:06:11 +03:00
void bch2_new_stripes_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct ec_stripe_head * h ;
struct ec_stripe_new * s ;
mutex_lock ( & c - > ec_stripe_head_lock ) ;
list_for_each_entry ( h , & c - > ec_stripe_head_list , list ) {
2023-02-04 05:01:40 +03:00
prt_printf ( out , " target %u algo %u redundancy %u: \n " ,
2020-07-26 00:06:11 +03:00
h - > target , h - > algo , h - > redundancy ) ;
if ( h - > s )
2023-03-08 11:57:32 +03:00
prt_printf ( out , " \t pending: idx %llu blocks %u+%u allocated %u \n " ,
h - > s - > idx , h - > s - > nr_data , h - > s - > nr_parity ,
2020-07-26 00:06:11 +03:00
bitmap_weight ( h - > s - > blocks_allocated ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_data ) ) ;
2020-07-26 00:06:11 +03:00
}
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
mutex_lock ( & c - > ec_stripe_new_lock ) ;
2020-08-05 06:12:49 +03:00
list_for_each_entry ( s , & c - > ec_stripe_new_list , list ) {
2023-03-08 11:57:32 +03:00
prt_printf ( out , " \t in flight: idx %llu blocks %u+%u pin %u \n " ,
s - > idx , s - > nr_data , s - > nr_parity ,
atomic_read ( & s - > pin ) ) ;
2020-07-26 00:06:11 +03:00
}
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
}
2018-11-01 22:13:19 +03:00
void bch2_fs_ec_exit ( struct bch_fs * c )
{
struct ec_stripe_head * h ;
2023-02-23 02:35:51 +03:00
unsigned i ;
2018-11-01 22:13:19 +03:00
while ( 1 ) {
2020-07-07 03:59:46 +03:00
mutex_lock ( & c - > ec_stripe_head_lock ) ;
h = list_first_entry_or_null ( & c - > ec_stripe_head_list ,
2018-11-01 22:13:19 +03:00
struct ec_stripe_head , list ) ;
if ( h )
list_del ( & h - > list ) ;
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
2018-11-01 22:13:19 +03:00
if ( ! h )
break ;
2023-02-23 02:35:51 +03:00
if ( h - > s ) {
for ( i = 0 ; i < h - > s - > new_stripe . key . v . nr_blocks ; i + + )
BUG_ON ( h - > s - > blocks [ i ] ) ;
kfree ( h - > s ) ;
}
2018-11-01 22:13:19 +03:00
kfree ( h ) ;
}
2020-07-07 03:59:46 +03:00
BUG_ON ( ! list_empty ( & c - > ec_stripe_new_list ) ) ;
2018-11-01 22:13:19 +03:00
free_heap ( & c - > ec_stripes_heap ) ;
2021-12-05 07:07:33 +03:00
genradix_free ( & c - > stripes ) ;
2018-11-01 22:13:19 +03:00
bioset_exit ( & c - > ec_bioset ) ;
}
2022-04-09 08:23:50 +03:00
void bch2_fs_ec_init_early ( struct bch_fs * c )
2018-11-01 22:13:19 +03:00
{
2020-07-07 03:59:46 +03:00
INIT_WORK ( & c - > ec_stripe_create_work , ec_stripe_create_work ) ;
2018-11-01 22:13:19 +03:00
INIT_WORK ( & c - > ec_stripe_delete_work , ec_stripe_delete_work ) ;
2022-04-09 08:23:50 +03:00
}
2018-11-01 22:13:19 +03:00
2022-04-09 08:23:50 +03:00
int bch2_fs_ec_init ( struct bch_fs * c )
{
2023-02-19 05:07:25 +03:00
spin_lock_init ( & c - > ec_stripes_new_lock ) ;
2018-11-01 22:13:19 +03:00
return bioset_init ( & c - > ec_bioset , 1 , offsetof ( struct ec_bio , bio ) ,
BIOSET_NEED_BVECS ) ;
}