2018-11-01 22:13:19 +03:00
// SPDX-License-Identifier: GPL-2.0
/* erasure coding */
# include "bcachefs.h"
# include "alloc_foreground.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2018-11-01 22:13:19 +03:00
# include "bset.h"
# include "btree_gc.h"
# include "btree_update.h"
# include "buckets.h"
# include "disk_groups.h"
# include "ec.h"
# include "error.h"
# include "io.h"
# include "keylist.h"
2019-04-12 05:39:39 +03:00
# include "recovery.h"
2018-11-01 22:13:19 +03:00
# include "super-io.h"
# include "util.h"
2018-11-23 10:06:18 +03:00
# include <linux/sort.h>
# ifdef __KERNEL__
2018-11-01 22:13:19 +03:00
# include <linux/raid/pq.h>
# include <linux/raid/xor.h>
2018-11-23 10:06:18 +03:00
static void raid5_recov ( unsigned disks , unsigned failed_idx ,
size_t size , void * * data )
{
unsigned i = 2 , nr ;
BUG_ON ( failed_idx > = disks ) ;
swap ( data [ 0 ] , data [ failed_idx ] ) ;
memcpy ( data [ 0 ] , data [ 1 ] , size ) ;
while ( i < disks ) {
nr = min_t ( unsigned , disks - i , MAX_XOR_BLOCKS ) ;
xor_blocks ( nr , size , data [ 0 ] , data + i ) ;
i + = nr ;
}
swap ( data [ 0 ] , data [ failed_idx ] ) ;
}
static void raid_gen ( int nd , int np , size_t size , void * * v )
{
if ( np > = 1 )
raid5_recov ( nd + np , nd , size , v ) ;
if ( np > = 2 )
raid6_call . gen_syndrome ( nd + np , size , v ) ;
BUG_ON ( np > 2 ) ;
}
static void raid_rec ( int nr , int * ir , int nd , int np , size_t size , void * * v )
{
switch ( nr ) {
case 0 :
break ;
case 1 :
if ( ir [ 0 ] < nd + 1 )
raid5_recov ( nd + 1 , ir [ 0 ] , size , v ) ;
else
raid6_call . gen_syndrome ( nd + np , size , v ) ;
break ;
case 2 :
if ( ir [ 1 ] < nd ) {
/* data+data failure. */
raid6_2data_recov ( nd + np , size , ir [ 0 ] , ir [ 1 ] , v ) ;
} else if ( ir [ 0 ] < nd ) {
/* data + p/q failure */
if ( ir [ 1 ] = = nd ) /* data + p failure */
raid6_datap_recov ( nd + np , size , ir [ 0 ] , v ) ;
else { /* data + q failure */
raid5_recov ( nd + 1 , ir [ 0 ] , size , v ) ;
raid6_call . gen_syndrome ( nd + np , size , v ) ;
}
} else {
raid_gen ( nd , np , size , v ) ;
}
break ;
default :
BUG ( ) ;
}
}
# else
# include <raid/raid.h>
# endif
2018-11-01 22:13:19 +03:00
struct ec_bio {
struct bch_dev * ca ;
struct ec_stripe_buf * buf ;
size_t idx ;
struct bio bio ;
} ;
/* Stripes btree keys: */
2018-11-01 22:10:01 +03:00
const char * bch2_stripe_invalid ( const struct bch_fs * c , struct bkey_s_c k )
2018-11-01 22:13:19 +03:00
{
2018-11-01 22:10:01 +03:00
const struct bch_stripe * s = bkey_s_c_to_stripe ( k ) . v ;
2021-01-23 02:01:07 +03:00
if ( ! bkey_cmp ( k . k - > p , POS_MIN ) )
return " stripe at pos 0 " ;
2018-11-01 22:13:19 +03:00
if ( k . k - > p . inode )
return " invalid stripe key " ;
2018-11-01 22:10:01 +03:00
if ( bkey_val_bytes ( k . k ) < sizeof ( * s ) )
return " incorrect value size " ;
2018-11-01 22:13:19 +03:00
2018-11-25 03:01:45 +03:00
if ( bkey_val_bytes ( k . k ) < sizeof ( * s ) | |
bkey_val_u64s ( k . k ) < stripe_val_u64s ( s ) )
2018-11-01 22:10:01 +03:00
return " incorrect value size " ;
2018-11-01 22:13:19 +03:00
2019-05-12 00:32:07 +03:00
return bch2_bkey_ptrs_invalid ( c , k ) ;
2018-11-01 22:13:19 +03:00
}
2018-11-01 22:10:01 +03:00
void bch2_stripe_to_text ( struct printbuf * out , struct bch_fs * c ,
2018-11-01 22:13:19 +03:00
struct bkey_s_c k )
{
2018-11-01 22:10:01 +03:00
const struct bch_stripe * s = bkey_s_c_to_stripe ( k ) . v ;
unsigned i ;
pr_buf ( out , " algo %u sectors %u blocks %u:%u csum %u gran %u " ,
s - > algorithm ,
le16_to_cpu ( s - > sectors ) ,
s - > nr_blocks - s - > nr_redundant ,
s - > nr_redundant ,
s - > csum_type ,
1U < < s - > csum_granularity_bits ) ;
for ( i = 0 ; i < s - > nr_blocks ; i + + )
2018-11-26 04:53:51 +03:00
pr_buf ( out , " %u:%llu:%u " , s - > ptrs [ i ] . dev ,
( u64 ) s - > ptrs [ i ] . offset ,
stripe_blockcount_get ( s , i ) ) ;
2018-11-01 22:13:19 +03:00
}
2021-01-11 21:51:23 +03:00
/* returns blocknr in stripe that we matched: */
static int bkey_matches_stripe ( struct bch_stripe * s ,
struct bkey_s_c k )
2018-11-01 22:13:19 +03:00
{
2021-01-11 21:51:23 +03:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
unsigned i , nr_data = s - > nr_blocks - s - > nr_redundant ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
bkey_for_each_ptr ( ptrs , ptr )
for ( i = 0 ; i < nr_data ; i + + )
2021-03-13 00:55:28 +03:00
if ( __bch2_ptr_matches_stripe ( & s - > ptrs [ i ] , ptr ,
le16_to_cpu ( s - > sectors ) ) )
2021-01-11 21:51:23 +03:00
return i ;
2018-11-01 22:13:19 +03:00
return - 1 ;
}
2019-05-13 07:30:02 +03:00
static bool extent_has_stripe_ptr ( struct bkey_s_c k , u64 idx )
{
2019-07-25 20:52:14 +03:00
switch ( k . k - > type ) {
case KEY_TYPE_extent : {
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const union bch_extent_entry * entry ;
2019-05-13 07:30:02 +03:00
2019-07-25 20:52:14 +03:00
extent_for_each_entry ( e , entry )
if ( extent_entry_type ( entry ) = =
BCH_EXTENT_ENTRY_stripe_ptr & &
entry - > stripe_ptr . idx = = idx )
return true ;
2019-05-13 07:30:02 +03:00
2019-07-25 20:52:14 +03:00
break ;
}
}
2019-05-13 07:30:02 +03:00
return false ;
}
2020-12-15 03:41:03 +03:00
/* Stripe bufs: */
2021-01-11 21:51:23 +03:00
static void ec_stripe_buf_exit ( struct ec_stripe_buf * buf )
2020-12-15 03:41:03 +03:00
{
unsigned i ;
2021-01-11 21:51:23 +03:00
for ( i = 0 ; i < buf - > key . v . nr_blocks ; i + + ) {
kvpfree ( buf - > data [ i ] , buf - > size < < 9 ) ;
buf - > data [ i ] = NULL ;
2020-12-15 03:41:03 +03:00
}
}
2021-01-11 21:51:23 +03:00
static int ec_stripe_buf_init ( struct ec_stripe_buf * buf ,
unsigned offset , unsigned size )
2020-12-15 03:41:03 +03:00
{
2021-01-11 21:51:23 +03:00
struct bch_stripe * v = & buf - > key . v ;
unsigned csum_granularity = 1U < < v - > csum_granularity_bits ;
unsigned end = offset + size ;
2020-12-15 03:41:03 +03:00
unsigned i ;
2021-01-11 21:51:23 +03:00
BUG_ON ( end > le16_to_cpu ( v - > sectors ) ) ;
offset = round_down ( offset , csum_granularity ) ;
end = min_t ( unsigned , le16_to_cpu ( v - > sectors ) ,
round_up ( end , csum_granularity ) ) ;
2020-12-15 03:41:03 +03:00
2021-01-11 21:51:23 +03:00
buf - > offset = offset ;
buf - > size = end - offset ;
memset ( buf - > valid , 0xFF , sizeof ( buf - > valid ) ) ;
for ( i = 0 ; i < buf - > key . v . nr_blocks ; i + + ) {
buf - > data [ i ] = kvpmalloc ( buf - > size < < 9 , GFP_KERNEL ) ;
if ( ! buf - > data [ i ] )
2020-12-15 03:41:03 +03:00
goto err ;
}
return 0 ;
err :
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( buf ) ;
2020-12-15 03:41:03 +03:00
return - ENOMEM ;
}
2018-11-01 22:13:19 +03:00
/* Checksumming: */
2021-01-11 21:51:23 +03:00
static struct bch_csum ec_block_checksum ( struct ec_stripe_buf * buf ,
unsigned block , unsigned offset )
2018-11-01 22:13:19 +03:00
{
struct bch_stripe * v = & buf - > key . v ;
unsigned csum_granularity = 1 < < v - > csum_granularity_bits ;
2021-01-11 21:51:23 +03:00
unsigned end = buf - > offset + buf - > size ;
unsigned len = min ( csum_granularity , end - offset ) ;
BUG_ON ( offset > = end ) ;
BUG_ON ( offset < buf - > offset ) ;
BUG_ON ( offset & ( csum_granularity - 1 ) ) ;
BUG_ON ( offset + len ! = le16_to_cpu ( v - > sectors ) & &
( len & ( csum_granularity - 1 ) ) ) ;
return bch2_checksum ( NULL , v - > csum_type ,
null_nonce ( ) ,
buf - > data [ block ] + ( ( offset - buf - > offset ) < < 9 ) ,
len < < 9 ) ;
}
static void ec_generate_checksums ( struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned i , j , csums_per_device = stripe_csums_per_device ( v ) ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
if ( ! v - > csum_type )
2018-11-01 22:13:19 +03:00
return ;
BUG_ON ( buf - > offset ) ;
BUG_ON ( buf - > size ! = le16_to_cpu ( v - > sectors ) ) ;
2021-01-11 21:51:23 +03:00
for ( i = 0 ; i < v - > nr_blocks ; i + + )
for ( j = 0 ; j < csums_per_device ; j + + )
stripe_csum_set ( v , i , j ,
ec_block_checksum ( buf , i , j < < v - > csum_granularity_bits ) ) ;
2018-11-01 22:13:19 +03:00
}
static void ec_validate_checksums ( struct bch_fs * c , struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned csum_granularity = 1 < < v - > csum_granularity_bits ;
unsigned i ;
2021-01-11 21:51:23 +03:00
if ( ! v - > csum_type )
2018-11-01 22:13:19 +03:00
return ;
for ( i = 0 ; i < v - > nr_blocks ; i + + ) {
unsigned offset = buf - > offset ;
unsigned end = buf - > offset + buf - > size ;
if ( ! test_bit ( i , buf - > valid ) )
continue ;
while ( offset < end ) {
unsigned j = offset > > v - > csum_granularity_bits ;
unsigned len = min ( csum_granularity , end - offset ) ;
2021-01-11 21:51:23 +03:00
struct bch_csum want = stripe_csum_get ( v , i , j ) ;
struct bch_csum got = ec_block_checksum ( buf , i , offset ) ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
if ( bch2_crc_cmp ( want , got ) ) {
2021-01-23 02:01:07 +03:00
char buf2 [ 200 ] ;
bch2_bkey_val_to_text ( & PBUF ( buf2 ) , c , bkey_i_to_s_c ( & buf - > key . k_i ) ) ;
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2021-01-23 02:01:07 +03:00
" stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx \n %s " ,
( void * ) _RET_IP_ , i , j , v - > csum_type ,
want . lo , got . lo , buf2 ) ;
2018-11-01 22:13:19 +03:00
clear_bit ( i , buf - > valid ) ;
break ;
}
offset + = len ;
}
}
}
/* Erasure coding: */
static void ec_generate_ec ( struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned nr_data = v - > nr_blocks - v - > nr_redundant ;
unsigned bytes = le16_to_cpu ( v - > sectors ) < < 9 ;
2018-11-23 10:06:18 +03:00
raid_gen ( nr_data , v - > nr_redundant , bytes , buf - > data ) ;
2018-11-01 22:13:19 +03:00
}
static unsigned ec_nr_failed ( struct ec_stripe_buf * buf )
{
2020-12-15 03:41:03 +03:00
return buf - > key . v . nr_blocks -
bitmap_weight ( buf - > valid , buf - > key . v . nr_blocks ) ;
2018-11-01 22:13:19 +03:00
}
static int ec_do_recov ( struct bch_fs * c , struct ec_stripe_buf * buf )
{
struct bch_stripe * v = & buf - > key . v ;
2020-12-16 22:23:27 +03:00
unsigned i , failed [ BCH_BKEY_PTRS_MAX ] , nr_failed = 0 ;
2018-11-01 22:13:19 +03:00
unsigned nr_data = v - > nr_blocks - v - > nr_redundant ;
unsigned bytes = buf - > size < < 9 ;
if ( ec_nr_failed ( buf ) > v - > nr_redundant ) {
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2018-11-01 22:13:19 +03:00
" error doing reconstruct read: unable to read enough blocks " ) ;
return - 1 ;
}
for ( i = 0 ; i < nr_data ; i + + )
if ( ! test_bit ( i , buf - > valid ) )
failed [ nr_failed + + ] = i ;
2018-11-23 10:06:18 +03:00
raid_rec ( nr_failed , failed , nr_data , v - > nr_redundant , bytes , buf - > data ) ;
2018-11-01 22:13:19 +03:00
return 0 ;
}
/* IO: */
static void ec_block_endio ( struct bio * bio )
{
struct ec_bio * ec_bio = container_of ( bio , struct ec_bio , bio ) ;
2021-01-23 02:01:07 +03:00
struct bch_stripe * v = & ec_bio - > buf - > key . v ;
struct bch_extent_ptr * ptr = & v - > ptrs [ ec_bio - > idx ] ;
2018-11-01 22:13:19 +03:00
struct bch_dev * ca = ec_bio - > ca ;
struct closure * cl = bio - > bi_private ;
2020-12-03 21:57:22 +03:00
if ( bch2_dev_io_err_on ( bio - > bi_status , ca , " erasure coding %s error: %s " ,
2020-07-02 20:43:58 +03:00
bio_data_dir ( bio ) ? " write " : " read " ,
2020-07-21 20:34:22 +03:00
bch2_blk_status_to_str ( bio - > bi_status ) ) )
2018-11-01 22:13:19 +03:00
clear_bit ( ec_bio - > idx , ec_bio - > buf - > valid ) ;
2021-01-23 02:01:07 +03:00
if ( ptr_stale ( ca , ptr ) ) {
bch_err_ratelimited ( ca - > fs ,
" error %s stripe: stale pointer after io " ,
bio_data_dir ( bio ) = = READ ? " reading from " : " writing to " ) ;
clear_bit ( ec_bio - > idx , ec_bio - > buf - > valid ) ;
}
2018-11-01 22:13:19 +03:00
bio_put ( & ec_bio - > bio ) ;
percpu_ref_put ( & ca - > io_ref ) ;
closure_put ( cl ) ;
}
static void ec_block_io ( struct bch_fs * c , struct ec_stripe_buf * buf ,
unsigned rw , unsigned idx , struct closure * cl )
{
struct bch_stripe * v = & buf - > key . v ;
unsigned offset = 0 , bytes = buf - > size < < 9 ;
struct bch_extent_ptr * ptr = & v - > ptrs [ idx ] ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2020-07-10 01:31:51 +03:00
enum bch_data_type data_type = idx < buf - > key . v . nr_blocks - buf - > key . v . nr_redundant
? BCH_DATA_user
: BCH_DATA_parity ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
if ( ptr_stale ( ca , ptr ) ) {
bch_err_ratelimited ( c ,
" error %s stripe: stale pointer " ,
rw = = READ ? " reading from " : " writing to " ) ;
clear_bit ( idx , buf - > valid ) ;
return ;
}
2018-11-01 22:13:19 +03:00
if ( ! bch2_dev_get_ioref ( ca , rw ) ) {
clear_bit ( idx , buf - > valid ) ;
return ;
}
2020-07-10 01:31:51 +03:00
this_cpu_add ( ca - > io_done - > sectors [ rw ] [ data_type ] , buf - > size ) ;
2018-11-01 22:13:19 +03:00
while ( offset < bytes ) {
unsigned nr_iovecs = min_t ( size_t , BIO_MAX_VECS ,
DIV_ROUND_UP ( bytes , PAGE_SIZE ) ) ;
unsigned b = min_t ( size_t , bytes - offset ,
nr_iovecs < < PAGE_SHIFT ) ;
struct ec_bio * ec_bio ;
ec_bio = container_of ( bio_alloc_bioset ( ca - > disk_sb . bdev ,
nr_iovecs ,
rw ,
GFP_KERNEL ,
& c - > ec_bioset ) ,
struct ec_bio , bio ) ;
ec_bio - > ca = ca ;
ec_bio - > buf = buf ;
ec_bio - > idx = idx ;
ec_bio - > bio . bi_iter . bi_sector = ptr - > offset + buf - > offset + ( offset > > 9 ) ;
ec_bio - > bio . bi_end_io = ec_block_endio ;
ec_bio - > bio . bi_private = cl ;
2019-07-04 02:27:42 +03:00
bch2_bio_map ( & ec_bio - > bio , buf - > data [ idx ] + offset , b ) ;
2018-11-01 22:13:19 +03:00
closure_get ( cl ) ;
percpu_ref_get ( & ca - > io_ref ) ;
submit_bio ( & ec_bio - > bio ) ;
offset + = b ;
}
percpu_ref_put ( & ca - > io_ref ) ;
}
2021-01-11 21:51:23 +03:00
static int get_stripe_key ( struct bch_fs * c , u64 idx , struct ec_stripe_buf * stripe )
2018-11-01 22:13:19 +03:00
{
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
struct btree_iter * iter ;
2021-01-11 21:51:23 +03:00
struct bkey_s_c k ;
int ret ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-02-21 03:27:37 +03:00
iter = bch2_trans_get_iter ( & trans , BTREE_ID_stripes , POS ( 0 , idx ) , BTREE_ITER_SLOTS ) ;
2021-01-11 21:51:23 +03:00
k = bch2_btree_iter_peek_slot ( iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( k . k - > type ! = KEY_TYPE_stripe ) {
ret = - ENOENT ;
goto err ;
}
bkey_reassemble ( & stripe - > key . k_i , k ) ;
err :
bch2_trans_exit ( & trans ) ;
return ret ;
}
/* recovery read path: */
int bch2_ec_read_extent ( struct bch_fs * c , struct bch_read_bio * rbio )
{
2018-11-01 22:13:19 +03:00
struct ec_stripe_buf * buf ;
struct closure cl ;
struct bch_stripe * v ;
2021-01-11 21:51:23 +03:00
unsigned i , offset ;
int ret = 0 ;
2018-11-01 22:13:19 +03:00
closure_init_stack ( & cl ) ;
2019-10-09 01:45:29 +03:00
BUG_ON ( ! rbio - > pick . has_ec ) ;
2018-11-01 22:13:19 +03:00
buf = kzalloc ( sizeof ( * buf ) , GFP_NOIO ) ;
if ( ! buf )
return - ENOMEM ;
2021-01-11 21:51:23 +03:00
ret = get_stripe_key ( c , rbio - > pick . ec . idx , buf ) ;
if ( ret ) {
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2021-01-11 21:51:23 +03:00
" error doing reconstruct read: error %i looking up stripe " , ret ) ;
2018-11-01 22:13:19 +03:00
kfree ( buf ) ;
2021-01-11 21:51:23 +03:00
return - EIO ;
2018-11-01 22:13:19 +03:00
}
v = & buf - > key . v ;
2021-01-11 21:51:23 +03:00
if ( ! bch2_ptr_matches_stripe ( v , rbio - > pick ) ) {
bch_err_ratelimited ( c ,
" error doing reconstruct read: pointer doesn't match stripe " ) ;
ret = - EIO ;
goto err ;
2018-11-01 22:13:19 +03:00
}
2021-01-11 21:51:23 +03:00
offset = rbio - > bio . bi_iter . bi_sector - v - > ptrs [ rbio - > pick . ec . block ] . offset ;
if ( offset + bio_sectors ( & rbio - > bio ) > le16_to_cpu ( v - > sectors ) ) {
bch_err_ratelimited ( c ,
" error doing reconstruct read: read is bigger than stripe " ) ;
ret = - EIO ;
goto err ;
}
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
ret = ec_stripe_buf_init ( buf , offset , bio_sectors ( & rbio - > bio ) ) ;
if ( ret )
goto err ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
for ( i = 0 ; i < v - > nr_blocks ; i + + )
2018-11-01 22:13:19 +03:00
ec_block_io ( c , buf , REQ_OP_READ , i , & cl ) ;
closure_sync ( & cl ) ;
if ( ec_nr_failed ( buf ) > v - > nr_redundant ) {
2020-12-03 21:57:22 +03:00
bch_err_ratelimited ( c ,
2018-11-01 22:13:19 +03:00
" error doing reconstruct read: unable to read enough blocks " ) ;
ret = - EIO ;
goto err ;
}
ec_validate_checksums ( c , buf ) ;
ret = ec_do_recov ( c , buf ) ;
if ( ret )
goto err ;
memcpy_to_bio ( & rbio - > bio , rbio - > bio . bi_iter ,
2021-01-11 21:51:23 +03:00
buf - > data [ rbio - > pick . ec . block ] + ( ( offset - buf - > offset ) < < 9 ) ) ;
2018-11-01 22:13:19 +03:00
err :
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( buf ) ;
2018-11-01 22:13:19 +03:00
kfree ( buf ) ;
return ret ;
}
2018-11-25 01:09:44 +03:00
/* stripe bucket accounting: */
2018-11-01 22:13:19 +03:00
static int __ec_stripe_mem_alloc ( struct bch_fs * c , size_t idx , gfp_t gfp )
{
ec_stripes_heap n , * h = & c - > ec_stripes_heap ;
if ( idx > = h - > size ) {
if ( ! init_heap ( & n , max ( 1024UL , roundup_pow_of_two ( idx + 1 ) ) , gfp ) )
return - ENOMEM ;
spin_lock ( & c - > ec_stripes_heap_lock ) ;
if ( n . size > h - > size ) {
memcpy ( n . data , h - > data , h - > used * sizeof ( h - > data [ 0 ] ) ) ;
n . used = h - > used ;
swap ( * h , n ) ;
}
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
free_heap ( & n ) ;
}
2018-11-25 01:09:44 +03:00
if ( ! genradix_ptr_alloc ( & c - > stripes [ 0 ] , idx , gfp ) )
return - ENOMEM ;
if ( c - > gc_pos . phase ! = GC_PHASE_NOT_RUNNING & &
! genradix_ptr_alloc ( & c - > stripes [ 1 ] , idx , gfp ) )
2018-11-01 22:13:19 +03:00
return - ENOMEM ;
return 0 ;
}
2021-08-25 04:30:06 +03:00
static int ec_stripe_mem_alloc ( struct btree_trans * trans ,
2018-11-01 22:13:19 +03:00
struct btree_iter * iter )
{
size_t idx = iter - > pos . offset ;
2019-03-11 21:59:58 +03:00
int ret = 0 ;
2018-11-01 22:13:19 +03:00
2021-08-25 04:30:06 +03:00
if ( ! __ec_stripe_mem_alloc ( trans - > c , idx , GFP_NOWAIT | __GFP_NOWARN ) )
2019-03-11 21:59:58 +03:00
return ret ;
2018-11-01 22:13:19 +03:00
2021-08-25 04:30:06 +03:00
bch2_trans_unlock ( trans ) ;
2019-03-11 21:59:58 +03:00
ret = - EINTR ;
2018-11-01 22:13:19 +03:00
2021-08-25 04:30:06 +03:00
if ( ! __ec_stripe_mem_alloc ( trans - > c , idx , GFP_KERNEL ) )
2019-03-11 21:59:58 +03:00
return ret ;
2018-11-01 22:13:19 +03:00
return - ENOMEM ;
}
static ssize_t stripe_idx_to_delete ( struct bch_fs * c )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
2019-07-11 19:45:59 +03:00
return h - > used & & h - > data [ 0 ] . blocks_nonempty = = 0
? h - > data [ 0 ] . idx : - 1 ;
2018-11-01 22:13:19 +03:00
}
static inline int ec_stripes_heap_cmp ( ec_stripes_heap * h ,
struct ec_stripe_heap_entry l ,
struct ec_stripe_heap_entry r )
{
return ( ( l . blocks_nonempty > r . blocks_nonempty ) -
( l . blocks_nonempty < r . blocks_nonempty ) ) ;
}
static inline void ec_stripes_heap_set_backpointer ( ec_stripes_heap * h ,
size_t i )
{
struct bch_fs * c = container_of ( h , struct bch_fs , ec_stripes_heap ) ;
2018-11-25 01:09:44 +03:00
genradix_ptr ( & c - > stripes [ 0 ] , h - > data [ i ] . idx ) - > heap_idx = i ;
2018-11-01 22:13:19 +03:00
}
static void heap_verify_backpointer ( struct bch_fs * c , size_t idx )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
2018-11-25 01:09:44 +03:00
struct stripe * m = genradix_ptr ( & c - > stripes [ 0 ] , idx ) ;
2018-11-01 22:13:19 +03:00
BUG_ON ( ! m - > alive ) ;
BUG_ON ( m - > heap_idx > = h - > used ) ;
BUG_ON ( h - > data [ m - > heap_idx ] . idx ! = idx ) ;
}
void bch2_stripes_heap_del ( struct bch_fs * c ,
2018-11-25 01:09:44 +03:00
struct stripe * m , size_t idx )
2018-11-01 22:13:19 +03:00
{
2020-07-07 03:18:13 +03:00
if ( ! m - > on_heap )
return ;
m - > on_heap = false ;
2018-11-01 22:13:19 +03:00
heap_verify_backpointer ( c , idx ) ;
heap_del ( & c - > ec_stripes_heap , m - > heap_idx ,
ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
}
void bch2_stripes_heap_insert ( struct bch_fs * c ,
2018-11-25 01:09:44 +03:00
struct stripe * m , size_t idx )
2018-11-01 22:13:19 +03:00
{
2020-07-07 03:18:13 +03:00
if ( m - > on_heap )
return ;
2018-11-01 22:13:19 +03:00
BUG_ON ( heap_full ( & c - > ec_stripes_heap ) ) ;
2020-07-07 03:18:13 +03:00
m - > on_heap = true ;
2018-11-01 22:13:19 +03:00
heap_add ( & c - > ec_stripes_heap , ( ( struct ec_stripe_heap_entry ) {
. idx = idx ,
2018-11-26 04:53:51 +03:00
. blocks_nonempty = m - > blocks_nonempty ,
2018-11-01 22:13:19 +03:00
} ) ,
ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
heap_verify_backpointer ( c , idx ) ;
}
2020-07-07 03:18:13 +03:00
void bch2_stripes_heap_update ( struct bch_fs * c ,
struct stripe * m , size_t idx )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
size_t i ;
if ( ! m - > on_heap )
return ;
heap_verify_backpointer ( c , idx ) ;
h - > data [ m - > heap_idx ] . blocks_nonempty = m - > blocks_nonempty ;
i = m - > heap_idx ;
heap_sift_up ( h , i , ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
heap_sift_down ( h , i , ec_stripes_heap_cmp ,
ec_stripes_heap_set_backpointer ) ;
heap_verify_backpointer ( c , idx ) ;
if ( stripe_idx_to_delete ( c ) > = 0 & &
! percpu_ref_is_dying ( & c - > writes ) )
schedule_work ( & c - > ec_stripe_delete_work ) ;
}
2018-11-25 01:09:44 +03:00
/* stripe deletion */
2019-03-14 03:49:16 +03:00
static int ec_stripe_delete ( struct bch_fs * c , size_t idx )
2018-11-01 22:13:19 +03:00
{
2021-02-21 03:27:37 +03:00
return bch2_btree_delete_range ( c , BTREE_ID_stripes ,
2019-03-14 03:49:16 +03:00
POS ( 0 , idx ) ,
POS ( 0 , idx + 1 ) ,
NULL ) ;
2018-11-01 22:13:19 +03:00
}
static void ec_stripe_delete_work ( struct work_struct * work )
{
struct bch_fs * c =
container_of ( work , struct bch_fs , ec_stripe_delete_work ) ;
ssize_t idx ;
while ( 1 ) {
spin_lock ( & c - > ec_stripes_heap_lock ) ;
idx = stripe_idx_to_delete ( c ) ;
2020-07-07 03:18:13 +03:00
if ( idx < 0 ) {
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
break ;
2020-07-07 03:18:13 +03:00
}
bch2_stripes_heap_del ( c , genradix_ptr ( & c - > stripes [ 0 ] , idx ) , idx ) ;
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
2019-07-10 23:04:58 +03:00
if ( ec_stripe_delete ( c , idx ) )
break ;
2018-11-01 22:13:19 +03:00
}
}
2018-11-25 01:09:44 +03:00
/* stripe creation: */
2018-11-01 22:13:19 +03:00
static int ec_stripe_bkey_insert ( struct bch_fs * c ,
2021-01-18 01:43:49 +03:00
struct bkey_i_stripe * stripe ,
struct disk_reservation * res )
2018-11-01 22:13:19 +03:00
{
2019-03-14 03:49:16 +03:00
struct btree_trans trans ;
struct btree_iter * iter ;
2018-11-01 22:13:19 +03:00
struct bkey_s_c k ;
2021-01-19 04:20:24 +03:00
struct bpos min_pos = POS ( 0 , 1 ) ;
struct bpos start_pos = bpos_max ( min_pos , POS ( 0 , c - > ec_stripe_hint ) ) ;
2018-11-01 22:13:19 +03:00
int ret ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2018-11-01 22:13:19 +03:00
retry :
2019-03-14 03:49:16 +03:00
bch2_trans_begin ( & trans ) ;
2021-02-21 03:27:37 +03:00
for_each_btree_key ( & trans , iter , BTREE_ID_stripes , start_pos ,
2019-04-17 22:49:28 +03:00
BTREE_ITER_SLOTS | BTREE_ITER_INTENT , k , ret ) {
2019-08-23 00:09:16 +03:00
if ( bkey_cmp ( k . k - > p , POS ( 0 , U32_MAX ) ) > 0 ) {
if ( start_pos . offset ) {
2021-01-19 04:20:24 +03:00
start_pos = min_pos ;
2019-08-23 00:09:16 +03:00
bch2_btree_iter_set_pos ( iter , start_pos ) ;
continue ;
}
ret = - ENOSPC ;
2019-03-14 03:49:16 +03:00
break ;
2019-08-23 00:09:16 +03:00
}
2018-11-01 22:13:19 +03:00
if ( bkey_deleted ( k . k ) )
goto found_slot ;
}
2019-03-11 21:59:58 +03:00
goto err ;
2018-11-01 22:13:19 +03:00
found_slot :
2019-08-23 00:09:16 +03:00
start_pos = iter - > pos ;
2021-08-25 04:30:06 +03:00
ret = ec_stripe_mem_alloc ( & trans , iter ) ;
2018-11-01 22:13:19 +03:00
if ( ret )
2019-03-11 21:59:58 +03:00
goto err ;
2018-11-01 22:13:19 +03:00
2019-03-14 03:49:16 +03:00
stripe - > k . p = iter - > pos ;
2018-11-01 22:13:19 +03:00
2021-06-02 07:15:07 +03:00
ret = bch2_trans_update ( & trans , iter , & stripe - > k_i , 0 ) ? :
bch2_trans_commit ( & trans , res , NULL ,
2019-03-11 21:59:58 +03:00
BTREE_INSERT_NOFAIL ) ;
err :
2020-02-26 23:39:46 +03:00
bch2_trans_iter_put ( & trans , iter ) ;
2019-03-11 21:59:58 +03:00
if ( ret = = - EINTR )
goto retry ;
2019-08-23 00:09:16 +03:00
c - > ec_stripe_hint = ret ? start_pos . offset : start_pos . offset + 1 ;
2019-03-14 03:49:16 +03:00
bch2_trans_exit ( & trans ) ;
2018-11-01 22:13:19 +03:00
return ret ;
}
2021-01-18 01:43:49 +03:00
static int ec_stripe_bkey_update ( struct btree_trans * trans ,
struct bkey_i_stripe * new )
{
struct btree_iter * iter ;
struct bkey_s_c k ;
const struct bch_stripe * existing ;
unsigned i ;
int ret ;
2021-02-21 03:27:37 +03:00
iter = bch2_trans_get_iter ( trans , BTREE_ID_stripes ,
2021-01-18 01:43:49 +03:00
new - > k . p , BTREE_ITER_INTENT ) ;
k = bch2_btree_iter_peek_slot ( iter ) ;
ret = bkey_err ( k ) ;
if ( ret )
goto err ;
if ( ! k . k | | k . k - > type ! = KEY_TYPE_stripe ) {
2021-01-28 03:36:09 +03:00
bch_err ( trans - > c , " error updating stripe: not found " ) ;
2021-01-18 01:43:49 +03:00
ret = - ENOENT ;
goto err ;
}
existing = bkey_s_c_to_stripe ( k ) . v ;
if ( existing - > nr_blocks ! = new - > v . nr_blocks ) {
2021-01-28 03:36:09 +03:00
bch_err ( trans - > c , " error updating stripe: nr_blocks does not match " ) ;
2021-01-18 01:43:49 +03:00
ret = - EINVAL ;
goto err ;
}
for ( i = 0 ; i < new - > v . nr_blocks ; i + + )
stripe_blockcount_set ( & new - > v , i ,
stripe_blockcount_get ( existing , i ) ) ;
2021-06-02 07:15:07 +03:00
ret = bch2_trans_update ( trans , iter , & new - > k_i , 0 ) ;
2021-01-18 01:43:49 +03:00
err :
bch2_trans_iter_put ( trans , iter ) ;
return ret ;
}
2018-11-01 22:13:19 +03:00
static void extent_stripe_ptr_add ( struct bkey_s_extent e ,
struct ec_stripe_buf * s ,
struct bch_extent_ptr * ptr ,
unsigned block )
{
struct bch_extent_stripe_ptr * dst = ( void * ) ptr ;
union bch_extent_entry * end = extent_entry_last ( e ) ;
memmove_u64s_up ( dst + 1 , dst , ( u64 * ) end - ( u64 * ) dst ) ;
e . k - > u64s + = sizeof ( * dst ) / sizeof ( u64 ) ;
* dst = ( struct bch_extent_stripe_ptr ) {
. type = 1 < < BCH_EXTENT_ENTRY_stripe_ptr ,
. block = block ,
2021-01-23 02:01:07 +03:00
. redundancy = s - > key . v . nr_redundant ,
2018-11-01 22:13:19 +03:00
. idx = s - > key . k . p . offset ,
} ;
}
static int ec_stripe_update_ptrs ( struct bch_fs * c ,
struct ec_stripe_buf * s ,
struct bkey * pos )
{
2019-03-14 03:49:16 +03:00
struct btree_trans trans ;
struct btree_iter * iter ;
2018-11-01 22:13:19 +03:00
struct bkey_s_c k ;
struct bkey_s_extent e ;
2020-12-17 23:08:58 +03:00
struct bkey_buf sk ;
2021-08-30 02:34:37 +03:00
struct bpos next_pos ;
2021-01-11 21:51:23 +03:00
int ret = 0 , dev , block ;
2018-11-01 22:13:19 +03:00
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_init ( & sk ) ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , BTREE_ITER_MAX , 0 ) ;
2018-11-01 22:13:19 +03:00
2020-06-30 21:44:19 +03:00
/* XXX this doesn't support the reflink btree */
2021-02-21 03:27:37 +03:00
iter = bch2_trans_get_iter ( & trans , BTREE_ID_extents ,
2019-03-14 03:49:16 +03:00
bkey_start_pos ( pos ) ,
BTREE_ITER_INTENT ) ;
while ( ( k = bch2_btree_iter_peek ( iter ) ) . k & &
2019-03-28 05:03:30 +03:00
! ( ret = bkey_err ( k ) ) & &
2018-11-01 22:13:19 +03:00
bkey_cmp ( bkey_start_pos ( k . k ) , pos - > p ) < 0 ) {
2019-11-16 04:40:15 +03:00
struct bch_extent_ptr * ptr , * ec_ptr = NULL ;
2019-05-13 07:30:02 +03:00
if ( extent_has_stripe_ptr ( k , s - > key . k . p . offset ) ) {
2021-03-21 23:55:25 +03:00
bch2_btree_iter_advance ( iter ) ;
2019-05-13 07:30:02 +03:00
continue ;
}
2021-01-11 21:51:23 +03:00
block = bkey_matches_stripe ( & s - > key . v , k ) ;
if ( block < 0 ) {
2021-03-21 23:55:25 +03:00
bch2_btree_iter_advance ( iter ) ;
2018-11-01 22:13:19 +03:00
continue ;
}
2021-01-11 21:51:23 +03:00
dev = s - > key . v . ptrs [ block ] . dev ;
2018-11-01 22:13:19 +03:00
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
2019-11-10 00:01:15 +03:00
e = bkey_i_to_s_extent ( sk . k ) ;
2018-11-01 22:13:19 +03:00
2020-07-11 20:23:17 +03:00
bch2_bkey_drop_ptrs ( e . s , ptr , ptr - > dev ! = dev ) ;
ec_ptr = ( void * ) bch2_bkey_has_device ( e . s_c , dev ) ;
BUG_ON ( ! ec_ptr ) ;
2018-11-01 22:13:19 +03:00
2021-01-11 21:51:23 +03:00
extent_stripe_ptr_add ( e , s , ec_ptr , block ) ;
2018-11-01 22:13:19 +03:00
2020-03-30 21:05:05 +03:00
bch2_btree_iter_set_pos ( iter , bkey_start_pos ( & sk . k - > k ) ) ;
2021-08-30 02:34:37 +03:00
next_pos = sk . k - > k . p ;
2021-06-17 06:21:23 +03:00
ret = bch2_btree_iter_traverse ( iter ) ? :
bch2_trans_update ( & trans , iter , sk . k , 0 ) ? :
2021-06-02 07:15:07 +03:00
bch2_trans_commit ( & trans , NULL , NULL ,
2020-12-22 01:17:18 +03:00
BTREE_INSERT_NOFAIL ) ;
2021-08-30 02:34:37 +03:00
if ( ! ret )
bch2_btree_iter_set_pos ( iter , next_pos ) ;
2018-11-01 22:13:19 +03:00
if ( ret = = - EINTR )
ret = 0 ;
if ( ret )
break ;
}
2021-03-20 03:29:11 +03:00
bch2_trans_iter_put ( & trans , iter ) ;
2018-11-01 22:13:19 +03:00
2019-03-14 03:49:16 +03:00
bch2_trans_exit ( & trans ) ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_exit ( & sk , c ) ;
2019-03-14 03:49:16 +03:00
return ret ;
2018-11-01 22:13:19 +03:00
}
/*
* data buckets of new stripe all written : create the stripe
*/
static void ec_stripe_create ( struct ec_stripe_new * s )
{
struct bch_fs * c = s - > c ;
struct open_bucket * ob ;
struct bkey_i * k ;
2020-07-07 03:18:13 +03:00
struct stripe * m ;
2020-12-15 03:41:03 +03:00
struct bch_stripe * v = & s - > new_stripe . key . v ;
2018-11-01 22:13:19 +03:00
unsigned i , nr_data = v - > nr_blocks - v - > nr_redundant ;
int ret ;
BUG_ON ( s - > h - > s = = s ) ;
2020-12-15 03:41:03 +03:00
closure_sync ( & s - > iodone ) ;
2018-11-01 22:13:19 +03:00
if ( s - > err ) {
2020-07-07 03:59:46 +03:00
if ( s - > err ! = - EROFS )
bch_err ( c , " error creating stripe: error writing data buckets " ) ;
2018-11-01 22:13:19 +03:00
goto err ;
}
2020-12-15 03:41:03 +03:00
if ( s - > have_existing_stripe ) {
ec_validate_checksums ( c , & s - > existing_stripe ) ;
if ( ec_do_recov ( c , & s - > existing_stripe ) ) {
bch_err ( c , " error creating stripe: error reading existing stripe " ) ;
goto err ;
}
for ( i = 0 ; i < nr_data ; i + + )
if ( stripe_blockcount_get ( & s - > existing_stripe . key . v , i ) )
swap ( s - > new_stripe . data [ i ] ,
s - > existing_stripe . data [ i ] ) ;
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( & s - > existing_stripe ) ;
2020-12-15 03:41:03 +03:00
}
2020-07-07 05:33:54 +03:00
BUG_ON ( ! s - > allocated ) ;
2018-11-01 22:13:19 +03:00
if ( ! percpu_ref_tryget ( & c - > writes ) )
goto err ;
2020-12-15 03:41:03 +03:00
ec_generate_ec ( & s - > new_stripe ) ;
2018-11-01 22:13:19 +03:00
2020-12-15 03:41:03 +03:00
ec_generate_checksums ( & s - > new_stripe ) ;
2018-11-01 22:13:19 +03:00
/* write p/q: */
for ( i = nr_data ; i < v - > nr_blocks ; i + + )
2020-12-15 03:41:03 +03:00
ec_block_io ( c , & s - > new_stripe , REQ_OP_WRITE , i , & s - > iodone ) ;
closure_sync ( & s - > iodone ) ;
2018-11-01 22:13:19 +03:00
2020-12-15 03:41:03 +03:00
if ( ec_nr_failed ( & s - > new_stripe ) ) {
bch_err ( c , " error creating stripe: error writing redundancy buckets " ) ;
goto err_put_writes ;
}
2018-11-01 22:13:19 +03:00
2020-12-15 03:41:03 +03:00
ret = s - > have_existing_stripe
2021-01-18 01:43:49 +03:00
? bch2_trans_do ( c , & s - > res , NULL , BTREE_INSERT_NOFAIL ,
ec_stripe_bkey_update ( & trans , & s - > new_stripe . key ) )
: ec_stripe_bkey_insert ( c , & s - > new_stripe . key , & s - > res ) ;
2018-11-01 22:13:19 +03:00
if ( ret ) {
bch_err ( c , " error creating stripe: error creating stripe key " ) ;
2020-07-07 03:18:13 +03:00
goto err_put_writes ;
2018-11-01 22:13:19 +03:00
}
for_each_keylist_key ( & s - > keys , k ) {
2020-12-15 03:41:03 +03:00
ret = ec_stripe_update_ptrs ( c , & s - > new_stripe , & k - > k ) ;
2020-06-30 21:44:19 +03:00
if ( ret ) {
2020-12-09 21:39:30 +03:00
bch_err ( c , " error creating stripe: error %i updating pointers " , ret ) ;
2018-11-01 22:13:19 +03:00
break ;
2020-06-30 21:44:19 +03:00
}
2018-11-01 22:13:19 +03:00
}
2020-07-07 03:18:13 +03:00
spin_lock ( & c - > ec_stripes_heap_lock ) ;
2020-12-15 03:41:03 +03:00
m = genradix_ptr ( & c - > stripes [ 0 ] , s - > new_stripe . key . k . p . offset ) ;
2021-01-18 01:43:49 +03:00
2020-07-07 03:18:13 +03:00
BUG_ON ( m - > on_heap ) ;
2020-12-15 03:41:03 +03:00
bch2_stripes_heap_insert ( c , m , s - > new_stripe . key . k . p . offset ) ;
2020-07-07 03:18:13 +03:00
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 22:13:19 +03:00
err_put_writes :
percpu_ref_put ( & c - > writes ) ;
err :
2020-07-10 01:31:51 +03:00
bch2_disk_reservation_put ( c , & s - > res ) ;
2021-01-19 07:26:42 +03:00
for ( i = 0 ; i < v - > nr_blocks ; i + + )
if ( s - > blocks [ i ] ) {
ob = c - > open_buckets + s - > blocks [ i ] ;
if ( i < nr_data ) {
ob - > ec = NULL ;
__bch2_open_bucket_put ( c , ob ) ;
} else {
bch2_open_bucket_put ( c , ob ) ;
}
}
2018-11-01 22:13:19 +03:00
bch2_keylist_free ( & s - > keys , s - > inline_keys ) ;
2021-01-11 21:51:23 +03:00
ec_stripe_buf_exit ( & s - > existing_stripe ) ;
ec_stripe_buf_exit ( & s - > new_stripe ) ;
2020-12-15 03:41:03 +03:00
closure_debug_destroy ( & s - > iodone ) ;
2018-11-01 22:13:19 +03:00
kfree ( s ) ;
}
2020-07-07 03:59:46 +03:00
static void ec_stripe_create_work ( struct work_struct * work )
2018-11-01 22:13:19 +03:00
{
2020-07-07 03:59:46 +03:00
struct bch_fs * c = container_of ( work ,
struct bch_fs , ec_stripe_create_work ) ;
struct ec_stripe_new * s , * n ;
restart :
mutex_lock ( & c - > ec_stripe_new_lock ) ;
list_for_each_entry_safe ( s , n , & c - > ec_stripe_new_list , list )
if ( ! atomic_read ( & s - > pin ) ) {
list_del ( & s - > list ) ;
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
ec_stripe_create ( s ) ;
goto restart ;
}
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
}
2018-11-01 22:13:19 +03:00
2020-07-07 03:59:46 +03:00
static void ec_stripe_new_put ( struct bch_fs * c , struct ec_stripe_new * s )
{
BUG_ON ( atomic_read ( & s - > pin ) < = 0 ) ;
2018-11-01 22:13:19 +03:00
2020-07-07 03:59:46 +03:00
if ( atomic_dec_and_test ( & s - > pin ) ) {
BUG_ON ( ! s - > pending ) ;
queue_work ( system_long_wq , & c - > ec_stripe_create_work ) ;
}
2018-11-01 22:13:19 +03:00
}
2020-07-07 03:59:46 +03:00
static void ec_stripe_set_pending ( struct bch_fs * c , struct ec_stripe_head * h )
2018-11-01 22:13:19 +03:00
{
2020-07-07 03:59:46 +03:00
struct ec_stripe_new * s = h - > s ;
2020-07-07 05:33:54 +03:00
BUG_ON ( ! s - > allocated & & ! s - > err ) ;
2020-07-07 03:59:46 +03:00
h - > s = NULL ;
s - > pending = true ;
mutex_lock ( & c - > ec_stripe_new_lock ) ;
list_add ( & s - > list , & c - > ec_stripe_new_list ) ;
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
ec_stripe_new_put ( c , s ) ;
2018-11-01 22:13:19 +03:00
}
/* have a full bucket - hand it off to be erasure coded: */
void bch2_ec_bucket_written ( struct bch_fs * c , struct open_bucket * ob )
{
struct ec_stripe_new * s = ob - > ec ;
if ( ob - > sectors_free )
s - > err = - 1 ;
2020-07-07 03:59:46 +03:00
ec_stripe_new_put ( c , s ) ;
2018-11-01 22:13:19 +03:00
}
void bch2_ec_bucket_cancel ( struct bch_fs * c , struct open_bucket * ob )
{
struct ec_stripe_new * s = ob - > ec ;
s - > err = - EIO ;
}
void * bch2_writepoint_ec_buf ( struct bch_fs * c , struct write_point * wp )
{
struct open_bucket * ob = ec_open_bucket ( c , & wp - > ptrs ) ;
struct bch_dev * ca ;
unsigned offset ;
if ( ! ob )
return NULL ;
ca = bch_dev_bkey_exists ( c , ob - > ptr . dev ) ;
offset = ca - > mi . bucket_size - ob - > sectors_free ;
2020-12-15 03:41:03 +03:00
return ob - > ec - > new_stripe . data [ ob - > ec_idx ] + ( offset < < 9 ) ;
2018-11-01 22:13:19 +03:00
}
void bch2_ec_add_backpointer ( struct bch_fs * c , struct write_point * wp ,
struct bpos pos , unsigned sectors )
{
struct open_bucket * ob = ec_open_bucket ( c , & wp - > ptrs ) ;
struct ec_stripe_new * ec ;
if ( ! ob )
return ;
ec = ob - > ec ;
mutex_lock ( & ec - > lock ) ;
if ( bch2_keylist_realloc ( & ec - > keys , ec - > inline_keys ,
ARRAY_SIZE ( ec - > inline_keys ) ,
BKEY_U64s ) ) {
BUG ( ) ;
}
bkey_init ( & ec - > keys . top - > k ) ;
ec - > keys . top - > k . p = pos ;
bch2_key_resize ( & ec - > keys . top - > k , sectors ) ;
bch2_keylist_push ( & ec - > keys ) ;
mutex_unlock ( & ec - > lock ) ;
}
static int unsigned_cmp ( const void * _l , const void * _r )
{
unsigned l = * ( ( const unsigned * ) _l ) ;
unsigned r = * ( ( const unsigned * ) _r ) ;
2019-04-12 11:54:12 +03:00
return cmp_int ( l , r ) ;
2018-11-01 22:13:19 +03:00
}
/* pick most common bucket size: */
static unsigned pick_blocksize ( struct bch_fs * c ,
struct bch_devs_mask * devs )
{
struct bch_dev * ca ;
unsigned i , nr = 0 , sizes [ BCH_SB_MEMBERS_MAX ] ;
struct {
unsigned nr , size ;
} cur = { 0 , 0 } , best = { 0 , 0 } ;
for_each_member_device_rcu ( ca , c , i , devs )
sizes [ nr + + ] = ca - > mi . bucket_size ;
sort ( sizes , nr , sizeof ( unsigned ) , unsigned_cmp , NULL ) ;
for ( i = 0 ; i < nr ; i + + ) {
if ( sizes [ i ] ! = cur . size ) {
if ( cur . nr > best . nr )
best = cur ;
cur . nr = 0 ;
cur . size = sizes [ i ] ;
}
cur . nr + + ;
}
if ( cur . nr > best . nr )
best = cur ;
return best . size ;
}
2020-06-30 21:44:19 +03:00
static bool may_create_new_stripe ( struct bch_fs * c )
{
return false ;
}
2020-07-07 05:33:54 +03:00
static void ec_stripe_key_init ( struct bch_fs * c ,
struct bkey_i_stripe * s ,
unsigned nr_data ,
unsigned nr_parity ,
unsigned stripe_size )
{
unsigned u64s ;
bkey_stripe_init ( & s - > k_i ) ;
s - > v . sectors = cpu_to_le16 ( stripe_size ) ;
s - > v . algorithm = 0 ;
s - > v . nr_blocks = nr_data + nr_parity ;
s - > v . nr_redundant = nr_parity ;
s - > v . csum_granularity_bits = ilog2 ( c - > sb . encoded_extent_max ) ;
s - > v . csum_type = BCH_CSUM_CRC32C ;
s - > v . pad = 0 ;
while ( ( u64s = stripe_val_u64s ( & s - > v ) ) > BKEY_VAL_U64s_MAX ) {
BUG_ON ( 1 < < s - > v . csum_granularity_bits > =
le16_to_cpu ( s - > v . sectors ) | |
s - > v . csum_granularity_bits = = U8_MAX ) ;
s - > v . csum_granularity_bits + + ;
}
set_bkey_val_u64s ( & s - > k , u64s ) ;
}
static int ec_new_stripe_alloc ( struct bch_fs * c , struct ec_stripe_head * h )
2018-11-01 22:13:19 +03:00
{
struct ec_stripe_new * s ;
lockdep_assert_held ( & h - > lock ) ;
s = kzalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( ! s )
return - ENOMEM ;
mutex_init ( & s - > lock ) ;
2020-12-15 03:41:03 +03:00
closure_init ( & s - > iodone , NULL ) ;
2018-11-01 22:13:19 +03:00
atomic_set ( & s - > pin , 1 ) ;
s - > c = c ;
s - > h = h ;
2020-07-07 05:33:54 +03:00
s - > nr_data = min_t ( unsigned , h - > nr_active_devs ,
2020-12-16 22:23:27 +03:00
BCH_BKEY_PTRS_MAX ) - h - > redundancy ;
2020-07-07 05:33:54 +03:00
s - > nr_parity = h - > redundancy ;
2018-11-01 22:13:19 +03:00
bch2_keylist_init ( & s - > keys , s - > inline_keys ) ;
2020-12-15 03:41:03 +03:00
ec_stripe_key_init ( c , & s - > new_stripe . key , s - > nr_data ,
2020-07-07 05:33:54 +03:00
s - > nr_parity , h - > blocksize ) ;
2018-11-01 22:13:19 +03:00
h - > s = s ;
return 0 ;
}
static struct ec_stripe_head *
ec_new_stripe_head_alloc ( struct bch_fs * c , unsigned target ,
2020-12-15 20:53:30 +03:00
unsigned algo , unsigned redundancy ,
bool copygc )
2018-11-01 22:13:19 +03:00
{
struct ec_stripe_head * h ;
struct bch_dev * ca ;
unsigned i ;
h = kzalloc ( sizeof ( * h ) , GFP_KERNEL ) ;
if ( ! h )
return NULL ;
mutex_init ( & h - > lock ) ;
mutex_lock ( & h - > lock ) ;
h - > target = target ;
h - > algo = algo ;
h - > redundancy = redundancy ;
2020-12-15 20:53:30 +03:00
h - > copygc = copygc ;
2018-11-01 22:13:19 +03:00
rcu_read_lock ( ) ;
2020-07-10 01:28:11 +03:00
h - > devs = target_rw_devs ( c , BCH_DATA_user , target ) ;
2018-11-01 22:13:19 +03:00
for_each_member_device_rcu ( ca , c , i , & h - > devs )
if ( ! ca - > mi . durability )
__clear_bit ( i , h - > devs . d ) ;
h - > blocksize = pick_blocksize ( c , & h - > devs ) ;
for_each_member_device_rcu ( ca , c , i , & h - > devs )
if ( ca - > mi . bucket_size = = h - > blocksize )
h - > nr_active_devs + + ;
rcu_read_unlock ( ) ;
2020-07-07 03:59:46 +03:00
list_add ( & h - > list , & c - > ec_stripe_head_list ) ;
2018-11-01 22:13:19 +03:00
return h ;
}
2020-07-07 03:59:46 +03:00
void bch2_ec_stripe_head_put ( struct bch_fs * c , struct ec_stripe_head * h )
2018-11-01 22:13:19 +03:00
{
if ( h - > s & &
2020-07-07 05:33:54 +03:00
h - > s - > allocated & &
2018-11-01 22:13:19 +03:00
bitmap_weight ( h - > s - > blocks_allocated ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_data ) = = h - > s - > nr_data )
2020-07-07 03:59:46 +03:00
ec_stripe_set_pending ( c , h ) ;
2018-11-01 22:13:19 +03:00
mutex_unlock ( & h - > lock ) ;
}
2020-07-07 05:33:54 +03:00
struct ec_stripe_head * __bch2_ec_stripe_head_get ( struct bch_fs * c ,
2020-12-15 20:53:30 +03:00
unsigned target ,
unsigned algo ,
unsigned redundancy ,
bool copygc )
2018-11-01 22:13:19 +03:00
{
struct ec_stripe_head * h ;
if ( ! redundancy )
return NULL ;
2020-07-07 03:59:46 +03:00
mutex_lock ( & c - > ec_stripe_head_lock ) ;
list_for_each_entry ( h , & c - > ec_stripe_head_list , list )
2018-11-01 22:13:19 +03:00
if ( h - > target = = target & &
h - > algo = = algo & &
2020-12-15 20:53:30 +03:00
h - > redundancy = = redundancy & &
h - > copygc = = copygc ) {
2018-11-01 22:13:19 +03:00
mutex_lock ( & h - > lock ) ;
goto found ;
}
2020-12-15 20:53:30 +03:00
h = ec_new_stripe_head_alloc ( c , target , algo , redundancy , copygc ) ;
2018-11-01 22:13:19 +03:00
found :
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
2018-11-01 22:13:19 +03:00
return h ;
}
2020-12-15 20:38:17 +03:00
static enum bucket_alloc_ret
new_stripe_alloc_buckets ( struct bch_fs * c , struct ec_stripe_head * h ,
struct closure * cl )
2020-07-07 05:33:54 +03:00
{
2021-01-19 07:26:42 +03:00
struct bch_devs_mask devs = h - > devs ;
2020-07-07 05:33:54 +03:00
struct open_bucket * ob ;
2021-01-19 07:26:42 +03:00
struct open_buckets buckets ;
unsigned i , j , nr_have_parity = 0 , nr_have_data = 0 ;
2020-07-07 05:33:54 +03:00
bool have_cache = true ;
2020-12-15 20:38:17 +03:00
enum bucket_alloc_ret ret = ALLOC_SUCCESS ;
2020-07-07 05:33:54 +03:00
2021-01-19 07:26:42 +03:00
for ( i = 0 ; i < h - > s - > new_stripe . key . v . nr_blocks ; i + + ) {
if ( test_bit ( i , h - > s - > blocks_gotten ) ) {
__clear_bit ( h - > s - > new_stripe . key . v . ptrs [ i ] . dev , devs . d ) ;
if ( i < h - > s - > nr_data )
nr_have_data + + ;
else
nr_have_parity + + ;
}
2020-07-07 05:33:54 +03:00
}
2021-01-19 07:26:42 +03:00
BUG_ON ( nr_have_data > h - > s - > nr_data ) ;
BUG_ON ( nr_have_parity > h - > s - > nr_parity ) ;
2020-07-07 05:33:54 +03:00
percpu_down_read ( & c - > mark_lock ) ;
rcu_read_lock ( ) ;
2021-01-19 07:26:42 +03:00
buckets . nr = 0 ;
if ( nr_have_parity < h - > s - > nr_parity ) {
ret = bch2_bucket_alloc_set ( c , & buckets ,
2020-07-07 05:33:54 +03:00
& h - > parity_stripe ,
& devs ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_parity ,
& nr_have_parity ,
2020-07-07 05:33:54 +03:00
& have_cache ,
2020-12-15 20:53:30 +03:00
h - > copygc
? RESERVE_MOVINGGC
: RESERVE_NONE ,
2020-07-07 05:33:54 +03:00
0 ,
2020-12-15 20:38:17 +03:00
cl ) ;
2021-01-19 07:26:42 +03:00
open_bucket_for_each ( c , & buckets , ob , i ) {
j = find_next_zero_bit ( h - > s - > blocks_gotten ,
h - > s - > nr_data + h - > s - > nr_parity ,
h - > s - > nr_data ) ;
BUG_ON ( j > = h - > s - > nr_data + h - > s - > nr_parity ) ;
h - > s - > blocks [ j ] = buckets . v [ i ] ;
h - > s - > new_stripe . key . v . ptrs [ j ] = ob - > ptr ;
__set_bit ( j , h - > s - > blocks_gotten ) ;
}
2020-07-07 05:33:54 +03:00
if ( ret )
goto err ;
}
2021-01-19 07:26:42 +03:00
buckets . nr = 0 ;
if ( nr_have_data < h - > s - > nr_data ) {
ret = bch2_bucket_alloc_set ( c , & buckets ,
2020-07-07 05:33:54 +03:00
& h - > block_stripe ,
& devs ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_data ,
& nr_have_data ,
2020-07-07 05:33:54 +03:00
& have_cache ,
2020-12-15 20:53:30 +03:00
h - > copygc
? RESERVE_MOVINGGC
: RESERVE_NONE ,
2020-07-07 05:33:54 +03:00
0 ,
2020-12-15 20:38:17 +03:00
cl ) ;
2021-01-19 07:26:42 +03:00
open_bucket_for_each ( c , & buckets , ob , i ) {
j = find_next_zero_bit ( h - > s - > blocks_gotten ,
h - > s - > nr_data , 0 ) ;
BUG_ON ( j > = h - > s - > nr_data ) ;
h - > s - > blocks [ j ] = buckets . v [ i ] ;
h - > s - > new_stripe . key . v . ptrs [ j ] = ob - > ptr ;
__set_bit ( j , h - > s - > blocks_gotten ) ;
}
2020-07-07 05:33:54 +03:00
if ( ret )
goto err ;
}
err :
rcu_read_unlock ( ) ;
percpu_up_read ( & c - > mark_lock ) ;
return ret ;
}
2020-06-30 21:44:19 +03:00
/* XXX: doesn't obey target: */
static s64 get_existing_stripe ( struct bch_fs * c ,
2021-01-11 21:51:23 +03:00
struct ec_stripe_head * head )
2020-06-30 21:44:19 +03:00
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
struct stripe * m ;
size_t heap_idx ;
u64 stripe_idx ;
2021-01-23 02:01:07 +03:00
s64 ret = - 1 ;
2020-06-30 21:44:19 +03:00
if ( may_create_new_stripe ( c ) )
return - 1 ;
spin_lock ( & c - > ec_stripes_heap_lock ) ;
for ( heap_idx = 0 ; heap_idx < h - > used ; heap_idx + + ) {
2021-01-23 02:01:07 +03:00
/* No blocks worth reusing, stripe will just be deleted: */
2020-06-30 21:44:19 +03:00
if ( ! h - > data [ heap_idx ] . blocks_nonempty )
continue ;
stripe_idx = h - > data [ heap_idx ] . idx ;
m = genradix_ptr ( & c - > stripes [ 0 ] , stripe_idx ) ;
2021-01-11 21:51:23 +03:00
if ( m - > algorithm = = head - > algo & &
m - > nr_redundant = = head - > redundancy & &
m - > sectors = = head - > blocksize & &
2020-06-30 21:44:19 +03:00
m - > blocks_nonempty < m - > nr_blocks - m - > nr_redundant ) {
bch2_stripes_heap_del ( c , m , stripe_idx ) ;
2021-01-23 02:01:07 +03:00
ret = stripe_idx ;
break ;
2020-06-30 21:44:19 +03:00
}
}
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2021-01-23 02:01:07 +03:00
return ret ;
2020-06-30 21:44:19 +03:00
}
2021-02-10 03:18:13 +03:00
static int __bch2_ec_stripe_head_reuse ( struct bch_fs * c ,
struct ec_stripe_head * h )
{
unsigned i ;
s64 idx ;
int ret ;
idx = get_existing_stripe ( c , h ) ;
if ( idx < 0 ) {
bch_err ( c , " failed to find an existing stripe " ) ;
return - ENOSPC ;
}
h - > s - > have_existing_stripe = true ;
ret = get_stripe_key ( c , idx , & h - > s - > existing_stripe ) ;
if ( ret ) {
bch2_fs_fatal_error ( c , " error reading stripe key: %i " , ret ) ;
return ret ;
}
if ( ec_stripe_buf_init ( & h - > s - > existing_stripe , 0 , h - > blocksize ) ) {
/*
* this is a problem : we have deleted from the
* stripes heap already
*/
BUG ( ) ;
}
BUG_ON ( h - > s - > existing_stripe . size ! = h - > blocksize ) ;
BUG_ON ( h - > s - > existing_stripe . size ! = h - > s - > existing_stripe . key . v . sectors ) ;
for ( i = 0 ; i < h - > s - > existing_stripe . key . v . nr_blocks ; i + + ) {
if ( stripe_blockcount_get ( & h - > s - > existing_stripe . key . v , i ) ) {
__set_bit ( i , h - > s - > blocks_gotten ) ;
__set_bit ( i , h - > s - > blocks_allocated ) ;
}
ec_block_io ( c , & h - > s - > existing_stripe , READ , i , & h - > s - > iodone ) ;
}
bkey_copy ( & h - > s - > new_stripe . key . k_i ,
& h - > s - > existing_stripe . key . k_i ) ;
return 0 ;
}
static int __bch2_ec_stripe_head_reserve ( struct bch_fs * c ,
struct ec_stripe_head * h )
{
int ret ;
ret = bch2_disk_reservation_get ( c , & h - > s - > res ,
h - > blocksize ,
h - > s - > nr_parity , 0 ) ;
if ( ret ) {
/*
* This means we need to wait for copygc to
* empty out buckets from existing stripes :
*/
bch_err ( c , " failed to reserve stripe " ) ;
}
return ret ;
}
2020-07-07 05:33:54 +03:00
struct ec_stripe_head * bch2_ec_stripe_head_get ( struct bch_fs * c ,
unsigned target ,
unsigned algo ,
2020-12-15 20:38:17 +03:00
unsigned redundancy ,
2020-12-15 20:53:30 +03:00
bool copygc ,
2020-12-15 20:38:17 +03:00
struct closure * cl )
2020-07-07 05:33:54 +03:00
{
struct ec_stripe_head * h ;
2020-07-10 01:31:51 +03:00
int ret ;
2021-02-10 03:18:13 +03:00
bool needs_stripe_new ;
2020-07-07 05:33:54 +03:00
2020-12-15 20:53:30 +03:00
h = __bch2_ec_stripe_head_get ( c , target , algo , redundancy , copygc ) ;
2020-12-15 03:41:03 +03:00
if ( ! h ) {
bch_err ( c , " no stripe head " ) ;
2020-07-07 05:33:54 +03:00
return NULL ;
2020-12-15 03:41:03 +03:00
}
2020-07-07 05:33:54 +03:00
2021-02-10 03:18:13 +03:00
needs_stripe_new = ! h - > s ;
if ( needs_stripe_new ) {
2020-12-09 21:39:30 +03:00
if ( ec_new_stripe_alloc ( c , h ) ) {
2021-02-10 03:18:13 +03:00
ret = - ENOMEM ;
2020-12-15 03:41:03 +03:00
bch_err ( c , " failed to allocate new stripe " ) ;
2021-02-10 03:18:13 +03:00
goto err ;
2020-12-15 03:41:03 +03:00
}
2021-02-10 03:18:13 +03:00
if ( ec_stripe_buf_init ( & h - > s - > new_stripe , 0 , h - > blocksize ) )
2020-12-15 03:41:03 +03:00
BUG ( ) ;
2020-12-09 21:39:30 +03:00
}
2020-06-30 21:44:19 +03:00
2021-02-10 03:18:13 +03:00
/*
* Try reserve a new stripe before reusing an
* existing stripe . This will prevent unnecessary
* read amplification during write oriented workloads .
*/
ret = 0 ;
if ( ! h - > s - > allocated & & ! h - > s - > res . sectors & & ! h - > s - > have_existing_stripe )
ret = __bch2_ec_stripe_head_reserve ( c , h ) ;
if ( ret & & needs_stripe_new )
ret = __bch2_ec_stripe_head_reuse ( c , h ) ;
if ( ret )
goto err ;
2020-07-10 01:31:51 +03:00
2021-02-10 03:18:13 +03:00
if ( ! h - > s - > allocated ) {
2020-12-15 20:38:17 +03:00
ret = new_stripe_alloc_buckets ( c , h , cl ) ;
2021-02-10 03:18:13 +03:00
if ( ret )
goto err ;
2020-07-07 05:33:54 +03:00
h - > s - > allocated = true ;
}
2021-02-10 03:18:13 +03:00
2020-07-07 05:33:54 +03:00
return h ;
2021-02-10 03:18:13 +03:00
err :
bch2_ec_stripe_head_put ( c , h ) ;
return ERR_PTR ( - ret ) ;
2020-07-07 05:33:54 +03:00
}
2018-11-01 22:13:19 +03:00
void bch2_ec_stop_dev ( struct bch_fs * c , struct bch_dev * ca )
{
struct ec_stripe_head * h ;
struct open_bucket * ob ;
unsigned i ;
2020-07-07 03:59:46 +03:00
mutex_lock ( & c - > ec_stripe_head_lock ) ;
list_for_each_entry ( h , & c - > ec_stripe_head_list , list ) {
2018-11-01 22:13:19 +03:00
mutex_lock ( & h - > lock ) ;
if ( ! h - > s )
goto unlock ;
2021-01-19 07:26:42 +03:00
for ( i = 0 ; i < h - > s - > new_stripe . key . v . nr_blocks ; i + + ) {
if ( ! h - > s - > blocks [ i ] )
continue ;
ob = c - > open_buckets + h - > s - > blocks [ i ] ;
2018-11-01 22:13:19 +03:00
if ( ob - > ptr . dev = = ca - > dev_idx )
goto found ;
2021-01-19 07:26:42 +03:00
}
2018-11-01 22:13:19 +03:00
goto unlock ;
found :
2020-07-07 03:59:46 +03:00
h - > s - > err = - EROFS ;
ec_stripe_set_pending ( c , h ) ;
2018-11-01 22:13:19 +03:00
unlock :
mutex_unlock ( & h - > lock ) ;
}
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
2018-11-01 22:13:19 +03:00
}
2021-01-15 00:19:23 +03:00
void bch2_stripes_heap_start ( struct bch_fs * c )
{
struct genradix_iter iter ;
struct stripe * m ;
genradix_for_each ( & c - > stripes [ 0 ] , iter , m )
if ( m - > alive )
bch2_stripes_heap_insert ( c , m , iter . pos ) ;
}
2019-03-14 03:49:16 +03:00
static int __bch2_stripe_write_key ( struct btree_trans * trans ,
2018-11-26 04:53:51 +03:00
struct btree_iter * iter ,
struct stripe * m ,
size_t idx ,
2020-02-26 23:39:46 +03:00
struct bkey_i_stripe * new_key )
2018-11-26 04:53:51 +03:00
{
2021-01-18 00:45:19 +03:00
const struct bch_stripe * v ;
2018-11-26 04:53:51 +03:00
struct bkey_s_c k ;
unsigned i ;
int ret ;
bch2_btree_iter_set_pos ( iter , POS ( 0 , idx ) ) ;
k = bch2_btree_iter_peek_slot ( iter ) ;
2019-03-28 05:03:30 +03:00
ret = bkey_err ( k ) ;
2018-11-26 04:53:51 +03:00
if ( ret )
return ret ;
if ( k . k - > type ! = KEY_TYPE_stripe )
return - EIO ;
2021-01-18 00:45:19 +03:00
v = bkey_s_c_to_stripe ( k ) . v ;
for ( i = 0 ; i < v - > nr_blocks ; i + + )
if ( m - > block_sectors [ i ] ! = stripe_blockcount_get ( v , i ) )
goto write ;
return 0 ;
write :
2018-11-26 04:53:51 +03:00
bkey_reassemble ( & new_key - > k_i , k ) ;
for ( i = 0 ; i < new_key - > v . nr_blocks ; i + + )
stripe_blockcount_set ( & new_key - > v , i ,
m - > block_sectors [ i ] ) ;
2021-06-02 07:15:07 +03:00
return bch2_trans_update ( trans , iter , & new_key - > k_i , 0 ) ;
2018-11-26 04:53:51 +03:00
}
2020-10-17 04:36:26 +03:00
int bch2_stripes_write ( struct bch_fs * c , unsigned flags )
2018-11-26 04:53:51 +03:00
{
2019-03-14 03:49:16 +03:00
struct btree_trans trans ;
struct btree_iter * iter ;
2018-11-26 04:53:51 +03:00
struct genradix_iter giter ;
struct bkey_i_stripe * new_key ;
struct stripe * m ;
int ret = 0 ;
new_key = kmalloc ( 255 * sizeof ( u64 ) , GFP_KERNEL ) ;
BUG_ON ( ! new_key ) ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2019-03-14 03:49:16 +03:00
2021-02-21 03:27:37 +03:00
iter = bch2_trans_get_iter ( & trans , BTREE_ID_stripes , POS_MIN ,
2019-03-14 03:49:16 +03:00
BTREE_ITER_SLOTS | BTREE_ITER_INTENT ) ;
2018-11-26 04:53:51 +03:00
genradix_for_each ( & c - > stripes [ 0 ] , giter , m ) {
2021-01-18 00:45:19 +03:00
if ( ! m - > alive )
2018-11-26 04:53:51 +03:00
continue ;
2020-02-26 23:39:46 +03:00
ret = __bch2_trans_do ( & trans , NULL , NULL ,
BTREE_INSERT_NOFAIL | flags ,
__bch2_stripe_write_key ( & trans , iter , m ,
giter . pos , new_key ) ) ;
2019-12-23 07:04:30 +03:00
2018-11-26 04:53:51 +03:00
if ( ret )
break ;
}
2021-04-21 03:21:39 +03:00
bch2_trans_iter_put ( & trans , iter ) ;
2018-11-26 04:53:51 +03:00
2019-03-14 03:49:16 +03:00
bch2_trans_exit ( & trans ) ;
2018-11-26 04:53:51 +03:00
kfree ( new_key ) ;
return ret ;
}
2021-04-29 22:37:47 +03:00
static int bch2_stripes_read_fn ( struct bch_fs * c , struct bkey_s_c k )
2018-11-26 04:53:51 +03:00
{
2020-05-24 21:06:10 +03:00
int ret = 0 ;
2019-07-13 00:08:32 +03:00
2021-04-29 22:37:47 +03:00
if ( k . k - > type = = KEY_TYPE_stripe )
2020-05-24 21:06:10 +03:00
ret = __ec_stripe_mem_alloc ( c , k . k - > p . offset , GFP_KERNEL ) ? :
2021-06-11 04:44:27 +03:00
bch2_mark_key ( c , k ,
BTREE_TRIGGER_INSERT |
2020-05-24 21:06:10 +03:00
BTREE_TRIGGER_NOATOMIC ) ;
2019-10-09 05:56:33 +03:00
2020-05-24 21:06:10 +03:00
return ret ;
}
2018-11-26 04:53:51 +03:00
2021-04-29 22:37:47 +03:00
int bch2_stripes_read ( struct bch_fs * c )
2020-05-24 21:06:10 +03:00
{
2021-04-29 22:37:47 +03:00
int ret = bch2_btree_and_journal_walk ( c , BTREE_ID_stripes ,
bch2_stripes_read_fn ) ;
2020-05-24 21:06:10 +03:00
if ( ret )
2019-04-17 22:49:28 +03:00
bch_err ( c , " error reading stripes: %i " , ret ) ;
2018-11-26 04:53:51 +03:00
2020-05-24 21:06:10 +03:00
return ret ;
2018-11-26 04:53:51 +03:00
}
2018-11-25 01:09:44 +03:00
int bch2_ec_mem_alloc ( struct bch_fs * c , bool gc )
2018-11-01 22:13:19 +03:00
{
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
struct btree_iter * iter ;
2018-11-01 22:13:19 +03:00
struct bkey_s_c k ;
size_t i , idx = 0 ;
int ret = 0 ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-02-21 03:27:37 +03:00
iter = bch2_trans_get_iter ( & trans , BTREE_ID_stripes , POS ( 0 , U64_MAX ) , 0 ) ;
2018-11-01 22:13:19 +03:00
2019-03-25 22:10:15 +03:00
k = bch2_btree_iter_prev ( iter ) ;
2018-11-01 22:13:19 +03:00
if ( ! IS_ERR_OR_NULL ( k . k ) )
idx = k . k - > p . offset + 1 ;
2021-03-20 03:29:11 +03:00
bch2_trans_iter_put ( & trans , iter ) ;
2019-03-25 22:10:15 +03:00
ret = bch2_trans_exit ( & trans ) ;
2018-11-01 22:13:19 +03:00
if ( ret )
return ret ;
2019-10-02 16:14:32 +03:00
if ( ! idx )
return 0 ;
2018-11-25 01:09:44 +03:00
if ( ! gc & &
! init_heap ( & c - > ec_stripes_heap , roundup_pow_of_two ( idx ) ,
2018-11-01 22:13:19 +03:00
GFP_KERNEL ) )
return - ENOMEM ;
#if 0
2018-11-25 01:09:44 +03:00
ret = genradix_prealloc ( & c - > stripes [ gc ] , idx , GFP_KERNEL ) ;
2018-11-01 22:13:19 +03:00
# else
for ( i = 0 ; i < idx ; i + + )
2018-11-25 01:09:44 +03:00
if ( ! genradix_ptr_alloc ( & c - > stripes [ gc ] , i , GFP_KERNEL ) )
2018-11-01 22:13:19 +03:00
return - ENOMEM ;
# endif
return 0 ;
}
2020-07-07 03:18:13 +03:00
void bch2_stripes_heap_to_text ( struct printbuf * out , struct bch_fs * c )
{
ec_stripes_heap * h = & c - > ec_stripes_heap ;
struct stripe * m ;
size_t i ;
spin_lock ( & c - > ec_stripes_heap_lock ) ;
2020-11-05 20:16:05 +03:00
for ( i = 0 ; i < min_t ( size_t , h - > used , 20 ) ; i + + ) {
2020-07-07 03:18:13 +03:00
m = genradix_ptr ( & c - > stripes [ 0 ] , h - > data [ i ] . idx ) ;
pr_buf ( out , " %zu %u/%u+%u \n " , h - > data [ i ] . idx ,
h - > data [ i ] . blocks_nonempty ,
m - > nr_blocks - m - > nr_redundant ,
m - > nr_redundant ) ;
}
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
}
2020-07-26 00:06:11 +03:00
void bch2_new_stripes_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct ec_stripe_head * h ;
struct ec_stripe_new * s ;
mutex_lock ( & c - > ec_stripe_head_lock ) ;
list_for_each_entry ( h , & c - > ec_stripe_head_list , list ) {
pr_buf ( out , " target %u algo %u redundancy %u: \n " ,
h - > target , h - > algo , h - > redundancy ) ;
if ( h - > s )
2021-01-19 07:26:42 +03:00
pr_buf ( out , " \t pending: blocks %u+%u allocated %u \n " ,
h - > s - > nr_data , h - > s - > nr_parity ,
2020-07-26 00:06:11 +03:00
bitmap_weight ( h - > s - > blocks_allocated ,
2021-01-19 07:26:42 +03:00
h - > s - > nr_data ) ) ;
2020-07-26 00:06:11 +03:00
}
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
mutex_lock ( & c - > ec_stripe_new_lock ) ;
2020-08-05 06:12:49 +03:00
list_for_each_entry ( s , & c - > ec_stripe_new_list , list ) {
2021-01-19 07:26:42 +03:00
pr_buf ( out , " \t in flight: blocks %u+%u pin %u \n " ,
s - > nr_data , s - > nr_parity ,
2020-07-26 00:06:11 +03:00
atomic_read ( & s - > pin ) ) ;
}
mutex_unlock ( & c - > ec_stripe_new_lock ) ;
}
2018-11-01 22:13:19 +03:00
void bch2_fs_ec_exit ( struct bch_fs * c )
{
struct ec_stripe_head * h ;
while ( 1 ) {
2020-07-07 03:59:46 +03:00
mutex_lock ( & c - > ec_stripe_head_lock ) ;
h = list_first_entry_or_null ( & c - > ec_stripe_head_list ,
2018-11-01 22:13:19 +03:00
struct ec_stripe_head , list ) ;
if ( h )
list_del ( & h - > list ) ;
2020-07-07 03:59:46 +03:00
mutex_unlock ( & c - > ec_stripe_head_lock ) ;
2018-11-01 22:13:19 +03:00
if ( ! h )
break ;
BUG_ON ( h - > s ) ;
kfree ( h ) ;
}
2020-07-07 03:59:46 +03:00
BUG_ON ( ! list_empty ( & c - > ec_stripe_new_list ) ) ;
2018-11-01 22:13:19 +03:00
free_heap ( & c - > ec_stripes_heap ) ;
2018-11-25 01:09:44 +03:00
genradix_free ( & c - > stripes [ 0 ] ) ;
2018-11-01 22:13:19 +03:00
bioset_exit ( & c - > ec_bioset ) ;
}
int bch2_fs_ec_init ( struct bch_fs * c )
{
2020-07-07 03:59:46 +03:00
INIT_WORK ( & c - > ec_stripe_create_work , ec_stripe_create_work ) ;
2018-11-01 22:13:19 +03:00
INIT_WORK ( & c - > ec_stripe_delete_work , ec_stripe_delete_work ) ;
return bioset_init ( & c - > ec_bioset , 1 , offsetof ( struct ec_bio , bio ) ,
BIOSET_NEED_BVECS ) ;
}