2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright ( C ) 2010 Kent Overstreet < kent . overstreet @ gmail . com >
*
* Code for managing the extent btree and dynamically updating the writeback
* dirty sector count .
*/
# include "bcachefs.h"
# include "bkey_methods.h"
# include "btree_gc.h"
# include "btree_update.h"
# include "btree_update_interior.h"
# include "buckets.h"
# include "checksum.h"
# include "debug.h"
# include "dirent.h"
# include "disk_groups.h"
# include "error.h"
# include "extents.h"
# include "inode.h"
# include "journal.h"
# include "replicas.h"
# include "super.h"
# include "super-io.h"
# include "trace.h"
# include "util.h"
# include "xattr.h"
2018-11-01 15:10:01 -04:00
unsigned bch2_bkey_nr_ptrs ( struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c p = bch2_bkey_ptrs_c ( k ) ;
2017-03-16 22:18:50 -08:00
const struct bch_extent_ptr * ptr ;
unsigned nr_ptrs = 0 ;
2018-11-01 15:10:01 -04:00
bkey_for_each_ptr ( p , ptr )
2017-03-16 22:18:50 -08:00
nr_ptrs + + ;
return nr_ptrs ;
}
2018-11-01 15:10:01 -04:00
unsigned bch2_bkey_nr_dirty_ptrs ( struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
unsigned nr_ptrs = 0 ;
switch ( k . k - > type ) {
2018-11-01 15:10:01 -04:00
case KEY_TYPE_btree_ptr :
2019-08-22 16:23:10 -04:00
case KEY_TYPE_extent :
case KEY_TYPE_reflink_v : {
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c p = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
bkey_for_each_ptr ( p , ptr )
2017-03-16 22:18:50 -08:00
nr_ptrs + = ! ptr - > cached ;
2018-11-01 15:10:01 -04:00
BUG_ON ( ! nr_ptrs ) ;
2017-03-16 22:18:50 -08:00
break ;
2018-11-01 15:10:01 -04:00
}
case KEY_TYPE_reservation :
2017-03-16 22:18:50 -08:00
nr_ptrs = bkey_s_c_to_reservation ( k ) . v - > nr_replicas ;
break ;
}
return nr_ptrs ;
}
2018-11-01 15:13:19 -04:00
static unsigned bch2_extent_ptr_durability ( struct bch_fs * c ,
struct extent_ptr_decoded p )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:13:19 -04:00
unsigned i , durability = 0 ;
2017-03-16 22:18:50 -08:00
struct bch_dev * ca ;
2018-11-01 15:13:19 -04:00
if ( p . ptr . cached )
2017-03-16 22:18:50 -08:00
return 0 ;
2018-11-01 15:13:19 -04:00
ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
if ( ca - > mi . state ! = BCH_MEMBER_STATE_FAILED )
durability = max_t ( unsigned , durability , ca - > mi . durability ) ;
for ( i = 0 ; i < p . ec_nr ; i + + ) {
2018-11-24 17:09:44 -05:00
struct stripe * s =
genradix_ptr ( & c - > stripes [ 0 ] , p . idx ) ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
if ( WARN_ON ( ! s ) )
continue ;
durability = max_t ( unsigned , durability , s - > nr_redundant ) ;
}
return durability ;
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:10:01 -04:00
unsigned bch2_bkey_durability ( struct bch_fs * c , struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2018-11-01 15:13:19 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
unsigned durability = 0 ;
2018-11-01 15:10:01 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
2018-11-01 15:13:19 -04:00
durability + = bch2_extent_ptr_durability ( c , p ) ;
2017-03-16 22:18:50 -08:00
return durability ;
}
2018-11-01 15:10:01 -04:00
static struct bch_dev_io_failures * dev_io_failures ( struct bch_io_failures * f ,
unsigned dev )
{
struct bch_dev_io_failures * i ;
for ( i = f - > devs ; i < f - > devs + f - > nr ; i + + )
if ( i - > dev = = dev )
return i ;
return NULL ;
}
void bch2_mark_io_failure ( struct bch_io_failures * failed ,
struct extent_ptr_decoded * p )
{
struct bch_dev_io_failures * f = dev_io_failures ( failed , p - > ptr . dev ) ;
if ( ! f ) {
BUG_ON ( failed - > nr > = ARRAY_SIZE ( failed - > devs ) ) ;
f = & failed - > devs [ failed - > nr + + ] ;
f - > dev = p - > ptr . dev ;
f - > idx = p - > idx ;
f - > nr_failed = 1 ;
f - > nr_retries = 0 ;
} else if ( p - > idx ! = f - > idx ) {
f - > idx = p - > idx ;
f - > nr_failed = 1 ;
f - > nr_retries = 0 ;
} else {
f - > nr_failed + + ;
}
}
/*
* returns true if p1 is better than p2 :
*/
static inline bool ptr_better ( struct bch_fs * c ,
const struct extent_ptr_decoded p1 ,
const struct extent_ptr_decoded p2 )
{
if ( likely ( ! p1 . idx & & ! p2 . idx ) ) {
struct bch_dev * dev1 = bch_dev_bkey_exists ( c , p1 . ptr . dev ) ;
struct bch_dev * dev2 = bch_dev_bkey_exists ( c , p2 . ptr . dev ) ;
u64 l1 = atomic64_read ( & dev1 - > cur_latency [ READ ] ) ;
u64 l2 = atomic64_read ( & dev2 - > cur_latency [ READ ] ) ;
/* Pick at random, biased in favor of the faster device: */
return bch2_rand_range ( l1 + l2 ) > l1 ;
}
if ( force_reconstruct_read ( c ) )
return p1 . idx > p2 . idx ;
return p1 . idx < p2 . idx ;
}
/*
* This picks a non - stale pointer , preferably from a device other than @ avoid .
* Avoid can be NULL , meaning pick any . If there are no non - stale pointers to
* other devices , it will still pick a pointer from avoid .
*/
int bch2_bkey_pick_read_device ( struct bch_fs * c , struct bkey_s_c k ,
struct bch_io_failures * failed ,
struct extent_ptr_decoded * pick )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
struct bch_dev_io_failures * f ;
struct bch_dev * ca ;
int ret = 0 ;
if ( k . k - > type = = KEY_TYPE_error )
return - EIO ;
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry ) {
ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
/*
* If there are any dirty pointers it ' s an error if we can ' t
* read :
*/
if ( ! ret & & ! p . ptr . cached )
ret = - EIO ;
if ( p . ptr . cached & & ptr_stale ( ca , & p . ptr ) )
continue ;
f = failed ? dev_io_failures ( failed , p . ptr . dev ) : NULL ;
if ( f )
p . idx = f - > nr_failed < f - > nr_retries
? f - > idx
: f - > idx + 1 ;
if ( ! p . idx & &
! bch2_dev_is_readable ( ca ) )
p . idx + + ;
if ( force_reconstruct_read ( c ) & &
! p . idx & & p . ec_nr )
p . idx + + ;
if ( p . idx > = p . ec_nr + 1 )
continue ;
if ( ret > 0 & & ! ptr_better ( c , p , * pick ) )
continue ;
* pick = p ;
ret = 1 ;
}
return ret ;
}
void bch2_bkey_append_ptr ( struct bkey_i * k ,
struct bch_extent_ptr ptr )
{
EBUG_ON ( bch2_bkey_has_device ( bkey_i_to_s_c ( k ) , ptr . dev ) ) ;
switch ( k - > k . type ) {
case KEY_TYPE_btree_ptr :
case KEY_TYPE_extent :
EBUG_ON ( bkey_val_u64s ( & k - > k ) > = BKEY_EXTENT_VAL_U64s_MAX ) ;
ptr . type = 1 < < BCH_EXTENT_ENTRY_ptr ;
memcpy ( ( void * ) & k - > v + bkey_val_bytes ( & k - > k ) ,
& ptr ,
sizeof ( ptr ) ) ;
k - > u64s + + ;
break ;
default :
BUG ( ) ;
}
}
void bch2_bkey_drop_device ( struct bkey_s k , unsigned dev )
{
struct bch_extent_ptr * ptr ;
bch2_bkey_drop_ptrs ( k , ptr , ptr - > dev = = dev ) ;
}
2019-07-25 13:52:14 -04:00
const struct bch_extent_ptr *
bch2_bkey_has_device ( struct bkey_s_c k , unsigned dev )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
bkey_for_each_ptr ( ptrs , ptr )
if ( ptr - > dev = = dev )
return ptr ;
return NULL ;
}
bool bch2_bkey_has_target ( struct bch_fs * c , struct bkey_s_c k , unsigned target )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr ;
bkey_for_each_ptr ( ptrs , ptr )
if ( bch2_dev_in_target ( c , ptr - > dev , target ) & &
( ! ptr - > cached | |
! ptr_stale ( bch_dev_bkey_exists ( c , ptr - > dev ) , ptr ) ) )
return true ;
return false ;
}
2018-11-01 15:10:01 -04:00
/* extent specific utility code */
const struct bch_extent_ptr *
bch2_extent_has_device ( struct bkey_s_c_extent e , unsigned dev )
{
const struct bch_extent_ptr * ptr ;
extent_for_each_ptr ( e , ptr )
if ( ptr - > dev = = dev )
return ptr ;
return NULL ;
}
const struct bch_extent_ptr *
bch2_extent_has_group ( struct bch_fs * c , struct bkey_s_c_extent e , unsigned group )
{
const struct bch_extent_ptr * ptr ;
extent_for_each_ptr ( e , ptr ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
if ( ca - > mi . group & &
ca - > mi . group - 1 = = group )
return ptr ;
}
return NULL ;
}
2017-03-16 22:18:50 -08:00
unsigned bch2_extent_is_compressed ( struct bkey_s_c k )
{
2019-08-22 16:23:10 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
unsigned ret = 0 ;
2019-08-22 16:23:10 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
if ( ! p . ptr . cached & &
p . crc . compression_type ! = BCH_COMPRESSION_NONE )
ret + = p . crc . compressed_size ;
2017-03-16 22:18:50 -08:00
return ret ;
}
2019-07-25 13:52:14 -04:00
bool bch2_bkey_matches_ptr ( struct bch_fs * c , struct bkey_s_c k ,
struct bch_extent_ptr m , u64 offset )
2017-03-16 22:18:50 -08:00
{
2019-07-25 13:52:14 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2018-09-27 21:08:39 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
2019-07-25 13:52:14 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry )
2018-09-27 21:08:39 -04:00
if ( p . ptr . dev = = m . dev & &
p . ptr . gen = = m . gen & &
2019-07-25 13:52:14 -04:00
( s64 ) p . ptr . offset + p . crc . offset - bkey_start_offset ( k . k ) = =
2017-03-16 22:18:50 -08:00
( s64 ) m . offset - offset )
2018-09-27 21:08:39 -04:00
return true ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
return false ;
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:10:01 -04:00
static union bch_extent_entry * extent_entry_prev ( struct bkey_ptrs ptrs ,
2018-11-01 15:13:19 -04:00
union bch_extent_entry * entry )
{
2018-11-01 15:10:01 -04:00
union bch_extent_entry * i = ptrs . start ;
2018-11-01 15:13:19 -04:00
if ( i = = entry )
return NULL ;
while ( extent_entry_next ( i ) ! = entry )
i = extent_entry_next ( i ) ;
return i ;
}
2018-11-01 15:10:01 -04:00
union bch_extent_entry * bch2_bkey_drop_ptr ( struct bkey_s k ,
struct bch_extent_ptr * ptr )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs ptrs = bch2_bkey_ptrs ( k ) ;
2018-11-01 15:13:19 -04:00
union bch_extent_entry * dst , * src , * prev ;
bool drop_crc = true ;
2018-09-30 18:28:23 -04:00
2018-11-01 15:10:01 -04:00
EBUG_ON ( ptr < & ptrs . start - > ptr | |
ptr > = & ptrs . end - > ptr ) ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( ptr - > type ! = 1 < < BCH_EXTENT_ENTRY_ptr ) ;
2018-11-01 15:13:19 -04:00
src = extent_entry_next ( to_entry ( ptr ) ) ;
2018-11-01 15:10:01 -04:00
if ( src ! = ptrs . end & &
2018-11-01 15:13:19 -04:00
! extent_entry_is_crc ( src ) )
drop_crc = false ;
2018-09-30 18:28:23 -04:00
2018-11-01 15:13:19 -04:00
dst = to_entry ( ptr ) ;
2018-11-01 15:10:01 -04:00
while ( ( prev = extent_entry_prev ( ptrs , dst ) ) ) {
2018-11-01 15:13:19 -04:00
if ( extent_entry_is_ptr ( prev ) )
break ;
if ( extent_entry_is_crc ( prev ) ) {
if ( drop_crc )
dst = prev ;
break ;
2018-09-30 18:28:23 -04:00
}
2018-11-01 15:13:19 -04:00
dst = prev ;
2018-09-30 18:28:23 -04:00
}
memmove_u64s_down ( dst , src ,
2018-11-01 15:10:01 -04:00
( u64 * ) ptrs . end - ( u64 * ) src ) ;
k . k - > u64s - = ( u64 * ) src - ( u64 * ) dst ;
2018-09-30 18:28:23 -04:00
return dst ;
2017-03-16 22:18:50 -08:00
}
static inline bool can_narrow_crc ( struct bch_extent_crc_unpacked u ,
struct bch_extent_crc_unpacked n )
{
return ! u . compression_type & &
u . csum_type & &
u . uncompressed_size > u . live_size & &
bch2_csum_type_is_encryption ( u . csum_type ) = =
bch2_csum_type_is_encryption ( n . csum_type ) ;
}
2019-07-25 13:52:14 -04:00
bool bch2_can_narrow_extent_crcs ( struct bkey_s_c k ,
2017-03-16 22:18:50 -08:00
struct bch_extent_crc_unpacked n )
{
2019-07-25 13:52:14 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2017-03-16 22:18:50 -08:00
struct bch_extent_crc_unpacked crc ;
const union bch_extent_entry * i ;
if ( ! n . csum_type )
return false ;
2019-07-25 13:52:14 -04:00
bkey_for_each_crc ( k . k , ptrs , crc , i )
2017-03-16 22:18:50 -08:00
if ( can_narrow_crc ( crc , n ) )
return true ;
return false ;
}
/*
* We ' re writing another replica for this extent , so while we ' ve got the data in
* memory we ' ll be computing a new checksum for the currently live data .
*
* If there are other replicas we aren ' t moving , and they are checksummed but
* not compressed , we can modify them to point to only the data that is
* currently live ( so that readers won ' t have to bounce ) while we ' ve got the
* checksum we need :
*/
2019-07-25 13:52:14 -04:00
bool bch2_bkey_narrow_crcs ( struct bkey_i * k , struct bch_extent_crc_unpacked n )
2017-03-16 22:18:50 -08:00
{
2019-07-25 13:52:14 -04:00
struct bkey_ptrs ptrs = bch2_bkey_ptrs ( bkey_i_to_s ( k ) ) ;
2017-03-16 22:18:50 -08:00
struct bch_extent_crc_unpacked u ;
2018-09-27 21:08:39 -04:00
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
union bch_extent_entry * i ;
2018-09-27 21:08:39 -04:00
bool ret = false ;
2017-03-16 22:18:50 -08:00
/* Find a checksum entry that covers only live data: */
2018-09-27 21:08:39 -04:00
if ( ! n . csum_type ) {
2019-07-25 13:52:14 -04:00
bkey_for_each_crc ( & k - > k , ptrs , u , i )
2017-03-16 22:18:50 -08:00
if ( ! u . compression_type & &
u . csum_type & &
u . live_size = = u . uncompressed_size ) {
n = u ;
2018-09-27 21:08:39 -04:00
goto found ;
2017-03-16 22:18:50 -08:00
}
return false ;
2018-09-27 21:08:39 -04:00
}
found :
2017-03-16 22:18:50 -08:00
BUG_ON ( n . compression_type ) ;
BUG_ON ( n . offset ) ;
2019-07-25 13:52:14 -04:00
BUG_ON ( n . live_size ! = k - > k . size ) ;
2017-03-16 22:18:50 -08:00
restart_narrow_pointers :
2019-08-21 18:55:07 -04:00
ptrs = bch2_bkey_ptrs ( bkey_i_to_s ( k ) ) ;
2019-07-25 13:52:14 -04:00
bkey_for_each_ptr_decode ( & k - > k , ptrs , p , i )
2018-09-27 21:08:39 -04:00
if ( can_narrow_crc ( p . crc , n ) ) {
2019-07-25 13:52:14 -04:00
bch2_bkey_drop_ptr ( bkey_i_to_s ( k ) , & i - > ptr ) ;
2018-09-27 21:08:39 -04:00
p . ptr . offset + = p . crc . offset ;
p . crc = n ;
2019-07-25 13:52:14 -04:00
bch2_extent_ptr_decoded_append ( k , & p ) ;
2018-09-27 21:08:39 -04:00
ret = true ;
2017-03-16 22:18:50 -08:00
goto restart_narrow_pointers ;
}
2018-09-27 21:08:39 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
/* returns true if not equal */
static inline bool bch2_crc_unpacked_cmp ( struct bch_extent_crc_unpacked l ,
struct bch_extent_crc_unpacked r )
{
return ( l . csum_type ! = r . csum_type | |
l . compression_type ! = r . compression_type | |
l . compressed_size ! = r . compressed_size | |
l . uncompressed_size ! = r . uncompressed_size | |
l . offset ! = r . offset | |
l . live_size ! = r . live_size | |
l . nonce ! = r . nonce | |
bch2_crc_cmp ( l . csum , r . csum ) ) ;
}
void bch2_ptr_swab ( const struct bkey_format * f , struct bkey_packed * k )
{
2018-11-01 15:10:01 -04:00
union bch_extent_entry * entry ;
u64 * d = ( u64 * ) bkeyp_val ( f , k ) ;
unsigned i ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
for ( i = 0 ; i < bkeyp_val_u64s ( f , k ) ; i + + )
d [ i ] = swab64 ( d [ i ] ) ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
for ( entry = ( union bch_extent_entry * ) d ;
entry < ( union bch_extent_entry * ) ( d + bkeyp_val_u64s ( f , k ) ) ;
entry = extent_entry_next ( entry ) ) {
switch ( extent_entry_type ( entry ) ) {
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_ptr :
break ;
2017-03-16 22:18:50 -08:00
case BCH_EXTENT_ENTRY_crc32 :
2018-11-01 15:10:01 -04:00
entry - > crc32 . csum = swab32 ( entry - > crc32 . csum ) ;
break ;
2017-03-16 22:18:50 -08:00
case BCH_EXTENT_ENTRY_crc64 :
2018-11-01 15:10:01 -04:00
entry - > crc64 . csum_hi = swab16 ( entry - > crc64 . csum_hi ) ;
entry - > crc64 . csum_lo = swab64 ( entry - > crc64 . csum_lo ) ;
break ;
2017-03-16 22:18:50 -08:00
case BCH_EXTENT_ENTRY_crc128 :
2018-11-01 15:10:01 -04:00
entry - > crc128 . csum . hi = ( __force __le64 )
swab64 ( ( __force u64 ) entry - > crc128 . csum . hi ) ;
entry - > crc128 . csum . lo = ( __force __le64 )
swab64 ( ( __force u64 ) entry - > crc128 . csum . lo ) ;
2017-03-16 22:18:50 -08:00
break ;
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_stripe_ptr :
2017-03-16 22:18:50 -08:00
break ;
}
}
}
2019-05-11 17:32:07 -04:00
void bch2_bkey_ptrs_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2018-09-27 21:08:39 -04:00
const union bch_extent_entry * entry ;
2018-11-01 15:10:01 -04:00
struct bch_extent_crc_unpacked crc ;
const struct bch_extent_ptr * ptr ;
const struct bch_extent_stripe_ptr * ec ;
2017-03-16 22:18:50 -08:00
struct bch_dev * ca ;
2018-11-01 15:10:01 -04:00
bool first = true ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
bkey_extent_entry_for_each ( ptrs , entry ) {
if ( ! first )
pr_buf ( out , " " ) ;
2018-11-01 15:13:19 -04:00
2018-11-01 15:10:01 -04:00
switch ( __extent_entry_type ( entry ) ) {
case BCH_EXTENT_ENTRY_ptr :
ptr = entry_to_ptr ( entry ) ;
ca = ptr - > dev < c - > sb . nr_devices & & c - > devs [ ptr - > dev ]
? bch_dev_bkey_exists ( c , ptr - > dev )
: NULL ;
2018-11-01 15:13:19 -04:00
2018-11-01 15:10:01 -04:00
pr_buf ( out , " ptr: %u:%llu gen %u%s%s " , ptr - > dev ,
( u64 ) ptr - > offset , ptr - > gen ,
ptr - > cached ? " cached " : " " ,
ca & & ptr_stale ( ca , ptr )
? " stale " : " " ) ;
break ;
case BCH_EXTENT_ENTRY_crc32 :
case BCH_EXTENT_ENTRY_crc64 :
case BCH_EXTENT_ENTRY_crc128 :
crc = bch2_extent_crc_unpack ( k . k , entry_to_crc ( entry ) ) ;
2018-11-01 15:13:19 -04:00
2018-11-01 15:10:01 -04:00
pr_buf ( out , " crc: c_size %u size %u offset %u nonce %u csum %u compress %u " ,
crc . compressed_size ,
crc . uncompressed_size ,
crc . offset , crc . nonce ,
crc . csum_type ,
crc . compression_type ) ;
break ;
case BCH_EXTENT_ENTRY_stripe_ptr :
ec = & entry - > stripe_ptr ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
pr_buf ( out , " ec: idx %llu block %u " ,
( u64 ) ec - > idx , ec - > block ) ;
break ;
default :
pr_buf ( out , " (invalid extent entry %.16llx) " , * ( ( u64 * ) entry ) ) ;
return ;
}
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
first = false ;
2017-03-16 22:18:50 -08:00
}
}
2019-05-11 17:32:07 -04:00
static const char * extent_ptr_invalid ( const struct bch_fs * c ,
struct bkey_s_c k ,
const struct bch_extent_ptr * ptr ,
unsigned size_ondisk ,
bool metadata )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const struct bch_extent_ptr * ptr2 ;
struct bch_dev * ca ;
2017-03-16 22:18:50 -08:00
2019-05-11 17:32:07 -04:00
if ( ! bch2_dev_exists2 ( c , ptr - > dev ) )
return " pointer to invalid device " ;
ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
if ( ! ca )
return " pointer to invalid device " ;
bkey_for_each_ptr ( ptrs , ptr2 )
if ( ptr ! = ptr2 & & ptr - > dev = = ptr2 - > dev )
return " multiple pointers to same device " ;
if ( ptr - > offset + size_ondisk > bucket_to_sector ( ca , ca - > mi . nbuckets ) )
return " offset past end of device " ;
if ( ptr - > offset < bucket_to_sector ( ca , ca - > mi . first_bucket ) )
return " offset before first bucket " ;
if ( bucket_remainder ( ca , ptr - > offset ) +
size_ondisk > ca - > mi . bucket_size )
return " spans multiple buckets " ;
return NULL ;
}
const char * bch2_bkey_ptrs_invalid ( const struct bch_fs * c , struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
2019-05-11 17:32:07 -04:00
struct bch_extent_crc_unpacked crc ;
unsigned size_ondisk = k . k - > size ;
2018-11-01 15:10:01 -04:00
const char * reason ;
2019-05-11 17:32:07 -04:00
unsigned nonce = UINT_MAX ;
2017-03-16 22:18:50 -08:00
2019-05-11 17:32:07 -04:00
if ( k . k - > type = = KEY_TYPE_btree_ptr )
size_ondisk = c - > opts . btree_node_size ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
bkey_extent_entry_for_each ( ptrs , entry ) {
if ( __extent_entry_type ( entry ) > = BCH_EXTENT_ENTRY_MAX )
return " invalid extent entry type " ;
2017-03-16 22:18:50 -08:00
2019-05-11 17:32:07 -04:00
if ( k . k - > type = = KEY_TYPE_btree_ptr & &
! extent_entry_is_ptr ( entry ) )
2018-11-01 15:10:01 -04:00
return " has non ptr field " ;
2017-03-16 22:18:50 -08:00
2019-05-11 17:32:07 -04:00
switch ( extent_entry_type ( entry ) ) {
case BCH_EXTENT_ENTRY_ptr :
reason = extent_ptr_invalid ( c , k , & entry - > ptr ,
size_ondisk , false ) ;
if ( reason )
return reason ;
break ;
case BCH_EXTENT_ENTRY_crc32 :
case BCH_EXTENT_ENTRY_crc64 :
case BCH_EXTENT_ENTRY_crc128 :
crc = bch2_extent_crc_unpack ( k . k , entry_to_crc ( entry ) ) ;
if ( crc . offset + crc . live_size >
crc . uncompressed_size )
return " checksum offset + key size > uncompressed size " ;
size_ondisk = crc . compressed_size ;
if ( ! bch2_checksum_type_valid ( c , crc . csum_type ) )
return " invalid checksum type " ;
if ( crc . compression_type > = BCH_COMPRESSION_NR )
return " invalid compression type " ;
if ( bch2_csum_type_is_encryption ( crc . csum_type ) ) {
if ( nonce = = UINT_MAX )
nonce = crc . offset + crc . nonce ;
else if ( nonce ! = crc . offset + crc . nonce )
return " incorrect nonce " ;
}
break ;
case BCH_EXTENT_ENTRY_stripe_ptr :
break ;
}
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:10:01 -04:00
return NULL ;
2017-03-16 22:18:50 -08:00
}
2019-05-11 17:32:07 -04:00
/* Btree ptrs */
const char * bch2_btree_ptr_invalid ( const struct bch_fs * c , struct bkey_s_c k )
{
if ( bkey_val_u64s ( k . k ) > BKEY_BTREE_PTR_VAL_U64s_MAX )
return " value too big " ;
return bch2_bkey_ptrs_invalid ( c , k ) ;
}
2017-03-16 22:18:50 -08:00
void bch2_btree_ptr_debugcheck ( struct bch_fs * c , struct btree * b ,
struct bkey_s_c k )
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
2017-03-16 22:18:50 -08:00
const struct bch_extent_ptr * ptr ;
const char * err ;
char buf [ 160 ] ;
struct bucket_mark mark ;
struct bch_dev * ca ;
2018-12-06 10:24:22 -05:00
bch2_fs_bug_on ( ! test_bit ( BCH_FS_REBUILD_REPLICAS , & c - > flags ) & &
! bch2_bkey_replicas_marked ( c , k , false ) , c ,
" btree key bad (replicas not marked in superblock): \n %s " ,
( bch2_bkey_val_to_text ( & PBUF ( buf ) , c , k ) , buf ) ) ;
if ( ! test_bit ( BCH_FS_INITIAL_GC_DONE , & c - > flags ) )
return ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
bkey_for_each_ptr ( ptrs , ptr ) {
2017-03-16 22:18:50 -08:00
ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2018-12-06 10:24:22 -05:00
mark = ptr_bucket_mark ( ca , ptr ) ;
2017-03-16 22:18:50 -08:00
err = " stale " ;
2018-12-06 10:24:22 -05:00
if ( gen_after ( mark . gen , ptr - > gen ) )
2017-03-16 22:18:50 -08:00
goto err ;
err = " inconsistent " ;
2018-12-06 10:24:22 -05:00
if ( mark . data_type ! = BCH_DATA_BTREE | |
mark . dirty_sectors < c - > opts . btree_node_size )
2017-03-16 22:18:50 -08:00
goto err ;
}
return ;
err :
2018-11-01 15:10:01 -04:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , k ) ;
2018-11-09 01:24:07 -05:00
bch2_fs_bug ( c , " %s btree pointer %s: bucket %zi gen %i mark %08x " ,
err , buf , PTR_BUCKET_NR ( ca , ptr ) ,
mark . gen , ( unsigned ) mark . v . counter ) ;
2017-03-16 22:18:50 -08:00
}
2018-11-09 01:24:07 -05:00
void bch2_btree_ptr_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2019-05-11 17:32:07 -04:00
bch2_bkey_ptrs_to_text ( out , c , k ) ;
2017-03-16 22:18:50 -08:00
}
/* Extents */
2019-07-18 17:21:21 -04:00
void __bch2_cut_front ( struct bpos where , struct bkey_s k )
2017-03-16 22:18:50 -08:00
{
2019-07-18 17:21:21 -04:00
u64 sub ;
2017-03-16 22:18:50 -08:00
if ( bkey_cmp ( where , bkey_start_pos ( k . k ) ) < = 0 )
2019-07-18 17:21:21 -04:00
return ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( bkey_cmp ( where , k . k - > p ) > 0 ) ;
2019-07-18 17:21:21 -04:00
sub = where . offset - bkey_start_offset ( k . k ) ;
2017-03-16 22:18:50 -08:00
2019-07-18 17:21:21 -04:00
k . k - > size - = sub ;
2017-03-16 22:18:50 -08:00
2019-07-18 17:21:21 -04:00
if ( ! k . k - > size )
2018-11-01 15:10:01 -04:00
k . k - > type = KEY_TYPE_deleted ;
2019-07-18 17:21:21 -04:00
switch ( k . k - > type ) {
case KEY_TYPE_deleted :
case KEY_TYPE_discard :
case KEY_TYPE_error :
case KEY_TYPE_cookie :
break ;
2019-08-16 09:59:56 -04:00
case KEY_TYPE_extent :
case KEY_TYPE_reflink_v : {
2019-07-18 17:21:21 -04:00
struct bkey_ptrs ptrs = bch2_bkey_ptrs ( k ) ;
2017-03-16 22:18:50 -08:00
union bch_extent_entry * entry ;
bool seen_crc = false ;
2019-07-18 17:21:21 -04:00
bkey_extent_entry_for_each ( ptrs , entry ) {
2017-03-16 22:18:50 -08:00
switch ( extent_entry_type ( entry ) ) {
case BCH_EXTENT_ENTRY_ptr :
if ( ! seen_crc )
2019-07-18 17:21:21 -04:00
entry - > ptr . offset + = sub ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_ENTRY_crc32 :
2019-07-18 17:21:21 -04:00
entry - > crc32 . offset + = sub ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_ENTRY_crc64 :
2019-07-18 17:21:21 -04:00
entry - > crc64 . offset + = sub ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_ENTRY_crc128 :
2019-07-18 17:21:21 -04:00
entry - > crc128 . offset + = sub ;
2017-03-16 22:18:50 -08:00
break ;
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_stripe_ptr :
break ;
2017-03-16 22:18:50 -08:00
}
if ( extent_entry_is_crc ( entry ) )
seen_crc = true ;
}
2019-07-18 17:21:21 -04:00
break ;
}
2019-08-16 09:59:56 -04:00
case KEY_TYPE_reflink_p : {
struct bkey_s_reflink_p p = bkey_s_to_reflink_p ( k ) ;
le64_add_cpu ( & p . v - > idx , sub ) ;
break ;
}
2019-07-18 17:21:21 -04:00
case KEY_TYPE_reservation :
break ;
default :
BUG ( ) ;
}
2017-03-16 22:18:50 -08:00
}
bool bch2_cut_back ( struct bpos where , struct bkey * k )
{
u64 len = 0 ;
if ( bkey_cmp ( where , k - > p ) > = 0 )
return false ;
EBUG_ON ( bkey_cmp ( where , bkey_start_pos ( k ) ) < 0 ) ;
len = where . offset - bkey_start_offset ( k ) ;
k - > p = where ;
k - > size = len ;
if ( ! len )
2018-11-01 15:10:01 -04:00
k - > type = KEY_TYPE_deleted ;
2017-03-16 22:18:50 -08:00
return true ;
}
2016-07-21 19:05:06 -08:00
static bool extent_i_save ( struct btree * b , struct bkey_packed * dst ,
struct bkey_i * src )
2017-03-16 22:18:50 -08:00
{
2016-07-21 19:05:06 -08:00
struct bkey_format * f = & b - > format ;
struct bkey_i * dst_unpacked ;
struct bkey_packed tmp ;
if ( ( dst_unpacked = packed_to_bkey ( dst ) ) )
dst_unpacked - > k = src - > k ;
else if ( bch2_bkey_pack_key ( & tmp , & src - > k , f ) )
memcpy_u64s ( dst , & tmp , f - > key_u64s ) ;
else
return false ;
memcpy_u64s ( bkeyp_val ( f , dst ) , & src - > v , bkey_val_u64s ( & src - > k ) ) ;
return true ;
2017-03-16 22:18:50 -08:00
}
static bool bch2_extent_merge_inline ( struct bch_fs * ,
struct btree_iter * ,
struct bkey_packed * ,
struct bkey_packed * ,
bool ) ;
2019-03-28 01:51:47 -04:00
static void verify_extent_nonoverlapping ( struct bch_fs * c ,
struct btree * b ,
2016-07-21 19:05:06 -08:00
struct btree_node_iter * _iter ,
struct bkey_i * insert )
{
# ifdef CONFIG_BCACHEFS_DEBUG
struct btree_node_iter iter ;
struct bkey_packed * k ;
struct bkey uk ;
2019-03-28 01:51:47 -04:00
if ( ! expensive_debug_checks ( c ) )
return ;
2016-07-21 19:05:06 -08:00
iter = * _iter ;
2018-11-01 15:10:01 -04:00
k = bch2_btree_node_iter_prev_filter ( & iter , b , KEY_TYPE_discard ) ;
2016-07-21 19:05:06 -08:00
BUG_ON ( k & &
( uk = bkey_unpack_key ( b , k ) ,
bkey_cmp ( uk . p , bkey_start_pos ( & insert - > k ) ) > 0 ) ) ;
iter = * _iter ;
2018-11-01 15:10:01 -04:00
k = bch2_btree_node_iter_peek_filter ( & iter , b , KEY_TYPE_discard ) ;
2016-07-21 19:05:06 -08:00
#if 0
BUG_ON ( k & &
( uk = bkey_unpack_key ( b , k ) ,
bkey_cmp ( insert - > k . p , bkey_start_pos ( & uk ) ) ) > 0 ) ;
# else
if ( k & &
( uk = bkey_unpack_key ( b , k ) ,
bkey_cmp ( insert - > k . p , bkey_start_pos ( & uk ) ) ) > 0 ) {
char buf1 [ 100 ] ;
char buf2 [ 100 ] ;
2018-11-09 01:24:07 -05:00
bch2_bkey_to_text ( & PBUF ( buf1 ) , & insert - > k ) ;
bch2_bkey_to_text ( & PBUF ( buf2 ) , & uk ) ;
2016-07-21 19:05:06 -08:00
bch2_dump_btree_node ( b ) ;
panic ( " insert > next : \n "
" insert %s \n "
" next %s \n " ,
buf1 , buf2 ) ;
}
# endif
# endif
}
static void verify_modified_extent ( struct btree_iter * iter ,
struct bkey_packed * k )
{
bch2_btree_iter_verify ( iter , iter - > l [ 0 ] . b ) ;
bch2_verify_insert_pos ( iter - > l [ 0 ] . b , k , k , k - > u64s ) ;
}
2017-03-16 22:18:50 -08:00
static void extent_bset_insert ( struct bch_fs * c , struct btree_iter * iter ,
struct bkey_i * insert )
{
struct btree_iter_level * l = & iter - > l [ 0 ] ;
2018-08-05 15:21:52 -04:00
struct btree_node_iter node_iter ;
struct bkey_packed * k ;
BUG_ON ( insert - > k . u64s > bch_btree_keys_u64s_remaining ( c , l - > b ) ) ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( bkey_deleted ( & insert - > k ) | | ! insert - > k . size ) ;
2019-03-28 01:51:47 -04:00
verify_extent_nonoverlapping ( c , l - > b , & l - > iter , insert ) ;
2016-07-21 19:05:06 -08:00
2018-08-05 15:21:52 -04:00
node_iter = l - > iter ;
2018-11-01 15:10:01 -04:00
k = bch2_btree_node_iter_prev_filter ( & node_iter , l - > b , KEY_TYPE_discard ) ;
2018-08-05 15:21:52 -04:00
if ( k & & ! bkey_written ( l - > b , k ) & &
bch2_extent_merge_inline ( c , iter , k , bkey_to_packed ( insert ) , true ) )
return ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
node_iter = l - > iter ;
2018-11-01 15:10:01 -04:00
k = bch2_btree_node_iter_peek_filter ( & node_iter , l - > b , KEY_TYPE_discard ) ;
2018-08-05 15:21:52 -04:00
if ( k & & ! bkey_written ( l - > b , k ) & &
bch2_extent_merge_inline ( c , iter , bkey_to_packed ( insert ) , k , false ) )
return ;
2017-03-16 22:18:50 -08:00
2019-08-20 17:46:22 -04:00
/*
* may have skipped past some deleted extents greater than the insert
* key , before we got to a non deleted extent and knew we could bail out
* rewind the iterator a bit if necessary :
*/
node_iter = l - > iter ;
while ( ( k = bch2_btree_node_iter_prev_all ( & node_iter , l - > b ) ) & &
bkey_cmp_left_packed ( l - > b , k , & insert - > k . p ) > 0 )
l - > iter = node_iter ;
2018-08-11 19:12:05 -04:00
k = bch2_btree_node_iter_bset_pos ( & l - > iter , l - > b , bset_tree_last ( l - > b ) ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
bch2_bset_insert ( l - > b , & l - > iter , k , insert , 0 ) ;
2018-08-11 19:12:05 -04:00
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter , k , 0 , k - > u64s ) ;
2016-07-21 19:05:06 -08:00
bch2_btree_iter_verify ( iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
}
2019-03-11 14:59:58 -04:00
static unsigned bch2_bkey_nr_alloc_ptrs ( struct bkey_s_c k )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
unsigned ret = 0 ;
bkey_extent_entry_for_each ( ptrs , entry ) {
switch ( __extent_entry_type ( entry ) ) {
case BCH_EXTENT_ENTRY_ptr :
case BCH_EXTENT_ENTRY_stripe_ptr :
ret + + ;
}
}
return ret ;
}
2019-08-16 09:58:07 -04:00
static int __bch2_extent_atomic_end ( struct btree_trans * trans ,
struct bkey_s_c k ,
unsigned offset ,
struct bpos * end ,
unsigned * nr_iters ,
unsigned max_iters )
{
int ret = 0 ;
switch ( k . k - > type ) {
case KEY_TYPE_extent :
2019-08-22 16:23:10 -04:00
case KEY_TYPE_reflink_v :
2019-08-16 09:58:07 -04:00
* nr_iters + = bch2_bkey_nr_alloc_ptrs ( k ) ;
if ( * nr_iters > = max_iters ) {
* end = bpos_min ( * end , k . k - > p ) ;
return 0 ;
}
break ;
2019-08-16 09:59:56 -04:00
case KEY_TYPE_reflink_p : {
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p ( k ) ;
u64 idx = le64_to_cpu ( p . v - > idx ) ;
unsigned sectors = end - > offset - bkey_start_offset ( p . k ) ;
struct btree_iter * iter ;
struct bkey_s_c r_k ;
for_each_btree_key ( trans , iter ,
BTREE_ID_REFLINK , POS ( 0 , idx + offset ) ,
BTREE_ITER_SLOTS , r_k , ret ) {
if ( bkey_cmp ( bkey_start_pos ( r_k . k ) ,
POS ( 0 , idx + sectors ) ) > = 0 )
break ;
* nr_iters + = 1 ;
if ( * nr_iters > = max_iters ) {
struct bpos pos = bkey_start_pos ( k . k ) ;
pos . offset + = r_k . k - > p . offset - idx ;
* end = bpos_min ( * end , pos ) ;
break ;
}
}
bch2_trans_iter_put ( trans , iter ) ;
break ;
}
2019-08-16 09:58:07 -04:00
}
return ret ;
}
2019-09-07 18:03:56 -04:00
int bch2_extent_atomic_end ( struct btree_iter * iter ,
2019-08-16 09:58:07 -04:00
struct bkey_i * insert ,
struct bpos * end )
2018-08-05 17:46:41 -04:00
{
2019-09-07 18:03:56 -04:00
struct btree_trans * trans = iter - > trans ;
2018-08-05 17:46:41 -04:00
struct btree * b = iter - > l [ 0 ] . b ;
2019-03-11 14:59:58 -04:00
struct btree_node_iter node_iter = iter - > l [ 0 ] . iter ;
struct bkey_packed * _k ;
2019-08-16 09:58:07 -04:00
unsigned nr_iters =
2019-03-11 14:59:58 -04:00
bch2_bkey_nr_alloc_ptrs ( bkey_i_to_s_c ( insert ) ) ;
2019-08-16 09:58:07 -04:00
int ret = 0 ;
2018-08-05 17:46:41 -04:00
BUG_ON ( iter - > uptodate > BTREE_ITER_NEED_PEEK ) ;
2019-03-11 14:59:58 -04:00
BUG_ON ( bkey_cmp ( bkey_start_pos ( & insert - > k ) , b - > data - > min_key ) < 0 ) ;
2019-08-16 09:58:07 -04:00
* end = bpos_min ( insert - > k . p , b - > key . k . p ) ;
ret = __bch2_extent_atomic_end ( trans , bkey_i_to_s_c ( insert ) ,
0 , end , & nr_iters , 10 ) ;
if ( ret )
return ret ;
while ( nr_iters < 20 & &
( _k = bch2_btree_node_iter_peek_filter ( & node_iter , b ,
2019-03-11 14:59:58 -04:00
KEY_TYPE_discard ) ) ) {
struct bkey unpacked ;
struct bkey_s_c k = bkey_disassemble ( b , _k , & unpacked ) ;
2019-08-16 09:58:07 -04:00
unsigned offset = 0 ;
2019-03-11 14:59:58 -04:00
2019-08-16 09:58:07 -04:00
if ( bkey_cmp ( bkey_start_pos ( k . k ) , * end ) > = 0 )
2019-03-11 14:59:58 -04:00
break ;
2019-08-16 09:58:07 -04:00
if ( bkey_cmp ( bkey_start_pos ( & insert - > k ) ,
bkey_start_pos ( k . k ) ) > 0 )
offset = bkey_start_offset ( & insert - > k ) -
bkey_start_offset ( k . k ) ;
2019-03-11 14:59:58 -04:00
2019-08-16 09:58:07 -04:00
ret = __bch2_extent_atomic_end ( trans , k , offset ,
end , & nr_iters , 20 ) ;
if ( ret )
return ret ;
if ( nr_iters > = 20 )
break ;
2019-03-11 14:59:58 -04:00
bch2_btree_node_iter_advance ( & node_iter , b ) ;
}
2018-08-05 17:46:41 -04:00
2019-08-16 09:58:07 -04:00
return 0 ;
2019-03-16 14:27:40 -04:00
}
2018-08-05 17:46:41 -04:00
2019-08-16 09:58:07 -04:00
int bch2_extent_trim_atomic ( struct bkey_i * k , struct btree_iter * iter )
2019-03-16 14:27:40 -04:00
{
2019-08-16 09:58:07 -04:00
struct bpos end ;
int ret ;
2019-09-07 18:03:56 -04:00
ret = bch2_extent_atomic_end ( iter , k , & end ) ;
2019-08-16 09:58:07 -04:00
if ( ret )
return ret ;
bch2_cut_back ( end , & k - > k ) ;
return 0 ;
2019-03-16 14:27:40 -04:00
}
2019-08-16 09:58:07 -04:00
int bch2_extent_is_atomic ( struct bkey_i * k , struct btree_iter * iter )
2019-03-16 14:27:40 -04:00
{
2019-08-16 09:58:07 -04:00
struct bpos end ;
int ret ;
2019-09-07 18:03:56 -04:00
ret = bch2_extent_atomic_end ( iter , k , & end ) ;
2019-08-16 09:58:07 -04:00
if ( ret )
return ret ;
return ! bkey_cmp ( end , k - > k . p ) ;
2018-08-05 17:46:41 -04:00
}
2018-08-03 19:41:44 -04:00
enum btree_insert_ret
2019-03-13 22:44:04 -04:00
bch2_extent_can_insert ( struct btree_trans * trans ,
2018-08-03 19:41:44 -04:00
struct btree_insert_entry * insert ,
unsigned * u64s )
2017-03-16 22:18:50 -08:00
{
2018-08-03 19:41:44 -04:00
struct btree_iter_level * l = & insert - > iter - > l [ 0 ] ;
struct btree_node_iter node_iter = l - > iter ;
enum bch_extent_overlap overlap ;
struct bkey_packed * _k ;
struct bkey unpacked ;
struct bkey_s_c k ;
int sectors ;
2018-08-05 15:21:52 -04:00
/*
* We avoid creating whiteouts whenever possible when deleting , but
* those optimizations mean we may potentially insert two whiteouts
* instead of one ( when we overlap with the front of one extent and the
* back of another ) :
*/
if ( bkey_whiteout ( & insert - > k - > k ) )
* u64s + = BKEY_U64s ;
2018-08-03 19:41:44 -04:00
_k = bch2_btree_node_iter_peek_filter ( & node_iter , l - > b ,
2018-11-01 15:10:01 -04:00
KEY_TYPE_discard ) ;
2018-08-03 19:41:44 -04:00
if ( ! _k )
return BTREE_INSERT_OK ;
k = bkey_disassemble ( l - > b , _k , & unpacked ) ;
overlap = bch2_extent_overlap ( & insert - > k - > k , k . k ) ;
/* account for having to split existing extent: */
if ( overlap = = BCH_EXTENT_OVERLAP_MIDDLE )
* u64s + = _k - > u64s ;
2017-03-16 22:18:50 -08:00
if ( overlap = = BCH_EXTENT_OVERLAP_MIDDLE & &
( sectors = bch2_extent_is_compressed ( k ) ) ) {
2019-02-11 22:08:09 -05:00
int flags = trans - > flags & BTREE_INSERT_NOFAIL
? BCH_DISK_RESERVATION_NOFAIL : 0 ;
2017-03-16 22:18:50 -08:00
2018-08-03 19:41:44 -04:00
switch ( bch2_disk_reservation_add ( trans - > c ,
trans - > disk_res ,
2018-11-04 23:10:09 -05:00
sectors , flags ) ) {
2017-03-16 22:18:50 -08:00
case 0 :
break ;
case - ENOSPC :
return BTREE_INSERT_ENOSPC ;
default :
BUG ( ) ;
}
}
return BTREE_INSERT_OK ;
}
2018-08-05 15:28:29 -04:00
static void
2019-03-16 14:27:40 -04:00
extent_squash ( struct bch_fs * c , struct btree_iter * iter ,
struct bkey_i * insert ,
2018-08-11 19:12:05 -04:00
struct bkey_packed * _k , struct bkey_s k ,
2017-03-16 22:18:50 -08:00
enum bch_extent_overlap overlap )
{
struct btree_iter_level * l = & iter - > l [ 0 ] ;
switch ( overlap ) {
case BCH_EXTENT_OVERLAP_FRONT :
/* insert overlaps with start of k: */
2018-11-05 02:31:48 -05:00
__bch2_cut_front ( insert - > k . p , k ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( bkey_deleted ( k . k ) ) ;
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
2019-09-14 10:45:46 -04:00
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
_k , _k - > u64s , _k - > u64s ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_OVERLAP_BACK :
/* insert overlaps with end of k: */
2018-11-05 02:31:48 -05:00
bch2_cut_back ( bkey_start_pos ( & insert - > k ) , k . k ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( bkey_deleted ( k . k ) ) ;
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
2017-03-16 22:18:50 -08:00
/*
* As the auxiliary tree is indexed by the end of the
* key and we ' ve just changed the end , update the
* auxiliary tree .
*/
2018-08-11 19:12:05 -04:00
bch2_bset_fix_invalidated_key ( l - > b , _k ) ;
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
2016-07-21 19:05:06 -08:00
_k , _k - > u64s , _k - > u64s ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_OVERLAP_ALL : {
/* The insert key completely covers k, invalidate k */
if ( ! bkey_whiteout ( k . k ) )
2018-08-11 19:12:05 -04:00
btree_account_key_drop ( l - > b , _k ) ;
2017-03-16 22:18:50 -08:00
2018-11-05 02:31:48 -05:00
k . k - > size = 0 ;
2018-11-01 15:10:01 -04:00
k . k - > type = KEY_TYPE_deleted ;
2017-03-16 22:18:50 -08:00
2018-08-11 19:12:05 -04:00
if ( _k > = btree_bset_last ( l - > b ) - > start ) {
2016-07-21 19:05:06 -08:00
unsigned u64s = _k - > u64s ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
bch2_bset_delete ( l - > b , _k , _k - > u64s ) ;
2018-08-11 19:12:05 -04:00
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
2016-07-21 19:05:06 -08:00
_k , u64s , 0 ) ;
2018-08-11 19:12:05 -04:00
bch2_btree_iter_verify ( iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
} else {
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
2016-07-21 19:05:06 -08:00
_k , _k - > u64s , _k - > u64s ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
}
break ;
}
case BCH_EXTENT_OVERLAP_MIDDLE : {
BKEY_PADDED ( k ) split ;
/*
* The insert key falls ' in the middle ' of k
* The insert key splits k in 3 :
* - start only in k , preserve
* - middle common section , invalidate in k
* - end only in k , preserve
*
* We update the old key to preserve the start ,
* insert will be the new common section ,
* we manually insert the end that we are preserving .
*
* modify k _before_ doing the insert ( which will move
* what k points to )
*/
bkey_reassemble ( & split . k , k . s_c ) ;
2018-08-11 19:12:05 -04:00
split . k . k . needs_whiteout | = bkey_written ( l - > b , _k ) ;
2017-03-16 22:18:50 -08:00
bch2_cut_back ( bkey_start_pos ( & insert - > k ) , & split . k . k ) ;
BUG_ON ( bkey_deleted ( & split . k . k ) ) ;
2018-11-05 02:31:48 -05:00
__bch2_cut_front ( insert - > k . p , k ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( bkey_deleted ( k . k ) ) ;
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
2019-09-14 10:45:46 -04:00
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
_k , _k - > u64s , _k - > u64s ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
extent_bset_insert ( c , iter , & split . k ) ;
break ;
}
}
}
2019-03-16 14:27:40 -04:00
struct extent_insert_state {
struct bkey_i whiteout ;
bool update_journal ;
bool update_btree ;
bool deleting ;
} ;
static void __bch2_insert_fixup_extent ( struct bch_fs * c ,
struct btree_iter * iter ,
struct bkey_i * insert ,
struct extent_insert_state * s )
2017-03-16 22:18:50 -08:00
{
struct btree_iter_level * l = & iter - > l [ 0 ] ;
struct bkey_packed * _k ;
struct bkey unpacked ;
2019-03-16 14:27:40 -04:00
while ( ( _k = bch2_btree_node_iter_peek_filter ( & l - > iter , l - > b ,
2018-11-01 15:10:01 -04:00
KEY_TYPE_discard ) ) ) {
2018-08-11 19:12:05 -04:00
struct bkey_s k = __bkey_disassemble ( l - > b , _k , & unpacked ) ;
2019-03-16 14:27:40 -04:00
struct bpos cur_end = bpos_min ( insert - > k . p , k . k - > p ) ;
enum bch_extent_overlap overlap =
bch2_extent_overlap ( & insert - > k , k . k ) ;
2017-03-16 22:18:50 -08:00
if ( bkey_cmp ( bkey_start_pos ( k . k ) , insert - > k . p ) > = 0 )
break ;
2018-08-05 15:21:52 -04:00
if ( ! bkey_whiteout ( k . k ) )
s - > update_journal = true ;
2016-07-21 19:05:06 -08:00
2018-08-05 15:21:52 -04:00
if ( ! s - > update_journal ) {
2019-03-16 14:27:40 -04:00
bch2_cut_front ( cur_end , insert ) ;
bch2_cut_front ( cur_end , & s - > whiteout ) ;
bch2_btree_iter_set_pos_same_leaf ( iter , cur_end ) ;
2018-08-05 15:21:52 -04:00
goto next ;
}
2016-07-21 19:05:06 -08:00
2018-08-05 15:21:52 -04:00
/*
* When deleting , if possible just do it by switching the type
* of the key we ' re deleting , instead of creating and inserting
* a new whiteout :
*/
if ( s - > deleting & &
! s - > update_btree & &
! bkey_cmp ( insert - > k . p , k . k - > p ) & &
! bkey_cmp ( bkey_start_pos ( & insert - > k ) , bkey_start_pos ( k . k ) ) ) {
if ( ! bkey_whiteout ( k . k ) ) {
2018-08-11 19:12:05 -04:00
btree_account_key_drop ( l - > b , _k ) ;
2018-11-01 15:10:01 -04:00
_k - > type = KEY_TYPE_discard ;
2018-08-11 19:12:05 -04:00
reserve_whiteout ( l - > b , _k ) ;
2018-08-05 15:21:52 -04:00
}
break ;
}
2016-07-21 19:05:06 -08:00
2018-08-11 19:12:05 -04:00
if ( k . k - > needs_whiteout | | bkey_written ( l - > b , _k ) ) {
2018-08-05 15:21:52 -04:00
insert - > k . needs_whiteout = true ;
s - > update_btree = true ;
}
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
if ( s - > update_btree & &
overlap = = BCH_EXTENT_OVERLAP_ALL & &
bkey_whiteout ( k . k ) & &
k . k - > needs_whiteout ) {
2018-08-11 19:12:05 -04:00
unreserve_whiteout ( l - > b , _k ) ;
2018-08-05 15:21:52 -04:00
_k - > needs_whiteout = false ;
}
2017-03-16 22:18:50 -08:00
2019-03-16 14:27:40 -04:00
extent_squash ( c , iter , insert , _k , k , overlap ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
if ( ! s - > update_btree )
2019-03-16 14:27:40 -04:00
bch2_cut_front ( cur_end , insert ) ;
2018-08-05 15:21:52 -04:00
next :
2018-08-05 15:28:29 -04:00
if ( overlap = = BCH_EXTENT_OVERLAP_FRONT | |
2016-07-21 19:05:06 -08:00
overlap = = BCH_EXTENT_OVERLAP_MIDDLE )
2017-03-16 22:18:50 -08:00
break ;
2016-07-21 19:05:06 -08:00
}
2017-03-16 22:18:50 -08:00
}
/**
* bch_extent_insert_fixup - insert a new extent and deal with overlaps
*
* this may result in not actually doing the insert , or inserting some subset
* of the insert key . For cmpxchg operations this is where that logic lives .
*
* All subsets of @ insert that need to be inserted are inserted using
* bch2_btree_insert_and_journal ( ) . If @ b or @ res fills up , this function
* returns false , setting @ iter - > pos for the prefix of @ insert that actually got
* inserted .
*
* BSET INVARIANTS : this function is responsible for maintaining all the
* invariants for bsets of extents in memory . things get really hairy with 0
* size extents
*
* within one bset :
*
* bkey_start_pos ( bkey_next ( k ) ) > = k
* or bkey_start_offset ( bkey_next ( k ) ) > = k - > offset
*
* i . e . strict ordering , no overlapping extents .
*
* multiple bsets ( i . e . full btree node ) :
*
* ∀ k , j
* k . size ! = 0 ∧ j . size ! = 0 →
* ¬ ( k > bkey_start_pos ( j ) ∧ k < j )
*
* i . e . no two overlapping keys _of nonzero size_
*
* We can ' t realistically maintain this invariant for zero size keys because of
* the key merging done in bch2_btree_insert_key ( ) - for two mergeable keys k , j
* there may be another 0 size key between them in another bset , and it will
* thus overlap with the merged key .
*
* In addition , the end of iter - > pos indicates how much has been processed .
* If the end of iter - > pos is not the same as the end of insert , then
* key insertion needs to continue / be retried .
*/
2019-03-13 22:44:04 -04:00
void bch2_insert_fixup_extent ( struct btree_trans * trans ,
2019-03-16 14:27:40 -04:00
struct btree_insert_entry * insert )
2017-03-16 22:18:50 -08:00
{
2019-03-16 14:27:40 -04:00
struct bch_fs * c = trans - > c ;
2018-08-08 19:53:30 -04:00
struct btree_iter * iter = insert - > iter ;
2017-03-16 22:18:50 -08:00
struct extent_insert_state s = {
2018-08-05 15:21:52 -04:00
. whiteout = * insert - > k ,
. update_journal = ! bkey_whiteout ( & insert - > k - > k ) ,
. update_btree = ! bkey_whiteout ( & insert - > k - > k ) ,
2017-03-16 22:18:50 -08:00
. deleting = bkey_whiteout ( & insert - > k - > k ) ,
} ;
2019-03-16 14:27:40 -04:00
BKEY_PADDED ( k ) tmp ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( iter - > level ) ;
EBUG_ON ( ! insert - > k - > k . size ) ;
EBUG_ON ( bkey_cmp ( iter - > pos , bkey_start_pos ( & insert - > k - > k ) ) ) ;
2019-03-16 14:27:40 -04:00
__bch2_insert_fixup_extent ( c , iter , insert - > k , & s ) ;
2017-03-16 22:18:50 -08:00
2019-03-16 14:27:40 -04:00
bch2_btree_iter_set_pos_same_leaf ( iter , insert - > k - > k . p ) ;
2017-03-16 22:18:50 -08:00
2019-03-16 14:27:40 -04:00
if ( s . update_btree ) {
bkey_copy ( & tmp . k , insert - > k ) ;
if ( s . deleting )
tmp . k . k . type = KEY_TYPE_discard ;
2019-08-22 11:17:04 -04:00
2019-03-16 14:27:40 -04:00
if ( debug_check_bkeys ( c ) )
bch2_bkey_debugcheck ( c , iter - > l [ 0 ] . b ,
bkey_i_to_s_c ( & tmp . k ) ) ;
2019-08-22 11:17:04 -04:00
2019-03-16 14:27:40 -04:00
EBUG_ON ( bkey_deleted ( & tmp . k . k ) | | ! tmp . k . k . size ) ;
extent_bset_insert ( c , iter , & tmp . k ) ;
}
if ( s . update_journal ) {
bkey_copy ( & tmp . k , ! s . deleting ? insert - > k : & s . whiteout ) ;
if ( s . deleting )
tmp . k . k . type = KEY_TYPE_discard ;
EBUG_ON ( bkey_deleted ( & tmp . k . k ) | | ! tmp . k . k . size ) ;
bch2_btree_journal_key ( trans , iter , & tmp . k ) ;
}
bch2_cut_front ( insert - > k - > k . p , insert - > k ) ;
2017-03-16 22:18:50 -08:00
}
const char * bch2_extent_invalid ( const struct bch_fs * c , struct bkey_s_c k )
{
2019-05-11 17:32:07 -04:00
return bch2_bkey_ptrs_invalid ( c , k ) ;
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:10:01 -04:00
void bch2_extent_debugcheck ( struct bch_fs * c , struct btree * b ,
struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
2018-12-06 10:24:22 -05:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
char buf [ 160 ] ;
/*
* XXX : we should be doing most / all of these checks at startup time ,
* where we check bch2_bkey_invalid ( ) in btree_node_read_done ( )
*
* But note that we can ' t check for stale pointers or incorrect gc marks
* until after journal replay is done ( it might be an extent that ' s
* going to get overwritten during replay )
*/
2019-08-22 11:17:04 -04:00
if ( percpu_down_read_trylock ( & c - > mark_lock ) ) {
bch2_fs_bug_on ( ! test_bit ( BCH_FS_REBUILD_REPLICAS , & c - > flags ) & &
! bch2_bkey_replicas_marked_locked ( c , e . s_c , false ) , c ,
" extent key bad (replicas not marked in superblock): \n %s " ,
( bch2_bkey_val_to_text ( & PBUF ( buf ) , c , e . s_c ) , buf ) ) ;
percpu_up_read ( & c - > mark_lock ) ;
}
2018-12-06 10:24:22 -05:00
/*
* If journal replay hasn ' t finished , we might be seeing keys
* that will be overwritten by the time journal replay is done :
*/
if ( ! test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) )
2017-03-16 22:18:50 -08:00
return ;
2018-12-06 10:24:22 -05:00
extent_for_each_ptr_decode ( e , p , entry ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
struct bucket_mark mark = ptr_bucket_mark ( ca , & p . ptr ) ;
unsigned stale = gen_after ( mark . gen , p . ptr . gen ) ;
unsigned disk_sectors = ptr_disk_sectors ( p ) ;
unsigned mark_sectors = p . ptr . cached
? mark . cached_sectors
: mark . dirty_sectors ;
bch2_fs_bug_on ( stale & & ! p . ptr . cached , c ,
" stale dirty pointer (ptr gen %u bucket %u " ,
p . ptr . gen , mark . gen ) ;
bch2_fs_bug_on ( stale > 96 , c , " key too stale: %i " , stale ) ;
bch2_fs_bug_on ( ! stale & &
( mark . data_type ! = BCH_DATA_USER | |
mark_sectors < disk_sectors ) , c ,
" extent pointer not marked: %s: \n "
" type %u sectors %u < %u " ,
( bch2_bkey_val_to_text ( & PBUF ( buf ) , c , e . s_c ) , buf ) ,
mark . data_type ,
mark_sectors , disk_sectors ) ;
2017-03-16 22:18:50 -08:00
}
}
2018-11-09 01:24:07 -05:00
void bch2_extent_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
2019-05-11 17:32:07 -04:00
bch2_bkey_ptrs_to_text ( out , c , k ) ;
2017-03-16 22:18:50 -08:00
}
2019-05-12 22:23:30 -04:00
static unsigned bch2_crc_field_size_max [ ] = {
[ BCH_EXTENT_ENTRY_crc32 ] = CRC32_SIZE_MAX ,
[ BCH_EXTENT_ENTRY_crc64 ] = CRC64_SIZE_MAX ,
[ BCH_EXTENT_ENTRY_crc128 ] = CRC128_SIZE_MAX ,
} ;
static void bch2_extent_crc_pack ( union bch_extent_crc * dst ,
struct bch_extent_crc_unpacked src )
{
# define set_common_fields(_dst, _src) \
_dst . csum_type = _src . csum_type , \
_dst . compression_type = _src . compression_type , \
_dst . _compressed_size = _src . compressed_size - 1 , \
_dst . _uncompressed_size = _src . uncompressed_size - 1 , \
_dst . offset = _src . offset
switch ( extent_entry_type ( to_entry ( dst ) ) ) {
case BCH_EXTENT_ENTRY_crc32 :
set_common_fields ( dst - > crc32 , src ) ;
dst - > crc32 . csum = * ( ( __le32 * ) & src . csum . lo ) ;
break ;
case BCH_EXTENT_ENTRY_crc64 :
set_common_fields ( dst - > crc64 , src ) ;
dst - > crc64 . nonce = src . nonce ;
dst - > crc64 . csum_lo = src . csum . lo ;
dst - > crc64 . csum_hi = * ( ( __le16 * ) & src . csum . hi ) ;
break ;
case BCH_EXTENT_ENTRY_crc128 :
set_common_fields ( dst - > crc128 , src ) ;
dst - > crc128 . nonce = src . nonce ;
dst - > crc128 . csum = src . csum ;
break ;
default :
BUG ( ) ;
}
# undef set_common_fields
}
2019-07-25 13:52:14 -04:00
void bch2_extent_crc_append ( struct bkey_i * k ,
struct bch_extent_crc_unpacked new )
2017-03-16 22:18:50 -08:00
{
2019-07-25 13:52:14 -04:00
struct bkey_ptrs ptrs = bch2_bkey_ptrs ( bkey_i_to_s ( k ) ) ;
union bch_extent_crc * crc = ( void * ) ptrs . end ;
2017-03-16 22:18:50 -08:00
if ( bch_crc_bytes [ new . csum_type ] < = 4 & &
2019-05-12 22:23:30 -04:00
new . uncompressed_size - 1 < = CRC32_SIZE_MAX & &
new . nonce < = CRC32_NONCE_MAX )
crc - > type = 1 < < BCH_EXTENT_ENTRY_crc32 ;
else if ( bch_crc_bytes [ new . csum_type ] < = 10 & &
new . uncompressed_size - 1 < = CRC64_SIZE_MAX & &
new . nonce < = CRC64_NONCE_MAX )
crc - > type = 1 < < BCH_EXTENT_ENTRY_crc64 ;
else if ( bch_crc_bytes [ new . csum_type ] < = 16 & &
new . uncompressed_size - 1 < = CRC128_SIZE_MAX & &
new . nonce < = CRC128_NONCE_MAX )
crc - > type = 1 < < BCH_EXTENT_ENTRY_crc128 ;
else
BUG ( ) ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
bch2_extent_crc_pack ( crc , new ) ;
2017-03-16 22:18:50 -08:00
2019-07-25 13:52:14 -04:00
k - > k . u64s + = extent_entry_u64s ( ptrs . end ) ;
EBUG_ON ( bkey_val_u64s ( & k - > k ) > BKEY_EXTENT_VAL_U64s_MAX ) ;
2018-09-27 21:08:39 -04:00
}
2017-03-16 22:18:50 -08:00
2019-07-25 13:52:14 -04:00
static inline void __extent_entry_insert ( struct bkey_i * k ,
2018-09-27 21:08:39 -04:00
union bch_extent_entry * dst ,
union bch_extent_entry * new )
{
2019-07-25 13:52:14 -04:00
union bch_extent_entry * end = bkey_val_end ( bkey_i_to_s ( k ) ) ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
memmove_u64s_up ( ( u64 * ) dst + extent_entry_u64s ( new ) ,
dst , ( u64 * ) end - ( u64 * ) dst ) ;
2019-07-25 13:52:14 -04:00
k - > k . u64s + = extent_entry_u64s ( new ) ;
2018-09-27 21:08:39 -04:00
memcpy_u64s_small ( dst , new , extent_entry_u64s ( new ) ) ;
}
2017-03-16 22:18:50 -08:00
2019-07-25 13:52:14 -04:00
void bch2_extent_ptr_decoded_append ( struct bkey_i * k ,
2018-09-27 21:08:39 -04:00
struct extent_ptr_decoded * p )
{
2019-07-25 13:52:14 -04:00
struct bkey_ptrs ptrs = bch2_bkey_ptrs ( bkey_i_to_s ( k ) ) ;
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack ( & k - > k , NULL ) ;
2018-09-27 21:08:39 -04:00
union bch_extent_entry * pos ;
2018-11-01 15:13:19 -04:00
unsigned i ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
if ( ! bch2_crc_unpacked_cmp ( crc , p - > crc ) ) {
2019-07-25 13:52:14 -04:00
pos = ptrs . start ;
2018-09-27 21:08:39 -04:00
goto found ;
}
2019-07-25 13:52:14 -04:00
bkey_for_each_crc ( & k - > k , ptrs , crc , pos )
2018-09-27 21:08:39 -04:00
if ( ! bch2_crc_unpacked_cmp ( crc , p - > crc ) ) {
pos = extent_entry_next ( pos ) ;
goto found ;
}
2019-07-25 13:52:14 -04:00
bch2_extent_crc_append ( k , p - > crc ) ;
pos = bkey_val_end ( bkey_i_to_s ( k ) ) ;
2018-09-27 21:08:39 -04:00
found :
p - > ptr . type = 1 < < BCH_EXTENT_ENTRY_ptr ;
2019-07-25 13:52:14 -04:00
__extent_entry_insert ( k , pos , to_entry ( & p - > ptr ) ) ;
2018-11-01 15:13:19 -04:00
for ( i = 0 ; i < p - > ec_nr ; i + + ) {
p - > ec [ i ] . type = 1 < < BCH_EXTENT_ENTRY_stripe_ptr ;
2019-07-25 13:52:14 -04:00
__extent_entry_insert ( k , pos , to_entry ( & p - > ec [ i ] ) ) ;
2018-11-01 15:13:19 -04:00
}
2017-03-16 22:18:50 -08:00
}
/*
* bch_extent_normalize - clean up an extent , dropping stale pointers etc .
*
* Returns true if @ k should be dropped entirely
*
* For existing keys , only called when btree nodes are being rewritten , not when
* they ' re merely being compacted / resorted in memory .
*/
bool bch2_extent_normalize ( struct bch_fs * c , struct bkey_s k )
{
2018-11-01 15:10:01 -04:00
struct bch_extent_ptr * ptr ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
bch2_bkey_drop_ptrs ( k , ptr ,
ptr - > cached & &
ptr_stale ( bch_dev_bkey_exists ( c , ptr - > dev ) , ptr ) ) ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:10:01 -04:00
/* will only happen if all pointers were cached: */
if ( ! bkey_val_u64s ( k . k ) )
2019-08-21 18:35:15 -04:00
k . k - > type = KEY_TYPE_discard ;
2017-03-16 22:18:50 -08:00
2019-08-21 18:35:15 -04:00
return bkey_whiteout ( k . k ) ;
2017-03-16 22:18:50 -08:00
}
2019-08-16 09:59:56 -04:00
void bch2_bkey_mark_replicas_cached ( struct bch_fs * c , struct bkey_s k ,
unsigned target ,
unsigned nr_desired_replicas )
2017-03-16 22:18:50 -08:00
{
2019-08-16 09:59:56 -04:00
struct bkey_ptrs ptrs = bch2_bkey_ptrs ( k ) ;
2018-11-01 15:13:19 -04:00
union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2019-08-16 09:59:56 -04:00
int extra = bch2_bkey_durability ( c , k . s_c ) - nr_desired_replicas ;
2017-03-16 22:18:50 -08:00
if ( target & & extra > 0 )
2019-08-16 09:59:56 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry ) {
2018-11-01 15:13:19 -04:00
int n = bch2_extent_ptr_durability ( c , p ) ;
2017-03-16 22:18:50 -08:00
if ( n & & n < = extra & &
2018-11-01 15:13:19 -04:00
! bch2_dev_in_target ( c , p . ptr . dev , target ) ) {
entry - > ptr . cached = true ;
2017-03-16 22:18:50 -08:00
extra - = n ;
}
}
if ( extra > 0 )
2019-08-16 09:59:56 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry ) {
2018-11-01 15:13:19 -04:00
int n = bch2_extent_ptr_durability ( c , p ) ;
2017-03-16 22:18:50 -08:00
if ( n & & n < = extra ) {
2018-11-01 15:13:19 -04:00
entry - > ptr . cached = true ;
2017-03-16 22:18:50 -08:00
extra - = n ;
}
}
}
2018-11-01 15:10:01 -04:00
enum merge_result bch2_extent_merge ( struct bch_fs * c ,
2019-06-09 16:56:16 -04:00
struct bkey_s _l , struct bkey_s _r )
2017-03-16 22:18:50 -08:00
{
2019-06-09 16:56:16 -04:00
struct bkey_s_extent l = bkey_s_to_extent ( _l ) ;
struct bkey_s_extent r = bkey_s_to_extent ( _r ) ;
union bch_extent_entry * en_l = l . v - > start ;
union bch_extent_entry * en_r = r . v - > start ;
2019-05-12 22:23:30 -04:00
struct bch_extent_crc_unpacked crc_l , crc_r ;
2017-03-16 22:18:50 -08:00
2019-06-09 16:56:16 -04:00
if ( bkey_val_u64s ( l . k ) ! = bkey_val_u64s ( r . k ) )
2019-05-21 10:14:54 -04:00
return BCH_MERGE_NOMERGE ;
2019-06-09 16:56:16 -04:00
crc_l = bch2_extent_crc_unpack ( l . k , NULL ) ;
2017-03-16 22:18:50 -08:00
2019-06-09 16:56:16 -04:00
extent_for_each_entry ( l , en_l ) {
en_r = vstruct_idx ( r . v , ( u64 * ) en_l - l . v - > _data ) ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
if ( extent_entry_type ( en_l ) ! = extent_entry_type ( en_r ) )
2018-11-01 15:10:01 -04:00
return BCH_MERGE_NOMERGE ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
switch ( extent_entry_type ( en_l ) ) {
case BCH_EXTENT_ENTRY_ptr : {
const struct bch_extent_ptr * lp = & en_l - > ptr ;
const struct bch_extent_ptr * rp = & en_r - > ptr ;
struct bch_dev * ca ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
if ( lp - > offset + crc_l . compressed_size ! = rp - > offset | |
lp - > dev ! = rp - > dev | |
lp - > gen ! = rp - > gen )
return BCH_MERGE_NOMERGE ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
/* We don't allow extents to straddle buckets: */
ca = bch_dev_bkey_exists ( c , lp - > dev ) ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
if ( PTR_BUCKET_NR ( ca , lp ) ! = PTR_BUCKET_NR ( ca , rp ) )
return BCH_MERGE_NOMERGE ;
break ;
}
case BCH_EXTENT_ENTRY_stripe_ptr :
if ( en_l - > stripe_ptr . block ! = en_r - > stripe_ptr . block | |
en_l - > stripe_ptr . idx ! = en_r - > stripe_ptr . idx )
return BCH_MERGE_NOMERGE ;
break ;
case BCH_EXTENT_ENTRY_crc32 :
case BCH_EXTENT_ENTRY_crc64 :
case BCH_EXTENT_ENTRY_crc128 :
2019-06-09 16:56:16 -04:00
crc_l = bch2_extent_crc_unpack ( l . k , entry_to_crc ( en_l ) ) ;
crc_r = bch2_extent_crc_unpack ( r . k , entry_to_crc ( en_r ) ) ;
2019-05-12 22:23:30 -04:00
if ( crc_l . csum_type ! = crc_r . csum_type | |
crc_l . compression_type ! = crc_r . compression_type | |
crc_l . nonce ! = crc_r . nonce )
return BCH_MERGE_NOMERGE ;
if ( crc_l . offset + crc_l . live_size ! = crc_l . compressed_size | |
crc_r . offset )
return BCH_MERGE_NOMERGE ;
if ( ! bch2_checksum_mergeable ( crc_l . csum_type ) )
return BCH_MERGE_NOMERGE ;
if ( crc_l . compression_type )
return BCH_MERGE_NOMERGE ;
if ( crc_l . csum_type & &
crc_l . uncompressed_size +
crc_r . uncompressed_size > c - > sb . encoded_extent_max )
return BCH_MERGE_NOMERGE ;
if ( crc_l . uncompressed_size + crc_r . uncompressed_size - 1 >
bch2_crc_field_size_max [ extent_entry_type ( en_l ) ] )
return BCH_MERGE_NOMERGE ;
break ;
default :
2017-03-16 22:18:50 -08:00
return BCH_MERGE_NOMERGE ;
2019-05-12 22:23:30 -04:00
}
2017-03-16 22:18:50 -08:00
}
2019-06-09 16:56:16 -04:00
extent_for_each_entry ( l , en_l ) {
2019-05-12 22:23:30 -04:00
struct bch_extent_crc_unpacked crc_l , crc_r ;
2019-06-09 16:56:16 -04:00
en_r = vstruct_idx ( r . v , ( u64 * ) en_l - l . v - > _data ) ;
2017-03-16 22:18:50 -08:00
2019-05-12 22:23:30 -04:00
if ( ! extent_entry_is_crc ( en_l ) )
continue ;
2019-06-09 16:56:16 -04:00
crc_l = bch2_extent_crc_unpack ( l . k , entry_to_crc ( en_l ) ) ;
crc_r = bch2_extent_crc_unpack ( r . k , entry_to_crc ( en_r ) ) ;
2019-05-12 22:23:30 -04:00
crc_l . csum = bch2_checksum_merge ( crc_l . csum_type ,
crc_l . csum ,
crc_r . csum ,
crc_r . uncompressed_size < < 9 ) ;
crc_l . uncompressed_size + = crc_r . uncompressed_size ;
crc_l . compressed_size + = crc_r . compressed_size ;
bch2_extent_crc_pack ( entry_to_crc ( en_l ) , crc_l ) ;
2017-03-16 22:18:50 -08:00
}
2019-06-09 16:56:16 -04:00
bch2_key_resize ( l . k , l . k - > size + r . k - > size ) ;
2017-03-16 22:18:50 -08:00
return BCH_MERGE_MERGE ;
}
/*
* When merging an extent that we ' re inserting into a btree node , the new merged
* extent could overlap with an existing 0 size extent - if we don ' t fix that ,
* it ' ll break the btree node iterator so this code finds those 0 size extents
* and shifts them out of the way .
*
* Also unpacks and repacks .
*/
static bool bch2_extent_merge_inline ( struct bch_fs * c ,
struct btree_iter * iter ,
struct bkey_packed * l ,
struct bkey_packed * r ,
bool back_merge )
{
struct btree * b = iter - > l [ 0 ] . b ;
struct btree_node_iter * node_iter = & iter - > l [ 0 ] . iter ;
2016-07-21 19:05:06 -08:00
BKEY_PADDED ( k ) li , ri ;
struct bkey_packed * m = back_merge ? l : r ;
struct bkey_i * mi = back_merge ? & li . k : & ri . k ;
struct bset_tree * t = bch2_bkey_to_bset ( b , m ) ;
enum merge_result ret ;
EBUG_ON ( bkey_written ( b , m ) ) ;
2017-03-16 22:18:50 -08:00
2019-05-29 20:06:06 -04:00
if ( bkey_val_u64s ( l ) > BKEY_EXTENT_VAL_U64s_MAX | |
bkey_val_u64s ( r ) > BKEY_EXTENT_VAL_U64s_MAX )
return BCH_MERGE_NOMERGE ;
2017-03-16 22:18:50 -08:00
/*
* We need to save copies of both l and r , because we might get a
* partial merge ( which modifies both ) and then fails to repack
*/
bch2_bkey_unpack ( b , & li . k , l ) ;
bch2_bkey_unpack ( b , & ri . k , r ) ;
2019-06-09 16:56:16 -04:00
ret = bch2_bkey_merge ( c ,
bkey_i_to_s ( & li . k ) ,
bkey_i_to_s ( & ri . k ) ) ;
2016-07-21 19:05:06 -08:00
if ( ret = = BCH_MERGE_NOMERGE )
return false ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
/*
* check if we overlap with deleted extents - would break the sort
* order :
*/
if ( back_merge ) {
struct bkey_packed * n = bkey_next ( m ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
if ( n ! = btree_bkey_last ( b , t ) & &
bkey_cmp_left_packed ( b , n , & li . k . k . p ) < = 0 & &
bkey_deleted ( n ) )
2017-03-16 22:18:50 -08:00
return false ;
2016-07-21 19:05:06 -08:00
} else if ( ret = = BCH_MERGE_MERGE ) {
struct bkey_packed * prev = bch2_bkey_prev_all ( b , t , m ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
if ( prev & &
bkey_cmp_left_packed_byval ( b , prev ,
bkey_start_pos ( & li . k . k ) ) > 0 )
2017-03-16 22:18:50 -08:00
return false ;
2016-07-21 19:05:06 -08:00
}
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
if ( ret = = BCH_MERGE_PARTIAL ) {
if ( ! extent_i_save ( b , m , mi ) )
return false ;
2017-03-16 22:18:50 -08:00
if ( ! back_merge )
bkey_copy ( packed_to_bkey ( l ) , & li . k ) ;
else
bkey_copy ( packed_to_bkey ( r ) , & ri . k ) ;
2016-07-21 19:05:06 -08:00
} else {
if ( ! extent_i_save ( b , m , & li . k ) )
2017-03-16 22:18:50 -08:00
return false ;
2016-07-21 19:05:06 -08:00
}
2017-03-16 22:18:50 -08:00
2018-08-11 19:12:05 -04:00
bch2_bset_fix_invalidated_key ( b , m ) ;
2016-07-21 19:05:06 -08:00
bch2_btree_node_iter_fix ( iter , b , node_iter ,
2018-08-11 19:12:05 -04:00
m , m - > u64s , m - > u64s ) ;
2016-07-21 19:05:06 -08:00
verify_modified_extent ( iter , m ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
return ret = = BCH_MERGE_MERGE ;
2017-03-16 22:18:50 -08:00
}
2019-01-21 15:32:13 -05:00
bool bch2_check_range_allocated ( struct bch_fs * c , struct bpos pos , u64 size ,
unsigned nr_replicas )
2017-03-16 22:18:50 -08:00
{
2019-03-25 15:10:15 -04:00
struct btree_trans trans ;
struct btree_iter * iter ;
2017-03-16 22:18:50 -08:00
struct bpos end = pos ;
struct bkey_s_c k ;
2019-01-21 15:32:13 -05:00
bool ret = true ;
2019-04-17 15:49:28 -04:00
int err ;
2017-03-16 22:18:50 -08:00
end . offset + = size ;
2019-05-15 10:54:43 -04:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2019-03-25 15:10:15 -04:00
for_each_btree_key ( & trans , iter , BTREE_ID_EXTENTS , pos ,
2019-04-17 15:49:28 -04:00
BTREE_ITER_SLOTS , k , err ) {
2017-03-16 22:18:50 -08:00
if ( bkey_cmp ( bkey_start_pos ( k . k ) , end ) > = 0 )
break ;
2019-01-21 15:32:13 -05:00
if ( nr_replicas > bch2_bkey_nr_ptrs_allocated ( k ) ) {
ret = false ;
2017-03-16 22:18:50 -08:00
break ;
}
}
2019-03-25 15:10:15 -04:00
bch2_trans_exit ( & trans ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
2018-11-01 15:10:01 -04:00
2019-01-21 15:32:13 -05:00
unsigned bch2_bkey_nr_ptrs_allocated ( struct bkey_s_c k )
{
unsigned ret = 0 ;
switch ( k . k - > type ) {
case KEY_TYPE_extent : {
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
extent_for_each_ptr_decode ( e , p , entry )
ret + = ! p . ptr . cached & &
p . crc . compression_type = = BCH_COMPRESSION_NONE ;
break ;
}
case KEY_TYPE_reservation :
ret = bkey_s_c_to_reservation ( k ) . v - > nr_replicas ;
break ;
}
return ret ;
}
2018-11-01 15:10:01 -04:00
/* KEY_TYPE_reservation: */
const char * bch2_reservation_invalid ( const struct bch_fs * c , struct bkey_s_c k )
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation ( k ) ;
if ( bkey_val_bytes ( k . k ) ! = sizeof ( struct bch_reservation ) )
return " incorrect value size " ;
if ( ! r . v - > nr_replicas | | r . v - > nr_replicas > BCH_REPLICAS_MAX )
return " invalid nr_replicas " ;
return NULL ;
}
void bch2_reservation_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation ( k ) ;
pr_buf ( out , " generation %u replicas %u " ,
le32_to_cpu ( r . v - > generation ) ,
r . v - > nr_replicas ) ;
}
enum merge_result bch2_reservation_merge ( struct bch_fs * c ,
2019-06-09 16:56:16 -04:00
struct bkey_s _l , struct bkey_s _r )
2018-11-01 15:10:01 -04:00
{
2019-06-09 16:56:16 -04:00
struct bkey_s_reservation l = bkey_s_to_reservation ( _l ) ;
struct bkey_s_reservation r = bkey_s_to_reservation ( _r ) ;
2018-11-01 15:10:01 -04:00
2019-06-09 16:56:16 -04:00
if ( l . v - > generation ! = r . v - > generation | |
l . v - > nr_replicas ! = r . v - > nr_replicas )
2018-11-01 15:10:01 -04:00
return BCH_MERGE_NOMERGE ;
2019-06-09 16:56:16 -04:00
if ( ( u64 ) l . k - > size + r . k - > size > KEY_SIZE_MAX ) {
bch2_key_resize ( l . k , KEY_SIZE_MAX ) ;
__bch2_cut_front ( l . k - > p , r . s ) ;
2018-11-01 15:10:01 -04:00
return BCH_MERGE_PARTIAL ;
}
2019-06-09 16:56:16 -04:00
bch2_key_resize ( l . k , l . k - > size + r . k - > size ) ;
2018-11-01 15:10:01 -04:00
return BCH_MERGE_MERGE ;
}