2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright ( C ) 2010 Kent Overstreet < kent . overstreet @ gmail . com >
*
* Code for managing the extent btree and dynamically updating the writeback
* dirty sector count .
*/
# include "bcachefs.h"
# include "bkey_methods.h"
# include "btree_gc.h"
# include "btree_update.h"
# include "btree_update_interior.h"
# include "buckets.h"
# include "checksum.h"
# include "debug.h"
# include "dirent.h"
# include "disk_groups.h"
# include "error.h"
# include "extents.h"
# include "inode.h"
# include "journal.h"
# include "replicas.h"
# include "super.h"
# include "super-io.h"
# include "trace.h"
# include "util.h"
# include "xattr.h"
/* Common among btree and extent ptrs */
const struct bch_extent_ptr *
bch2_extent_has_device ( struct bkey_s_c_extent e , unsigned dev )
{
const struct bch_extent_ptr * ptr ;
extent_for_each_ptr ( e , ptr )
if ( ptr - > dev = = dev )
return ptr ;
return NULL ;
}
2018-09-30 18:28:23 -04:00
void bch2_extent_drop_device ( struct bkey_s_extent e , unsigned dev )
2017-03-16 22:18:50 -08:00
{
struct bch_extent_ptr * ptr ;
2018-09-30 18:28:23 -04:00
bch2_extent_drop_ptrs ( e , ptr , ptr - > dev = = dev ) ;
2017-03-16 22:18:50 -08:00
}
const struct bch_extent_ptr *
bch2_extent_has_group ( struct bch_fs * c , struct bkey_s_c_extent e , unsigned group )
{
const struct bch_extent_ptr * ptr ;
extent_for_each_ptr ( e , ptr ) {
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
if ( ca - > mi . group & &
ca - > mi . group - 1 = = group )
return ptr ;
}
return NULL ;
}
const struct bch_extent_ptr *
bch2_extent_has_target ( struct bch_fs * c , struct bkey_s_c_extent e , unsigned target )
{
const struct bch_extent_ptr * ptr ;
extent_for_each_ptr ( e , ptr )
if ( bch2_dev_in_target ( c , ptr - > dev , target ) & &
( ! ptr - > cached | |
! ptr_stale ( bch_dev_bkey_exists ( c , ptr - > dev ) , ptr ) ) )
return ptr ;
return NULL ;
}
unsigned bch2_extent_nr_ptrs ( struct bkey_s_c_extent e )
{
const struct bch_extent_ptr * ptr ;
unsigned nr_ptrs = 0 ;
extent_for_each_ptr ( e , ptr )
nr_ptrs + + ;
return nr_ptrs ;
}
unsigned bch2_extent_nr_dirty_ptrs ( struct bkey_s_c k )
{
struct bkey_s_c_extent e ;
const struct bch_extent_ptr * ptr ;
unsigned nr_ptrs = 0 ;
switch ( k . k - > type ) {
case BCH_EXTENT :
case BCH_EXTENT_CACHED :
e = bkey_s_c_to_extent ( k ) ;
extent_for_each_ptr ( e , ptr )
nr_ptrs + = ! ptr - > cached ;
break ;
case BCH_RESERVATION :
nr_ptrs = bkey_s_c_to_reservation ( k ) . v - > nr_replicas ;
break ;
}
return nr_ptrs ;
}
2018-11-01 15:13:19 -04:00
static unsigned bch2_extent_ptr_durability ( struct bch_fs * c ,
struct extent_ptr_decoded p )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:13:19 -04:00
unsigned i , durability = 0 ;
2017-03-16 22:18:50 -08:00
struct bch_dev * ca ;
2018-11-01 15:13:19 -04:00
if ( p . ptr . cached )
2017-03-16 22:18:50 -08:00
return 0 ;
2018-11-01 15:13:19 -04:00
ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
if ( ca - > mi . state ! = BCH_MEMBER_STATE_FAILED )
durability = max_t ( unsigned , durability , ca - > mi . durability ) ;
for ( i = 0 ; i < p . ec_nr ; i + + ) {
2018-11-24 17:09:44 -05:00
struct stripe * s =
genradix_ptr ( & c - > stripes [ 0 ] , p . idx ) ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
if ( WARN_ON ( ! s ) )
continue ;
durability = max_t ( unsigned , durability , s - > nr_redundant ) ;
}
return durability ;
2017-03-16 22:18:50 -08:00
}
unsigned bch2_extent_durability ( struct bch_fs * c , struct bkey_s_c_extent e )
{
2018-11-01 15:13:19 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
unsigned durability = 0 ;
2018-11-01 15:13:19 -04:00
extent_for_each_ptr_decode ( e , p , entry )
durability + = bch2_extent_ptr_durability ( c , p ) ;
2017-03-16 22:18:50 -08:00
return durability ;
}
unsigned bch2_extent_is_compressed ( struct bkey_s_c k )
{
unsigned ret = 0 ;
switch ( k . k - > type ) {
case BCH_EXTENT :
2018-09-27 21:08:39 -04:00
case BCH_EXTENT_CACHED : {
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
extent_for_each_ptr_decode ( e , p , entry )
if ( ! p . ptr . cached & &
p . crc . compression_type ! = BCH_COMPRESSION_NONE & &
p . crc . compressed_size < p . crc . live_size )
2018-11-04 23:10:09 -05:00
ret + = p . crc . compressed_size ;
2018-09-27 21:08:39 -04:00
}
2017-03-16 22:18:50 -08:00
}
return ret ;
}
bool bch2_extent_matches_ptr ( struct bch_fs * c , struct bkey_s_c_extent e ,
struct bch_extent_ptr m , u64 offset )
{
2018-09-27 21:08:39 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
extent_for_each_ptr_decode ( e , p , entry )
if ( p . ptr . dev = = m . dev & &
p . ptr . gen = = m . gen & &
( s64 ) p . ptr . offset + p . crc . offset - bkey_start_offset ( e . k ) = =
2017-03-16 22:18:50 -08:00
( s64 ) m . offset - offset )
2018-09-27 21:08:39 -04:00
return true ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
return false ;
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:13:19 -04:00
static union bch_extent_entry * extent_entry_prev ( struct bkey_s_extent e ,
union bch_extent_entry * entry )
{
union bch_extent_entry * i = e . v - > start ;
if ( i = = entry )
return NULL ;
while ( extent_entry_next ( i ) ! = entry )
i = extent_entry_next ( i ) ;
return i ;
}
2018-09-30 18:28:23 -04:00
union bch_extent_entry * bch2_extent_drop_ptr ( struct bkey_s_extent e ,
struct bch_extent_ptr * ptr )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:13:19 -04:00
union bch_extent_entry * dst , * src , * prev ;
bool drop_crc = true ;
2018-09-30 18:28:23 -04:00
2017-03-16 22:18:50 -08:00
EBUG_ON ( ptr < & e . v - > start - > ptr | |
ptr > = & extent_entry_last ( e ) - > ptr ) ;
EBUG_ON ( ptr - > type ! = 1 < < BCH_EXTENT_ENTRY_ptr ) ;
2018-11-01 15:13:19 -04:00
src = extent_entry_next ( to_entry ( ptr ) ) ;
2018-09-30 18:28:23 -04:00
if ( src ! = extent_entry_last ( e ) & &
2018-11-01 15:13:19 -04:00
! extent_entry_is_crc ( src ) )
drop_crc = false ;
2018-09-30 18:28:23 -04:00
2018-11-01 15:13:19 -04:00
dst = to_entry ( ptr ) ;
while ( ( prev = extent_entry_prev ( e , dst ) ) ) {
if ( extent_entry_is_ptr ( prev ) )
break ;
if ( extent_entry_is_crc ( prev ) ) {
if ( drop_crc )
dst = prev ;
break ;
2018-09-30 18:28:23 -04:00
}
2018-11-01 15:13:19 -04:00
dst = prev ;
2018-09-30 18:28:23 -04:00
}
memmove_u64s_down ( dst , src ,
( u64 * ) extent_entry_last ( e ) - ( u64 * ) src ) ;
e . k - > u64s - = ( u64 * ) src - ( u64 * ) dst ;
return dst ;
2017-03-16 22:18:50 -08:00
}
static inline bool can_narrow_crc ( struct bch_extent_crc_unpacked u ,
struct bch_extent_crc_unpacked n )
{
return ! u . compression_type & &
u . csum_type & &
u . uncompressed_size > u . live_size & &
bch2_csum_type_is_encryption ( u . csum_type ) = =
bch2_csum_type_is_encryption ( n . csum_type ) ;
}
bool bch2_can_narrow_extent_crcs ( struct bkey_s_c_extent e ,
struct bch_extent_crc_unpacked n )
{
struct bch_extent_crc_unpacked crc ;
const union bch_extent_entry * i ;
if ( ! n . csum_type )
return false ;
extent_for_each_crc ( e , crc , i )
if ( can_narrow_crc ( crc , n ) )
return true ;
return false ;
}
/*
* We ' re writing another replica for this extent , so while we ' ve got the data in
* memory we ' ll be computing a new checksum for the currently live data .
*
* If there are other replicas we aren ' t moving , and they are checksummed but
* not compressed , we can modify them to point to only the data that is
* currently live ( so that readers won ' t have to bounce ) while we ' ve got the
* checksum we need :
*/
bool bch2_extent_narrow_crcs ( struct bkey_i_extent * e ,
struct bch_extent_crc_unpacked n )
{
struct bch_extent_crc_unpacked u ;
2018-09-27 21:08:39 -04:00
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
union bch_extent_entry * i ;
2018-09-27 21:08:39 -04:00
bool ret = false ;
2017-03-16 22:18:50 -08:00
/* Find a checksum entry that covers only live data: */
2018-09-27 21:08:39 -04:00
if ( ! n . csum_type ) {
2017-03-16 22:18:50 -08:00
extent_for_each_crc ( extent_i_to_s ( e ) , u , i )
if ( ! u . compression_type & &
u . csum_type & &
u . live_size = = u . uncompressed_size ) {
n = u ;
2018-09-27 21:08:39 -04:00
goto found ;
2017-03-16 22:18:50 -08:00
}
return false ;
2018-09-27 21:08:39 -04:00
}
found :
2017-03-16 22:18:50 -08:00
BUG_ON ( n . compression_type ) ;
BUG_ON ( n . offset ) ;
BUG_ON ( n . live_size ! = e - > k . size ) ;
restart_narrow_pointers :
2018-09-27 21:08:39 -04:00
extent_for_each_ptr_decode ( extent_i_to_s ( e ) , p , i )
if ( can_narrow_crc ( p . crc , n ) ) {
2018-09-30 18:28:23 -04:00
bch2_extent_drop_ptr ( extent_i_to_s ( e ) , & i - > ptr ) ;
2018-09-27 21:08:39 -04:00
p . ptr . offset + = p . crc . offset ;
p . crc = n ;
bch2_extent_ptr_decoded_append ( e , & p ) ;
ret = true ;
2017-03-16 22:18:50 -08:00
goto restart_narrow_pointers ;
}
2018-09-27 21:08:39 -04:00
return ret ;
2017-03-16 22:18:50 -08:00
}
/* returns true if not equal */
static inline bool bch2_crc_unpacked_cmp ( struct bch_extent_crc_unpacked l ,
struct bch_extent_crc_unpacked r )
{
return ( l . csum_type ! = r . csum_type | |
l . compression_type ! = r . compression_type | |
l . compressed_size ! = r . compressed_size | |
l . uncompressed_size ! = r . uncompressed_size | |
l . offset ! = r . offset | |
l . live_size ! = r . live_size | |
l . nonce ! = r . nonce | |
bch2_crc_cmp ( l . csum , r . csum ) ) ;
}
static void bch2_extent_drop_stale ( struct bch_fs * c , struct bkey_s_extent e )
{
2018-09-30 18:28:23 -04:00
struct bch_extent_ptr * ptr ;
bch2_extent_drop_ptrs ( e , ptr ,
ptr - > cached & &
ptr_stale ( bch_dev_bkey_exists ( c , ptr - > dev ) , ptr ) ) ;
2017-03-16 22:18:50 -08:00
}
bool bch2_ptr_normalize ( struct bch_fs * c , struct btree * b , struct bkey_s k )
{
return bch2_extent_normalize ( c , k ) ;
}
void bch2_ptr_swab ( const struct bkey_format * f , struct bkey_packed * k )
{
switch ( k - > type ) {
case BCH_EXTENT :
case BCH_EXTENT_CACHED : {
union bch_extent_entry * entry ;
u64 * d = ( u64 * ) bkeyp_val ( f , k ) ;
unsigned i ;
for ( i = 0 ; i < bkeyp_val_u64s ( f , k ) ; i + + )
d [ i ] = swab64 ( d [ i ] ) ;
for ( entry = ( union bch_extent_entry * ) d ;
entry < ( union bch_extent_entry * ) ( d + bkeyp_val_u64s ( f , k ) ) ;
entry = extent_entry_next ( entry ) ) {
switch ( extent_entry_type ( entry ) ) {
2018-09-27 21:08:39 -04:00
case BCH_EXTENT_ENTRY_ptr :
break ;
2017-03-16 22:18:50 -08:00
case BCH_EXTENT_ENTRY_crc32 :
entry - > crc32 . csum = swab32 ( entry - > crc32 . csum ) ;
break ;
case BCH_EXTENT_ENTRY_crc64 :
entry - > crc64 . csum_hi = swab16 ( entry - > crc64 . csum_hi ) ;
entry - > crc64 . csum_lo = swab64 ( entry - > crc64 . csum_lo ) ;
break ;
case BCH_EXTENT_ENTRY_crc128 :
entry - > crc128 . csum . hi = ( __force __le64 )
swab64 ( ( __force u64 ) entry - > crc128 . csum . hi ) ;
entry - > crc128 . csum . lo = ( __force __le64 )
swab64 ( ( __force u64 ) entry - > crc128 . csum . lo ) ;
break ;
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_stripe_ptr :
break ;
2017-03-16 22:18:50 -08:00
}
}
break ;
}
}
}
static const char * extent_ptr_invalid ( const struct bch_fs * c ,
struct bkey_s_c_extent e ,
const struct bch_extent_ptr * ptr ,
unsigned size_ondisk ,
bool metadata )
{
const struct bch_extent_ptr * ptr2 ;
struct bch_dev * ca ;
if ( ptr - > dev > = c - > sb . nr_devices | |
! c - > devs [ ptr - > dev ] )
return " pointer to invalid device " ;
ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
if ( ! ca )
return " pointer to invalid device " ;
extent_for_each_ptr ( e , ptr2 )
if ( ptr ! = ptr2 & & ptr - > dev = = ptr2 - > dev )
return " multiple pointers to same device " ;
if ( ptr - > offset + size_ondisk > bucket_to_sector ( ca , ca - > mi . nbuckets ) )
return " offset past end of device " ;
if ( ptr - > offset < bucket_to_sector ( ca , ca - > mi . first_bucket ) )
return " offset before first bucket " ;
if ( bucket_remainder ( ca , ptr - > offset ) +
size_ondisk > ca - > mi . bucket_size )
return " spans multiple buckets " ;
return NULL ;
}
2018-11-09 01:24:07 -05:00
static void extent_print_ptrs ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c_extent e )
2017-03-16 22:18:50 -08:00
{
const union bch_extent_entry * entry ;
struct bch_extent_crc_unpacked crc ;
const struct bch_extent_ptr * ptr ;
2018-11-01 15:13:19 -04:00
const struct bch_extent_stripe_ptr * ec ;
2017-03-16 22:18:50 -08:00
struct bch_dev * ca ;
bool first = true ;
extent_for_each_entry ( e , entry ) {
if ( ! first )
2018-11-09 01:24:07 -05:00
pr_buf ( out , " " ) ;
2017-03-16 22:18:50 -08:00
switch ( __extent_entry_type ( entry ) ) {
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_ptr :
ptr = entry_to_ptr ( entry ) ;
ca = ptr - > dev < c - > sb . nr_devices & & c - > devs [ ptr - > dev ]
? bch_dev_bkey_exists ( c , ptr - > dev )
: NULL ;
pr_buf ( out , " ptr: %u:%llu gen %u%s%s " , ptr - > dev ,
( u64 ) ptr - > offset , ptr - > gen ,
ptr - > cached ? " cached " : " " ,
ca & & ptr_stale ( ca , ptr )
? " stale " : " " ) ;
break ;
2017-03-16 22:18:50 -08:00
case BCH_EXTENT_ENTRY_crc32 :
case BCH_EXTENT_ENTRY_crc64 :
case BCH_EXTENT_ENTRY_crc128 :
crc = bch2_extent_crc_unpack ( e . k , entry_to_crc ( entry ) ) ;
2018-11-09 01:24:07 -05:00
pr_buf ( out , " crc: c_size %u size %u offset %u nonce %u csum %u compress %u " ,
crc . compressed_size ,
crc . uncompressed_size ,
crc . offset , crc . nonce ,
crc . csum_type ,
crc . compression_type ) ;
2017-03-16 22:18:50 -08:00
break ;
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_stripe_ptr :
ec = & entry - > stripe_ptr ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
pr_buf ( out , " ec: idx %llu block %u " ,
( u64 ) ec - > idx , ec - > block ) ;
2017-03-16 22:18:50 -08:00
break ;
default :
2018-11-09 01:24:07 -05:00
pr_buf ( out , " (invalid extent entry %.16llx) " , * ( ( u64 * ) entry ) ) ;
2017-03-16 22:18:50 -08:00
goto out ;
}
first = false ;
}
out :
if ( bkey_extent_is_cached ( e . k ) )
2018-11-09 01:24:07 -05:00
pr_buf ( out , " cached " ) ;
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:28:45 -04:00
static struct bch_dev_io_failures * dev_io_failures ( struct bch_io_failures * f ,
unsigned dev )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:28:45 -04:00
struct bch_dev_io_failures * i ;
for ( i = f - > devs ; i < f - > devs + f - > nr ; i + + )
if ( i - > dev = = dev )
return i ;
return NULL ;
}
void bch2_mark_io_failure ( struct bch_io_failures * failed ,
struct extent_ptr_decoded * p )
{
struct bch_dev_io_failures * f = dev_io_failures ( failed , p - > ptr . dev ) ;
if ( ! f ) {
BUG_ON ( failed - > nr > = ARRAY_SIZE ( failed - > devs ) ) ;
f = & failed - > devs [ failed - > nr + + ] ;
f - > dev = p - > ptr . dev ;
2018-11-01 15:13:19 -04:00
f - > idx = p - > idx ;
f - > nr_failed = 1 ;
f - > nr_retries = 0 ;
} else if ( p - > idx ! = f - > idx ) {
f - > idx = p - > idx ;
2018-11-01 15:28:45 -04:00
f - > nr_failed = 1 ;
f - > nr_retries = 0 ;
} else {
f - > nr_failed + + ;
}
}
/*
* returns true if p1 is better than p2 :
*/
static inline bool ptr_better ( struct bch_fs * c ,
const struct extent_ptr_decoded p1 ,
const struct extent_ptr_decoded p2 )
{
2018-11-01 15:13:19 -04:00
if ( likely ( ! p1 . idx & & ! p2 . idx ) ) {
struct bch_dev * dev1 = bch_dev_bkey_exists ( c , p1 . ptr . dev ) ;
struct bch_dev * dev2 = bch_dev_bkey_exists ( c , p2 . ptr . dev ) ;
u64 l1 = atomic64_read ( & dev1 - > cur_latency [ READ ] ) ;
u64 l2 = atomic64_read ( & dev2 - > cur_latency [ READ ] ) ;
2018-11-01 15:28:45 -04:00
2018-11-01 15:13:19 -04:00
/* Pick at random, biased in favor of the faster device: */
return bch2_rand_range ( l1 + l2 ) > l1 ;
}
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
if ( force_reconstruct_read ( c ) )
return p1 . idx > p2 . idx ;
2017-03-16 22:18:50 -08:00
2018-11-01 15:13:19 -04:00
return p1 . idx < p2 . idx ;
2017-03-16 22:18:50 -08:00
}
static int extent_pick_read_device ( struct bch_fs * c ,
struct bkey_s_c_extent e ,
2018-11-01 15:28:45 -04:00
struct bch_io_failures * failed ,
2018-10-02 11:03:39 -04:00
struct extent_ptr_decoded * pick )
2017-03-16 22:18:50 -08:00
{
2018-09-27 21:08:39 -04:00
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2018-11-01 15:28:45 -04:00
struct bch_dev_io_failures * f ;
2017-03-16 22:18:50 -08:00
struct bch_dev * ca ;
int ret = 0 ;
2018-09-27 21:08:39 -04:00
extent_for_each_ptr_decode ( e , p , entry ) {
ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
if ( p . ptr . cached & & ptr_stale ( ca , & p . ptr ) )
2017-03-16 22:18:50 -08:00
continue ;
2018-11-01 15:28:45 -04:00
f = failed ? dev_io_failures ( failed , p . ptr . dev ) : NULL ;
2018-11-01 15:13:19 -04:00
if ( f )
p . idx = f - > nr_failed < f - > nr_retries
? f - > idx
: f - > idx + 1 ;
if ( ! p . idx & &
! bch2_dev_is_readable ( ca ) )
p . idx + + ;
2018-11-13 22:10:27 -05:00
if ( force_reconstruct_read ( c ) & &
! p . idx & & p . ec_nr )
2018-11-01 15:13:19 -04:00
p . idx + + ;
2018-11-13 22:10:27 -05:00
if ( p . idx > = p . ec_nr + 1 )
2017-03-16 22:18:50 -08:00
continue ;
2018-11-01 15:28:45 -04:00
if ( ret & & ! ptr_better ( c , p , * pick ) )
2018-09-27 21:08:39 -04:00
continue ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
* pick = p ;
2017-03-16 22:18:50 -08:00
ret = 1 ;
}
return ret ;
}
/* Btree ptrs */
const char * bch2_btree_ptr_invalid ( const struct bch_fs * c , struct bkey_s_c k )
{
if ( bkey_extent_is_cached ( k . k ) )
return " cached " ;
if ( k . k - > size )
return " nonzero key size " ;
if ( bkey_val_u64s ( k . k ) > BKEY_BTREE_PTR_VAL_U64s_MAX )
return " value too big " ;
switch ( k . k - > type ) {
case BCH_EXTENT : {
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const union bch_extent_entry * entry ;
const struct bch_extent_ptr * ptr ;
const char * reason ;
extent_for_each_entry ( e , entry ) {
if ( __extent_entry_type ( entry ) > = BCH_EXTENT_ENTRY_MAX )
return " invalid extent entry type " ;
2018-11-01 15:13:19 -04:00
if ( ! extent_entry_is_ptr ( entry ) )
return " has non ptr field " ;
2017-03-16 22:18:50 -08:00
}
extent_for_each_ptr ( e , ptr ) {
reason = extent_ptr_invalid ( c , e , ptr ,
c - > opts . btree_node_size ,
true ) ;
if ( reason )
return reason ;
}
return NULL ;
}
default :
return " invalid value type " ;
}
}
void bch2_btree_ptr_debugcheck ( struct bch_fs * c , struct btree * b ,
struct bkey_s_c k )
{
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const struct bch_extent_ptr * ptr ;
unsigned seq ;
const char * err ;
char buf [ 160 ] ;
struct bucket_mark mark ;
struct bch_dev * ca ;
unsigned replicas = 0 ;
bool bad ;
extent_for_each_ptr ( e , ptr ) {
ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
replicas + + ;
if ( ! test_bit ( BCH_FS_ALLOC_READ_DONE , & c - > flags ) )
continue ;
err = " stale " ;
if ( ptr_stale ( ca , ptr ) )
goto err ;
do {
seq = read_seqcount_begin ( & c - > gc_pos_lock ) ;
mark = ptr_bucket_mark ( ca , ptr ) ;
bad = gc_pos_cmp ( c - > gc_pos , gc_pos_btree_node ( b ) ) > 0 & &
( mark . data_type ! = BCH_DATA_BTREE | |
mark . dirty_sectors < c - > opts . btree_node_size ) ;
} while ( read_seqcount_retry ( & c - > gc_pos_lock , seq ) ) ;
err = " inconsistent " ;
if ( bad )
goto err ;
}
2018-11-03 22:00:50 -04:00
if ( ! test_bit ( BCH_FS_REBUILD_REPLICAS , & c - > flags ) & &
2018-11-07 17:48:32 -05:00
! bch2_bkey_replicas_marked ( c , btree_node_type ( b ) ,
e . s_c , false ) ) {
2018-11-09 01:24:07 -05:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , btree_node_type ( b ) , k ) ;
2017-03-16 22:18:50 -08:00
bch2_fs_bug ( c ,
" btree key bad (replicas not marked in superblock): \n %s " ,
buf ) ;
return ;
}
return ;
err :
2018-11-09 01:24:07 -05:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , btree_node_type ( b ) , k ) ;
bch2_fs_bug ( c , " %s btree pointer %s: bucket %zi gen %i mark %08x " ,
err , buf , PTR_BUCKET_NR ( ca , ptr ) ,
mark . gen , ( unsigned ) mark . v . counter ) ;
2017-03-16 22:18:50 -08:00
}
2018-11-09 01:24:07 -05:00
void bch2_btree_ptr_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
const char * invalid ;
if ( bkey_extent_is_data ( k . k ) )
2018-11-09 01:24:07 -05:00
extent_print_ptrs ( out , c , bkey_s_c_to_extent ( k ) ) ;
2017-03-16 22:18:50 -08:00
invalid = bch2_btree_ptr_invalid ( c , k ) ;
if ( invalid )
2018-11-09 01:24:07 -05:00
pr_buf ( out , " invalid: %s " , invalid ) ;
2017-03-16 22:18:50 -08:00
}
int bch2_btree_pick_ptr ( struct bch_fs * c , const struct btree * b ,
2018-11-01 15:28:45 -04:00
struct bch_io_failures * failed ,
2018-10-02 11:03:39 -04:00
struct extent_ptr_decoded * pick )
2017-03-16 22:18:50 -08:00
{
return extent_pick_read_device ( c , bkey_i_to_s_c_extent ( & b - > key ) ,
2018-11-01 15:28:45 -04:00
failed , pick ) ;
2017-03-16 22:18:50 -08:00
}
/* Extents */
2018-11-27 18:30:56 -05:00
bool __bch2_cut_front ( struct bpos where , struct bkey_s k )
2017-03-16 22:18:50 -08:00
{
u64 len = 0 ;
if ( bkey_cmp ( where , bkey_start_pos ( k . k ) ) < = 0 )
return false ;
EBUG_ON ( bkey_cmp ( where , k . k - > p ) > 0 ) ;
len = k . k - > p . offset - where . offset ;
BUG_ON ( len > k . k - > size ) ;
/*
* Don ' t readjust offset if the key size is now 0 , because that could
* cause offset to point to the next bucket :
*/
if ( ! len )
k . k - > type = KEY_TYPE_DELETED ;
else if ( bkey_extent_is_data ( k . k ) ) {
struct bkey_s_extent e = bkey_s_to_extent ( k ) ;
union bch_extent_entry * entry ;
bool seen_crc = false ;
extent_for_each_entry ( e , entry ) {
switch ( extent_entry_type ( entry ) ) {
case BCH_EXTENT_ENTRY_ptr :
if ( ! seen_crc )
entry - > ptr . offset + = e . k - > size - len ;
break ;
case BCH_EXTENT_ENTRY_crc32 :
entry - > crc32 . offset + = e . k - > size - len ;
break ;
case BCH_EXTENT_ENTRY_crc64 :
entry - > crc64 . offset + = e . k - > size - len ;
break ;
case BCH_EXTENT_ENTRY_crc128 :
entry - > crc128 . offset + = e . k - > size - len ;
break ;
2018-11-01 15:13:19 -04:00
case BCH_EXTENT_ENTRY_stripe_ptr :
break ;
2017-03-16 22:18:50 -08:00
}
if ( extent_entry_is_crc ( entry ) )
seen_crc = true ;
}
}
k . k - > size = len ;
return true ;
}
bool bch2_cut_back ( struct bpos where , struct bkey * k )
{
u64 len = 0 ;
if ( bkey_cmp ( where , k - > p ) > = 0 )
return false ;
EBUG_ON ( bkey_cmp ( where , bkey_start_pos ( k ) ) < 0 ) ;
len = where . offset - bkey_start_offset ( k ) ;
BUG_ON ( len > k - > size ) ;
k - > p = where ;
k - > size = len ;
if ( ! len )
k - > type = KEY_TYPE_DELETED ;
return true ;
}
/**
* bch_key_resize - adjust size of @ k
*
* bkey_start_offset ( k ) will be preserved , modifies where the extent ends
*/
void bch2_key_resize ( struct bkey * k ,
unsigned new_size )
{
k - > p . offset - = k - > size ;
k - > p . offset + = new_size ;
k - > size = new_size ;
}
2016-07-21 19:05:06 -08:00
static bool extent_i_save ( struct btree * b , struct bkey_packed * dst ,
struct bkey_i * src )
2017-03-16 22:18:50 -08:00
{
2016-07-21 19:05:06 -08:00
struct bkey_format * f = & b - > format ;
struct bkey_i * dst_unpacked ;
struct bkey_packed tmp ;
if ( ( dst_unpacked = packed_to_bkey ( dst ) ) )
dst_unpacked - > k = src - > k ;
else if ( bch2_bkey_pack_key ( & tmp , & src - > k , f ) )
memcpy_u64s ( dst , & tmp , f - > key_u64s ) ;
else
return false ;
memcpy_u64s ( bkeyp_val ( f , dst ) , & src - > v , bkey_val_u64s ( & src - > k ) ) ;
return true ;
2017-03-16 22:18:50 -08:00
}
struct extent_insert_state {
struct btree_insert * trans ;
struct btree_insert_entry * insert ;
struct bpos committed ;
/* for deleting: */
struct bkey_i whiteout ;
2018-08-05 15:21:52 -04:00
bool update_journal ;
bool update_btree ;
2017-03-16 22:18:50 -08:00
bool deleting ;
} ;
static bool bch2_extent_merge_inline ( struct bch_fs * ,
struct btree_iter * ,
struct bkey_packed * ,
struct bkey_packed * ,
bool ) ;
2016-07-21 19:05:06 -08:00
static void verify_extent_nonoverlapping ( struct btree * b ,
struct btree_node_iter * _iter ,
struct bkey_i * insert )
{
# ifdef CONFIG_BCACHEFS_DEBUG
struct btree_node_iter iter ;
struct bkey_packed * k ;
struct bkey uk ;
iter = * _iter ;
k = bch2_btree_node_iter_prev_filter ( & iter , b , KEY_TYPE_DISCARD ) ;
BUG_ON ( k & &
( uk = bkey_unpack_key ( b , k ) ,
bkey_cmp ( uk . p , bkey_start_pos ( & insert - > k ) ) > 0 ) ) ;
iter = * _iter ;
k = bch2_btree_node_iter_peek_filter ( & iter , b , KEY_TYPE_DISCARD ) ;
#if 0
BUG_ON ( k & &
( uk = bkey_unpack_key ( b , k ) ,
bkey_cmp ( insert - > k . p , bkey_start_pos ( & uk ) ) ) > 0 ) ;
# else
if ( k & &
( uk = bkey_unpack_key ( b , k ) ,
bkey_cmp ( insert - > k . p , bkey_start_pos ( & uk ) ) ) > 0 ) {
char buf1 [ 100 ] ;
char buf2 [ 100 ] ;
2018-11-09 01:24:07 -05:00
bch2_bkey_to_text ( & PBUF ( buf1 ) , & insert - > k ) ;
bch2_bkey_to_text ( & PBUF ( buf2 ) , & uk ) ;
2016-07-21 19:05:06 -08:00
bch2_dump_btree_node ( b ) ;
panic ( " insert > next : \n "
" insert %s \n "
" next %s \n " ,
buf1 , buf2 ) ;
}
# endif
# endif
}
static void verify_modified_extent ( struct btree_iter * iter ,
struct bkey_packed * k )
{
bch2_btree_iter_verify ( iter , iter - > l [ 0 ] . b ) ;
bch2_verify_insert_pos ( iter - > l [ 0 ] . b , k , k , k - > u64s ) ;
}
2017-03-16 22:18:50 -08:00
static void extent_bset_insert ( struct bch_fs * c , struct btree_iter * iter ,
struct bkey_i * insert )
{
struct btree_iter_level * l = & iter - > l [ 0 ] ;
2018-08-05 15:21:52 -04:00
struct btree_node_iter node_iter ;
struct bkey_packed * k ;
BUG_ON ( insert - > k . u64s > bch_btree_keys_u64s_remaining ( c , l - > b ) ) ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( bkey_deleted ( & insert - > k ) | | ! insert - > k . size ) ;
2016-07-21 19:05:06 -08:00
verify_extent_nonoverlapping ( l - > b , & l - > iter , insert ) ;
2018-08-05 15:21:52 -04:00
node_iter = l - > iter ;
k = bch2_btree_node_iter_prev_filter ( & node_iter , l - > b , KEY_TYPE_DISCARD ) ;
if ( k & & ! bkey_written ( l - > b , k ) & &
bch2_extent_merge_inline ( c , iter , k , bkey_to_packed ( insert ) , true ) )
return ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
node_iter = l - > iter ;
k = bch2_btree_node_iter_peek_filter ( & node_iter , l - > b , KEY_TYPE_DISCARD ) ;
if ( k & & ! bkey_written ( l - > b , k ) & &
bch2_extent_merge_inline ( c , iter , bkey_to_packed ( insert ) , k , false ) )
return ;
2017-03-16 22:18:50 -08:00
2018-08-11 19:12:05 -04:00
k = bch2_btree_node_iter_bset_pos ( & l - > iter , l - > b , bset_tree_last ( l - > b ) ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
bch2_bset_insert ( l - > b , & l - > iter , k , insert , 0 ) ;
2018-08-11 19:12:05 -04:00
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter , k , 0 , k - > u64s ) ;
2016-07-21 19:05:06 -08:00
bch2_btree_iter_verify ( iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
}
static void extent_insert_committed ( struct extent_insert_state * s )
{
struct bch_fs * c = s - > trans - > c ;
struct btree_iter * iter = s - > insert - > iter ;
2018-08-05 15:21:52 -04:00
struct bkey_i * insert = s - > insert - > k ;
2017-03-16 22:18:50 -08:00
BKEY_PADDED ( k ) split ;
EBUG_ON ( bkey_cmp ( insert - > k . p , s - > committed ) < 0 ) ;
EBUG_ON ( bkey_cmp ( s - > committed , bkey_start_pos ( & insert - > k ) ) < 0 ) ;
2018-08-05 15:21:52 -04:00
bkey_copy ( & split . k , insert ) ;
if ( s - > deleting )
split . k . k . type = KEY_TYPE_DISCARD ;
2018-11-05 02:31:48 -05:00
bch2_cut_back ( s - > committed , & split . k . k ) ;
2018-08-05 15:21:52 -04:00
if ( ! bkey_cmp ( s - > committed , iter - > pos ) )
2017-03-16 22:18:50 -08:00
return ;
2018-08-05 15:21:52 -04:00
bch2_btree_iter_set_pos_same_leaf ( iter , s - > committed ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
if ( s - > update_btree ) {
if ( debug_check_bkeys ( c ) )
bch2_bkey_debugcheck ( c , iter - > l [ 0 ] . b ,
bkey_i_to_s_c ( & split . k ) ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
EBUG_ON ( bkey_deleted ( & split . k . k ) | | ! split . k . k . size ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
extent_bset_insert ( c , iter , & split . k ) ;
2017-03-16 22:18:50 -08:00
}
2018-08-05 15:21:52 -04:00
if ( s - > update_journal ) {
bkey_copy ( & split . k , ! s - > deleting ? insert : & s - > whiteout ) ;
if ( s - > deleting )
split . k . k . type = KEY_TYPE_DISCARD ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
bch2_cut_back ( s - > committed , & split . k . k ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
EBUG_ON ( bkey_deleted ( & split . k . k ) | | ! split . k . k . size ) ;
bch2_btree_journal_key ( s - > trans , iter , & split . k ) ;
2016-07-21 19:05:06 -08:00
}
2018-08-05 15:21:52 -04:00
bch2_cut_front ( s - > committed , insert ) ;
2017-03-16 22:18:50 -08:00
insert - > k . needs_whiteout = false ;
}
2018-08-05 17:46:41 -04:00
void bch2_extent_trim_atomic ( struct bkey_i * k , struct btree_iter * iter )
{
struct btree * b = iter - > l [ 0 ] . b ;
BUG_ON ( iter - > uptodate > BTREE_ITER_NEED_PEEK ) ;
bch2_cut_back ( b - > key . k . p , & k - > k ) ;
BUG_ON ( bkey_cmp ( bkey_start_pos ( & k - > k ) , b - > data - > min_key ) < 0 ) ;
}
2018-08-03 19:41:44 -04:00
enum btree_insert_ret
bch2_extent_can_insert ( struct btree_insert * trans ,
struct btree_insert_entry * insert ,
unsigned * u64s )
2017-03-16 22:18:50 -08:00
{
2018-08-03 19:41:44 -04:00
struct btree_iter_level * l = & insert - > iter - > l [ 0 ] ;
struct btree_node_iter node_iter = l - > iter ;
enum bch_extent_overlap overlap ;
struct bkey_packed * _k ;
struct bkey unpacked ;
struct bkey_s_c k ;
int sectors ;
2018-08-05 17:46:41 -04:00
BUG_ON ( trans - > flags & BTREE_INSERT_ATOMIC & &
! bch2_extent_is_atomic ( & insert - > k - > k , insert - > iter ) ) ;
2018-08-05 15:21:52 -04:00
/*
* We avoid creating whiteouts whenever possible when deleting , but
* those optimizations mean we may potentially insert two whiteouts
* instead of one ( when we overlap with the front of one extent and the
* back of another ) :
*/
if ( bkey_whiteout ( & insert - > k - > k ) )
* u64s + = BKEY_U64s ;
2018-08-03 19:41:44 -04:00
_k = bch2_btree_node_iter_peek_filter ( & node_iter , l - > b ,
KEY_TYPE_DISCARD ) ;
if ( ! _k )
return BTREE_INSERT_OK ;
k = bkey_disassemble ( l - > b , _k , & unpacked ) ;
overlap = bch2_extent_overlap ( & insert - > k - > k , k . k ) ;
/* account for having to split existing extent: */
if ( overlap = = BCH_EXTENT_OVERLAP_MIDDLE )
* u64s + = _k - > u64s ;
2017-03-16 22:18:50 -08:00
if ( overlap = = BCH_EXTENT_OVERLAP_MIDDLE & &
( sectors = bch2_extent_is_compressed ( k ) ) ) {
int flags = BCH_DISK_RESERVATION_BTREE_LOCKS_HELD ;
2018-08-03 19:41:44 -04:00
if ( trans - > flags & BTREE_INSERT_NOFAIL )
2017-03-16 22:18:50 -08:00
flags | = BCH_DISK_RESERVATION_NOFAIL ;
2018-08-03 19:41:44 -04:00
switch ( bch2_disk_reservation_add ( trans - > c ,
trans - > disk_res ,
2018-11-04 23:10:09 -05:00
sectors , flags ) ) {
2017-03-16 22:18:50 -08:00
case 0 :
break ;
case - ENOSPC :
return BTREE_INSERT_ENOSPC ;
case - EINTR :
return BTREE_INSERT_NEED_GC_LOCK ;
default :
BUG ( ) ;
}
}
return BTREE_INSERT_OK ;
}
2018-08-05 15:28:29 -04:00
static void
2017-03-16 22:18:50 -08:00
extent_squash ( struct extent_insert_state * s , struct bkey_i * insert ,
2018-08-11 19:12:05 -04:00
struct bkey_packed * _k , struct bkey_s k ,
2017-03-16 22:18:50 -08:00
enum bch_extent_overlap overlap )
{
struct bch_fs * c = s - > trans - > c ;
struct btree_iter * iter = s - > insert - > iter ;
struct btree_iter_level * l = & iter - > l [ 0 ] ;
switch ( overlap ) {
case BCH_EXTENT_OVERLAP_FRONT :
/* insert overlaps with start of k: */
2018-11-05 02:31:48 -05:00
__bch2_cut_front ( insert - > k . p , k ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( bkey_deleted ( k . k ) ) ;
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_OVERLAP_BACK :
/* insert overlaps with end of k: */
2018-11-05 02:31:48 -05:00
bch2_cut_back ( bkey_start_pos ( & insert - > k ) , k . k ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( bkey_deleted ( k . k ) ) ;
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
2017-03-16 22:18:50 -08:00
/*
* As the auxiliary tree is indexed by the end of the
* key and we ' ve just changed the end , update the
* auxiliary tree .
*/
2018-08-11 19:12:05 -04:00
bch2_bset_fix_invalidated_key ( l - > b , _k ) ;
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
2016-07-21 19:05:06 -08:00
_k , _k - > u64s , _k - > u64s ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
break ;
case BCH_EXTENT_OVERLAP_ALL : {
/* The insert key completely covers k, invalidate k */
if ( ! bkey_whiteout ( k . k ) )
2018-08-11 19:12:05 -04:00
btree_account_key_drop ( l - > b , _k ) ;
2017-03-16 22:18:50 -08:00
2018-11-05 02:31:48 -05:00
k . k - > size = 0 ;
k . k - > type = KEY_TYPE_DELETED ;
2017-03-16 22:18:50 -08:00
2018-08-11 19:12:05 -04:00
if ( _k > = btree_bset_last ( l - > b ) - > start ) {
2016-07-21 19:05:06 -08:00
unsigned u64s = _k - > u64s ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
bch2_bset_delete ( l - > b , _k , _k - > u64s ) ;
2018-08-11 19:12:05 -04:00
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
2016-07-21 19:05:06 -08:00
_k , u64s , 0 ) ;
2018-08-11 19:12:05 -04:00
bch2_btree_iter_verify ( iter , l - > b ) ;
2017-03-16 22:18:50 -08:00
} else {
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
bch2_btree_node_iter_fix ( iter , l - > b , & l - > iter ,
2016-07-21 19:05:06 -08:00
_k , _k - > u64s , _k - > u64s ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
}
break ;
}
case BCH_EXTENT_OVERLAP_MIDDLE : {
BKEY_PADDED ( k ) split ;
/*
* The insert key falls ' in the middle ' of k
* The insert key splits k in 3 :
* - start only in k , preserve
* - middle common section , invalidate in k
* - end only in k , preserve
*
* We update the old key to preserve the start ,
* insert will be the new common section ,
* we manually insert the end that we are preserving .
*
* modify k _before_ doing the insert ( which will move
* what k points to )
*/
bkey_reassemble ( & split . k , k . s_c ) ;
2018-08-11 19:12:05 -04:00
split . k . k . needs_whiteout | = bkey_written ( l - > b , _k ) ;
2017-03-16 22:18:50 -08:00
bch2_cut_back ( bkey_start_pos ( & insert - > k ) , & split . k . k ) ;
BUG_ON ( bkey_deleted ( & split . k . k ) ) ;
2018-11-05 02:31:48 -05:00
__bch2_cut_front ( insert - > k . p , k ) ;
2017-03-16 22:18:50 -08:00
BUG_ON ( bkey_deleted ( k . k ) ) ;
2018-08-11 19:12:05 -04:00
extent_save ( l - > b , _k , k . k ) ;
2018-08-05 15:21:52 -04:00
verify_modified_extent ( iter , _k ) ;
2017-03-16 22:18:50 -08:00
extent_bset_insert ( c , iter , & split . k ) ;
break ;
}
}
}
2018-08-08 19:53:30 -04:00
static void __bch2_insert_fixup_extent ( struct extent_insert_state * s )
2017-03-16 22:18:50 -08:00
{
struct btree_iter * iter = s - > insert - > iter ;
struct btree_iter_level * l = & iter - > l [ 0 ] ;
struct bkey_packed * _k ;
struct bkey unpacked ;
struct bkey_i * insert = s - > insert - > k ;
while ( bkey_cmp ( s - > committed , insert - > k . p ) < 0 & &
2018-08-11 19:12:05 -04:00
( _k = bch2_btree_node_iter_peek_filter ( & l - > iter , l - > b ,
2018-08-05 15:21:52 -04:00
KEY_TYPE_DISCARD ) ) ) {
2018-08-11 19:12:05 -04:00
struct bkey_s k = __bkey_disassemble ( l - > b , _k , & unpacked ) ;
2018-08-05 15:21:52 -04:00
enum bch_extent_overlap overlap = bch2_extent_overlap ( & insert - > k , k . k ) ;
2017-03-16 22:18:50 -08:00
EBUG_ON ( bkey_cmp ( iter - > pos , k . k - > p ) > = 0 ) ;
if ( bkey_cmp ( bkey_start_pos ( k . k ) , insert - > k . p ) > = 0 )
break ;
2018-08-08 19:53:30 -04:00
s - > committed = bpos_min ( s - > insert - > k - > k . p , k . k - > p ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
if ( ! bkey_whiteout ( k . k ) )
s - > update_journal = true ;
2016-07-21 19:05:06 -08:00
2018-08-05 15:21:52 -04:00
if ( ! s - > update_journal ) {
bch2_cut_front ( s - > committed , insert ) ;
bch2_cut_front ( s - > committed , & s - > whiteout ) ;
bch2_btree_iter_set_pos_same_leaf ( iter , s - > committed ) ;
goto next ;
}
2016-07-21 19:05:06 -08:00
2018-08-05 15:21:52 -04:00
/*
* When deleting , if possible just do it by switching the type
* of the key we ' re deleting , instead of creating and inserting
* a new whiteout :
*/
if ( s - > deleting & &
! s - > update_btree & &
! bkey_cmp ( insert - > k . p , k . k - > p ) & &
! bkey_cmp ( bkey_start_pos ( & insert - > k ) , bkey_start_pos ( k . k ) ) ) {
if ( ! bkey_whiteout ( k . k ) ) {
2018-08-11 19:12:05 -04:00
btree_account_key_drop ( l - > b , _k ) ;
2016-07-21 19:05:06 -08:00
_k - > type = KEY_TYPE_DISCARD ;
2018-08-11 19:12:05 -04:00
reserve_whiteout ( l - > b , _k ) ;
2018-08-05 15:21:52 -04:00
}
break ;
}
2016-07-21 19:05:06 -08:00
2018-08-11 19:12:05 -04:00
if ( k . k - > needs_whiteout | | bkey_written ( l - > b , _k ) ) {
2018-08-05 15:21:52 -04:00
insert - > k . needs_whiteout = true ;
s - > update_btree = true ;
}
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
if ( s - > update_btree & &
overlap = = BCH_EXTENT_OVERLAP_ALL & &
bkey_whiteout ( k . k ) & &
k . k - > needs_whiteout ) {
2018-08-11 19:12:05 -04:00
unreserve_whiteout ( l - > b , _k ) ;
2018-08-05 15:21:52 -04:00
_k - > needs_whiteout = false ;
}
2017-03-16 22:18:50 -08:00
2018-08-11 19:12:05 -04:00
extent_squash ( s , insert , _k , k , overlap ) ;
2017-03-16 22:18:50 -08:00
2018-08-05 15:21:52 -04:00
if ( ! s - > update_btree )
2016-07-21 19:05:06 -08:00
bch2_cut_front ( s - > committed , insert ) ;
2018-08-05 15:21:52 -04:00
next :
2018-08-05 15:28:29 -04:00
if ( overlap = = BCH_EXTENT_OVERLAP_FRONT | |
2016-07-21 19:05:06 -08:00
overlap = = BCH_EXTENT_OVERLAP_MIDDLE )
2017-03-16 22:18:50 -08:00
break ;
2016-07-21 19:05:06 -08:00
}
2017-03-16 22:18:50 -08:00
2018-08-08 19:53:30 -04:00
if ( bkey_cmp ( s - > committed , insert - > k . p ) < 0 )
2018-08-11 19:12:05 -04:00
s - > committed = bpos_min ( s - > insert - > k - > k . p , l - > b - > key . k . p ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
/*
* may have skipped past some deleted extents greater than the insert
* key , before we got to a non deleted extent and knew we could bail out
* rewind the iterator a bit if necessary :
*/
{
struct btree_node_iter node_iter = l - > iter ;
while ( ( _k = bch2_btree_node_iter_prev_all ( & node_iter , l - > b ) ) & &
2018-08-11 19:12:05 -04:00
bkey_cmp_left_packed ( l - > b , _k , & s - > committed ) > 0 )
2016-07-21 19:05:06 -08:00
l - > iter = node_iter ;
2017-03-16 22:18:50 -08:00
}
}
/**
* bch_extent_insert_fixup - insert a new extent and deal with overlaps
*
* this may result in not actually doing the insert , or inserting some subset
* of the insert key . For cmpxchg operations this is where that logic lives .
*
* All subsets of @ insert that need to be inserted are inserted using
* bch2_btree_insert_and_journal ( ) . If @ b or @ res fills up , this function
* returns false , setting @ iter - > pos for the prefix of @ insert that actually got
* inserted .
*
* BSET INVARIANTS : this function is responsible for maintaining all the
* invariants for bsets of extents in memory . things get really hairy with 0
* size extents
*
* within one bset :
*
* bkey_start_pos ( bkey_next ( k ) ) > = k
* or bkey_start_offset ( bkey_next ( k ) ) > = k - > offset
*
* i . e . strict ordering , no overlapping extents .
*
* multiple bsets ( i . e . full btree node ) :
*
* ∀ k , j
* k . size ! = 0 ∧ j . size ! = 0 →
* ¬ ( k > bkey_start_pos ( j ) ∧ k < j )
*
* i . e . no two overlapping keys _of nonzero size_
*
* We can ' t realistically maintain this invariant for zero size keys because of
* the key merging done in bch2_btree_insert_key ( ) - for two mergeable keys k , j
* there may be another 0 size key between them in another bset , and it will
* thus overlap with the merged key .
*
* In addition , the end of iter - > pos indicates how much has been processed .
* If the end of iter - > pos is not the same as the end of insert , then
* key insertion needs to continue / be retried .
*/
enum btree_insert_ret
bch2_insert_fixup_extent ( struct btree_insert * trans ,
struct btree_insert_entry * insert )
{
2018-08-08 19:53:30 -04:00
struct btree_iter * iter = insert - > iter ;
struct btree * b = iter - > l [ 0 ] . b ;
2017-03-16 22:18:50 -08:00
struct extent_insert_state s = {
. trans = trans ,
. insert = insert ,
2018-08-08 19:53:30 -04:00
. committed = iter - > pos ,
2018-08-05 15:21:52 -04:00
. whiteout = * insert - > k ,
. update_journal = ! bkey_whiteout ( & insert - > k - > k ) ,
. update_btree = ! bkey_whiteout ( & insert - > k - > k ) ,
2017-03-16 22:18:50 -08:00
. deleting = bkey_whiteout ( & insert - > k - > k ) ,
} ;
EBUG_ON ( iter - > level ) ;
EBUG_ON ( ! insert - > k - > k . size ) ;
/*
* As we process overlapping extents , we advance @ iter - > pos both to
* signal to our caller ( btree_insert_key ( ) ) how much of @ insert - > k has
* been inserted , and also to keep @ iter - > pos consistent with
* @ insert - > k and the node iterator that we ' re advancing :
*/
EBUG_ON ( bkey_cmp ( iter - > pos , bkey_start_pos ( & insert - > k - > k ) ) ) ;
2018-08-08 19:53:30 -04:00
__bch2_insert_fixup_extent ( & s ) ;
2017-03-16 22:18:50 -08:00
extent_insert_committed ( & s ) ;
EBUG_ON ( bkey_cmp ( iter - > pos , bkey_start_pos ( & insert - > k - > k ) ) ) ;
EBUG_ON ( bkey_cmp ( iter - > pos , s . committed ) ) ;
2018-08-08 19:53:30 -04:00
if ( insert - > k - > k . size ) {
/* got to the end of this leaf node */
BUG_ON ( bkey_cmp ( iter - > pos , b - > key . k . p ) ) ;
return BTREE_INSERT_NEED_TRAVERSE ;
}
2017-03-16 22:18:50 -08:00
2018-08-08 19:53:30 -04:00
return BTREE_INSERT_OK ;
2017-03-16 22:18:50 -08:00
}
const char * bch2_extent_invalid ( const struct bch_fs * c , struct bkey_s_c k )
{
if ( bkey_val_u64s ( k . k ) > BKEY_EXTENT_VAL_U64s_MAX )
return " value too big " ;
if ( ! k . k - > size )
return " zero key size " ;
switch ( k . k - > type ) {
case BCH_EXTENT :
case BCH_EXTENT_CACHED : {
struct bkey_s_c_extent e = bkey_s_c_to_extent ( k ) ;
const union bch_extent_entry * entry ;
struct bch_extent_crc_unpacked crc ;
const struct bch_extent_ptr * ptr ;
unsigned size_ondisk = e . k - > size ;
const char * reason ;
unsigned nonce = UINT_MAX ;
extent_for_each_entry ( e , entry ) {
if ( __extent_entry_type ( entry ) > = BCH_EXTENT_ENTRY_MAX )
return " invalid extent entry type " ;
2018-11-01 15:13:19 -04:00
switch ( extent_entry_type ( entry ) ) {
case BCH_EXTENT_ENTRY_ptr :
ptr = entry_to_ptr ( entry ) ;
reason = extent_ptr_invalid ( c , e , & entry - > ptr ,
size_ondisk , false ) ;
if ( reason )
return reason ;
break ;
case BCH_EXTENT_ENTRY_crc32 :
case BCH_EXTENT_ENTRY_crc64 :
case BCH_EXTENT_ENTRY_crc128 :
2017-03-16 22:18:50 -08:00
crc = bch2_extent_crc_unpack ( e . k , entry_to_crc ( entry ) ) ;
if ( crc . offset + e . k - > size >
crc . uncompressed_size )
return " checksum offset + key size > uncompressed size " ;
size_ondisk = crc . compressed_size ;
if ( ! bch2_checksum_type_valid ( c , crc . csum_type ) )
return " invalid checksum type " ;
if ( crc . compression_type > = BCH_COMPRESSION_NR )
return " invalid compression type " ;
if ( bch2_csum_type_is_encryption ( crc . csum_type ) ) {
if ( nonce = = UINT_MAX )
nonce = crc . offset + crc . nonce ;
else if ( nonce ! = crc . offset + crc . nonce )
return " incorrect nonce " ;
}
2018-11-01 15:13:19 -04:00
break ;
case BCH_EXTENT_ENTRY_stripe_ptr :
break ;
2017-03-16 22:18:50 -08:00
}
}
return NULL ;
}
case BCH_RESERVATION : {
struct bkey_s_c_reservation r = bkey_s_c_to_reservation ( k ) ;
if ( bkey_val_bytes ( k . k ) ! = sizeof ( struct bch_reservation ) )
return " incorrect value size " ;
if ( ! r . v - > nr_replicas | | r . v - > nr_replicas > BCH_REPLICAS_MAX )
return " invalid nr_replicas " ;
return NULL ;
}
default :
return " invalid value type " ;
}
}
static void bch2_extent_debugcheck_extent ( struct bch_fs * c , struct btree * b ,
struct bkey_s_c_extent e )
{
const struct bch_extent_ptr * ptr ;
struct bch_dev * ca ;
struct bucket_mark mark ;
unsigned seq , stale ;
char buf [ 160 ] ;
bool bad ;
unsigned replicas = 0 ;
/*
* XXX : we should be doing most / all of these checks at startup time ,
* where we check bch2_bkey_invalid ( ) in btree_node_read_done ( )
*
* But note that we can ' t check for stale pointers or incorrect gc marks
* until after journal replay is done ( it might be an extent that ' s
* going to get overwritten during replay )
*/
extent_for_each_ptr ( e , ptr ) {
ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
replicas + + ;
/*
* If journal replay hasn ' t finished , we might be seeing keys
* that will be overwritten by the time journal replay is done :
*/
if ( ! test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) )
continue ;
stale = 0 ;
do {
seq = read_seqcount_begin ( & c - > gc_pos_lock ) ;
mark = ptr_bucket_mark ( ca , ptr ) ;
/* between mark and bucket gen */
smp_rmb ( ) ;
stale = ptr_stale ( ca , ptr ) ;
bch2_fs_bug_on ( stale & & ! ptr - > cached , c ,
" stale dirty pointer " ) ;
bch2_fs_bug_on ( stale > 96 , c ,
" key too stale: %i " ,
stale ) ;
if ( stale )
break ;
bad = gc_pos_cmp ( c - > gc_pos , gc_pos_btree_node ( b ) ) > 0 & &
( mark . data_type ! = BCH_DATA_USER | |
! ( ptr - > cached
? mark . cached_sectors
: mark . dirty_sectors ) ) ;
} while ( read_seqcount_retry ( & c - > gc_pos_lock , seq ) ) ;
if ( bad )
goto bad_ptr ;
}
if ( replicas > BCH_REPLICAS_MAX ) {
2018-11-09 01:24:07 -05:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , btree_node_type ( b ) ,
e . s_c ) ;
2017-03-16 22:18:50 -08:00
bch2_fs_bug ( c ,
" extent key bad (too many replicas: %u): %s " ,
replicas , buf ) ;
return ;
}
2018-11-03 22:00:50 -04:00
if ( ! test_bit ( BCH_FS_REBUILD_REPLICAS , & c - > flags ) & &
2018-11-07 17:48:32 -05:00
! bch2_bkey_replicas_marked ( c , btree_node_type ( b ) ,
e . s_c , false ) ) {
2018-11-09 01:24:07 -05:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , btree_node_type ( b ) ,
e . s_c ) ;
2017-03-16 22:18:50 -08:00
bch2_fs_bug ( c ,
" extent key bad (replicas not marked in superblock): \n %s " ,
buf ) ;
return ;
}
return ;
bad_ptr :
2018-11-09 01:24:07 -05:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , btree_node_type ( b ) ,
e . s_c ) ;
2017-03-16 22:18:50 -08:00
bch2_fs_bug ( c , " extent pointer bad gc mark: %s: \n bucket %zu "
" gen %i type %u " , buf ,
PTR_BUCKET_NR ( ca , ptr ) , mark . gen , mark . data_type ) ;
}
void bch2_extent_debugcheck ( struct bch_fs * c , struct btree * b , struct bkey_s_c k )
{
switch ( k . k - > type ) {
case BCH_EXTENT :
case BCH_EXTENT_CACHED :
bch2_extent_debugcheck_extent ( c , b , bkey_s_c_to_extent ( k ) ) ;
break ;
case BCH_RESERVATION :
break ;
default :
BUG ( ) ;
}
}
2018-11-09 01:24:07 -05:00
void bch2_extent_to_text ( struct printbuf * out , struct bch_fs * c ,
struct bkey_s_c k )
2017-03-16 22:18:50 -08:00
{
const char * invalid ;
if ( bkey_extent_is_data ( k . k ) )
2018-11-09 01:24:07 -05:00
extent_print_ptrs ( out , c , bkey_s_c_to_extent ( k ) ) ;
2017-03-16 22:18:50 -08:00
invalid = bch2_extent_invalid ( c , k ) ;
if ( invalid )
2018-11-09 01:24:07 -05:00
pr_buf ( out , " invalid: %s " , invalid ) ;
2017-03-16 22:18:50 -08:00
}
static void bch2_extent_crc_init ( union bch_extent_crc * crc ,
struct bch_extent_crc_unpacked new )
{
# define common_fields(_crc) \
. csum_type = _crc . csum_type , \
. compression_type = _crc . compression_type , \
. _compressed_size = _crc . compressed_size - 1 , \
. _uncompressed_size = _crc . uncompressed_size - 1 , \
. offset = _crc . offset
if ( bch_crc_bytes [ new . csum_type ] < = 4 & &
new . uncompressed_size < = CRC32_SIZE_MAX & &
new . nonce < = CRC32_NONCE_MAX ) {
crc - > crc32 = ( struct bch_extent_crc32 ) {
. type = 1 < < BCH_EXTENT_ENTRY_crc32 ,
common_fields ( new ) ,
. csum = * ( ( __le32 * ) & new . csum . lo ) ,
} ;
return ;
}
if ( bch_crc_bytes [ new . csum_type ] < = 10 & &
new . uncompressed_size < = CRC64_SIZE_MAX & &
new . nonce < = CRC64_NONCE_MAX ) {
crc - > crc64 = ( struct bch_extent_crc64 ) {
. type = 1 < < BCH_EXTENT_ENTRY_crc64 ,
common_fields ( new ) ,
. nonce = new . nonce ,
. csum_lo = new . csum . lo ,
. csum_hi = * ( ( __le16 * ) & new . csum . hi ) ,
} ;
return ;
}
if ( bch_crc_bytes [ new . csum_type ] < = 16 & &
new . uncompressed_size < = CRC128_SIZE_MAX & &
new . nonce < = CRC128_NONCE_MAX ) {
crc - > crc128 = ( struct bch_extent_crc128 ) {
. type = 1 < < BCH_EXTENT_ENTRY_crc128 ,
common_fields ( new ) ,
. nonce = new . nonce ,
. csum = new . csum ,
} ;
return ;
}
# undef common_fields
BUG ( ) ;
}
void bch2_extent_crc_append ( struct bkey_i_extent * e ,
struct bch_extent_crc_unpacked new )
{
2018-09-27 21:08:39 -04:00
bch2_extent_crc_init ( ( void * ) extent_entry_last ( extent_i_to_s ( e ) ) , new ) ;
__extent_entry_push ( e ) ;
}
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
static inline void __extent_entry_insert ( struct bkey_i_extent * e ,
union bch_extent_entry * dst ,
union bch_extent_entry * new )
{
union bch_extent_entry * end = extent_entry_last ( extent_i_to_s ( e ) ) ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
memmove_u64s_up ( ( u64 * ) dst + extent_entry_u64s ( new ) ,
dst , ( u64 * ) end - ( u64 * ) dst ) ;
e - > k . u64s + = extent_entry_u64s ( new ) ;
memcpy_u64s_small ( dst , new , extent_entry_u64s ( new ) ) ;
}
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
void bch2_extent_ptr_decoded_append ( struct bkey_i_extent * e ,
struct extent_ptr_decoded * p )
{
struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack ( & e - > k , NULL ) ;
union bch_extent_entry * pos ;
2018-11-01 15:13:19 -04:00
unsigned i ;
2017-03-16 22:18:50 -08:00
2018-09-27 21:08:39 -04:00
if ( ! bch2_crc_unpacked_cmp ( crc , p - > crc ) ) {
pos = e - > v . start ;
goto found ;
}
extent_for_each_crc ( extent_i_to_s ( e ) , crc , pos )
if ( ! bch2_crc_unpacked_cmp ( crc , p - > crc ) ) {
pos = extent_entry_next ( pos ) ;
goto found ;
}
bch2_extent_crc_append ( e , p - > crc ) ;
pos = extent_entry_last ( extent_i_to_s ( e ) ) ;
found :
p - > ptr . type = 1 < < BCH_EXTENT_ENTRY_ptr ;
__extent_entry_insert ( e , pos , to_entry ( & p - > ptr ) ) ;
2018-11-01 15:13:19 -04:00
for ( i = 0 ; i < p - > ec_nr ; i + + ) {
p - > ec [ i ] . type = 1 < < BCH_EXTENT_ENTRY_stripe_ptr ;
__extent_entry_insert ( e , pos , to_entry ( & p - > ec [ i ] ) ) ;
}
2017-03-16 22:18:50 -08:00
}
/*
* bch_extent_normalize - clean up an extent , dropping stale pointers etc .
*
* Returns true if @ k should be dropped entirely
*
* For existing keys , only called when btree nodes are being rewritten , not when
* they ' re merely being compacted / resorted in memory .
*/
bool bch2_extent_normalize ( struct bch_fs * c , struct bkey_s k )
{
struct bkey_s_extent e ;
switch ( k . k - > type ) {
case KEY_TYPE_ERROR :
return false ;
case KEY_TYPE_DELETED :
return true ;
case KEY_TYPE_DISCARD :
return bversion_zero ( k . k - > version ) ;
case KEY_TYPE_COOKIE :
return false ;
case BCH_EXTENT :
case BCH_EXTENT_CACHED :
e = bkey_s_to_extent ( k ) ;
bch2_extent_drop_stale ( c , e ) ;
if ( ! bkey_val_u64s ( e . k ) ) {
if ( bkey_extent_is_cached ( e . k ) ) {
k . k - > type = KEY_TYPE_DISCARD ;
if ( bversion_zero ( k . k - > version ) )
return true ;
} else {
k . k - > type = KEY_TYPE_ERROR ;
}
}
return false ;
case BCH_RESERVATION :
return false ;
default :
BUG ( ) ;
}
}
void bch2_extent_mark_replicas_cached ( struct bch_fs * c ,
struct bkey_s_extent e ,
unsigned target ,
unsigned nr_desired_replicas )
{
2018-11-01 15:13:19 -04:00
union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2017-03-16 22:18:50 -08:00
int extra = bch2_extent_durability ( c , e . c ) - nr_desired_replicas ;
if ( target & & extra > 0 )
2018-11-01 15:13:19 -04:00
extent_for_each_ptr_decode ( e , p , entry ) {
int n = bch2_extent_ptr_durability ( c , p ) ;
2017-03-16 22:18:50 -08:00
if ( n & & n < = extra & &
2018-11-01 15:13:19 -04:00
! bch2_dev_in_target ( c , p . ptr . dev , target ) ) {
entry - > ptr . cached = true ;
2017-03-16 22:18:50 -08:00
extra - = n ;
}
}
if ( extra > 0 )
2018-11-01 15:13:19 -04:00
extent_for_each_ptr_decode ( e , p , entry ) {
int n = bch2_extent_ptr_durability ( c , p ) ;
2017-03-16 22:18:50 -08:00
if ( n & & n < = extra ) {
2018-11-01 15:13:19 -04:00
entry - > ptr . cached = true ;
2017-03-16 22:18:50 -08:00
extra - = n ;
}
}
}
/*
* This picks a non - stale pointer , preferably from a device other than @ avoid .
* Avoid can be NULL , meaning pick any . If there are no non - stale pointers to
* other devices , it will still pick a pointer from avoid .
*/
int bch2_extent_pick_ptr ( struct bch_fs * c , struct bkey_s_c k ,
2018-11-01 15:28:45 -04:00
struct bch_io_failures * failed ,
2018-10-02 11:03:39 -04:00
struct extent_ptr_decoded * pick )
2017-03-16 22:18:50 -08:00
{
int ret ;
switch ( k . k - > type ) {
case KEY_TYPE_ERROR :
return - EIO ;
case BCH_EXTENT :
case BCH_EXTENT_CACHED :
ret = extent_pick_read_device ( c , bkey_s_c_to_extent ( k ) ,
2018-11-01 15:28:45 -04:00
failed , pick ) ;
2017-03-16 22:18:50 -08:00
if ( ! ret & & ! bkey_extent_is_cached ( k . k ) )
ret = - EIO ;
return ret ;
default :
return 0 ;
}
}
enum merge_result bch2_extent_merge ( struct bch_fs * c , struct btree * b ,
struct bkey_i * l , struct bkey_i * r )
{
struct bkey_s_extent el , er ;
union bch_extent_entry * en_l , * en_r ;
if ( key_merging_disabled ( c ) )
return BCH_MERGE_NOMERGE ;
/*
* Generic header checks
* Assumes left and right are in order
* Left and right must be exactly aligned
*/
if ( l - > k . u64s ! = r - > k . u64s | |
l - > k . type ! = r - > k . type | |
bversion_cmp ( l - > k . version , r - > k . version ) | |
bkey_cmp ( l - > k . p , bkey_start_pos ( & r - > k ) ) )
return BCH_MERGE_NOMERGE ;
switch ( l - > k . type ) {
case KEY_TYPE_DISCARD :
case KEY_TYPE_ERROR :
/* These types are mergeable, and no val to check */
break ;
case BCH_EXTENT :
case BCH_EXTENT_CACHED :
el = bkey_i_to_s_extent ( l ) ;
er = bkey_i_to_s_extent ( r ) ;
extent_for_each_entry ( el , en_l ) {
struct bch_extent_ptr * lp , * rp ;
struct bch_dev * ca ;
en_r = vstruct_idx ( er . v , ( u64 * ) en_l - el . v - > _data ) ;
if ( ( extent_entry_type ( en_l ) ! =
extent_entry_type ( en_r ) ) | |
2018-11-01 15:13:19 -04:00
! extent_entry_is_ptr ( en_l ) )
2017-03-16 22:18:50 -08:00
return BCH_MERGE_NOMERGE ;
lp = & en_l - > ptr ;
rp = & en_r - > ptr ;
if ( lp - > offset + el . k - > size ! = rp - > offset | |
lp - > dev ! = rp - > dev | |
lp - > gen ! = rp - > gen )
return BCH_MERGE_NOMERGE ;
/* We don't allow extents to straddle buckets: */
ca = bch_dev_bkey_exists ( c , lp - > dev ) ;
if ( PTR_BUCKET_NR ( ca , lp ) ! = PTR_BUCKET_NR ( ca , rp ) )
return BCH_MERGE_NOMERGE ;
}
break ;
case BCH_RESERVATION : {
struct bkey_i_reservation * li = bkey_i_to_reservation ( l ) ;
struct bkey_i_reservation * ri = bkey_i_to_reservation ( r ) ;
if ( li - > v . generation ! = ri - > v . generation | |
li - > v . nr_replicas ! = ri - > v . nr_replicas )
return BCH_MERGE_NOMERGE ;
break ;
}
default :
return BCH_MERGE_NOMERGE ;
}
l - > k . needs_whiteout | = r - > k . needs_whiteout ;
/* Keys with no pointers aren't restricted to one bucket and could
* overflow KEY_SIZE
*/
if ( ( u64 ) l - > k . size + r - > k . size > KEY_SIZE_MAX ) {
bch2_key_resize ( & l - > k , KEY_SIZE_MAX ) ;
bch2_cut_front ( l - > k . p , r ) ;
return BCH_MERGE_PARTIAL ;
}
bch2_key_resize ( & l - > k , l - > k . size + r - > k . size ) ;
return BCH_MERGE_MERGE ;
}
/*
* When merging an extent that we ' re inserting into a btree node , the new merged
* extent could overlap with an existing 0 size extent - if we don ' t fix that ,
* it ' ll break the btree node iterator so this code finds those 0 size extents
* and shifts them out of the way .
*
* Also unpacks and repacks .
*/
static bool bch2_extent_merge_inline ( struct bch_fs * c ,
struct btree_iter * iter ,
struct bkey_packed * l ,
struct bkey_packed * r ,
bool back_merge )
{
struct btree * b = iter - > l [ 0 ] . b ;
struct btree_node_iter * node_iter = & iter - > l [ 0 ] . iter ;
2016-07-21 19:05:06 -08:00
BKEY_PADDED ( k ) li , ri ;
struct bkey_packed * m = back_merge ? l : r ;
struct bkey_i * mi = back_merge ? & li . k : & ri . k ;
struct bset_tree * t = bch2_bkey_to_bset ( b , m ) ;
enum merge_result ret ;
EBUG_ON ( bkey_written ( b , m ) ) ;
2017-03-16 22:18:50 -08:00
/*
* We need to save copies of both l and r , because we might get a
* partial merge ( which modifies both ) and then fails to repack
*/
bch2_bkey_unpack ( b , & li . k , l ) ;
bch2_bkey_unpack ( b , & ri . k , r ) ;
2016-07-21 19:05:06 -08:00
ret = bch2_extent_merge ( c , b , & li . k , & ri . k ) ;
if ( ret = = BCH_MERGE_NOMERGE )
return false ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
/*
* check if we overlap with deleted extents - would break the sort
* order :
*/
if ( back_merge ) {
struct bkey_packed * n = bkey_next ( m ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
if ( n ! = btree_bkey_last ( b , t ) & &
bkey_cmp_left_packed ( b , n , & li . k . k . p ) < = 0 & &
bkey_deleted ( n ) )
2017-03-16 22:18:50 -08:00
return false ;
2016-07-21 19:05:06 -08:00
} else if ( ret = = BCH_MERGE_MERGE ) {
struct bkey_packed * prev = bch2_bkey_prev_all ( b , t , m ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
if ( prev & &
bkey_cmp_left_packed_byval ( b , prev ,
bkey_start_pos ( & li . k . k ) ) > 0 )
2017-03-16 22:18:50 -08:00
return false ;
2016-07-21 19:05:06 -08:00
}
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
if ( ret = = BCH_MERGE_PARTIAL ) {
if ( ! extent_i_save ( b , m , mi ) )
return false ;
2017-03-16 22:18:50 -08:00
if ( ! back_merge )
bkey_copy ( packed_to_bkey ( l ) , & li . k ) ;
else
bkey_copy ( packed_to_bkey ( r ) , & ri . k ) ;
2016-07-21 19:05:06 -08:00
} else {
if ( ! extent_i_save ( b , m , & li . k ) )
2017-03-16 22:18:50 -08:00
return false ;
2016-07-21 19:05:06 -08:00
}
2017-03-16 22:18:50 -08:00
2018-08-11 19:12:05 -04:00
bch2_bset_fix_invalidated_key ( b , m ) ;
2016-07-21 19:05:06 -08:00
bch2_btree_node_iter_fix ( iter , b , node_iter ,
2018-08-11 19:12:05 -04:00
m , m - > u64s , m - > u64s ) ;
2016-07-21 19:05:06 -08:00
verify_modified_extent ( iter , m ) ;
2017-03-16 22:18:50 -08:00
2016-07-21 19:05:06 -08:00
return ret = = BCH_MERGE_MERGE ;
2017-03-16 22:18:50 -08:00
}
int bch2_check_range_allocated ( struct bch_fs * c , struct bpos pos , u64 size )
{
struct btree_iter iter ;
struct bpos end = pos ;
struct bkey_s_c k ;
int ret = 0 ;
end . offset + = size ;
for_each_btree_key ( & iter , c , BTREE_ID_EXTENTS , pos ,
BTREE_ITER_SLOTS , k ) {
if ( bkey_cmp ( bkey_start_pos ( k . k ) , end ) > = 0 )
break ;
if ( ! bch2_extent_is_fully_allocated ( k ) ) {
ret = - ENOSPC ;
break ;
}
}
bch2_btree_iter_unlock ( & iter ) ;
return ret ;
}