2017-03-16 22:18:50 -08:00
// SPDX-License-Identifier: GPL-2.0
/*
* Code for manipulating bucket marks for garbage collection .
*
* Copyright 2014 Datera , Inc .
*
* Bucket states :
* - free bucket : mark = = 0
* The bucket contains no data and will not be read
*
* - allocator bucket : owned_by_allocator = = 1
* The bucket is on a free list , or it is an open bucket
*
* - cached bucket : owned_by_allocator = = 0 & &
* dirty_sectors = = 0 & &
* cached_sectors > 0
* The bucket contains data but may be safely discarded as there are
* enough replicas of the data on other cache devices , or it has been
* written back to the backing device
*
* - dirty bucket : owned_by_allocator = = 0 & &
* dirty_sectors > 0
* The bucket contains data that we must not discard ( either only copy ,
* or one of the ' main copies ' for data requiring multiple replicas )
*
* - metadata bucket : owned_by_allocator = = 0 & & is_metadata = = 1
* This is a btree node , journal or gen / prio bucket
*
* Lifecycle :
*
* bucket invalidated = > bucket on freelist = > open bucket = >
* [ dirty bucket = > ] cached bucket = > bucket invalidated = > . . .
*
* Note that cache promotion can skip the dirty bucket step , as data
* is copied from a deeper tier to a shallower tier , onto a cached
* bucket .
* Note also that a cached bucket can spontaneously become dirty - -
* see below .
*
* Only a traversal of the key space can determine whether a bucket is
* truly dirty or cached .
*
* Transitions :
*
* - free = > allocator : bucket was invalidated
* - cached = > allocator : bucket was invalidated
*
* - allocator = > dirty : open bucket was filled up
* - allocator = > cached : open bucket was filled up
* - allocator = > metadata : metadata was allocated
*
* - dirty = > cached : dirty sectors were copied to a deeper tier
* - dirty = > free : dirty sectors were overwritten or moved ( copy gc )
* - cached = > free : cached sectors were overwritten
*
* - metadata = > free : metadata was freed
*
* Oddities :
* - cached = > dirty : a device was removed so formerly replicated data
* is no longer sufficiently replicated
* - free = > cached : cannot happen
* - free = > dirty : cannot happen
* - free = > metadata : cannot happen
*/
# include "bcachefs.h"
2018-10-06 00:46:55 -04:00
# include "alloc_background.h"
2018-11-05 02:31:48 -05:00
# include "bset.h"
2017-03-16 22:18:50 -08:00
# include "btree_gc.h"
2018-11-05 02:31:48 -05:00
# include "btree_update.h"
2017-03-16 22:18:50 -08:00
# include "buckets.h"
2018-11-01 15:13:19 -04:00
# include "ec.h"
2017-03-16 22:18:50 -08:00
# include "error.h"
# include "movinggc.h"
2019-01-21 15:32:13 -05:00
# include "replicas.h"
2017-03-16 22:18:50 -08:00
# include "trace.h"
# include <linux/preempt.h>
/*
* Clear journal_seq_valid for buckets for which it ' s not needed , to prevent
* wraparound :
*/
void bch2_bucket_seq_cleanup ( struct bch_fs * c )
{
2018-07-21 22:57:20 -04:00
u64 journal_seq = atomic64_read ( & c - > journal . seq ) ;
2017-03-16 22:18:50 -08:00
u16 last_seq_ondisk = c - > journal . last_seq_ondisk ;
struct bch_dev * ca ;
struct bucket_array * buckets ;
struct bucket * g ;
struct bucket_mark m ;
unsigned i ;
2018-07-21 22:57:20 -04:00
if ( journal_seq - c - > last_bucket_seq_cleanup <
( 1U < < ( BUCKET_JOURNAL_SEQ_BITS - 2 ) ) )
return ;
c - > last_bucket_seq_cleanup = journal_seq ;
2017-03-16 22:18:50 -08:00
for_each_member_device ( ca , c , i ) {
down_read ( & ca - > bucket_lock ) ;
buckets = bucket_array ( ca ) ;
for_each_bucket ( g , buckets ) {
bucket_cmpxchg ( g , m , ( {
if ( ! m . journal_seq_valid | |
bucket_needs_journal_commit ( m , last_seq_ondisk ) )
break ;
m . journal_seq_valid = 0 ;
} ) ) ;
}
up_read ( & ca - > bucket_lock ) ;
}
}
2019-01-24 20:25:40 -05:00
void bch2_fs_usage_initialize ( struct bch_fs * c )
{
struct bch_fs_usage * usage ;
2019-02-14 20:39:17 -05:00
unsigned i ;
2019-01-24 20:25:40 -05:00
percpu_down_write ( & c - > mark_lock ) ;
2019-02-14 20:39:17 -05:00
usage = ( void * ) bch2_acc_percpu_u64s ( ( void * ) c - > usage [ 0 ] ,
fs_usage_u64s ( c ) ) ;
2019-01-24 20:25:40 -05:00
2019-02-09 19:20:57 -05:00
for ( i = 0 ; i < BCH_REPLICAS_MAX ; i + + )
2019-02-14 18:38:52 -05:00
usage - > reserved + = usage - > persistent_reserved [ i ] ;
2019-02-09 19:20:57 -05:00
2019-01-24 20:25:40 -05:00
for ( i = 0 ; i < c - > replicas . nr ; i + + ) {
struct bch_replicas_entry * e =
cpu_replicas_entry ( & c - > replicas , i ) ;
switch ( e - > data_type ) {
case BCH_DATA_BTREE :
2019-03-29 22:22:45 -04:00
usage - > btree + = usage - > replicas [ i ] ;
break ;
2019-01-24 20:25:40 -05:00
case BCH_DATA_USER :
2019-02-14 18:38:52 -05:00
usage - > data + = usage - > replicas [ i ] ;
2019-01-24 20:25:40 -05:00
break ;
case BCH_DATA_CACHED :
2019-02-14 18:38:52 -05:00
usage - > cached + = usage - > replicas [ i ] ;
2019-01-24 20:25:40 -05:00
break ;
}
}
percpu_up_write ( & c - > mark_lock ) ;
}
2019-03-15 18:20:46 -04:00
void bch2_fs_usage_scratch_put ( struct bch_fs * c , struct bch_fs_usage * fs_usage )
{
if ( fs_usage = = c - > usage_scratch )
mutex_unlock ( & c - > usage_scratch_lock ) ;
else
kfree ( fs_usage ) ;
}
struct bch_fs_usage * bch2_fs_usage_scratch_get ( struct bch_fs * c )
{
struct bch_fs_usage * ret ;
unsigned bytes = fs_usage_u64s ( c ) * sizeof ( u64 ) ;
ret = kzalloc ( bytes , GFP_NOWAIT ) ;
if ( ret )
return ret ;
if ( mutex_trylock ( & c - > usage_scratch_lock ) )
goto out_pool ;
ret = kzalloc ( bytes , GFP_NOFS ) ;
if ( ret )
return ret ;
mutex_lock ( & c - > usage_scratch_lock ) ;
out_pool :
ret = c - > usage_scratch ;
memset ( ret , 0 , bytes ) ;
return ret ;
}
2017-03-16 22:18:50 -08:00
struct bch_dev_usage bch2_dev_usage_read ( struct bch_fs * c , struct bch_dev * ca )
{
2019-02-14 18:38:52 -05:00
struct bch_dev_usage ret ;
memset ( & ret , 0 , sizeof ( ret ) ) ;
acc_u64s_percpu ( ( u64 * ) & ret ,
( u64 __percpu * ) ca - > usage [ 0 ] ,
sizeof ( ret ) / sizeof ( u64 ) ) ;
return ret ;
2017-03-16 22:18:50 -08:00
}
2019-01-21 15:32:13 -05:00
struct bch_fs_usage * bch2_fs_usage_read ( struct bch_fs * c )
2017-03-16 22:18:50 -08:00
{
2019-01-21 15:32:13 -05:00
struct bch_fs_usage * ret ;
2019-02-14 20:39:17 -05:00
unsigned v , u64s = fs_usage_u64s ( c ) ;
2019-01-21 15:32:13 -05:00
retry :
2019-02-14 20:39:17 -05:00
ret = kzalloc ( u64s * sizeof ( u64 ) , GFP_NOFS ) ;
2019-01-21 15:32:13 -05:00
if ( unlikely ( ! ret ) )
return NULL ;
percpu_down_read ( & c - > mark_lock ) ;
2019-02-14 20:39:17 -05:00
v = fs_usage_u64s ( c ) ;
if ( unlikely ( u64s ! = v ) ) {
u64s = v ;
2019-01-21 15:32:13 -05:00
percpu_up_read ( & c - > mark_lock ) ;
kfree ( ret ) ;
goto retry ;
}
2019-02-14 20:39:17 -05:00
acc_u64s_percpu ( ( u64 * ) ret , ( u64 __percpu * ) c - > usage [ 0 ] , u64s ) ;
2019-01-21 15:32:13 -05:00
return ret ;
2017-03-16 22:18:50 -08:00
}
# define RESERVE_FACTOR 6
static u64 reserve_factor ( u64 r )
{
return r + ( round_up ( r , ( 1 < < RESERVE_FACTOR ) ) > > RESERVE_FACTOR ) ;
}
static u64 avail_factor ( u64 r )
{
2018-07-24 14:54:39 -04:00
return ( r < < RESERVE_FACTOR ) / ( ( 1 < < RESERVE_FACTOR ) + 1 ) ;
2017-03-16 22:18:50 -08:00
}
2019-02-14 18:38:52 -05:00
u64 bch2_fs_sectors_used ( struct bch_fs * c , struct bch_fs_usage * fs_usage )
2017-03-16 22:18:50 -08:00
{
2019-02-14 18:38:52 -05:00
return min ( fs_usage - > hidden +
2019-03-29 22:22:45 -04:00
fs_usage - > btree +
2019-02-14 18:38:52 -05:00
fs_usage - > data +
reserve_factor ( fs_usage - > reserved +
fs_usage - > online_reserved ) ,
2019-01-21 15:32:13 -05:00
c - > capacity ) ;
2018-11-27 08:23:22 -05:00
}
2019-02-14 18:38:52 -05:00
static struct bch_fs_usage_short
__bch2_fs_usage_read_short ( struct bch_fs * c )
{
struct bch_fs_usage_short ret ;
u64 data , reserved ;
ret . capacity = c - > capacity -
percpu_u64_get ( & c - > usage [ 0 ] - > hidden ) ;
2019-03-29 22:22:45 -04:00
data = percpu_u64_get ( & c - > usage [ 0 ] - > data ) +
percpu_u64_get ( & c - > usage [ 0 ] - > btree ) ;
2019-02-14 18:38:52 -05:00
reserved = percpu_u64_get ( & c - > usage [ 0 ] - > reserved ) +
percpu_u64_get ( & c - > usage [ 0 ] - > online_reserved ) ;
ret . used = min ( ret . capacity , data + reserve_factor ( reserved ) ) ;
ret . free = ret . capacity - ret . used ;
ret . nr_inodes = percpu_u64_get ( & c - > usage [ 0 ] - > nr_inodes ) ;
return ret ;
}
2018-11-27 08:23:22 -05:00
struct bch_fs_usage_short
bch2_fs_usage_read_short ( struct bch_fs * c )
{
struct bch_fs_usage_short ret ;
2019-02-14 18:38:52 -05:00
percpu_down_read ( & c - > mark_lock ) ;
ret = __bch2_fs_usage_read_short ( c ) ;
percpu_up_read ( & c - > mark_lock ) ;
2018-11-27 08:23:22 -05:00
return ret ;
2017-03-16 22:18:50 -08:00
}
static inline int is_unavailable_bucket ( struct bucket_mark m )
{
return ! is_available_bucket ( m ) ;
}
static inline int is_fragmented_bucket ( struct bucket_mark m ,
struct bch_dev * ca )
{
if ( ! m . owned_by_allocator & &
m . data_type = = BCH_DATA_USER & &
bucket_sectors_used ( m ) )
return max_t ( int , 0 , ( int ) ca - > mi . bucket_size -
bucket_sectors_used ( m ) ) ;
return 0 ;
}
static inline enum bch_data_type bucket_type ( struct bucket_mark m )
{
return m . cached_sectors & & ! m . dirty_sectors
2018-11-24 17:09:44 -05:00
? BCH_DATA_CACHED
2017-03-16 22:18:50 -08:00
: m . data_type ;
}
2018-07-23 05:32:01 -04:00
static bool bucket_became_unavailable ( struct bucket_mark old ,
2017-03-16 22:18:50 -08:00
struct bucket_mark new )
{
return is_available_bucket ( old ) & &
2018-07-23 05:32:01 -04:00
! is_available_bucket ( new ) ;
2017-03-16 22:18:50 -08:00
}
2019-01-21 15:32:13 -05:00
int bch2_fs_usage_apply ( struct bch_fs * c ,
struct bch_fs_usage * fs_usage ,
2019-02-12 14:58:55 -05:00
struct disk_reservation * disk_res )
2017-03-16 22:18:50 -08:00
{
2019-02-14 18:38:52 -05:00
s64 added = fs_usage - > data + fs_usage - > reserved ;
2018-11-04 23:10:09 -05:00
s64 should_not_have_added ;
2019-01-21 15:32:13 -05:00
int ret = 0 ;
2017-03-16 22:18:50 -08:00
2018-11-26 00:13:33 -05:00
percpu_rwsem_assert_held ( & c - > mark_lock ) ;
2018-11-22 22:50:35 -05:00
2017-03-16 22:18:50 -08:00
/*
* Not allowed to reduce sectors_available except by getting a
* reservation :
*/
2018-11-04 23:10:09 -05:00
should_not_have_added = added - ( s64 ) ( disk_res ? disk_res - > sectors : 0 ) ;
2018-11-05 02:31:48 -05:00
if ( WARN_ONCE ( should_not_have_added > 0 ,
" disk usage increased without a reservation " ) ) {
2018-11-04 23:10:09 -05:00
atomic64_sub ( should_not_have_added , & c - > sectors_available ) ;
added - = should_not_have_added ;
2019-01-21 15:32:13 -05:00
ret = - 1 ;
2018-11-04 23:10:09 -05:00
}
2017-03-16 22:18:50 -08:00
if ( added > 0 ) {
2018-11-27 08:23:22 -05:00
disk_res - > sectors - = added ;
2019-02-14 18:38:52 -05:00
fs_usage - > online_reserved - = added ;
2017-03-16 22:18:50 -08:00
}
preempt_disable ( ) ;
2018-12-17 08:44:56 -05:00
acc_u64s ( ( u64 * ) this_cpu_ptr ( c - > usage [ 0 ] ) ,
2019-02-14 20:39:17 -05:00
( u64 * ) fs_usage , fs_usage_u64s ( c ) ) ;
2017-03-16 22:18:50 -08:00
preempt_enable ( ) ;
2019-01-21 15:32:13 -05:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2018-12-01 11:32:12 -05:00
static inline void account_bucket ( struct bch_fs_usage * fs_usage ,
struct bch_dev_usage * dev_usage ,
enum bch_data_type type ,
int nr , s64 size )
{
if ( type = = BCH_DATA_SB | | type = = BCH_DATA_JOURNAL )
2019-02-14 18:38:52 -05:00
fs_usage - > hidden + = size ;
2018-12-01 11:32:12 -05:00
dev_usage - > buckets [ type ] + = nr ;
}
2017-03-16 22:18:50 -08:00
static void bch2_dev_usage_update ( struct bch_fs * c , struct bch_dev * ca ,
2018-07-23 05:32:01 -04:00
struct bch_fs_usage * fs_usage ,
struct bucket_mark old , struct bucket_mark new ,
bool gc )
2017-03-16 22:18:50 -08:00
{
struct bch_dev_usage * dev_usage ;
2018-11-26 00:13:33 -05:00
percpu_rwsem_assert_held ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
2018-07-24 16:42:49 -04:00
bch2_fs_inconsistent_on ( old . data_type & & new . data_type & &
old . data_type ! = new . data_type , c ,
" different types of data in same bucket: %s, %s " ,
bch2_data_types [ old . data_type ] ,
bch2_data_types [ new . data_type ] ) ;
2017-03-16 22:18:50 -08:00
preempt_disable ( ) ;
2018-07-23 05:32:01 -04:00
dev_usage = this_cpu_ptr ( ca - > usage [ gc ] ) ;
2017-03-16 22:18:50 -08:00
2018-12-01 11:32:12 -05:00
if ( bucket_type ( old ) )
account_bucket ( fs_usage , dev_usage , bucket_type ( old ) ,
- 1 , - ca - > mi . bucket_size ) ;
2018-11-24 17:09:44 -05:00
2018-12-01 11:32:12 -05:00
if ( bucket_type ( new ) )
account_bucket ( fs_usage , dev_usage , bucket_type ( new ) ,
1 , ca - > mi . bucket_size ) ;
2017-03-16 22:18:50 -08:00
dev_usage - > buckets_alloc + =
( int ) new . owned_by_allocator - ( int ) old . owned_by_allocator ;
2018-11-01 15:13:19 -04:00
dev_usage - > buckets_ec + =
( int ) new . stripe - ( int ) old . stripe ;
2017-03-16 22:18:50 -08:00
dev_usage - > buckets_unavailable + =
is_unavailable_bucket ( new ) - is_unavailable_bucket ( old ) ;
dev_usage - > sectors [ old . data_type ] - = old . dirty_sectors ;
dev_usage - > sectors [ new . data_type ] + = new . dirty_sectors ;
dev_usage - > sectors [ BCH_DATA_CACHED ] + =
( int ) new . cached_sectors - ( int ) old . cached_sectors ;
dev_usage - > sectors_fragmented + =
is_fragmented_bucket ( new , ca ) - is_fragmented_bucket ( old , ca ) ;
preempt_enable ( ) ;
if ( ! is_available_bucket ( old ) & & is_available_bucket ( new ) )
bch2_wake_allocator ( ca ) ;
}
2019-03-29 22:22:45 -04:00
void bch2_dev_usage_from_buckets ( struct bch_fs * c )
2018-11-22 22:50:35 -05:00
{
2019-03-29 22:22:45 -04:00
struct bch_dev * ca ;
2018-11-22 22:50:35 -05:00
struct bucket_mark old = { . v . counter = 0 } ;
struct bch_fs_usage * fs_usage ;
struct bucket_array * buckets ;
struct bucket * g ;
2019-03-29 22:22:45 -04:00
unsigned i ;
int cpu ;
percpu_u64_set ( & c - > usage [ 0 ] - > hidden , 0 ) ;
2018-11-22 22:50:35 -05:00
2022-12-24 22:44:56 -05:00
/*
* This is only called during startup , before there ' s any multithreaded
* access to c - > usage :
*/
preempt_disable ( ) ;
2018-11-22 22:50:35 -05:00
fs_usage = this_cpu_ptr ( c - > usage [ 0 ] ) ;
2022-12-24 22:44:56 -05:00
preempt_enable ( ) ;
2019-03-29 22:22:45 -04:00
for_each_member_device ( ca , c , i ) {
for_each_possible_cpu ( cpu )
memset ( per_cpu_ptr ( ca - > usage [ 0 ] , cpu ) , 0 ,
sizeof ( * ca - > usage [ 0 ] ) ) ;
buckets = bucket_array ( ca ) ;
2018-11-22 22:50:35 -05:00
2019-03-29 22:22:45 -04:00
for_each_bucket ( g , buckets )
bch2_dev_usage_update ( c , ca , fs_usage ,
old , g - > mark , false ) ;
}
2018-11-22 22:50:35 -05:00
}
# define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
2017-03-16 22:18:50 -08:00
( { \
struct bucket_mark _old = bucket_cmpxchg ( g , new , expr ) ; \
\
2018-11-24 17:09:44 -05:00
bch2_dev_usage_update ( c , ca , fs_usage , _old , new , gc ) ; \
2017-03-16 22:18:50 -08:00
_old ; \
} )
2019-01-21 15:32:13 -05:00
static inline void update_replicas ( struct bch_fs * c ,
struct bch_fs_usage * fs_usage ,
struct bch_replicas_entry * r ,
s64 sectors )
{
int idx = bch2_replicas_entry_idx ( c , r ) ;
BUG_ON ( idx < 0 ) ;
BUG_ON ( ! sectors ) ;
2019-03-29 22:22:45 -04:00
switch ( r - > data_type ) {
case BCH_DATA_BTREE :
fs_usage - > btree + = sectors ;
break ;
case BCH_DATA_USER :
2019-02-14 18:38:52 -05:00
fs_usage - > data + = sectors ;
2019-03-29 22:22:45 -04:00
break ;
case BCH_DATA_CACHED :
fs_usage - > cached + = sectors ;
break ;
}
2019-02-14 18:38:52 -05:00
fs_usage - > replicas [ idx ] + = sectors ;
2019-01-21 15:32:13 -05:00
}
static inline void update_cached_sectors ( struct bch_fs * c ,
struct bch_fs_usage * fs_usage ,
unsigned dev , s64 sectors )
{
struct bch_replicas_padded r ;
bch2_replicas_entry_cached ( & r . e , dev ) ;
update_replicas ( c , fs_usage , & r . e , sectors ) ;
}
2019-02-12 15:03:47 -05:00
# define do_mark_fn(fn, c, pos, flags, ...) \
( { \
int gc , ret = 0 ; \
\
percpu_rwsem_assert_held ( & c - > mark_lock ) ; \
\
for ( gc = 0 ; gc < 2 & & ! ret ; gc + + ) \
if ( ! gc = = ! ( flags & BCH_BUCKET_MARK_GC ) | | \
( gc & & gc_visited ( c , pos ) ) ) \
ret = fn ( c , __VA_ARGS__ , gc ) ; \
ret ; \
} )
static int __bch2_invalidate_bucket ( struct bch_fs * c , struct bch_dev * ca ,
size_t b , struct bucket_mark * ret ,
bool gc )
2017-03-16 22:18:50 -08:00
{
2018-11-27 08:23:22 -05:00
struct bch_fs_usage * fs_usage = this_cpu_ptr ( c - > usage [ gc ] ) ;
2018-07-23 05:32:01 -04:00
struct bucket * g = __bucket ( ca , b , gc ) ;
2019-02-11 22:08:09 -05:00
struct bucket_mark old , new ;
2017-03-16 22:18:50 -08:00
2019-02-11 22:08:09 -05:00
old = bucket_data_cmpxchg ( c , ca , fs_usage , g , new , ( {
2018-07-22 10:43:01 -04:00
BUG_ON ( ! is_available_bucket ( new ) ) ;
2017-03-16 22:18:50 -08:00
2018-11-19 01:31:41 -05:00
new . owned_by_allocator = true ;
new . dirty = true ;
2017-03-16 22:18:50 -08:00
new . data_type = 0 ;
new . cached_sectors = 0 ;
new . dirty_sectors = 0 ;
new . gen + + ;
} ) ) ;
2019-02-11 22:08:09 -05:00
if ( old . cached_sectors )
2019-01-21 15:32:13 -05:00
update_cached_sectors ( c , fs_usage , ca - > dev_idx ,
2019-02-14 15:42:41 -05:00
- ( ( s64 ) old . cached_sectors ) ) ;
2019-02-11 22:08:09 -05:00
2019-02-12 15:03:47 -05:00
if ( ! gc )
2019-02-11 22:08:09 -05:00
* ret = old ;
2019-02-12 15:03:47 -05:00
return 0 ;
2018-07-23 05:32:01 -04:00
}
void bch2_invalidate_bucket ( struct bch_fs * c , struct bch_dev * ca ,
size_t b , struct bucket_mark * old )
{
2019-02-12 15:03:47 -05:00
do_mark_fn ( __bch2_invalidate_bucket , c , gc_phase ( GC_PHASE_START ) , 0 ,
ca , b , old ) ;
2019-02-11 22:08:09 -05:00
2017-03-16 22:18:50 -08:00
if ( ! old - > owned_by_allocator & & old - > cached_sectors )
trace_invalidate ( ca , bucket_to_sector ( ca , b ) ,
old - > cached_sectors ) ;
}
2019-02-12 15:03:47 -05:00
static int __bch2_mark_alloc_bucket ( struct bch_fs * c , struct bch_dev * ca ,
size_t b , bool owned_by_allocator ,
bool gc )
2017-03-16 22:18:50 -08:00
{
2018-11-27 08:23:22 -05:00
struct bch_fs_usage * fs_usage = this_cpu_ptr ( c - > usage [ gc ] ) ;
2018-07-23 05:32:01 -04:00
struct bucket * g = __bucket ( ca , b , gc ) ;
2017-03-16 22:18:50 -08:00
struct bucket_mark old , new ;
2018-11-27 08:23:22 -05:00
old = bucket_data_cmpxchg ( c , ca , fs_usage , g , new , ( {
2017-03-16 22:18:50 -08:00
new . owned_by_allocator = owned_by_allocator ;
} ) ) ;
2018-07-23 05:32:01 -04:00
BUG_ON ( ! gc & &
! owned_by_allocator & & ! old . owned_by_allocator ) ;
2019-02-12 15:03:47 -05:00
return 0 ;
2018-07-23 05:32:01 -04:00
}
void bch2_mark_alloc_bucket ( struct bch_fs * c , struct bch_dev * ca ,
size_t b , bool owned_by_allocator ,
struct gc_pos pos , unsigned flags )
{
2022-12-24 22:44:56 -05:00
preempt_disable ( ) ;
2019-02-12 15:03:47 -05:00
do_mark_fn ( __bch2_mark_alloc_bucket , c , pos , flags ,
ca , b , owned_by_allocator ) ;
2022-12-24 22:44:56 -05:00
preempt_enable ( ) ;
2017-03-16 22:18:50 -08:00
}
2019-02-13 14:46:32 -05:00
static int bch2_mark_alloc ( struct bch_fs * c , struct bkey_s_c k ,
bool inserting ,
struct bch_fs_usage * fs_usage ,
unsigned journal_seq , unsigned flags ,
bool gc )
{
struct bkey_alloc_unpacked u ;
struct bch_dev * ca ;
struct bucket * g ;
struct bucket_mark old , m ;
if ( ! inserting )
return 0 ;
/*
* alloc btree is read in by bch2_alloc_read , not gc :
*/
if ( flags & BCH_BUCKET_MARK_GC )
return 0 ;
u = bch2_alloc_unpack ( bkey_s_c_to_alloc ( k ) . v ) ;
ca = bch_dev_bkey_exists ( c , k . k - > p . inode ) ;
g = __bucket ( ca , k . k - > p . offset , gc ) ;
/*
* this should currently only be getting called from the bucket
* invalidate path :
*/
BUG_ON ( u . dirty_sectors ) ;
BUG_ON ( u . cached_sectors ) ;
BUG_ON ( ! g - > mark . owned_by_allocator ) ;
old = bucket_data_cmpxchg ( c , ca , fs_usage , g , m , ( {
m . gen = u . gen ;
m . data_type = u . data_type ;
m . dirty_sectors = u . dirty_sectors ;
m . cached_sectors = u . cached_sectors ;
} ) ) ;
g - > io_time [ READ ] = u . read_time ;
g - > io_time [ WRITE ] = u . write_time ;
g - > oldest_gen = u . oldest_gen ;
g - > gen_valid = 1 ;
if ( old . cached_sectors ) {
update_cached_sectors ( c , fs_usage , ca - > dev_idx ,
- old . cached_sectors ) ;
trace_invalidate ( ca , bucket_to_sector ( ca , k . k - > p . offset ) ,
old . cached_sectors ) ;
}
return 0 ;
}
2018-07-22 06:10:52 -04:00
# define checked_add(a, b) \
2019-03-07 16:33:56 -05:00
( { \
2018-07-22 06:10:52 -04:00
unsigned _res = ( unsigned ) ( a ) + ( b ) ; \
2019-03-07 16:33:56 -05:00
bool overflow = _res > U16_MAX ; \
if ( overflow ) \
_res = U16_MAX ; \
2018-07-22 06:10:52 -04:00
( a ) = _res ; \
2019-03-07 16:33:56 -05:00
overflow ; \
} )
2017-03-16 22:18:50 -08:00
2019-02-12 15:03:47 -05:00
static int __bch2_mark_metadata_bucket ( struct bch_fs * c , struct bch_dev * ca ,
size_t b , enum bch_data_type type ,
unsigned sectors , bool gc )
2018-07-23 05:32:01 -04:00
{
struct bucket * g = __bucket ( ca , b , gc ) ;
2019-03-07 16:33:56 -05:00
struct bucket_mark old , new ;
bool overflow ;
2018-07-23 05:32:01 -04:00
BUG_ON ( type ! = BCH_DATA_SB & &
type ! = BCH_DATA_JOURNAL ) ;
2019-03-07 16:33:56 -05:00
old = bucket_cmpxchg ( g , new , ( {
2018-11-19 01:31:41 -05:00
new . dirty = true ;
2018-07-23 05:32:01 -04:00
new . data_type = type ;
2019-03-07 16:33:56 -05:00
overflow = checked_add ( new . dirty_sectors , sectors ) ;
2018-07-23 05:32:01 -04:00
} ) ) ;
2019-02-12 15:03:47 -05:00
2019-03-07 16:33:56 -05:00
bch2_fs_inconsistent_on ( overflow , c ,
" bucket sector count overflow: %u + %u > U16_MAX " ,
old . dirty_sectors , sectors ) ;
if ( c )
bch2_dev_usage_update ( c , ca , this_cpu_ptr ( c - > usage [ gc ] ) ,
old , new , gc ) ;
2019-02-12 15:03:47 -05:00
return 0 ;
2018-07-23 05:32:01 -04:00
}
2017-03-16 22:18:50 -08:00
void bch2_mark_metadata_bucket ( struct bch_fs * c , struct bch_dev * ca ,
size_t b , enum bch_data_type type ,
unsigned sectors , struct gc_pos pos ,
unsigned flags )
{
2018-07-24 14:54:39 -04:00
BUG_ON ( type ! = BCH_DATA_SB & &
type ! = BCH_DATA_JOURNAL ) ;
2017-03-16 22:18:50 -08:00
2018-07-23 05:32:01 -04:00
preempt_disable ( ) ;
2017-03-16 22:18:50 -08:00
if ( likely ( c ) ) {
2019-02-12 15:03:47 -05:00
do_mark_fn ( __bch2_mark_metadata_bucket , c , pos , flags ,
ca , b , type , sectors ) ;
2018-07-24 16:42:49 -04:00
} else {
2019-03-07 16:33:56 -05:00
__bch2_mark_metadata_bucket ( c , ca , b , type , sectors , 0 ) ;
2018-07-24 16:42:49 -04:00
}
2017-03-16 22:18:50 -08:00
2018-07-23 05:32:01 -04:00
preempt_enable ( ) ;
2017-03-16 22:18:50 -08:00
}
2018-12-06 11:52:58 -05:00
static s64 ptr_disk_sectors_delta ( struct extent_ptr_decoded p ,
s64 delta )
2017-03-16 22:18:50 -08:00
{
2018-12-06 11:52:58 -05:00
if ( delta > 0 ) {
/*
* marking a new extent , which _will have size_ @ delta
*
* in the bch2_mark_update - > BCH_EXTENT_OVERLAP_MIDDLE
* case , we haven ' t actually created the key we ' ll be inserting
* yet ( for the split ) - so we don ' t want to be using
* k - > size / crc . live_size here :
*/
return __ptr_disk_sectors ( p , delta ) ;
} else {
BUG_ON ( - delta > p . crc . live_size ) ;
2017-03-16 22:18:50 -08:00
2018-12-06 11:52:58 -05:00
return ( s64 ) __ptr_disk_sectors ( p , p . crc . live_size + delta ) -
( s64 ) ptr_disk_sectors ( p ) ;
2017-03-16 22:18:50 -08:00
}
2018-11-01 15:21:48 -04:00
}
/*
* Checking against gc ' s position has to be done here , inside the cmpxchg ( )
* loop , to avoid racing with the start of gc clearing all the marks - GC does
* that with the gc pos seqlock held .
*/
2019-02-14 15:42:41 -05:00
static bool bch2_mark_pointer ( struct bch_fs * c ,
2018-11-01 15:21:48 -04:00
struct extent_ptr_decoded p ,
s64 sectors , enum bch_data_type data_type ,
struct bch_fs_usage * fs_usage ,
2018-11-27 08:23:22 -05:00
unsigned journal_seq , unsigned flags ,
2018-07-23 05:32:01 -04:00
bool gc )
2018-11-01 15:21:48 -04:00
{
struct bucket_mark old , new ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , p . ptr . dev ) ;
2018-07-23 05:32:01 -04:00
size_t b = PTR_BUCKET_NR ( ca , & p . ptr ) ;
struct bucket * g = __bucket ( ca , b , gc ) ;
2019-03-07 16:33:56 -05:00
bool overflow ;
2018-11-01 15:21:48 -04:00
u64 v ;
2018-07-24 16:42:49 -04:00
2017-03-16 22:18:50 -08:00
v = atomic64_read ( & g - > _mark . v ) ;
do {
new . v . counter = old . v . counter = v ;
2018-11-19 01:31:41 -05:00
new . dirty = true ;
2017-03-16 22:18:50 -08:00
/*
* Check this after reading bucket mark to guard against
* the allocator invalidating a bucket after we ' ve already
* checked the gen
*/
2018-09-27 21:08:39 -04:00
if ( gen_after ( new . gen , p . ptr . gen ) ) {
2017-03-16 22:18:50 -08:00
BUG_ON ( ! test_bit ( BCH_FS_ALLOC_READ_DONE , & c - > flags ) ) ;
2018-09-27 21:08:39 -04:00
EBUG_ON ( ! p . ptr . cached & &
2017-03-16 22:18:50 -08:00
test_bit ( JOURNAL_REPLAY_DONE , & c - > journal . flags ) ) ;
2019-02-14 15:42:41 -05:00
return true ;
2017-03-16 22:18:50 -08:00
}
2018-09-27 21:08:39 -04:00
if ( ! p . ptr . cached )
2019-03-07 16:33:56 -05:00
overflow = checked_add ( new . dirty_sectors , sectors ) ;
2017-03-16 22:18:50 -08:00
else
2019-03-07 16:33:56 -05:00
overflow = checked_add ( new . cached_sectors , sectors ) ;
2017-03-16 22:18:50 -08:00
if ( ! new . dirty_sectors & &
! new . cached_sectors ) {
new . data_type = 0 ;
if ( journal_seq ) {
new . journal_seq_valid = 1 ;
new . journal_seq = journal_seq ;
}
} else {
new . data_type = data_type ;
}
if ( flags & BCH_BUCKET_MARK_NOATOMIC ) {
g - > _mark = new ;
break ;
}
} while ( ( v = atomic64_cmpxchg ( & g - > _mark . v ,
old . v . counter ,
new . v . counter ) ) ! = old . v . counter ) ;
2019-03-07 16:33:56 -05:00
bch2_fs_inconsistent_on ( overflow , c ,
" bucket sector count overflow: %u + %lli > U16_MAX " ,
! p . ptr . cached
? old . dirty_sectors
: old . cached_sectors , sectors ) ;
2018-07-23 05:32:01 -04:00
bch2_dev_usage_update ( c , ca , fs_usage , old , new , gc ) ;
2017-03-16 22:18:50 -08:00
2018-07-23 05:32:01 -04:00
BUG_ON ( ! gc & & bucket_became_unavailable ( old , new ) ) ;
2019-02-14 15:42:41 -05:00
return false ;
2017-03-16 22:18:50 -08:00
}
2018-11-24 17:09:44 -05:00
static int bch2_mark_stripe_ptr ( struct bch_fs * c ,
struct bch_extent_stripe_ptr p ,
2019-01-21 15:32:13 -05:00
enum bch_data_type data_type ,
struct bch_fs_usage * fs_usage ,
2018-11-24 17:09:44 -05:00
s64 sectors , unsigned flags ,
bool gc )
2018-11-01 15:13:19 -04:00
{
2018-11-24 17:09:44 -05:00
struct stripe * m ;
2018-11-01 15:13:19 -04:00
unsigned old , new , nr_data ;
int blocks_nonempty_delta ;
s64 parity_sectors ;
2018-11-25 20:53:51 -05:00
BUG_ON ( ! sectors ) ;
2018-11-24 17:09:44 -05:00
m = genradix_ptr ( & c - > stripes [ gc ] , p . idx ) ;
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
spin_lock ( & c - > ec_stripes_heap_lock ) ;
2018-11-24 17:09:44 -05:00
if ( ! m | | ! m - > alive ) {
2018-11-25 20:53:51 -05:00
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-24 17:09:44 -05:00
bch_err_ratelimited ( c , " pointer to nonexistent stripe %llu " ,
( u64 ) p . idx ) ;
return - 1 ;
}
2018-11-01 15:13:19 -04:00
2019-01-21 15:32:13 -05:00
BUG_ON ( m - > r . e . data_type ! = data_type ) ;
2018-11-01 15:13:19 -04:00
nr_data = m - > nr_blocks - m - > nr_redundant ;
parity_sectors = DIV_ROUND_UP ( abs ( sectors ) * m - > nr_redundant , nr_data ) ;
if ( sectors < 0 )
parity_sectors = - parity_sectors ;
2019-01-21 15:32:13 -05:00
sectors + = parity_sectors ;
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
old = m - > block_sectors [ p . block ] ;
m - > block_sectors [ p . block ] + = sectors ;
new = m - > block_sectors [ p . block ] ;
2018-11-01 15:13:19 -04:00
blocks_nonempty_delta = ( int ) ! ! new - ( int ) ! ! old ;
2018-11-25 20:53:51 -05:00
if ( blocks_nonempty_delta ) {
m - > blocks_nonempty + = blocks_nonempty_delta ;
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
if ( ! gc )
bch2_stripes_heap_update ( c , m , p . idx ) ;
}
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
m - > dirty = true ;
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-24 17:09:44 -05:00
2019-01-21 15:32:13 -05:00
update_replicas ( c , fs_usage , & m - > r . e , sectors ) ;
2018-11-24 17:09:44 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
}
2018-11-24 17:09:44 -05:00
static int bch2_mark_extent ( struct bch_fs * c , struct bkey_s_c k ,
s64 sectors , enum bch_data_type data_type ,
2018-11-27 08:23:22 -05:00
struct bch_fs_usage * fs_usage ,
unsigned journal_seq , unsigned flags ,
2018-11-24 17:09:44 -05:00
bool gc )
2017-03-16 22:18:50 -08:00
{
2018-11-01 15:10:01 -04:00
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
const union bch_extent_entry * entry ;
struct extent_ptr_decoded p ;
2019-01-21 15:32:13 -05:00
struct bch_replicas_padded r ;
s64 dirty_sectors = 0 ;
2018-11-01 15:10:01 -04:00
unsigned i ;
int ret ;
2019-01-21 15:32:13 -05:00
r . e . data_type = data_type ;
r . e . nr_devs = 0 ;
r . e . nr_required = 1 ;
2018-11-01 15:21:48 -04:00
BUG_ON ( ! sectors ) ;
2018-11-01 15:10:01 -04:00
bkey_for_each_ptr_decode ( k . k , ptrs , p , entry ) {
2018-12-06 11:52:58 -05:00
s64 disk_sectors = data_type = = BCH_DATA_BTREE
? sectors
: ptr_disk_sectors_delta ( p , sectors ) ;
2019-02-14 15:42:41 -05:00
bool stale = bch2_mark_pointer ( c , p , disk_sectors , data_type ,
fs_usage , journal_seq , flags , gc ) ;
2018-11-05 02:31:48 -05:00
2019-01-21 15:32:13 -05:00
if ( p . ptr . cached ) {
2019-02-14 15:42:41 -05:00
if ( disk_sectors & & ! stale )
update_cached_sectors ( c , fs_usage , p . ptr . dev ,
disk_sectors ) ;
2019-01-21 15:32:13 -05:00
} else if ( ! p . ec_nr ) {
dirty_sectors + = disk_sectors ;
r . e . devs [ r . e . nr_devs + + ] = p . ptr . dev ;
} else {
2018-11-01 15:10:01 -04:00
for ( i = 0 ; i < p . ec_nr ; i + + ) {
ret = bch2_mark_stripe_ptr ( c , p . ec [ i ] ,
2019-01-21 15:32:13 -05:00
data_type , fs_usage ,
disk_sectors , flags , gc ) ;
2018-11-01 15:10:01 -04:00
if ( ret )
return ret ;
}
2019-01-21 15:32:13 -05:00
r . e . nr_required = 0 ;
}
2018-11-01 15:21:48 -04:00
}
2018-11-05 02:31:48 -05:00
2019-01-21 15:32:13 -05:00
if ( dirty_sectors )
update_replicas ( c , fs_usage , & r . e , dirty_sectors ) ;
2018-11-24 17:09:44 -05:00
return 0 ;
2018-11-01 15:21:48 -04:00
}
2018-07-24 14:54:39 -04:00
2018-11-01 15:13:19 -04:00
static void bucket_set_stripe ( struct bch_fs * c ,
const struct bch_stripe * v ,
bool enabled ,
struct bch_fs_usage * fs_usage ,
2018-07-23 05:32:01 -04:00
u64 journal_seq ,
bool gc )
2018-11-01 15:13:19 -04:00
{
unsigned i ;
for ( i = 0 ; i < v - > nr_blocks ; i + + ) {
const struct bch_extent_ptr * ptr = v - > ptrs + i ;
struct bch_dev * ca = bch_dev_bkey_exists ( c , ptr - > dev ) ;
2018-07-23 05:32:01 -04:00
size_t b = PTR_BUCKET_NR ( ca , ptr ) ;
struct bucket * g = __bucket ( ca , b , gc ) ;
2018-11-01 15:13:19 -04:00
struct bucket_mark new , old ;
BUG_ON ( ptr_stale ( ca , ptr ) ) ;
2018-11-24 17:09:44 -05:00
old = bucket_data_cmpxchg ( c , ca , fs_usage , g , new , ( {
2018-11-19 01:31:41 -05:00
new . dirty = true ;
2018-11-01 15:13:19 -04:00
new . stripe = enabled ;
if ( journal_seq ) {
new . journal_seq_valid = 1 ;
new . journal_seq = journal_seq ;
}
} ) ) ;
}
}
2018-11-24 17:09:44 -05:00
static int bch2_mark_stripe ( struct bch_fs * c , struct bkey_s_c k ,
bool inserting ,
struct bch_fs_usage * fs_usage ,
u64 journal_seq , unsigned flags ,
bool gc )
2018-11-01 15:13:19 -04:00
{
2018-11-01 15:10:01 -04:00
struct bkey_s_c_stripe s = bkey_s_c_to_stripe ( k ) ;
size_t idx = s . k - > p . offset ;
struct stripe * m = genradix_ptr ( & c - > stripes [ gc ] , idx ) ;
unsigned i ;
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
spin_lock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 15:10:01 -04:00
if ( ! m | | ( ! inserting & & ! m - > alive ) ) {
2018-11-25 20:53:51 -05:00
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 15:10:01 -04:00
bch_err_ratelimited ( c , " error marking nonexistent stripe %zu " ,
idx ) ;
return - 1 ;
}
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
if ( m - > alive )
bch2_stripes_heap_del ( c , m , idx ) ;
2018-11-01 15:13:19 -04:00
2018-11-25 20:53:51 -05:00
memset ( m , 0 , sizeof ( * m ) ) ;
2018-11-01 15:13:19 -04:00
2018-11-01 15:10:01 -04:00
if ( inserting ) {
m - > sectors = le16_to_cpu ( s . v - > sectors ) ;
m - > algorithm = s . v - > algorithm ;
m - > nr_blocks = s . v - > nr_blocks ;
m - > nr_redundant = s . v - > nr_redundant ;
2019-01-21 15:32:13 -05:00
memset ( & m - > r , 0 , sizeof ( m - > r ) ) ;
m - > r . e . data_type = BCH_DATA_USER ;
m - > r . e . nr_devs = s . v - > nr_blocks ;
m - > r . e . nr_required = s . v - > nr_blocks - s . v - > nr_redundant ;
for ( i = 0 ; i < s . v - > nr_blocks ; i + + )
m - > r . e . devs [ i ] = s . v - > ptrs [ i ] . dev ;
2018-11-01 15:10:01 -04:00
2019-01-21 15:32:13 -05:00
/*
* XXX : account for stripes somehow here
*/
#if 0
update_replicas ( c , fs_usage , & m - > r . e , stripe_sectors ) ;
# endif
2018-11-25 20:53:51 -05:00
/* gc recalculates these fields: */
if ( ! ( flags & BCH_BUCKET_MARK_GC ) ) {
for ( i = 0 ; i < s . v - > nr_blocks ; i + + ) {
m - > block_sectors [ i ] =
stripe_blockcount_get ( s . v , i ) ;
m - > blocks_nonempty + = ! ! m - > block_sectors [ i ] ;
}
}
if ( ! gc )
2018-11-01 15:10:01 -04:00
bch2_stripes_heap_insert ( c , m , idx ) ;
else
2018-11-25 20:53:51 -05:00
m - > alive = true ;
2018-11-01 15:13:19 -04:00
}
2018-11-24 17:09:44 -05:00
2018-11-25 20:53:51 -05:00
spin_unlock ( & c - > ec_stripes_heap_lock ) ;
2018-11-01 15:10:01 -04:00
bucket_set_stripe ( c , s . v , inserting , fs_usage , 0 , gc ) ;
2018-11-24 17:09:44 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
}
2019-03-29 19:49:17 -04:00
int bch2_mark_key_locked ( struct bch_fs * c ,
struct bkey_s_c k ,
bool inserting , s64 sectors ,
struct bch_fs_usage * fs_usage ,
u64 journal_seq , unsigned flags )
2018-11-01 15:21:48 -04:00
{
2019-03-29 19:49:17 -04:00
bool gc = flags & BCH_BUCKET_MARK_GC ;
2019-03-15 18:20:46 -04:00
int ret = 0 ;
preempt_disable ( ) ;
2019-02-12 15:03:47 -05:00
if ( ! fs_usage | | gc )
fs_usage = this_cpu_ptr ( c - > usage [ gc ] ) ;
2018-11-24 17:09:44 -05:00
2018-11-01 15:10:01 -04:00
switch ( k . k - > type ) {
2019-02-13 14:46:32 -05:00
case KEY_TYPE_alloc :
2019-03-15 18:20:46 -04:00
ret = bch2_mark_alloc ( c , k , inserting ,
2019-02-13 14:46:32 -05:00
fs_usage , journal_seq , flags , gc ) ;
2019-03-15 18:20:46 -04:00
break ;
2018-11-01 15:10:01 -04:00
case KEY_TYPE_btree_ptr :
2019-03-15 18:20:46 -04:00
ret = bch2_mark_extent ( c , k , inserting
2019-02-13 14:46:32 -05:00
? c - > opts . btree_node_size
: - c - > opts . btree_node_size ,
BCH_DATA_BTREE ,
fs_usage , journal_seq , flags , gc ) ;
2019-03-15 18:20:46 -04:00
break ;
2018-11-01 15:10:01 -04:00
case KEY_TYPE_extent :
2019-03-15 18:20:46 -04:00
ret = bch2_mark_extent ( c , k , sectors , BCH_DATA_USER ,
2019-02-13 14:46:32 -05:00
fs_usage , journal_seq , flags , gc ) ;
2019-03-15 18:20:46 -04:00
break ;
2018-11-01 15:10:01 -04:00
case KEY_TYPE_stripe :
2019-03-15 18:20:46 -04:00
ret = bch2_mark_stripe ( c , k , inserting ,
2019-02-13 14:46:32 -05:00
fs_usage , journal_seq , flags , gc ) ;
2019-03-15 18:20:46 -04:00
break ;
2019-01-24 20:00:57 -05:00
case KEY_TYPE_inode :
2018-11-29 02:14:31 -05:00
if ( inserting )
2019-02-14 18:38:52 -05:00
fs_usage - > nr_inodes + + ;
2018-11-29 02:14:31 -05:00
else
2019-02-14 18:38:52 -05:00
fs_usage - > nr_inodes - - ;
2019-03-15 18:20:46 -04:00
break ;
2018-11-01 15:10:01 -04:00
case KEY_TYPE_reservation : {
unsigned replicas = bkey_s_c_to_reservation ( k ) . v - > nr_replicas ;
sectors * = replicas ;
2019-01-21 15:32:13 -05:00
replicas = clamp_t ( unsigned , replicas , 1 ,
ARRAY_SIZE ( fs_usage - > persistent_reserved ) ) ;
2018-11-01 15:10:01 -04:00
2019-02-14 18:38:52 -05:00
fs_usage - > reserved + = sectors ;
2019-01-21 15:32:13 -05:00
fs_usage - > persistent_reserved [ replicas - 1 ] + = sectors ;
2019-03-15 18:20:46 -04:00
break ;
2018-11-01 15:10:01 -04:00
}
2017-03-16 22:18:50 -08:00
}
2019-03-15 18:20:46 -04:00
preempt_enable ( ) ;
return ret ;
2018-07-23 05:32:01 -04:00
}
2018-11-01 15:10:01 -04:00
int bch2_mark_key ( struct bch_fs * c , struct bkey_s_c k ,
2018-11-22 22:50:35 -05:00
bool inserting , s64 sectors ,
2018-11-27 08:23:22 -05:00
struct bch_fs_usage * fs_usage ,
2018-11-22 22:50:35 -05:00
u64 journal_seq , unsigned flags )
{
int ret ;
2018-11-26 00:13:33 -05:00
percpu_down_read ( & c - > mark_lock ) ;
2018-11-01 15:10:01 -04:00
ret = bch2_mark_key_locked ( c , k , inserting , sectors ,
2019-03-29 19:49:17 -04:00
fs_usage , journal_seq , flags ) ;
2018-11-26 00:13:33 -05:00
percpu_up_read ( & c - > mark_lock ) ;
2018-11-24 17:09:44 -05:00
return ret ;
2017-03-16 22:18:50 -08:00
}
2019-04-15 14:58:00 -04:00
inline bool bch2_mark_overwrite ( struct btree_trans * trans ,
struct btree_iter * iter ,
struct bkey_s_c old ,
struct bkey_i * new ,
struct bch_fs_usage * fs_usage ,
unsigned flags )
{
struct bch_fs * c = trans - > c ;
struct btree * b = iter - > l [ 0 ] . b ;
s64 sectors = 0 ;
if ( btree_node_is_extents ( b )
? bkey_cmp ( new - > k . p , bkey_start_pos ( old . k ) ) < = 0
: bkey_cmp ( new - > k . p , old . k - > p ) )
return false ;
if ( btree_node_is_extents ( b ) ) {
switch ( bch2_extent_overlap ( & new - > k , old . k ) ) {
case BCH_EXTENT_OVERLAP_ALL :
sectors = - ( ( s64 ) old . k - > size ) ;
break ;
case BCH_EXTENT_OVERLAP_BACK :
sectors = bkey_start_offset ( & new - > k ) -
old . k - > p . offset ;
break ;
case BCH_EXTENT_OVERLAP_FRONT :
sectors = bkey_start_offset ( old . k ) -
new - > k . p . offset ;
break ;
case BCH_EXTENT_OVERLAP_MIDDLE :
sectors = old . k - > p . offset - new - > k . p . offset ;
BUG_ON ( sectors < = 0 ) ;
bch2_mark_key_locked ( c , old , true , sectors ,
fs_usage , trans - > journal_res . seq ,
flags ) ;
sectors = bkey_start_offset ( & new - > k ) -
old . k - > p . offset ;
break ;
}
BUG_ON ( sectors > = 0 ) ;
}
bch2_mark_key_locked ( c , old , false , sectors ,
fs_usage , trans - > journal_res . seq , flags ) ;
return true ;
}
2019-03-13 22:44:04 -04:00
void bch2_mark_update ( struct btree_trans * trans ,
2019-03-15 18:20:46 -04:00
struct btree_insert_entry * insert ,
2019-03-29 19:49:17 -04:00
struct bch_fs_usage * fs_usage ,
unsigned flags )
2018-11-05 02:31:48 -05:00
{
struct bch_fs * c = trans - > c ;
struct btree_iter * iter = insert - > iter ;
struct btree * b = iter - > l [ 0 ] . b ;
struct btree_node_iter node_iter = iter - > l [ 0 ] . iter ;
struct bkey_packed * _k ;
2018-11-01 15:10:01 -04:00
if ( ! btree_node_type_needs_gc ( iter - > btree_id ) )
2018-11-22 22:50:35 -05:00
return ;
2019-04-15 14:58:00 -04:00
bch2_mark_key_locked ( c , bkey_i_to_s_c ( insert - > k ) , true ,
bpos_min ( insert - > k - > k . p , b - > key . k . p ) . offset -
bkey_start_offset ( & insert - > k - > k ) ,
fs_usage , trans - > journal_res . seq , flags ) ;
if ( unlikely ( trans - > flags & BTREE_INSERT_NOMARK_OVERWRITES ) )
return ;
2018-11-05 02:31:48 -05:00
2019-04-17 15:49:45 -04:00
/*
* For non extents , we only mark the new key , not the key being
* overwritten - unless we ' re actually deleting :
*/
if ( ( iter - > btree_id = = BTREE_ID_ALLOC | |
iter - > btree_id = = BTREE_ID_EC ) & &
! bkey_deleted ( & insert - > k - > k ) )
return ;
2018-11-05 02:31:48 -05:00
while ( ( _k = bch2_btree_node_iter_peek_filter ( & node_iter , b ,
2018-11-01 15:10:01 -04:00
KEY_TYPE_discard ) ) ) {
2018-11-05 02:31:48 -05:00
struct bkey unpacked ;
2019-04-15 14:58:00 -04:00
struct bkey_s_c k = bkey_disassemble ( b , _k , & unpacked ) ;
2018-11-05 02:31:48 -05:00
2019-04-15 14:58:00 -04:00
if ( ! bch2_mark_overwrite ( trans , iter , k , insert - > k ,
fs_usage , flags ) )
2018-11-05 02:31:48 -05:00
break ;
bch2_btree_node_iter_advance ( & node_iter , b ) ;
}
2019-03-15 18:20:46 -04:00
}
2018-11-05 02:31:48 -05:00
2019-03-15 18:20:46 -04:00
void bch2_trans_fs_usage_apply ( struct btree_trans * trans ,
struct bch_fs_usage * fs_usage )
{
struct bch_fs * c = trans - > c ;
struct btree_insert_entry * i ;
static int warned_disk_usage = 0 ;
u64 disk_res_sectors = trans - > disk_res ? trans - > disk_res - > sectors : 0 ;
char buf [ 200 ] ;
if ( ! bch2_fs_usage_apply ( c , fs_usage , trans - > disk_res ) | |
warned_disk_usage | |
xchg ( & warned_disk_usage , 1 ) )
return ;
2019-01-21 15:32:13 -05:00
2019-03-15 18:20:46 -04:00
pr_err ( " disk usage increased more than %llu sectors reserved " , disk_res_sectors ) ;
trans_for_each_update_iter ( trans , i ) {
struct btree_iter * iter = i - > iter ;
struct btree * b = iter - > l [ 0 ] . b ;
struct btree_node_iter node_iter = iter - > l [ 0 ] . iter ;
struct bkey_packed * _k ;
2019-01-21 15:32:13 -05:00
pr_err ( " while inserting " ) ;
2019-03-15 18:20:46 -04:00
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , bkey_i_to_s_c ( i - > k ) ) ;
2019-01-21 15:32:13 -05:00
pr_err ( " %s " , buf ) ;
pr_err ( " overlapping with " ) ;
node_iter = iter - > l [ 0 ] . iter ;
while ( ( _k = bch2_btree_node_iter_peek_filter ( & node_iter , b ,
KEY_TYPE_discard ) ) ) {
struct bkey unpacked ;
struct bkey_s_c k ;
k = bkey_disassemble ( b , _k , & unpacked ) ;
2018-11-22 22:50:35 -05:00
2019-01-21 15:32:13 -05:00
if ( btree_node_is_extents ( b )
2019-03-15 18:20:46 -04:00
? bkey_cmp ( i - > k - > k . p , bkey_start_pos ( k . k ) ) < = 0
: bkey_cmp ( i - > k - > k . p , k . k - > p ) )
2019-01-21 15:32:13 -05:00
break ;
bch2_bkey_val_to_text ( & PBUF ( buf ) , c , k ) ;
pr_err ( " %s " , buf ) ;
bch2_btree_node_iter_advance ( & node_iter , b ) ;
}
}
2018-11-05 02:31:48 -05:00
}
2017-03-16 22:18:50 -08:00
/* Disk reservations: */
2018-07-23 05:32:01 -04:00
static u64 bch2_recalc_sectors_available ( struct bch_fs * c )
2017-03-16 22:18:50 -08:00
{
2019-02-06 11:42:13 -05:00
percpu_u64_set ( & c - > pcpu - > sectors_available , 0 ) ;
2017-03-16 22:18:50 -08:00
2019-02-14 18:38:52 -05:00
return avail_factor ( __bch2_fs_usage_read_short ( c ) . free ) ;
2017-03-16 22:18:50 -08:00
}
void __bch2_disk_reservation_put ( struct bch_fs * c , struct disk_reservation * res )
{
2018-11-26 00:13:33 -05:00
percpu_down_read ( & c - > mark_lock ) ;
2019-02-14 18:38:52 -05:00
this_cpu_sub ( c - > usage [ 0 ] - > online_reserved , res - > sectors ) ;
2018-11-26 00:13:33 -05:00
percpu_up_read ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
res - > sectors = 0 ;
}
# define SECTORS_CACHE 1024
int bch2_disk_reservation_add ( struct bch_fs * c , struct disk_reservation * res ,
unsigned sectors , int flags )
{
2018-11-27 08:23:22 -05:00
struct bch_fs_pcpu * pcpu ;
2017-03-16 22:18:50 -08:00
u64 old , v , get ;
s64 sectors_available ;
int ret ;
2018-11-26 00:13:33 -05:00
percpu_down_read ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
preempt_disable ( ) ;
2018-11-27 08:23:22 -05:00
pcpu = this_cpu_ptr ( c - > pcpu ) ;
2017-03-16 22:18:50 -08:00
2018-11-27 08:23:22 -05:00
if ( sectors < = pcpu - > sectors_available )
2017-03-16 22:18:50 -08:00
goto out ;
v = atomic64_read ( & c - > sectors_available ) ;
do {
old = v ;
get = min ( ( u64 ) sectors + SECTORS_CACHE , old ) ;
if ( get < sectors ) {
preempt_enable ( ) ;
2018-11-26 00:13:33 -05:00
percpu_up_read ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
goto recalculate ;
}
} while ( ( v = atomic64_cmpxchg ( & c - > sectors_available ,
old , old - get ) ) ! = old ) ;
2018-11-27 08:23:22 -05:00
pcpu - > sectors_available + = get ;
2017-03-16 22:18:50 -08:00
out :
2018-11-27 08:23:22 -05:00
pcpu - > sectors_available - = sectors ;
2019-02-14 18:38:52 -05:00
this_cpu_add ( c - > usage [ 0 ] - > online_reserved , sectors ) ;
2018-11-27 08:23:22 -05:00
res - > sectors + = sectors ;
2017-03-16 22:18:50 -08:00
preempt_enable ( ) ;
2018-11-26 00:13:33 -05:00
percpu_up_read ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
return 0 ;
recalculate :
2018-11-26 00:13:33 -05:00
percpu_down_write ( & c - > mark_lock ) ;
2019-02-11 22:08:09 -05:00
2018-07-23 05:32:01 -04:00
sectors_available = bch2_recalc_sectors_available ( c ) ;
2017-03-16 22:18:50 -08:00
if ( sectors < = sectors_available | |
( flags & BCH_DISK_RESERVATION_NOFAIL ) ) {
atomic64_set ( & c - > sectors_available ,
max_t ( s64 , 0 , sectors_available - sectors ) ) ;
2019-02-14 18:38:52 -05:00
this_cpu_add ( c - > usage [ 0 ] - > online_reserved , sectors ) ;
2018-11-27 08:23:22 -05:00
res - > sectors + = sectors ;
2017-03-16 22:18:50 -08:00
ret = 0 ;
} else {
atomic64_set ( & c - > sectors_available , sectors_available ) ;
ret = - ENOSPC ;
}
2018-11-26 00:13:33 -05:00
percpu_up_write ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
return ret ;
}
/* Startup/shutdown: */
static void buckets_free_rcu ( struct rcu_head * rcu )
{
struct bucket_array * buckets =
container_of ( rcu , struct bucket_array , rcu ) ;
kvpfree ( buckets ,
sizeof ( struct bucket_array ) +
buckets - > nbuckets * sizeof ( struct bucket ) ) ;
}
int bch2_dev_buckets_resize ( struct bch_fs * c , struct bch_dev * ca , u64 nbuckets )
{
struct bucket_array * buckets = NULL , * old_buckets = NULL ;
2018-11-19 01:16:07 -05:00
unsigned long * buckets_nouse = NULL ;
2018-11-18 23:20:21 -05:00
unsigned long * buckets_written = NULL ;
2017-03-16 22:18:50 -08:00
alloc_fifo free [ RESERVE_NR ] ;
alloc_fifo free_inc ;
alloc_heap alloc_heap ;
copygc_heap copygc_heap ;
size_t btree_reserve = DIV_ROUND_UP ( BTREE_NODE_RESERVE ,
ca - > mi . bucket_size / c - > opts . btree_node_size ) ;
/* XXX: these should be tunable */
2018-11-04 22:09:51 -05:00
size_t reserve_none = max_t ( size_t , 1 , nbuckets > > 9 ) ;
size_t copygc_reserve = max_t ( size_t , 2 , nbuckets > > 7 ) ;
size_t free_inc_nr = max ( max_t ( size_t , 1 , nbuckets > > 12 ) ,
2019-01-13 16:02:22 -05:00
btree_reserve * 2 ) ;
2018-07-23 05:32:01 -04:00
bool resize = ca - > buckets [ 0 ] ! = NULL ,
2017-03-16 22:18:50 -08:00
start_copygc = ca - > copygc_thread ! = NULL ;
int ret = - ENOMEM ;
unsigned i ;
memset ( & free , 0 , sizeof ( free ) ) ;
memset ( & free_inc , 0 , sizeof ( free_inc ) ) ;
memset ( & alloc_heap , 0 , sizeof ( alloc_heap ) ) ;
memset ( & copygc_heap , 0 , sizeof ( copygc_heap ) ) ;
if ( ! ( buckets = kvpmalloc ( sizeof ( struct bucket_array ) +
nbuckets * sizeof ( struct bucket ) ,
GFP_KERNEL | __GFP_ZERO ) ) | |
2018-11-19 01:16:07 -05:00
! ( buckets_nouse = kvpmalloc ( BITS_TO_LONGS ( nbuckets ) *
2017-03-16 22:18:50 -08:00
sizeof ( unsigned long ) ,
GFP_KERNEL | __GFP_ZERO ) ) | |
2018-11-18 23:20:21 -05:00
! ( buckets_written = kvpmalloc ( BITS_TO_LONGS ( nbuckets ) *
sizeof ( unsigned long ) ,
GFP_KERNEL | __GFP_ZERO ) ) | |
2017-03-16 22:18:50 -08:00
! init_fifo ( & free [ RESERVE_BTREE ] , btree_reserve , GFP_KERNEL ) | |
! init_fifo ( & free [ RESERVE_MOVINGGC ] ,
copygc_reserve , GFP_KERNEL ) | |
! init_fifo ( & free [ RESERVE_NONE ] , reserve_none , GFP_KERNEL ) | |
2018-07-22 10:43:01 -04:00
! init_fifo ( & free_inc , free_inc_nr , GFP_KERNEL ) | |
! init_heap ( & alloc_heap , ALLOC_SCAN_BATCH ( ca ) < < 1 , GFP_KERNEL ) | |
2017-03-16 22:18:50 -08:00
! init_heap ( & copygc_heap , copygc_reserve , GFP_KERNEL ) )
goto err ;
buckets - > first_bucket = ca - > mi . first_bucket ;
buckets - > nbuckets = nbuckets ;
bch2_copygc_stop ( ca ) ;
if ( resize ) {
down_write ( & c - > gc_lock ) ;
down_write ( & ca - > bucket_lock ) ;
2018-11-26 00:13:33 -05:00
percpu_down_write ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
}
old_buckets = bucket_array ( ca ) ;
if ( resize ) {
size_t n = min ( buckets - > nbuckets , old_buckets - > nbuckets ) ;
memcpy ( buckets - > b ,
old_buckets - > b ,
n * sizeof ( struct bucket ) ) ;
2018-11-19 01:16:07 -05:00
memcpy ( buckets_nouse ,
ca - > buckets_nouse ,
2017-03-16 22:18:50 -08:00
BITS_TO_LONGS ( n ) * sizeof ( unsigned long ) ) ;
2018-11-18 23:20:21 -05:00
memcpy ( buckets_written ,
ca - > buckets_written ,
BITS_TO_LONGS ( n ) * sizeof ( unsigned long ) ) ;
2017-03-16 22:18:50 -08:00
}
2018-07-23 05:32:01 -04:00
rcu_assign_pointer ( ca - > buckets [ 0 ] , buckets ) ;
2017-03-16 22:18:50 -08:00
buckets = old_buckets ;
2018-11-19 01:16:07 -05:00
swap ( ca - > buckets_nouse , buckets_nouse ) ;
2018-11-18 23:20:21 -05:00
swap ( ca - > buckets_written , buckets_written ) ;
2017-03-16 22:18:50 -08:00
if ( resize )
2018-11-26 00:13:33 -05:00
percpu_up_write ( & c - > mark_lock ) ;
2017-03-16 22:18:50 -08:00
spin_lock ( & c - > freelist_lock ) ;
for ( i = 0 ; i < RESERVE_NR ; i + + ) {
fifo_move ( & free [ i ] , & ca - > free [ i ] ) ;
swap ( ca - > free [ i ] , free [ i ] ) ;
}
fifo_move ( & free_inc , & ca - > free_inc ) ;
swap ( ca - > free_inc , free_inc ) ;
spin_unlock ( & c - > freelist_lock ) ;
/* with gc lock held, alloc_heap can't be in use: */
swap ( ca - > alloc_heap , alloc_heap ) ;
/* and we shut down copygc: */
swap ( ca - > copygc_heap , copygc_heap ) ;
nbuckets = ca - > mi . nbuckets ;
if ( resize ) {
up_write ( & ca - > bucket_lock ) ;
up_write ( & c - > gc_lock ) ;
}
if ( start_copygc & &
bch2_copygc_start ( c , ca ) )
bch_err ( ca , " error restarting copygc thread " ) ;
ret = 0 ;
err :
free_heap ( & copygc_heap ) ;
free_heap ( & alloc_heap ) ;
free_fifo ( & free_inc ) ;
for ( i = 0 ; i < RESERVE_NR ; i + + )
free_fifo ( & free [ i ] ) ;
2018-11-19 01:16:07 -05:00
kvpfree ( buckets_nouse ,
2017-03-16 22:18:50 -08:00
BITS_TO_LONGS ( nbuckets ) * sizeof ( unsigned long ) ) ;
2018-11-18 23:20:21 -05:00
kvpfree ( buckets_written ,
BITS_TO_LONGS ( nbuckets ) * sizeof ( unsigned long ) ) ;
2017-03-16 22:18:50 -08:00
if ( buckets )
call_rcu ( & old_buckets - > rcu , buckets_free_rcu ) ;
return ret ;
}
void bch2_dev_buckets_free ( struct bch_dev * ca )
{
unsigned i ;
free_heap ( & ca - > copygc_heap ) ;
free_heap ( & ca - > alloc_heap ) ;
free_fifo ( & ca - > free_inc ) ;
for ( i = 0 ; i < RESERVE_NR ; i + + )
free_fifo ( & ca - > free [ i ] ) ;
2018-11-18 23:20:21 -05:00
kvpfree ( ca - > buckets_written ,
BITS_TO_LONGS ( ca - > mi . nbuckets ) * sizeof ( unsigned long ) ) ;
2018-11-19 01:16:07 -05:00
kvpfree ( ca - > buckets_nouse ,
2017-03-16 22:18:50 -08:00
BITS_TO_LONGS ( ca - > mi . nbuckets ) * sizeof ( unsigned long ) ) ;
2018-07-23 05:32:01 -04:00
kvpfree ( rcu_dereference_protected ( ca - > buckets [ 0 ] , 1 ) ,
2017-03-16 22:18:50 -08:00
sizeof ( struct bucket_array ) +
ca - > mi . nbuckets * sizeof ( struct bucket ) ) ;
2018-07-23 05:32:01 -04:00
free_percpu ( ca - > usage [ 0 ] ) ;
2017-03-16 22:18:50 -08:00
}
int bch2_dev_buckets_alloc ( struct bch_fs * c , struct bch_dev * ca )
{
2018-07-23 05:32:01 -04:00
if ( ! ( ca - > usage [ 0 ] = alloc_percpu ( struct bch_dev_usage ) ) )
2017-03-16 22:18:50 -08:00
return - ENOMEM ;
return bch2_dev_buckets_resize ( c , ca , ca - > mi . nbuckets ) ; ;
}