2018-10-06 00:46:55 -04:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2012 Google , Inc .
*
2021-04-17 20:37:04 -04:00
* Foreground allocator code : allocate buckets from freelist , and allocate in
* sector granularity from writepoints .
2018-10-06 00:46:55 -04:00
*
* bch2_bucket_alloc ( ) allocates a single bucket from a specific device .
*
* bch2_bucket_alloc_set ( ) allocates one or more buckets from different devices
* in a given filesystem .
*/
# include "bcachefs.h"
# include "alloc_background.h"
# include "alloc_foreground.h"
2022-03-17 20:51:27 -04:00
# include "backpointers.h"
2022-01-09 20:48:31 -05:00
# include "btree_iter.h"
# include "btree_update.h"
2018-10-06 00:46:55 -04:00
# include "btree_gc.h"
# include "buckets.h"
2022-01-09 20:48:31 -05:00
# include "buckets_waiting_for_journal.h"
2018-10-06 00:46:55 -04:00
# include "clock.h"
# include "debug.h"
# include "disk_groups.h"
2018-11-01 15:13:19 -04:00
# include "ec.h"
2022-01-09 20:48:31 -05:00
# include "error.h"
2018-10-06 00:46:55 -04:00
# include "io.h"
2022-01-09 20:48:31 -05:00
# include "journal.h"
2022-07-17 21:40:39 -04:00
# include "movinggc.h"
2022-12-14 20:52:11 -05:00
# include "nocow_locking.h"
2018-10-06 00:46:55 -04:00
# include "trace.h"
# include <linux/math64.h>
# include <linux/rculist.h>
# include <linux/rcupdate.h>
2022-03-13 19:27:55 -04:00
const char * const bch2_alloc_reserves [ ] = {
# define x(t) #t,
BCH_ALLOC_RESERVES ( )
# undef x
NULL
} ;
2018-10-06 00:46:55 -04:00
/*
* Open buckets represent a bucket that ' s currently being allocated from . They
* serve two purposes :
*
* - They track buckets that have been partially allocated , allowing for
* sub - bucket sized allocations - they ' re used by the sector allocator below
*
* - They provide a reference to the buckets they own that mark and sweep GC
* can find , until the new allocation has a pointer to it inserted into the
* btree
*
* When allocating some space with the sector allocator , the allocation comes
* with a reference to an open bucket - the caller is required to put that
* reference _after_ doing the index update that makes its allocation reachable .
*/
2022-01-09 20:48:31 -05:00
void bch2_reset_alloc_cursors ( struct bch_fs * c )
{
struct bch_dev * ca ;
unsigned i ;
rcu_read_lock ( ) ;
for_each_member_device_rcu ( ca , c , i , NULL )
ca - > alloc_cursor = 0 ;
rcu_read_unlock ( ) ;
}
2021-12-25 21:43:29 -05:00
static void bch2_open_bucket_hash_add ( struct bch_fs * c , struct open_bucket * ob )
{
open_bucket_idx_t idx = ob - c - > open_buckets ;
open_bucket_idx_t * slot = open_bucket_hashslot ( c , ob - > dev , ob - > bucket ) ;
ob - > hash = * slot ;
* slot = idx ;
}
static void bch2_open_bucket_hash_remove ( struct bch_fs * c , struct open_bucket * ob )
{
open_bucket_idx_t idx = ob - c - > open_buckets ;
open_bucket_idx_t * slot = open_bucket_hashslot ( c , ob - > dev , ob - > bucket ) ;
while ( * slot ! = idx ) {
BUG_ON ( ! * slot ) ;
slot = & c - > open_buckets [ * slot ] . hash ;
}
* slot = ob - > hash ;
ob - > hash = 0 ;
}
2018-10-06 00:46:55 -04:00
void __bch2_open_bucket_put ( struct bch_fs * c , struct open_bucket * ob )
{
2021-12-25 21:21:46 -05:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-10-06 00:46:55 -04:00
2018-11-01 15:13:19 -04:00
if ( ob - > ec ) {
bch2_ec_bucket_written ( c , ob ) ;
return ;
}
2018-11-26 00:13:33 -05:00
percpu_down_read ( & c - > mark_lock ) ;
2018-10-06 00:46:55 -04:00
spin_lock ( & ob - > lock ) ;
ob - > valid = false ;
2021-12-25 21:21:46 -05:00
ob - > data_type = 0 ;
2018-10-06 00:46:55 -04:00
spin_unlock ( & ob - > lock ) ;
2018-11-26 00:13:33 -05:00
percpu_up_read ( & c - > mark_lock ) ;
2018-10-06 00:46:55 -04:00
spin_lock ( & c - > freelist_lock ) ;
2021-12-25 21:43:29 -05:00
bch2_open_bucket_hash_remove ( c , ob ) ;
2018-10-06 00:46:55 -04:00
ob - > freelist = c - > open_buckets_freelist ;
c - > open_buckets_freelist = ob - c - > open_buckets ;
2021-04-13 09:49:23 -04:00
2018-10-06 00:46:55 -04:00
c - > open_buckets_nr_free + + ;
2021-04-13 09:49:23 -04:00
ca - > nr_open_buckets - - ;
2018-10-06 00:46:55 -04:00
spin_unlock ( & c - > freelist_lock ) ;
closure_wake_up ( & c - > open_buckets_wait ) ;
}
2018-11-01 15:13:19 -04:00
void bch2_open_bucket_write_error ( struct bch_fs * c ,
struct open_buckets * obs ,
unsigned dev )
{
struct open_bucket * ob ;
unsigned i ;
open_bucket_for_each ( c , obs , ob , i )
2021-12-25 21:21:46 -05:00
if ( ob - > dev = = dev & & ob - > ec )
2018-11-01 15:13:19 -04:00
bch2_ec_bucket_cancel ( c , ob ) ;
}
2018-10-06 00:46:55 -04:00
static struct open_bucket * bch2_open_bucket_alloc ( struct bch_fs * c )
{
struct open_bucket * ob ;
BUG_ON ( ! c - > open_buckets_freelist | | ! c - > open_buckets_nr_free ) ;
ob = c - > open_buckets + c - > open_buckets_freelist ;
c - > open_buckets_freelist = ob - > freelist ;
atomic_set ( & ob - > pin , 1 ) ;
2021-12-25 21:21:46 -05:00
ob - > data_type = 0 ;
2018-10-06 00:46:55 -04:00
c - > open_buckets_nr_free - - ;
return ob ;
}
2018-10-06 04:12:42 -04:00
static void open_bucket_free_unused ( struct bch_fs * c ,
2020-07-21 17:12:39 -04:00
struct write_point * wp ,
struct open_bucket * ob )
2018-10-06 04:12:42 -04:00
{
2021-12-25 21:21:46 -05:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
bool may_realloc = wp - > data_type = = BCH_DATA_user ;
2018-10-06 04:12:42 -04:00
2020-07-21 17:12:39 -04:00
BUG_ON ( ca - > open_buckets_partial_nr >
2018-10-06 04:12:42 -04:00
ARRAY_SIZE ( ca - > open_buckets_partial ) ) ;
2018-11-01 15:13:19 -04:00
if ( ca - > open_buckets_partial_nr <
ARRAY_SIZE ( ca - > open_buckets_partial ) & &
may_realloc ) {
2018-10-06 04:12:42 -04:00
spin_lock ( & c - > freelist_lock ) ;
ob - > on_partial_list = true ;
ca - > open_buckets_partial [ ca - > open_buckets_partial_nr + + ] =
ob - c - > open_buckets ;
spin_unlock ( & c - > freelist_lock ) ;
closure_wake_up ( & c - > open_buckets_wait ) ;
closure_wake_up ( & c - > freelist_wait ) ;
} else {
bch2_open_bucket_put ( c , ob ) ;
}
}
2018-10-06 00:46:55 -04:00
/* _only_ for allocating the journal on a new device: */
long bch2_bucket_alloc_new_fs ( struct bch_dev * ca )
{
2021-12-24 04:22:20 -05:00
while ( ca - > new_fs_bucket_idx < ca - > mi . nbuckets ) {
u64 b = ca - > new_fs_bucket_idx + + ;
if ( ! is_superblock_bucket ( ca , b ) & &
( ! ca - > buckets_nouse | | ! test_bit ( b , ca - > buckets_nouse ) ) )
return b ;
}
return - 1 ;
2018-10-06 00:46:55 -04:00
}
static inline unsigned open_buckets_reserved ( enum alloc_reserve reserve )
{
switch ( reserve ) {
2022-03-13 19:27:55 -04:00
case RESERVE_btree :
case RESERVE_btree_movinggc :
2018-10-06 00:46:55 -04:00
return 0 ;
2022-03-13 19:27:55 -04:00
case RESERVE_movinggc :
2021-01-07 17:18:14 -05:00
return OPEN_BUCKETS_COUNT / 4 ;
2018-10-06 00:46:55 -04:00
default :
2020-06-09 15:46:22 -04:00
return OPEN_BUCKETS_COUNT / 2 ;
2018-10-06 00:46:55 -04:00
}
}
2022-01-09 20:48:31 -05:00
static struct open_bucket * __try_alloc_bucket ( struct bch_fs * c , struct bch_dev * ca ,
u64 bucket ,
enum alloc_reserve reserve ,
2023-01-30 20:58:43 -05:00
const struct bch_alloc_v4 * a ,
2022-11-04 16:06:55 -04:00
struct bucket_alloc_state * s ,
2022-01-09 20:48:31 -05:00
struct closure * cl )
2018-10-06 00:46:55 -04:00
{
struct open_bucket * ob ;
2022-01-09 20:48:31 -05:00
if ( unlikely ( ca - > buckets_nouse & & test_bit ( bucket , ca - > buckets_nouse ) ) ) {
2022-11-04 16:06:55 -04:00
s - > skipped_nouse + + ;
2022-01-09 20:48:31 -05:00
return NULL ;
}
2018-10-06 00:46:55 -04:00
2022-01-09 20:48:31 -05:00
if ( bch2_bucket_is_open ( c , ca - > dev_idx , bucket ) ) {
2022-11-04 16:06:55 -04:00
s - > skipped_open + + ;
2022-01-09 20:48:31 -05:00
return NULL ;
}
if ( bch2_bucket_needs_journal_commit ( & c - > buckets_waiting_for_journal ,
c - > journal . flushed_seq_ondisk , ca - > dev_idx , bucket ) ) {
2022-11-04 16:06:55 -04:00
s - > skipped_need_journal_commit + + ;
2022-01-09 20:48:31 -05:00
return NULL ;
2018-10-06 00:46:55 -04:00
}
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-02 17:12:00 -04:00
if ( bch2_bucket_nocow_is_locked ( & c - > nocow_locks , POS ( ca - > dev_idx , bucket ) ) ) {
s - > skipped_nocow + + ;
return NULL ;
}
2022-01-09 20:48:31 -05:00
spin_lock ( & c - > freelist_lock ) ;
2018-10-06 00:46:55 -04:00
if ( unlikely ( c - > open_buckets_nr_free < = open_buckets_reserved ( reserve ) ) ) {
if ( cl )
closure_wait ( & c - > open_buckets_wait , cl ) ;
2019-03-18 13:42:10 -04:00
if ( ! c - > blocked_allocate_open_bucket )
c - > blocked_allocate_open_bucket = local_clock ( ) ;
2018-10-06 00:46:55 -04:00
spin_unlock ( & c - > freelist_lock ) ;
2022-07-17 22:31:21 -04:00
return ERR_PTR ( - BCH_ERR_open_buckets_empty ) ;
2018-10-06 00:46:55 -04:00
}
2022-01-09 20:48:31 -05:00
/* Recheck under lock: */
if ( bch2_bucket_is_open ( c , ca - > dev_idx , bucket ) ) {
spin_unlock ( & c - > freelist_lock ) ;
2022-11-04 16:06:55 -04:00
s - > skipped_open + + ;
2022-01-09 20:48:31 -05:00
return NULL ;
2018-10-06 00:46:55 -04:00
}
ob = bch2_open_bucket_alloc ( c ) ;
spin_lock ( & ob - > lock ) ;
ob - > valid = true ;
ob - > sectors_free = ca - > mi . bucket_size ;
2020-07-21 17:12:39 -04:00
ob - > alloc_reserve = reserve ;
2021-12-25 21:21:46 -05:00
ob - > dev = ca - > dev_idx ;
2022-01-09 20:48:31 -05:00
ob - > gen = a - > gen ;
ob - > bucket = bucket ;
2018-10-06 00:46:55 -04:00
spin_unlock ( & ob - > lock ) ;
2021-12-25 21:43:29 -05:00
ca - > nr_open_buckets + + ;
bch2_open_bucket_hash_add ( c , ob ) ;
2019-03-18 13:42:10 -04:00
if ( c - > blocked_allocate_open_bucket ) {
bch2_time_stats_update (
& c - > times [ BCH_TIME_blocked_allocate_open_bucket ] ,
c - > blocked_allocate_open_bucket ) ;
c - > blocked_allocate_open_bucket = 0 ;
}
if ( c - > blocked_allocate ) {
bch2_time_stats_update (
& c - > times [ BCH_TIME_blocked_allocate ] ,
c - > blocked_allocate ) ;
c - > blocked_allocate = 0 ;
}
2018-10-06 00:46:55 -04:00
spin_unlock ( & c - > freelist_lock ) ;
2022-01-09 20:48:31 -05:00
return ob ;
}
static struct open_bucket * try_alloc_bucket ( struct btree_trans * trans , struct bch_dev * ca ,
enum alloc_reserve reserve , u64 free_entry ,
2022-11-04 16:06:55 -04:00
struct bucket_alloc_state * s ,
2022-01-09 20:48:31 -05:00
struct bkey_s_c freespace_k ,
struct closure * cl )
{
struct bch_fs * c = trans - > c ;
struct btree_iter iter = { NULL } ;
struct bkey_s_c k ;
struct open_bucket * ob ;
2023-01-30 20:58:43 -05:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2022-01-09 20:48:31 -05:00
u64 b = free_entry & ~ ( ~ 0ULL < < 56 ) ;
unsigned genbits = free_entry > > 56 ;
struct printbuf buf = PRINTBUF ;
int ret ;
if ( b < ca - > mi . first_bucket | | b > = ca - > mi . nbuckets ) {
2023-02-03 21:01:40 -05:00
prt_printf ( & buf , " freespace btree has bucket outside allowed range %u-%llu \n "
2022-01-09 20:48:31 -05:00
" freespace key " ,
ca - > mi . first_bucket , ca - > mi . nbuckets ) ;
bch2_bkey_val_to_text ( & buf , c , freespace_k ) ;
bch2_trans_inconsistent ( trans , " %s " , buf . buf ) ;
ob = ERR_PTR ( - EIO ) ;
goto err ;
}
bch2_trans_iter_init ( trans , & iter , BTREE_ID_alloc , POS ( ca - > dev_idx , b ) , BTREE_ITER_CACHED ) ;
k = bch2_btree_iter_peek_slot ( & iter ) ;
ret = bkey_err ( k ) ;
if ( ret ) {
ob = ERR_PTR ( ret ) ;
goto err ;
}
2023-01-30 20:58:43 -05:00
a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2022-01-09 20:48:31 -05:00
2022-12-11 19:14:30 -05:00
if ( a - > data_type ! = BCH_DATA_free ) {
if ( ! test_bit ( BCH_FS_CHECK_ALLOC_DONE , & c - > flags ) ) {
ob = NULL ;
goto err ;
}
prt_printf ( & buf , " non free bucket in freespace btree \n "
" freespace key " ) ;
2022-01-09 20:48:31 -05:00
bch2_bkey_val_to_text ( & buf , c , freespace_k ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( & buf , " \n " ) ;
2022-01-09 20:48:31 -05:00
bch2_bkey_val_to_text ( & buf , c , k ) ;
bch2_trans_inconsistent ( trans , " %s " , buf . buf ) ;
ob = ERR_PTR ( - EIO ) ;
goto err ;
}
2022-12-11 19:14:30 -05:00
if ( genbits ! = ( alloc_freespace_genbits ( * a ) > > 56 ) & &
test_bit ( BCH_FS_CHECK_ALLOC_DONE , & c - > flags ) ) {
prt_printf ( & buf , " bucket in freespace btree with wrong genbits (got %u should be %llu) \n "
" freespace key " ,
genbits , alloc_freespace_genbits ( * a ) > > 56 ) ;
2022-01-09 20:48:31 -05:00
bch2_bkey_val_to_text ( & buf , c , freespace_k ) ;
2023-02-03 21:01:40 -05:00
prt_printf ( & buf , " \n " ) ;
2022-01-09 20:48:31 -05:00
bch2_bkey_val_to_text ( & buf , c , k ) ;
bch2_trans_inconsistent ( trans , " %s " , buf . buf ) ;
ob = ERR_PTR ( - EIO ) ;
goto err ;
2022-12-11 19:14:30 -05:00
2022-01-09 20:48:31 -05:00
}
2022-03-17 20:51:27 -04:00
if ( ! test_bit ( BCH_FS_CHECK_BACKPOINTERS_DONE , & c - > flags ) ) {
struct bch_backpointer bp ;
u64 bp_offset = 0 ;
ret = bch2_get_next_backpointer ( trans , POS ( ca - > dev_idx , b ) , - 1 ,
2022-10-14 07:02:36 -04:00
& bp_offset , & bp ,
BTREE_ITER_NOPRESERVE ) ;
2022-03-17 20:51:27 -04:00
if ( ret ) {
ob = ERR_PTR ( ret ) ;
goto err ;
}
if ( bp_offset ! = U64_MAX ) {
/*
* Bucket may have data in it - we don ' t call
* bc2h_trans_inconnsistent ( ) because fsck hasn ' t
* finished yet
*/
ob = NULL ;
goto err ;
}
}
2023-01-30 20:58:43 -05:00
ob = __try_alloc_bucket ( c , ca , b , reserve , a , s , cl ) ;
2022-01-09 20:48:31 -05:00
if ( ! ob )
iter . path - > preserve = false ;
err :
2022-07-19 14:51:52 -04:00
set_btree_iter_dontneed ( & iter ) ;
2022-01-09 20:48:31 -05:00
bch2_trans_iter_exit ( trans , & iter ) ;
printbuf_exit ( & buf ) ;
return ob ;
}
static struct open_bucket * try_alloc_partial_bucket ( struct bch_fs * c , struct bch_dev * ca ,
enum alloc_reserve reserve )
{
struct open_bucket * ob ;
int i ;
spin_lock ( & c - > freelist_lock ) ;
for ( i = ca - > open_buckets_partial_nr - 1 ; i > = 0 ; - - i ) {
ob = c - > open_buckets + ca - > open_buckets_partial [ i ] ;
if ( reserve < = ob - > alloc_reserve ) {
array_remove_item ( ca - > open_buckets_partial ,
ca - > open_buckets_partial_nr ,
i ) ;
ob - > on_partial_list = false ;
ob - > alloc_reserve = reserve ;
spin_unlock ( & c - > freelist_lock ) ;
return ob ;
}
}
spin_unlock ( & c - > freelist_lock ) ;
return NULL ;
}
/*
* This path is for before the freespace btree is initialized :
*
* If ca - > new_fs_bucket_idx is nonzero , we haven ' t yet marked superblock &
* journal buckets - journal buckets will be < ca - > new_fs_bucket_idx
*/
static noinline struct open_bucket *
bch2_bucket_alloc_early ( struct btree_trans * trans ,
struct bch_dev * ca ,
enum alloc_reserve reserve ,
2022-11-04 16:06:55 -04:00
struct bucket_alloc_state * s ,
2022-01-09 20:48:31 -05:00
struct closure * cl )
{
struct btree_iter iter ;
struct bkey_s_c k ;
struct open_bucket * ob = NULL ;
u64 alloc_start = max_t ( u64 , ca - > mi . first_bucket , ca - > new_fs_bucket_idx ) ;
u64 alloc_cursor = max ( alloc_start , READ_ONCE ( ca - > alloc_cursor ) ) ;
int ret ;
again :
2022-07-17 02:46:46 -04:00
for_each_btree_key_norestart ( trans , iter , BTREE_ID_alloc , POS ( ca - > dev_idx , alloc_cursor ) ,
2022-01-09 20:48:31 -05:00
BTREE_ITER_SLOTS , k , ret ) {
2023-01-30 20:58:43 -05:00
struct bch_alloc_v4 a_convert ;
const struct bch_alloc_v4 * a ;
2022-01-09 20:48:31 -05:00
2022-11-24 03:12:22 -05:00
if ( bkey_ge ( k . k - > p , POS ( ca - > dev_idx , ca - > mi . nbuckets ) ) )
2022-01-09 20:48:31 -05:00
break ;
if ( ca - > new_fs_bucket_idx & &
is_superblock_bucket ( ca , k . k - > p . offset ) )
continue ;
2023-01-30 20:58:43 -05:00
a = bch2_alloc_to_v4 ( k , & a_convert ) ;
2022-01-09 20:48:31 -05:00
2023-01-30 20:58:43 -05:00
if ( a - > data_type ! = BCH_DATA_free )
2022-01-09 20:48:31 -05:00
continue ;
2022-11-04 16:06:55 -04:00
s - > buckets_seen + + ;
2022-01-09 20:48:31 -05:00
2023-01-30 20:58:43 -05:00
ob = __try_alloc_bucket ( trans - > c , ca , k . k - > p . offset , reserve , a , s , cl ) ;
2022-01-09 20:48:31 -05:00
if ( ob )
break ;
}
bch2_trans_iter_exit ( trans , & iter ) ;
ca - > alloc_cursor = alloc_cursor ;
2023-01-23 20:28:59 -05:00
if ( ! ob & & ret )
ob = ERR_PTR ( ret ) ;
2022-01-09 20:48:31 -05:00
if ( ! ob & & alloc_cursor > alloc_start ) {
alloc_cursor = alloc_start ;
goto again ;
}
2023-01-23 20:28:59 -05:00
return ob ;
2022-01-09 20:48:31 -05:00
}
static struct open_bucket * bch2_bucket_alloc_freelist ( struct btree_trans * trans ,
struct bch_dev * ca ,
enum alloc_reserve reserve ,
2022-11-04 16:06:55 -04:00
struct bucket_alloc_state * s ,
2022-01-09 20:48:31 -05:00
struct closure * cl )
{
struct btree_iter iter ;
struct bkey_s_c k ;
struct open_bucket * ob = NULL ;
u64 alloc_start = max_t ( u64 , ca - > mi . first_bucket , READ_ONCE ( ca - > alloc_cursor ) ) ;
u64 alloc_cursor = alloc_start ;
int ret ;
BUG_ON ( ca - > new_fs_bucket_idx ) ;
again :
for_each_btree_key_norestart ( trans , iter , BTREE_ID_freespace ,
POS ( ca - > dev_idx , alloc_cursor ) , 0 , k , ret ) {
if ( k . k - > p . inode ! = ca - > dev_idx )
break ;
for ( alloc_cursor = max ( alloc_cursor , bkey_start_offset ( k . k ) ) ;
alloc_cursor < k . k - > p . offset ;
alloc_cursor + + ) {
2022-07-17 23:06:38 -04:00
ret = btree_trans_too_many_iters ( trans ) ;
if ( ret ) {
ob = ERR_PTR ( ret ) ;
2022-01-09 20:48:31 -05:00
break ;
}
2022-11-04 16:06:55 -04:00
s - > buckets_seen + + ;
2022-01-09 20:48:31 -05:00
ob = try_alloc_bucket ( trans , ca , reserve ,
2022-11-04 16:06:55 -04:00
alloc_cursor , s , k , cl ) ;
2022-01-09 20:48:31 -05:00
if ( ob ) {
iter . path - > preserve = false ;
break ;
}
}
2022-07-17 23:06:38 -04:00
if ( ob | | ret )
2022-01-09 20:48:31 -05:00
break ;
}
bch2_trans_iter_exit ( trans , & iter ) ;
ca - > alloc_cursor = alloc_cursor ;
if ( ! ob & & ret )
ob = ERR_PTR ( ret ) ;
if ( ! ob & & alloc_start > ca - > mi . first_bucket ) {
alloc_cursor = alloc_start = ca - > mi . first_bucket ;
goto again ;
}
return ob ;
}
/**
* bch_bucket_alloc - allocate a single bucket from a specific device
*
* Returns index of bucket on success , 0 on failure
2022-10-19 18:31:33 -04:00
*/
2022-01-09 20:48:31 -05:00
static struct open_bucket * bch2_bucket_alloc_trans ( struct btree_trans * trans ,
struct bch_dev * ca ,
enum alloc_reserve reserve ,
bool may_alloc_partial ,
2022-10-21 14:01:19 -04:00
struct closure * cl ,
struct bch_dev_usage * usage )
2022-01-09 20:48:31 -05:00
{
struct bch_fs * c = trans - > c ;
struct open_bucket * ob = NULL ;
2022-12-11 19:14:30 -05:00
bool freespace = READ_ONCE ( ca - > mi . freespace_initialized ) ;
2022-04-01 01:29:59 -04:00
u64 avail ;
2022-11-04 16:06:55 -04:00
struct bucket_alloc_state s = { 0 } ;
2022-04-01 01:29:59 -04:00
bool waiting = false ;
2022-01-09 20:48:31 -05:00
again :
2022-10-21 14:01:19 -04:00
bch2_dev_usage_read_fast ( ca , usage ) ;
avail = dev_buckets_free ( ca , * usage , reserve ) ;
2022-04-01 01:29:59 -04:00
2022-10-21 14:01:19 -04:00
if ( usage - > d [ BCH_DATA_need_discard ] . buckets > avail )
2022-04-01 01:29:59 -04:00
bch2_do_discards ( c ) ;
2022-10-21 14:01:19 -04:00
if ( usage - > d [ BCH_DATA_need_gc_gens ] . buckets > avail )
2022-04-01 01:29:59 -04:00
bch2_do_gc_gens ( c ) ;
2022-10-21 14:01:19 -04:00
if ( should_invalidate_buckets ( ca , * usage ) )
2022-04-01 01:29:59 -04:00
bch2_do_invalidates ( c ) ;
2022-01-09 20:48:31 -05:00
if ( ! avail ) {
2022-04-01 01:29:59 -04:00
if ( cl & & ! waiting ) {
2022-01-09 20:48:31 -05:00
closure_wait ( & c - > freelist_wait , cl ) ;
2022-04-01 01:29:59 -04:00
waiting = true ;
goto again ;
2022-01-09 20:48:31 -05:00
}
if ( ! c - > blocked_allocate )
c - > blocked_allocate = local_clock ( ) ;
2022-07-17 22:31:21 -04:00
ob = ERR_PTR ( - BCH_ERR_freelist_empty ) ;
2022-01-09 20:48:31 -05:00
goto err ;
}
2022-04-01 01:29:59 -04:00
if ( waiting )
closure_wake_up ( & c - > freelist_wait ) ;
if ( may_alloc_partial ) {
ob = try_alloc_partial_bucket ( c , ca , reserve ) ;
if ( ob )
return ob ;
}
2022-12-11 19:14:30 -05:00
alloc :
ob = likely ( freespace )
2022-11-04 16:06:55 -04:00
? bch2_bucket_alloc_freelist ( trans , ca , reserve , & s , cl )
: bch2_bucket_alloc_early ( trans , ca , reserve , & s , cl ) ;
if ( s . skipped_need_journal_commit * 2 > avail )
2022-01-09 20:48:31 -05:00
bch2_journal_flush_async ( & c - > journal , NULL ) ;
2022-12-11 19:14:30 -05:00
if ( ! ob & & freespace & & ! test_bit ( BCH_FS_CHECK_ALLOC_DONE , & c - > flags ) ) {
freespace = false ;
goto alloc ;
}
2022-01-09 20:48:31 -05:00
err :
if ( ! ob )
2022-07-17 22:31:21 -04:00
ob = ERR_PTR ( - BCH_ERR_no_buckets_found ) ;
2022-01-09 20:48:31 -05:00
2022-08-27 12:48:36 -04:00
if ( ! IS_ERR ( ob ) )
trace_and_count ( c , bucket_alloc , ca ,
bch2_alloc_reserves [ reserve ] ,
2022-09-26 18:18:00 -04:00
may_alloc_partial ,
ob - > bucket ,
2022-10-21 14:01:19 -04:00
usage - > d [ BCH_DATA_free ] . buckets ,
2022-08-27 12:48:36 -04:00
avail ,
bch2_copygc_wait_amount ( c ) ,
c - > copygc_wait - atomic64_read ( & c - > io_clock [ WRITE ] . now ) ,
2022-11-04 16:06:55 -04:00
& s ,
2022-08-27 12:48:36 -04:00
cl = = NULL ,
" " ) ;
2022-10-17 07:07:28 -04:00
else if ( ! bch2_err_matches ( PTR_ERR ( ob ) , BCH_ERR_transaction_restart ) )
2022-08-27 12:48:36 -04:00
trace_and_count ( c , bucket_alloc_fail , ca ,
bch2_alloc_reserves [ reserve ] ,
2022-09-26 18:18:00 -04:00
may_alloc_partial ,
0 ,
2022-10-21 14:01:19 -04:00
usage - > d [ BCH_DATA_free ] . buckets ,
2022-08-27 12:48:36 -04:00
avail ,
bch2_copygc_wait_amount ( c ) ,
c - > copygc_wait - atomic64_read ( & c - > io_clock [ WRITE ] . now ) ,
2022-11-04 16:06:55 -04:00
& s ,
2022-08-27 12:48:36 -04:00
cl = = NULL ,
bch2_err_str ( PTR_ERR ( ob ) ) ) ;
2022-01-09 20:48:31 -05:00
return ob ;
}
2018-10-06 00:46:55 -04:00
2022-01-09 20:48:31 -05:00
struct open_bucket * bch2_bucket_alloc ( struct bch_fs * c , struct bch_dev * ca ,
enum alloc_reserve reserve ,
bool may_alloc_partial ,
struct closure * cl )
{
2022-10-21 14:01:19 -04:00
struct bch_dev_usage usage ;
2022-01-09 20:48:31 -05:00
struct open_bucket * ob ;
2018-10-06 00:46:55 -04:00
2022-01-09 20:48:31 -05:00
bch2_trans_do ( c , NULL , NULL , 0 ,
PTR_ERR_OR_ZERO ( ob = bch2_bucket_alloc_trans ( & trans , ca , reserve ,
2022-10-21 14:01:19 -04:00
may_alloc_partial , cl , & usage ) ) ) ;
2018-10-06 04:12:42 -04:00
return ob ;
2018-10-06 00:46:55 -04:00
}
2018-11-01 15:13:19 -04:00
static int __dev_stripe_cmp ( struct dev_stripe_state * stripe ,
unsigned l , unsigned r )
2018-10-06 00:46:55 -04:00
{
2018-11-01 15:13:19 -04:00
return ( ( stripe - > next_alloc [ l ] > stripe - > next_alloc [ r ] ) -
( stripe - > next_alloc [ l ] < stripe - > next_alloc [ r ] ) ) ;
2018-10-06 00:46:55 -04:00
}
2018-11-01 15:13:19 -04:00
# define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
2018-10-06 00:46:55 -04:00
2018-11-01 15:13:19 -04:00
struct dev_alloc_list bch2_dev_alloc_list ( struct bch_fs * c ,
struct dev_stripe_state * stripe ,
struct bch_devs_mask * devs )
2018-10-06 00:46:55 -04:00
{
struct dev_alloc_list ret = { . nr = 0 } ;
unsigned i ;
2020-07-06 22:33:54 -04:00
for_each_set_bit ( i , devs - > d , BCH_SB_MEMBERS_MAX )
2018-10-06 00:46:55 -04:00
ret . devs [ ret . nr + + ] = i ;
2018-11-01 15:13:19 -04:00
bubble_sort ( ret . devs , ret . nr , dev_stripe_cmp ) ;
2018-10-06 00:46:55 -04:00
return ret ;
}
2022-10-21 14:01:19 -04:00
static inline void bch2_dev_stripe_increment_inlined ( struct bch_dev * ca ,
struct dev_stripe_state * stripe ,
struct bch_dev_usage * usage )
2018-10-06 00:46:55 -04:00
{
2018-11-01 15:13:19 -04:00
u64 * v = stripe - > next_alloc + ca - > dev_idx ;
2022-01-09 20:48:31 -05:00
u64 free_space = dev_buckets_available ( ca , RESERVE_none ) ;
2018-10-06 00:46:55 -04:00
u64 free_space_inv = free_space
? div64_u64 ( 1ULL < < 48 , free_space )
: 1ULL < < 48 ;
u64 scale = * v / 4 ;
if ( * v + free_space_inv > = * v )
* v + = free_space_inv ;
else
* v = U64_MAX ;
2018-11-01 15:13:19 -04:00
for ( v = stripe - > next_alloc ;
v < stripe - > next_alloc + ARRAY_SIZE ( stripe - > next_alloc ) ; v + + )
2018-10-06 00:46:55 -04:00
* v = * v < scale ? 0 : * v - scale ;
}
2022-10-21 14:01:19 -04:00
void bch2_dev_stripe_increment ( struct bch_dev * ca ,
struct dev_stripe_state * stripe )
{
struct bch_dev_usage usage ;
bch2_dev_usage_read_fast ( ca , & usage ) ;
bch2_dev_stripe_increment_inlined ( ca , stripe , & usage ) ;
}
2018-11-01 15:13:19 -04:00
# define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
# define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
2019-06-10 11:31:07 -04:00
static void add_new_bucket ( struct bch_fs * c ,
struct open_buckets * ptrs ,
struct bch_devs_mask * devs_may_alloc ,
unsigned * nr_effective ,
bool * have_cache ,
unsigned flags ,
struct open_bucket * ob )
{
unsigned durability =
2021-12-25 21:21:46 -05:00
bch_dev_bkey_exists ( c , ob - > dev ) - > mi . durability ;
2019-06-10 11:31:07 -04:00
2021-12-25 21:21:46 -05:00
__clear_bit ( ob - > dev , devs_may_alloc - > d ) ;
2019-06-10 11:31:07 -04:00
* nr_effective + = ( flags & BUCKET_ALLOC_USE_DURABILITY )
? durability : 1 ;
* have_cache | = ! durability ;
ob_push ( c , ptrs , ob ) ;
}
2023-02-17 20:50:55 -05:00
int bch2_bucket_alloc_set_trans ( struct btree_trans * trans ,
2020-07-11 18:52:14 -04:00
struct open_buckets * ptrs ,
struct dev_stripe_state * stripe ,
struct bch_devs_mask * devs_may_alloc ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
enum alloc_reserve reserve ,
unsigned flags ,
struct closure * cl )
2018-10-06 00:46:55 -04:00
{
2022-01-09 20:48:31 -05:00
struct bch_fs * c = trans - > c ;
2018-10-06 04:12:42 -04:00
struct dev_alloc_list devs_sorted =
2018-11-01 15:13:19 -04:00
bch2_dev_alloc_list ( c , stripe , devs_may_alloc ) ;
2022-01-09 20:48:31 -05:00
unsigned dev ;
2018-10-06 00:46:55 -04:00
struct bch_dev * ca ;
2022-10-01 23:54:46 -04:00
int ret = - BCH_ERR_insufficient_devices ;
2019-06-10 11:31:07 -04:00
unsigned i ;
2018-10-06 00:46:55 -04:00
2018-10-06 04:12:42 -04:00
BUG_ON ( * nr_effective > = nr_replicas ) ;
2018-10-06 00:46:55 -04:00
for ( i = 0 ; i < devs_sorted . nr ; i + + ) {
2022-10-21 14:01:19 -04:00
struct bch_dev_usage usage ;
2018-10-06 04:12:42 -04:00
struct open_bucket * ob ;
2018-10-06 00:46:55 -04:00
2022-01-09 20:48:31 -05:00
dev = devs_sorted . devs [ i ] ;
rcu_read_lock ( ) ;
ca = rcu_dereference ( c - > devs [ dev ] ) ;
if ( ca )
percpu_ref_get ( & ca - > ref ) ;
rcu_read_unlock ( ) ;
2018-10-06 00:46:55 -04:00
if ( ! ca )
continue ;
2022-01-09 20:48:31 -05:00
if ( ! ca - > mi . durability & & * have_cache ) {
percpu_ref_put ( & ca - > ref ) ;
2018-10-06 00:46:55 -04:00
continue ;
2022-01-09 20:48:31 -05:00
}
2018-10-06 00:46:55 -04:00
2022-01-09 20:48:31 -05:00
ob = bch2_bucket_alloc_trans ( trans , ca , reserve ,
2022-10-21 14:01:19 -04:00
flags & BUCKET_MAY_ALLOC_PARTIAL , cl , & usage ) ;
2022-01-09 20:48:31 -05:00
if ( ! IS_ERR ( ob ) )
2022-10-21 14:01:19 -04:00
bch2_dev_stripe_increment_inlined ( ca , stripe , & usage ) ;
2022-01-09 20:48:31 -05:00
percpu_ref_put ( & ca - > ref ) ;
2022-10-01 23:54:46 -04:00
if ( IS_ERR ( ob ) ) {
ret = PTR_ERR ( ob ) ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) | | cl )
2022-01-09 20:48:31 -05:00
break ;
2018-10-06 00:46:55 -04:00
continue ;
}
2019-06-10 11:31:07 -04:00
add_new_bucket ( c , ptrs , devs_may_alloc ,
nr_effective , have_cache , flags , ob ) ;
2018-10-06 00:46:55 -04:00
2022-10-01 23:54:46 -04:00
if ( * nr_effective > = nr_replicas ) {
ret = 0 ;
2022-01-09 20:48:31 -05:00
break ;
2022-10-01 23:54:46 -04:00
}
2018-10-06 00:46:55 -04:00
}
2020-07-11 18:52:14 -04:00
return ret ;
2018-10-06 00:46:55 -04:00
}
2018-11-01 15:13:19 -04:00
/* Allocate from stripes: */
/*
* if we can ' t allocate a new stripe because there are already too many
* partially filled stripes , force allocating from an existing stripe even when
* it ' s to a device we don ' t want :
*/
2023-02-17 20:50:55 -05:00
static int bucket_alloc_from_stripe ( struct btree_trans * trans ,
2020-12-15 12:38:17 -05:00
struct open_buckets * ptrs ,
struct write_point * wp ,
struct bch_devs_mask * devs_may_alloc ,
u16 target ,
unsigned erasure_code ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
unsigned flags ,
struct closure * cl )
2018-11-01 15:13:19 -04:00
{
2023-02-17 20:50:55 -05:00
struct bch_fs * c = trans - > c ;
2018-11-01 15:13:19 -04:00
struct dev_alloc_list devs_sorted ;
struct ec_stripe_head * h ;
struct open_bucket * ob ;
struct bch_dev * ca ;
unsigned i , ec_idx ;
if ( ! erasure_code )
2020-12-15 12:38:17 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
if ( nr_replicas < 2 )
2020-12-15 12:38:17 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
if ( ec_open_bucket ( c , ptrs ) )
2020-12-15 12:38:17 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
2023-02-17 20:50:55 -05:00
h = bch2_ec_stripe_head_get ( trans , target , 0 , nr_replicas - 1 ,
2020-12-15 12:53:30 -05:00
wp = = & c - > copygc_write_point ,
cl ) ;
2020-12-15 12:38:17 -05:00
if ( IS_ERR ( h ) )
2023-02-17 20:50:55 -05:00
return PTR_ERR ( h ) ;
2018-11-01 15:13:19 -04:00
if ( ! h )
2020-12-15 12:38:17 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
devs_sorted = bch2_dev_alloc_list ( c , & wp - > stripe , devs_may_alloc ) ;
for ( i = 0 ; i < devs_sorted . nr ; i + + )
2021-01-18 23:26:42 -05:00
for ( ec_idx = 0 ; ec_idx < h - > s - > nr_data ; ec_idx + + ) {
if ( ! h - > s - > blocks [ ec_idx ] )
continue ;
ob = c - > open_buckets + h - > s - > blocks [ ec_idx ] ;
2021-12-25 21:21:46 -05:00
if ( ob - > dev = = devs_sorted . devs [ i ] & &
2021-01-18 23:26:42 -05:00
! test_and_set_bit ( ec_idx , h - > s - > blocks_allocated ) )
2018-11-01 15:13:19 -04:00
goto got_bucket ;
2021-01-18 23:26:42 -05:00
}
2018-11-01 15:13:19 -04:00
goto out_put_head ;
got_bucket :
2021-12-25 21:21:46 -05:00
ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-11-01 15:13:19 -04:00
2021-01-18 23:26:42 -05:00
ob - > ec_idx = ec_idx ;
2018-11-01 15:13:19 -04:00
ob - > ec = h - > s ;
2019-06-10 11:31:07 -04:00
add_new_bucket ( c , ptrs , devs_may_alloc ,
nr_effective , have_cache , flags , ob ) ;
2018-11-01 15:13:19 -04:00
atomic_inc ( & h - > s - > pin ) ;
out_put_head :
2020-07-06 20:59:46 -04:00
bch2_ec_stripe_head_put ( c , h ) ;
2020-12-15 12:38:17 -05:00
return 0 ;
2018-11-01 15:13:19 -04:00
}
2018-10-06 00:46:55 -04:00
/* Sector allocator */
2018-11-01 15:13:19 -04:00
static void get_buckets_from_writepoint ( struct bch_fs * c ,
struct open_buckets * ptrs ,
struct write_point * wp ,
struct bch_devs_mask * devs_may_alloc ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
2019-06-10 11:31:07 -04:00
unsigned flags ,
2018-11-01 15:13:19 -04:00
bool need_ec )
2018-10-06 00:46:55 -04:00
{
2018-10-06 04:12:42 -04:00
struct open_buckets ptrs_skip = { . nr = 0 } ;
2018-10-06 00:46:55 -04:00
struct open_bucket * ob ;
unsigned i ;
2018-10-06 04:12:42 -04:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i ) {
2021-12-25 21:21:46 -05:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-10-06 00:46:55 -04:00
2018-10-06 04:12:42 -04:00
if ( * nr_effective < nr_replicas & &
2021-12-25 21:21:46 -05:00
test_bit ( ob - > dev , devs_may_alloc - > d ) & &
2018-10-06 04:12:42 -04:00
( ca - > mi . durability | |
2021-12-25 21:21:46 -05:00
( wp - > data_type = = BCH_DATA_user & & ! * have_cache ) ) & &
2018-11-01 15:13:19 -04:00
( ob - > ec | | ! need_ec ) ) {
2019-06-10 11:31:07 -04:00
add_new_bucket ( c , ptrs , devs_may_alloc ,
nr_effective , have_cache ,
flags , ob ) ;
2018-10-06 04:12:42 -04:00
} else {
ob_push ( c , & ptrs_skip , ob ) ;
}
2018-10-06 00:46:55 -04:00
}
2018-10-06 04:12:42 -04:00
wp - > ptrs = ptrs_skip ;
2018-10-06 00:46:55 -04:00
}
2022-01-09 20:48:31 -05:00
static int open_bucket_add_buckets ( struct btree_trans * trans ,
2020-07-11 18:52:14 -04:00
struct open_buckets * ptrs ,
struct write_point * wp ,
struct bch_devs_list * devs_have ,
u16 target ,
unsigned erasure_code ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
enum alloc_reserve reserve ,
unsigned flags ,
struct closure * _cl )
2018-10-06 00:46:55 -04:00
{
2022-01-09 20:48:31 -05:00
struct bch_fs * c = trans - > c ;
2018-10-06 04:12:42 -04:00
struct bch_devs_mask devs ;
2018-10-06 00:46:55 -04:00
struct open_bucket * ob ;
2018-11-01 15:13:19 -04:00
struct closure * cl = NULL ;
2021-11-28 13:42:05 -05:00
int ret ;
2019-06-10 11:31:07 -04:00
unsigned i ;
2018-10-06 00:46:55 -04:00
2018-11-01 15:13:19 -04:00
rcu_read_lock ( ) ;
2021-12-25 21:21:46 -05:00
devs = target_rw_devs ( c , wp - > data_type , target ) ;
2018-11-01 15:13:19 -04:00
rcu_read_unlock ( ) ;
2018-10-06 04:12:42 -04:00
2018-10-06 00:46:55 -04:00
/* Don't allocate from devices we already have pointers to: */
for ( i = 0 ; i < devs_have - > nr ; i + + )
__clear_bit ( devs_have - > devs [ i ] , devs . d ) ;
2018-10-06 04:12:42 -04:00
open_bucket_for_each ( c , ptrs , ob , i )
2021-12-25 21:21:46 -05:00
__clear_bit ( ob - > dev , devs . d ) ;
2018-10-06 00:46:55 -04:00
2018-11-01 15:13:19 -04:00
if ( erasure_code ) {
2020-07-23 11:31:01 -04:00
if ( ! ec_open_bucket ( c , ptrs ) ) {
get_buckets_from_writepoint ( c , ptrs , wp , & devs ,
nr_replicas , nr_effective ,
have_cache , flags , true ) ;
if ( * nr_effective > = nr_replicas )
return 0 ;
}
2018-10-06 00:46:55 -04:00
2020-07-23 11:31:01 -04:00
if ( ! ec_open_bucket ( c , ptrs ) ) {
2023-02-17 20:50:55 -05:00
ret = bucket_alloc_from_stripe ( trans , ptrs , wp , & devs ,
2020-07-23 11:31:01 -04:00
target , erasure_code ,
nr_replicas , nr_effective ,
2020-12-15 12:38:17 -05:00
have_cache , flags , _cl ) ;
2022-07-17 23:06:38 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) | |
2022-07-17 22:31:21 -04:00
bch2_err_matches ( ret , BCH_ERR_freelist_empty ) | |
bch2_err_matches ( ret , BCH_ERR_open_buckets_empty ) )
2020-12-15 12:38:17 -05:00
return ret ;
2020-07-23 11:31:01 -04:00
if ( * nr_effective > = nr_replicas )
return 0 ;
}
2018-11-01 15:13:19 -04:00
}
get_buckets_from_writepoint ( c , ptrs , wp , & devs ,
nr_replicas , nr_effective ,
2019-06-10 11:31:07 -04:00
have_cache , flags , false ) ;
2018-11-01 15:13:19 -04:00
if ( * nr_effective > = nr_replicas )
return 0 ;
retry_blocking :
2018-10-06 04:12:42 -04:00
/*
* Try nonblocking first , so that if one device is full we ' ll try from
* other devices :
*/
2022-01-09 20:48:31 -05:00
ret = bch2_bucket_alloc_set_trans ( trans , ptrs , & wp - > stripe , & devs ,
2018-10-06 04:12:42 -04:00
nr_replicas , nr_effective , have_cache ,
2018-11-01 15:13:19 -04:00
reserve , flags , cl ) ;
2022-01-09 20:48:31 -05:00
if ( ret & &
2022-07-17 23:06:38 -04:00
! bch2_err_matches ( ret , BCH_ERR_transaction_restart ) & &
2022-07-17 22:31:21 -04:00
! bch2_err_matches ( ret , BCH_ERR_insufficient_devices ) & &
2022-01-09 20:48:31 -05:00
! cl & & _cl ) {
2018-11-01 15:13:19 -04:00
cl = _cl ;
goto retry_blocking ;
}
2018-10-06 04:12:42 -04:00
2018-10-06 00:46:55 -04:00
return ret ;
}
2018-11-01 15:13:19 -04:00
void bch2_open_buckets_stop_dev ( struct bch_fs * c , struct bch_dev * ca ,
2019-09-20 16:17:46 -04:00
struct open_buckets * obs )
2018-10-06 00:46:55 -04:00
{
2018-10-06 04:12:42 -04:00
struct open_buckets ptrs = { . nr = 0 } ;
2018-11-01 15:13:19 -04:00
struct open_bucket * ob , * ob2 ;
unsigned i , j ;
2018-10-06 00:46:55 -04:00
2018-11-01 15:13:19 -04:00
open_bucket_for_each ( c , obs , ob , i ) {
2021-12-25 21:21:46 -05:00
bool drop = ! ca | | ob - > dev = = ca - > dev_idx ;
2018-11-01 15:13:19 -04:00
if ( ! drop & & ob - > ec ) {
mutex_lock ( & ob - > ec - > lock ) ;
2021-01-18 23:26:42 -05:00
for ( j = 0 ; j < ob - > ec - > new_stripe . key . v . nr_blocks ; j + + ) {
if ( ! ob - > ec - > blocks [ j ] )
continue ;
ob2 = c - > open_buckets + ob - > ec - > blocks [ j ] ;
2021-12-25 21:21:46 -05:00
drop | = ob2 - > dev = = ca - > dev_idx ;
2021-01-18 23:26:42 -05:00
}
2018-11-01 15:13:19 -04:00
mutex_unlock ( & ob - > ec - > lock ) ;
}
if ( drop )
bch2_open_bucket_put ( c , ob ) ;
2018-10-06 04:12:42 -04:00
else
ob_push ( c , & ptrs , ob ) ;
2018-11-01 15:13:19 -04:00
}
2018-10-06 04:12:42 -04:00
2018-11-01 15:13:19 -04:00
* obs = ptrs ;
}
void bch2_writepoint_stop ( struct bch_fs * c , struct bch_dev * ca ,
struct write_point * wp )
{
mutex_lock ( & wp - > lock ) ;
2019-09-20 16:17:46 -04:00
bch2_open_buckets_stop_dev ( c , ca , & wp - > ptrs ) ;
2018-10-06 00:46:55 -04:00
mutex_unlock ( & wp - > lock ) ;
}
2018-11-04 21:55:35 -05:00
static inline struct hlist_head * writepoint_hash ( struct bch_fs * c ,
unsigned long write_point )
{
unsigned hash =
hash_long ( write_point , ilog2 ( ARRAY_SIZE ( c - > write_points_hash ) ) ) ;
return & c - > write_points_hash [ hash ] ;
}
2018-10-06 00:46:55 -04:00
static struct write_point * __writepoint_find ( struct hlist_head * head ,
unsigned long write_point )
{
struct write_point * wp ;
2021-04-15 18:31:58 -04:00
rcu_read_lock ( ) ;
2018-10-06 00:46:55 -04:00
hlist_for_each_entry_rcu ( wp , head , node )
if ( wp - > write_point = = write_point )
2021-04-15 18:31:58 -04:00
goto out ;
wp = NULL ;
out :
rcu_read_unlock ( ) ;
return wp ;
2018-10-06 00:46:55 -04:00
}
2018-11-04 21:55:35 -05:00
static inline bool too_many_writepoints ( struct bch_fs * c , unsigned factor )
{
u64 stranded = c - > write_points_nr * c - > bucket_size_max ;
2019-02-14 18:38:52 -05:00
u64 free = bch2_fs_usage_read_short ( c ) . free ;
2018-11-04 21:55:35 -05:00
return stranded * factor > free ;
}
static bool try_increase_writepoints ( struct bch_fs * c )
{
struct write_point * wp ;
if ( c - > write_points_nr = = ARRAY_SIZE ( c - > write_points ) | |
too_many_writepoints ( c , 32 ) )
return false ;
wp = c - > write_points + c - > write_points_nr + + ;
hlist_add_head_rcu ( & wp - > node , writepoint_hash ( c , wp - > write_point ) ) ;
return true ;
}
static bool try_decrease_writepoints ( struct bch_fs * c ,
unsigned old_nr )
{
struct write_point * wp ;
mutex_lock ( & c - > write_points_hash_lock ) ;
if ( c - > write_points_nr < old_nr ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
return true ;
}
if ( c - > write_points_nr = = 1 | |
! too_many_writepoints ( c , 8 ) ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
return false ;
}
wp = c - > write_points + - - c - > write_points_nr ;
hlist_del_rcu ( & wp - > node ) ;
mutex_unlock ( & c - > write_points_hash_lock ) ;
bch2_writepoint_stop ( c , NULL , wp ) ;
return true ;
}
2022-01-09 20:48:31 -05:00
static void bch2_trans_mutex_lock ( struct btree_trans * trans ,
struct mutex * lock )
{
if ( ! mutex_trylock ( lock ) ) {
bch2_trans_unlock ( trans ) ;
mutex_lock ( lock ) ;
}
}
static struct write_point * writepoint_find ( struct btree_trans * trans ,
2018-10-06 00:46:55 -04:00
unsigned long write_point )
{
2022-01-09 20:48:31 -05:00
struct bch_fs * c = trans - > c ;
2018-10-06 00:46:55 -04:00
struct write_point * wp , * oldest ;
struct hlist_head * head ;
if ( ! ( write_point & 1UL ) ) {
wp = ( struct write_point * ) write_point ;
2022-01-09 20:48:31 -05:00
bch2_trans_mutex_lock ( trans , & wp - > lock ) ;
2018-10-06 00:46:55 -04:00
return wp ;
}
head = writepoint_hash ( c , write_point ) ;
restart_find :
wp = __writepoint_find ( head , write_point ) ;
if ( wp ) {
lock_wp :
2022-01-09 20:48:31 -05:00
bch2_trans_mutex_lock ( trans , & wp - > lock ) ;
2018-10-06 00:46:55 -04:00
if ( wp - > write_point = = write_point )
goto out ;
mutex_unlock ( & wp - > lock ) ;
goto restart_find ;
}
2018-11-04 21:55:35 -05:00
restart_find_oldest :
2018-10-06 00:46:55 -04:00
oldest = NULL ;
for ( wp = c - > write_points ;
2018-11-04 21:55:35 -05:00
wp < c - > write_points + c - > write_points_nr ; wp + + )
2018-10-06 00:46:55 -04:00
if ( ! oldest | | time_before64 ( wp - > last_used , oldest - > last_used ) )
oldest = wp ;
2022-01-09 20:48:31 -05:00
bch2_trans_mutex_lock ( trans , & oldest - > lock ) ;
bch2_trans_mutex_lock ( trans , & c - > write_points_hash_lock ) ;
2018-11-04 21:55:35 -05:00
if ( oldest > = c - > write_points + c - > write_points_nr | |
try_increase_writepoints ( c ) ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
mutex_unlock ( & oldest - > lock ) ;
goto restart_find_oldest ;
}
2018-10-06 00:46:55 -04:00
wp = __writepoint_find ( head , write_point ) ;
if ( wp & & wp ! = oldest ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
mutex_unlock ( & oldest - > lock ) ;
goto lock_wp ;
}
wp = oldest ;
hlist_del_rcu ( & wp - > node ) ;
wp - > write_point = write_point ;
hlist_add_head_rcu ( & wp - > node , head ) ;
mutex_unlock ( & c - > write_points_hash_lock ) ;
out :
2022-10-17 07:07:28 -04:00
wp - > last_used = local_clock ( ) ;
2018-10-06 00:46:55 -04:00
return wp ;
}
/*
* Get us an open_bucket we can allocate from , return with it locked :
*/
2022-01-09 20:48:31 -05:00
int bch2_alloc_sectors_start_trans ( struct btree_trans * trans ,
2022-10-31 16:13:05 -04:00
unsigned target ,
unsigned erasure_code ,
struct write_point_specifier write_point ,
struct bch_devs_list * devs_have ,
unsigned nr_replicas ,
unsigned nr_replicas_required ,
enum alloc_reserve reserve ,
unsigned flags ,
struct closure * cl ,
struct write_point * * wp_ret )
2018-10-06 00:46:55 -04:00
{
2022-01-09 20:48:31 -05:00
struct bch_fs * c = trans - > c ;
2018-10-06 00:46:55 -04:00
struct write_point * wp ;
struct open_bucket * ob ;
2019-01-18 18:58:51 -05:00
struct open_buckets ptrs ;
unsigned nr_effective , write_points_nr ;
2019-06-10 11:31:07 -04:00
unsigned ob_flags = 0 ;
2019-01-18 18:58:51 -05:00
bool have_cache ;
2021-11-28 13:42:05 -05:00
int ret ;
2020-07-11 18:52:14 -04:00
int i ;
2018-10-06 00:46:55 -04:00
2019-06-10 11:31:07 -04:00
if ( ! ( flags & BCH_WRITE_ONLY_SPECIFIED_DEVS ) )
ob_flags | = BUCKET_ALLOC_USE_DURABILITY ;
2018-10-06 00:46:55 -04:00
BUG_ON ( ! nr_replicas | | ! nr_replicas_required ) ;
2018-11-04 21:55:35 -05:00
retry :
2019-01-18 18:58:51 -05:00
ptrs . nr = 0 ;
nr_effective = 0 ;
2018-11-04 21:55:35 -05:00
write_points_nr = c - > write_points_nr ;
2019-01-18 18:58:51 -05:00
have_cache = false ;
2018-11-01 15:13:19 -04:00
2022-01-09 20:48:31 -05:00
* wp_ret = wp = writepoint_find ( trans , write_point . v ) ;
2018-10-06 00:46:55 -04:00
2021-12-25 21:21:46 -05:00
if ( wp - > data_type = = BCH_DATA_user )
2019-06-10 11:31:07 -04:00
ob_flags | = BUCKET_MAY_ALLOC_PARTIAL ;
2018-11-01 15:13:19 -04:00
/* metadata may not allocate on cache devices: */
2021-12-25 21:21:46 -05:00
if ( wp - > data_type ! = BCH_DATA_user )
2018-11-01 15:13:19 -04:00
have_cache = true ;
2018-10-06 04:12:42 -04:00
if ( ! target | | ( flags & BCH_WRITE_ONLY_SPECIFIED_DEVS ) ) {
2022-01-09 20:48:31 -05:00
ret = open_bucket_add_buckets ( trans , & ptrs , wp , devs_have ,
2018-11-01 15:13:19 -04:00
target , erasure_code ,
2018-10-06 04:12:42 -04:00
nr_replicas , & nr_effective ,
2019-06-10 11:31:07 -04:00
& have_cache , reserve ,
ob_flags , cl ) ;
2018-10-06 00:46:55 -04:00
} else {
2022-01-09 20:48:31 -05:00
ret = open_bucket_add_buckets ( trans , & ptrs , wp , devs_have ,
2018-11-01 15:13:19 -04:00
target , erasure_code ,
2018-10-06 04:12:42 -04:00
nr_replicas , & nr_effective ,
2019-06-10 11:31:07 -04:00
& have_cache , reserve ,
ob_flags , NULL ) ;
2022-07-17 23:06:38 -04:00
if ( ! ret | |
bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2018-10-06 00:46:55 -04:00
goto alloc_done ;
2022-01-09 20:48:31 -05:00
ret = open_bucket_add_buckets ( trans , & ptrs , wp , devs_have ,
2018-11-01 15:13:19 -04:00
0 , erasure_code ,
2018-10-06 04:12:42 -04:00
nr_replicas , & nr_effective ,
2019-06-10 11:31:07 -04:00
& have_cache , reserve ,
ob_flags , cl ) ;
2018-10-06 00:46:55 -04:00
}
alloc_done :
2018-10-06 04:12:42 -04:00
BUG_ON ( ! ret & & nr_effective < nr_replicas ) ;
2018-10-06 00:46:55 -04:00
2018-11-01 15:13:19 -04:00
if ( erasure_code & & ! ec_open_bucket ( c , & ptrs ) )
pr_debug ( " failed to get ec bucket: ret %u " , ret ) ;
2022-07-17 22:31:21 -04:00
if ( ret = = - BCH_ERR_insufficient_devices & &
2018-10-06 04:12:42 -04:00
nr_effective > = nr_replicas_required )
2018-10-06 00:46:55 -04:00
ret = 0 ;
if ( ret )
goto err ;
2018-10-06 04:12:42 -04:00
/* Free buckets we didn't use: */
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
2020-07-21 17:12:39 -04:00
open_bucket_free_unused ( c , wp , ob ) ;
2018-10-06 00:46:55 -04:00
2018-10-06 04:12:42 -04:00
wp - > ptrs = ptrs ;
2018-10-06 00:46:55 -04:00
wp - > sectors_free = UINT_MAX ;
2018-10-06 04:12:42 -04:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
2018-10-06 00:46:55 -04:00
wp - > sectors_free = min ( wp - > sectors_free , ob - > sectors_free ) ;
BUG_ON ( ! wp - > sectors_free | | wp - > sectors_free = = UINT_MAX ) ;
2022-10-31 16:13:05 -04:00
return 0 ;
2018-10-06 00:46:55 -04:00
err :
2018-10-06 04:12:42 -04:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
if ( ptrs . nr < ARRAY_SIZE ( ptrs . v ) )
ob_push ( c , & ptrs , ob ) ;
else
2020-07-21 17:12:39 -04:00
open_bucket_free_unused ( c , wp , ob ) ;
2018-10-06 04:12:42 -04:00
wp - > ptrs = ptrs ;
2018-10-06 00:46:55 -04:00
mutex_unlock ( & wp - > lock ) ;
2018-11-04 21:55:35 -05:00
2022-07-17 22:31:21 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_freelist_empty ) & &
2018-11-04 21:55:35 -05:00
try_decrease_writepoints ( c , write_points_nr ) )
goto retry ;
2022-07-17 22:31:21 -04:00
if ( bch2_err_matches ( ret , BCH_ERR_open_buckets_empty ) | |
bch2_err_matches ( ret , BCH_ERR_freelist_empty ) )
2022-09-18 17:10:33 -04:00
return cl
2022-12-13 15:17:40 -05:00
? - BCH_ERR_bucket_alloc_blocked
2022-09-18 17:10:33 -04:00
: - BCH_ERR_ENOSPC_bucket_alloc ;
2022-07-17 22:31:21 -04:00
return ret ;
2018-10-06 00:46:55 -04:00
}
2021-12-25 21:21:46 -05:00
struct bch_extent_ptr bch2_ob_ptr ( struct bch_fs * c , struct open_bucket * ob )
{
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
return ( struct bch_extent_ptr ) {
. type = 1 < < BCH_EXTENT_ENTRY_ptr ,
. gen = ob - > gen ,
. dev = ob - > dev ,
. offset = bucket_to_sector ( ca , ob - > bucket ) +
ca - > mi . bucket_size -
ob - > sectors_free ,
} ;
}
2018-10-06 00:46:55 -04:00
void bch2_alloc_sectors_append_ptrs ( struct bch_fs * c , struct write_point * wp ,
2021-12-25 21:14:49 -05:00
struct bkey_i * k , unsigned sectors ,
bool cached )
2018-10-06 00:46:55 -04:00
{
2022-11-24 18:03:55 -05:00
bch2_alloc_sectors_append_ptrs_inlined ( c , wp , k , sectors , cached ) ;
2018-10-06 00:46:55 -04:00
}
/*
* Append pointers to the space we just allocated to @ k , and mark @ sectors space
* as allocated out of @ ob
*/
void bch2_alloc_sectors_done ( struct bch_fs * c , struct write_point * wp )
{
2022-11-24 18:03:55 -05:00
bch2_alloc_sectors_done_inlined ( c , wp ) ;
2018-10-06 00:46:55 -04:00
}
2018-11-04 21:55:35 -05:00
2020-07-21 17:12:39 -04:00
static inline void writepoint_init ( struct write_point * wp ,
enum bch_data_type type )
{
mutex_init ( & wp - > lock ) ;
2021-12-25 21:21:46 -05:00
wp - > data_type = type ;
2022-10-31 16:13:05 -04:00
INIT_WORK ( & wp - > index_update_work , bch2_write_point_do_index_updates ) ;
INIT_LIST_HEAD ( & wp - > writes ) ;
spin_lock_init ( & wp - > writes_lock ) ;
2020-07-21 17:12:39 -04:00
}
2018-11-04 21:55:35 -05:00
void bch2_fs_allocator_foreground_init ( struct bch_fs * c )
{
struct open_bucket * ob ;
struct write_point * wp ;
mutex_init ( & c - > write_points_hash_lock ) ;
c - > write_points_nr = ARRAY_SIZE ( c - > write_points ) ;
/* open bucket 0 is a sentinal NULL: */
spin_lock_init ( & c - > open_buckets [ 0 ] . lock ) ;
for ( ob = c - > open_buckets + 1 ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ; ob + + ) {
spin_lock_init ( & ob - > lock ) ;
c - > open_buckets_nr_free + + ;
ob - > freelist = c - > open_buckets_freelist ;
c - > open_buckets_freelist = ob - c - > open_buckets ;
}
2020-07-11 16:28:54 -04:00
writepoint_init ( & c - > btree_write_point , BCH_DATA_btree ) ;
writepoint_init ( & c - > rebalance_write_point , BCH_DATA_user ) ;
writepoint_init ( & c - > copygc_write_point , BCH_DATA_user ) ;
2018-11-04 21:55:35 -05:00
for ( wp = c - > write_points ;
wp < c - > write_points + c - > write_points_nr ; wp + + ) {
2020-07-09 18:28:11 -04:00
writepoint_init ( wp , BCH_DATA_user ) ;
2018-11-04 21:55:35 -05:00
2022-10-17 07:07:28 -04:00
wp - > last_used = local_clock ( ) ;
2018-11-04 21:55:35 -05:00
wp - > write_point = ( unsigned long ) wp ;
hlist_add_head_rcu ( & wp - > node ,
writepoint_hash ( c , wp - > write_point ) ) ;
}
}
2021-12-25 21:21:46 -05:00
void bch2_open_buckets_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct open_bucket * ob ;
for ( ob = c - > open_buckets ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ;
ob + + ) {
spin_lock ( & ob - > lock ) ;
if ( ob - > valid & & ! ob - > on_partial_list ) {
2023-02-03 21:01:40 -05:00
prt_printf ( out , " %zu ref %u type %s %u:%llu:%u \n " ,
2021-12-25 21:21:46 -05:00
ob - c - > open_buckets ,
atomic_read ( & ob - > pin ) ,
2022-04-10 18:04:24 -04:00
bch2_data_types [ ob - > data_type ] ,
ob - > dev , ob - > bucket , ob - > gen ) ;
2021-12-25 21:21:46 -05:00
}
spin_unlock ( & ob - > lock ) ;
}
}
2022-10-31 16:13:05 -04:00
static const char * const bch2_write_point_states [ ] = {
# define x(n) #n,
WRITE_POINT_STATES ( )
# undef x
NULL
} ;
void bch2_write_points_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct write_point * wp ;
unsigned i ;
for ( wp = c - > write_points ;
wp < c - > write_points + ARRAY_SIZE ( c - > write_points ) ;
wp + + ) {
2023-02-03 21:01:40 -05:00
prt_printf ( out , " %lu: " , wp - > write_point ) ;
prt_human_readable_u64 ( out , wp - > sectors_allocated ) ;
2022-10-31 16:13:05 -04:00
2023-02-03 21:01:40 -05:00
prt_printf ( out , " last wrote: " ) ;
2022-10-31 16:13:05 -04:00
bch2_pr_time_units ( out , sched_clock ( ) - wp - > last_used ) ;
for ( i = 0 ; i < WRITE_POINT_STATE_NR ; i + + ) {
2023-02-03 21:01:40 -05:00
prt_printf ( out , " %s: " , bch2_write_point_states [ i ] ) ;
2022-10-31 16:13:05 -04:00
bch2_pr_time_units ( out , wp - > time [ i ] ) ;
}
2023-02-03 21:01:40 -05:00
prt_newline ( out ) ;
2022-10-31 16:13:05 -04:00
}
}