2018-10-06 07:46:55 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2012 Google , Inc .
*
2021-04-18 03:37:04 +03:00
* Foreground allocator code : allocate buckets from freelist , and allocate in
* sector granularity from writepoints .
2018-10-06 07:46:55 +03:00
*
* bch2_bucket_alloc ( ) allocates a single bucket from a specific device .
*
* bch2_bucket_alloc_set ( ) allocates one or more buckets from different devices
* in a given filesystem .
*/
# include "bcachefs.h"
# include "alloc_background.h"
# include "alloc_foreground.h"
# include "btree_gc.h"
# include "buckets.h"
# include "clock.h"
# include "debug.h"
# include "disk_groups.h"
2018-11-01 22:13:19 +03:00
# include "ec.h"
2018-10-06 07:46:55 +03:00
# include "io.h"
# include "trace.h"
# include <linux/math64.h>
# include <linux/rculist.h>
# include <linux/rcupdate.h>
2022-03-14 02:27:55 +03:00
const char * const bch2_alloc_reserves [ ] = {
# define x(t) #t,
BCH_ALLOC_RESERVES ( )
# undef x
NULL
} ;
2018-10-06 07:46:55 +03:00
/*
* Open buckets represent a bucket that ' s currently being allocated from . They
* serve two purposes :
*
* - They track buckets that have been partially allocated , allowing for
* sub - bucket sized allocations - they ' re used by the sector allocator below
*
* - They provide a reference to the buckets they own that mark and sweep GC
* can find , until the new allocation has a pointer to it inserted into the
* btree
*
* When allocating some space with the sector allocator , the allocation comes
* with a reference to an open bucket - the caller is required to put that
* reference _after_ doing the index update that makes its allocation reachable .
*/
2021-12-26 05:43:29 +03:00
static void bch2_open_bucket_hash_add ( struct bch_fs * c , struct open_bucket * ob )
{
open_bucket_idx_t idx = ob - c - > open_buckets ;
open_bucket_idx_t * slot = open_bucket_hashslot ( c , ob - > dev , ob - > bucket ) ;
ob - > hash = * slot ;
* slot = idx ;
}
static void bch2_open_bucket_hash_remove ( struct bch_fs * c , struct open_bucket * ob )
{
open_bucket_idx_t idx = ob - c - > open_buckets ;
open_bucket_idx_t * slot = open_bucket_hashslot ( c , ob - > dev , ob - > bucket ) ;
while ( * slot ! = idx ) {
BUG_ON ( ! * slot ) ;
slot = & c - > open_buckets [ * slot ] . hash ;
}
* slot = ob - > hash ;
ob - > hash = 0 ;
}
2018-10-06 07:46:55 +03:00
void __bch2_open_bucket_put ( struct bch_fs * c , struct open_bucket * ob )
{
2021-12-26 05:21:46 +03:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
if ( ob - > ec ) {
bch2_ec_bucket_written ( c , ob ) ;
return ;
}
2018-11-26 08:13:33 +03:00
percpu_down_read ( & c - > mark_lock ) ;
2018-10-06 07:46:55 +03:00
spin_lock ( & ob - > lock ) ;
2021-12-26 05:21:46 +03:00
bch2_mark_alloc_bucket ( c , ca , ob - > bucket , false ) ;
2018-10-06 07:46:55 +03:00
ob - > valid = false ;
2021-12-26 05:21:46 +03:00
ob - > data_type = 0 ;
2018-10-06 07:46:55 +03:00
spin_unlock ( & ob - > lock ) ;
2018-11-26 08:13:33 +03:00
percpu_up_read ( & c - > mark_lock ) ;
2018-10-06 07:46:55 +03:00
spin_lock ( & c - > freelist_lock ) ;
2021-12-26 05:43:29 +03:00
bch2_open_bucket_hash_remove ( c , ob ) ;
2018-10-06 07:46:55 +03:00
ob - > freelist = c - > open_buckets_freelist ;
c - > open_buckets_freelist = ob - c - > open_buckets ;
2021-04-13 16:49:23 +03:00
2018-10-06 07:46:55 +03:00
c - > open_buckets_nr_free + + ;
2021-04-13 16:49:23 +03:00
ca - > nr_open_buckets - - ;
2018-10-06 07:46:55 +03:00
spin_unlock ( & c - > freelist_lock ) ;
closure_wake_up ( & c - > open_buckets_wait ) ;
}
2018-11-01 22:13:19 +03:00
void bch2_open_bucket_write_error ( struct bch_fs * c ,
struct open_buckets * obs ,
unsigned dev )
{
struct open_bucket * ob ;
unsigned i ;
open_bucket_for_each ( c , obs , ob , i )
2021-12-26 05:21:46 +03:00
if ( ob - > dev = = dev & & ob - > ec )
2018-11-01 22:13:19 +03:00
bch2_ec_bucket_cancel ( c , ob ) ;
}
2018-10-06 07:46:55 +03:00
static struct open_bucket * bch2_open_bucket_alloc ( struct bch_fs * c )
{
struct open_bucket * ob ;
BUG_ON ( ! c - > open_buckets_freelist | | ! c - > open_buckets_nr_free ) ;
ob = c - > open_buckets + c - > open_buckets_freelist ;
c - > open_buckets_freelist = ob - > freelist ;
atomic_set ( & ob - > pin , 1 ) ;
2021-12-26 05:21:46 +03:00
ob - > data_type = 0 ;
2018-10-06 07:46:55 +03:00
c - > open_buckets_nr_free - - ;
return ob ;
}
2018-10-06 11:12:42 +03:00
static void open_bucket_free_unused ( struct bch_fs * c ,
2020-07-22 00:12:39 +03:00
struct write_point * wp ,
struct open_bucket * ob )
2018-10-06 11:12:42 +03:00
{
2021-12-26 05:21:46 +03:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
bool may_realloc = wp - > data_type = = BCH_DATA_user ;
2018-10-06 11:12:42 +03:00
2020-07-22 00:12:39 +03:00
BUG_ON ( ca - > open_buckets_partial_nr >
2018-10-06 11:12:42 +03:00
ARRAY_SIZE ( ca - > open_buckets_partial ) ) ;
2018-11-01 22:13:19 +03:00
if ( ca - > open_buckets_partial_nr <
ARRAY_SIZE ( ca - > open_buckets_partial ) & &
may_realloc ) {
2018-10-06 11:12:42 +03:00
spin_lock ( & c - > freelist_lock ) ;
ob - > on_partial_list = true ;
ca - > open_buckets_partial [ ca - > open_buckets_partial_nr + + ] =
ob - c - > open_buckets ;
spin_unlock ( & c - > freelist_lock ) ;
closure_wake_up ( & c - > open_buckets_wait ) ;
closure_wake_up ( & c - > freelist_wait ) ;
} else {
bch2_open_bucket_put ( c , ob ) ;
}
}
2018-10-06 07:46:55 +03:00
/* _only_ for allocating the journal on a new device: */
long bch2_bucket_alloc_new_fs ( struct bch_dev * ca )
{
2021-12-24 12:22:20 +03:00
while ( ca - > new_fs_bucket_idx < ca - > mi . nbuckets ) {
u64 b = ca - > new_fs_bucket_idx + + ;
if ( ! is_superblock_bucket ( ca , b ) & &
( ! ca - > buckets_nouse | | ! test_bit ( b , ca - > buckets_nouse ) ) )
return b ;
}
return - 1 ;
2018-10-06 07:46:55 +03:00
}
static inline unsigned open_buckets_reserved ( enum alloc_reserve reserve )
{
switch ( reserve ) {
2022-03-14 02:27:55 +03:00
case RESERVE_btree :
case RESERVE_btree_movinggc :
2018-10-06 07:46:55 +03:00
return 0 ;
2022-03-14 02:27:55 +03:00
case RESERVE_movinggc :
2021-01-08 01:18:14 +03:00
return OPEN_BUCKETS_COUNT / 4 ;
2018-10-06 07:46:55 +03:00
default :
2020-06-09 22:46:22 +03:00
return OPEN_BUCKETS_COUNT / 2 ;
2018-10-06 07:46:55 +03:00
}
}
/**
* bch_bucket_alloc - allocate a single bucket from a specific device
*
* Returns index of bucket on success , 0 on failure
* */
2018-10-06 11:12:42 +03:00
struct open_bucket * bch2_bucket_alloc ( struct bch_fs * c , struct bch_dev * ca ,
enum alloc_reserve reserve ,
bool may_alloc_partial ,
struct closure * cl )
2018-10-06 07:46:55 +03:00
{
struct open_bucket * ob ;
2021-01-22 04:51:51 +03:00
long b = 0 ;
2018-10-06 07:46:55 +03:00
spin_lock ( & c - > freelist_lock ) ;
2020-07-22 00:12:39 +03:00
if ( may_alloc_partial ) {
int i ;
for ( i = ca - > open_buckets_partial_nr - 1 ; i > = 0 ; - - i ) {
ob = c - > open_buckets + ca - > open_buckets_partial [ i ] ;
if ( reserve < = ob - > alloc_reserve ) {
array_remove_item ( ca - > open_buckets_partial ,
ca - > open_buckets_partial_nr ,
i ) ;
ob - > on_partial_list = false ;
ob - > alloc_reserve = reserve ;
spin_unlock ( & c - > freelist_lock ) ;
return ob ;
}
}
2018-10-06 07:46:55 +03:00
}
if ( unlikely ( c - > open_buckets_nr_free < = open_buckets_reserved ( reserve ) ) ) {
if ( cl )
closure_wait ( & c - > open_buckets_wait , cl ) ;
2019-03-18 20:42:10 +03:00
if ( ! c - > blocked_allocate_open_bucket )
c - > blocked_allocate_open_bucket = local_clock ( ) ;
2018-10-06 07:46:55 +03:00
spin_unlock ( & c - > freelist_lock ) ;
2022-03-14 02:27:55 +03:00
trace_open_bucket_alloc_fail ( ca , bch2_alloc_reserves [ reserve ] ) ;
2018-10-06 11:12:42 +03:00
return ERR_PTR ( - OPEN_BUCKETS_EMPTY ) ;
2018-10-06 07:46:55 +03:00
}
2022-03-14 02:27:55 +03:00
if ( likely ( fifo_pop ( & ca - > free [ RESERVE_none ] , b ) ) )
2018-10-06 07:46:55 +03:00
goto out ;
switch ( reserve ) {
2022-03-14 02:27:55 +03:00
case RESERVE_btree_movinggc :
case RESERVE_movinggc :
if ( fifo_pop ( & ca - > free [ RESERVE_movinggc ] , b ) )
2018-10-06 07:46:55 +03:00
goto out ;
break ;
default :
break ;
}
if ( cl )
closure_wait ( & c - > freelist_wait , cl ) ;
2019-03-18 20:42:10 +03:00
if ( ! c - > blocked_allocate )
c - > blocked_allocate = local_clock ( ) ;
2018-10-06 07:46:55 +03:00
spin_unlock ( & c - > freelist_lock ) ;
2022-03-14 02:27:55 +03:00
trace_bucket_alloc_fail ( ca , bch2_alloc_reserves [ reserve ] ) ;
2018-10-06 11:12:42 +03:00
return ERR_PTR ( - FREELIST_EMPTY ) ;
2018-10-06 07:46:55 +03:00
out :
2021-01-22 04:51:51 +03:00
verify_not_on_freelist ( c , ca , b ) ;
2018-10-06 07:46:55 +03:00
ob = bch2_open_bucket_alloc ( c ) ;
spin_lock ( & ob - > lock ) ;
ob - > valid = true ;
ob - > sectors_free = ca - > mi . bucket_size ;
2020-07-22 00:12:39 +03:00
ob - > alloc_reserve = reserve ;
2021-12-26 05:21:46 +03:00
ob - > dev = ca - > dev_idx ;
2021-12-26 03:55:34 +03:00
ob - > gen = * bucket_gen ( ca , b ) ;
2021-12-26 05:21:46 +03:00
ob - > bucket = b ;
2018-10-06 07:46:55 +03:00
spin_unlock ( & ob - > lock ) ;
2021-12-26 05:43:29 +03:00
ca - > nr_open_buckets + + ;
bch2_open_bucket_hash_add ( c , ob ) ;
2019-03-18 20:42:10 +03:00
if ( c - > blocked_allocate_open_bucket ) {
bch2_time_stats_update (
& c - > times [ BCH_TIME_blocked_allocate_open_bucket ] ,
c - > blocked_allocate_open_bucket ) ;
c - > blocked_allocate_open_bucket = 0 ;
}
if ( c - > blocked_allocate ) {
bch2_time_stats_update (
& c - > times [ BCH_TIME_blocked_allocate ] ,
c - > blocked_allocate ) ;
c - > blocked_allocate = 0 ;
}
2018-10-06 07:46:55 +03:00
spin_unlock ( & c - > freelist_lock ) ;
bch2_wake_allocator ( ca ) ;
2022-03-14 02:27:55 +03:00
trace_bucket_alloc ( ca , bch2_alloc_reserves [ reserve ] ) ;
2018-10-06 11:12:42 +03:00
return ob ;
2018-10-06 07:46:55 +03:00
}
2018-11-01 22:13:19 +03:00
static int __dev_stripe_cmp ( struct dev_stripe_state * stripe ,
unsigned l , unsigned r )
2018-10-06 07:46:55 +03:00
{
2018-11-01 22:13:19 +03:00
return ( ( stripe - > next_alloc [ l ] > stripe - > next_alloc [ r ] ) -
( stripe - > next_alloc [ l ] < stripe - > next_alloc [ r ] ) ) ;
2018-10-06 07:46:55 +03:00
}
2018-11-01 22:13:19 +03:00
# define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
struct dev_alloc_list bch2_dev_alloc_list ( struct bch_fs * c ,
struct dev_stripe_state * stripe ,
struct bch_devs_mask * devs )
2018-10-06 07:46:55 +03:00
{
struct dev_alloc_list ret = { . nr = 0 } ;
unsigned i ;
2020-07-07 05:33:54 +03:00
for_each_set_bit ( i , devs - > d , BCH_SB_MEMBERS_MAX )
2018-10-06 07:46:55 +03:00
ret . devs [ ret . nr + + ] = i ;
2018-11-01 22:13:19 +03:00
bubble_sort ( ret . devs , ret . nr , dev_stripe_cmp ) ;
2018-10-06 07:46:55 +03:00
return ret ;
}
2020-07-22 20:27:00 +03:00
void bch2_dev_stripe_increment ( struct bch_dev * ca ,
2018-11-01 22:13:19 +03:00
struct dev_stripe_state * stripe )
2018-10-06 07:46:55 +03:00
{
2018-11-01 22:13:19 +03:00
u64 * v = stripe - > next_alloc + ca - > dev_idx ;
2021-04-13 16:49:23 +03:00
u64 free_space = dev_buckets_available ( ca ) ;
2018-10-06 07:46:55 +03:00
u64 free_space_inv = free_space
? div64_u64 ( 1ULL < < 48 , free_space )
: 1ULL < < 48 ;
u64 scale = * v / 4 ;
if ( * v + free_space_inv > = * v )
* v + = free_space_inv ;
else
* v = U64_MAX ;
2018-11-01 22:13:19 +03:00
for ( v = stripe - > next_alloc ;
v < stripe - > next_alloc + ARRAY_SIZE ( stripe - > next_alloc ) ; v + + )
2018-10-06 07:46:55 +03:00
* v = * v < scale ? 0 : * v - scale ;
}
2018-11-01 22:13:19 +03:00
# define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
# define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
2019-06-10 18:31:07 +03:00
static void add_new_bucket ( struct bch_fs * c ,
struct open_buckets * ptrs ,
struct bch_devs_mask * devs_may_alloc ,
unsigned * nr_effective ,
bool * have_cache ,
unsigned flags ,
struct open_bucket * ob )
{
unsigned durability =
2021-12-26 05:21:46 +03:00
bch_dev_bkey_exists ( c , ob - > dev ) - > mi . durability ;
2019-06-10 18:31:07 +03:00
2021-12-26 05:21:46 +03:00
__clear_bit ( ob - > dev , devs_may_alloc - > d ) ;
2019-06-10 18:31:07 +03:00
* nr_effective + = ( flags & BUCKET_ALLOC_USE_DURABILITY )
? durability : 1 ;
* have_cache | = ! durability ;
ob_push ( c , ptrs , ob ) ;
}
2021-11-28 21:42:05 +03:00
int bch2_bucket_alloc_set ( struct bch_fs * c ,
2020-07-12 01:52:14 +03:00
struct open_buckets * ptrs ,
struct dev_stripe_state * stripe ,
struct bch_devs_mask * devs_may_alloc ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
enum alloc_reserve reserve ,
unsigned flags ,
struct closure * cl )
2018-10-06 07:46:55 +03:00
{
2018-10-06 11:12:42 +03:00
struct dev_alloc_list devs_sorted =
2018-11-01 22:13:19 +03:00
bch2_dev_alloc_list ( c , stripe , devs_may_alloc ) ;
2018-10-06 07:46:55 +03:00
struct bch_dev * ca ;
2021-11-28 21:42:05 +03:00
int ret = - INSUFFICIENT_DEVICES ;
2019-06-10 18:31:07 +03:00
unsigned i ;
2018-10-06 07:46:55 +03:00
2018-10-06 11:12:42 +03:00
BUG_ON ( * nr_effective > = nr_replicas ) ;
2018-10-06 07:46:55 +03:00
for ( i = 0 ; i < devs_sorted . nr ; i + + ) {
2018-10-06 11:12:42 +03:00
struct open_bucket * ob ;
2018-10-06 07:46:55 +03:00
ca = rcu_dereference ( c - > devs [ devs_sorted . devs [ i ] ] ) ;
if ( ! ca )
continue ;
2018-11-01 22:13:19 +03:00
if ( ! ca - > mi . durability & & * have_cache )
2018-10-06 07:46:55 +03:00
continue ;
ob = bch2_bucket_alloc ( c , ca , reserve ,
2018-11-01 22:13:19 +03:00
flags & BUCKET_MAY_ALLOC_PARTIAL , cl ) ;
2018-10-06 11:12:42 +03:00
if ( IS_ERR ( ob ) ) {
2021-11-28 21:42:05 +03:00
ret = PTR_ERR ( ob ) ;
2018-10-06 11:12:42 +03:00
if ( cl )
2020-07-12 01:52:14 +03:00
return ret ;
2018-10-06 07:46:55 +03:00
continue ;
}
2019-06-10 18:31:07 +03:00
add_new_bucket ( c , ptrs , devs_may_alloc ,
nr_effective , have_cache , flags , ob ) ;
2018-10-06 07:46:55 +03:00
2020-07-22 20:27:00 +03:00
bch2_dev_stripe_increment ( ca , stripe ) ;
2018-10-06 07:46:55 +03:00
2018-10-06 11:12:42 +03:00
if ( * nr_effective > = nr_replicas )
2021-11-28 21:42:05 +03:00
return 0 ;
2018-10-06 07:46:55 +03:00
}
2020-07-12 01:52:14 +03:00
return ret ;
2018-10-06 07:46:55 +03:00
}
2018-11-01 22:13:19 +03:00
/* Allocate from stripes: */
/*
* if we can ' t allocate a new stripe because there are already too many
* partially filled stripes , force allocating from an existing stripe even when
* it ' s to a device we don ' t want :
*/
2021-11-28 21:42:05 +03:00
static int bucket_alloc_from_stripe ( struct bch_fs * c ,
2020-12-15 20:38:17 +03:00
struct open_buckets * ptrs ,
struct write_point * wp ,
struct bch_devs_mask * devs_may_alloc ,
u16 target ,
unsigned erasure_code ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
unsigned flags ,
struct closure * cl )
2018-11-01 22:13:19 +03:00
{
struct dev_alloc_list devs_sorted ;
struct ec_stripe_head * h ;
struct open_bucket * ob ;
struct bch_dev * ca ;
unsigned i , ec_idx ;
if ( ! erasure_code )
2020-12-15 20:38:17 +03:00
return 0 ;
2018-11-01 22:13:19 +03:00
if ( nr_replicas < 2 )
2020-12-15 20:38:17 +03:00
return 0 ;
2018-11-01 22:13:19 +03:00
if ( ec_open_bucket ( c , ptrs ) )
2020-12-15 20:38:17 +03:00
return 0 ;
2018-11-01 22:13:19 +03:00
2020-12-15 20:53:30 +03:00
h = bch2_ec_stripe_head_get ( c , target , 0 , nr_replicas - 1 ,
wp = = & c - > copygc_write_point ,
cl ) ;
2020-12-15 20:38:17 +03:00
if ( IS_ERR ( h ) )
return - PTR_ERR ( h ) ;
2018-11-01 22:13:19 +03:00
if ( ! h )
2020-12-15 20:38:17 +03:00
return 0 ;
2018-11-01 22:13:19 +03:00
devs_sorted = bch2_dev_alloc_list ( c , & wp - > stripe , devs_may_alloc ) ;
for ( i = 0 ; i < devs_sorted . nr ; i + + )
2021-01-19 07:26:42 +03:00
for ( ec_idx = 0 ; ec_idx < h - > s - > nr_data ; ec_idx + + ) {
if ( ! h - > s - > blocks [ ec_idx ] )
continue ;
ob = c - > open_buckets + h - > s - > blocks [ ec_idx ] ;
2021-12-26 05:21:46 +03:00
if ( ob - > dev = = devs_sorted . devs [ i ] & &
2021-01-19 07:26:42 +03:00
! test_and_set_bit ( ec_idx , h - > s - > blocks_allocated ) )
2018-11-01 22:13:19 +03:00
goto got_bucket ;
2021-01-19 07:26:42 +03:00
}
2018-11-01 22:13:19 +03:00
goto out_put_head ;
got_bucket :
2021-12-26 05:21:46 +03:00
ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-11-01 22:13:19 +03:00
2021-01-19 07:26:42 +03:00
ob - > ec_idx = ec_idx ;
2018-11-01 22:13:19 +03:00
ob - > ec = h - > s ;
2019-06-10 18:31:07 +03:00
add_new_bucket ( c , ptrs , devs_may_alloc ,
nr_effective , have_cache , flags , ob ) ;
2018-11-01 22:13:19 +03:00
atomic_inc ( & h - > s - > pin ) ;
out_put_head :
2020-07-07 03:59:46 +03:00
bch2_ec_stripe_head_put ( c , h ) ;
2020-12-15 20:38:17 +03:00
return 0 ;
2018-11-01 22:13:19 +03:00
}
2018-10-06 07:46:55 +03:00
/* Sector allocator */
2018-11-01 22:13:19 +03:00
static void get_buckets_from_writepoint ( struct bch_fs * c ,
struct open_buckets * ptrs ,
struct write_point * wp ,
struct bch_devs_mask * devs_may_alloc ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
2019-06-10 18:31:07 +03:00
unsigned flags ,
2018-11-01 22:13:19 +03:00
bool need_ec )
2018-10-06 07:46:55 +03:00
{
2018-10-06 11:12:42 +03:00
struct open_buckets ptrs_skip = { . nr = 0 } ;
2018-10-06 07:46:55 +03:00
struct open_bucket * ob ;
unsigned i ;
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i ) {
2021-12-26 05:21:46 +03:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
2018-10-06 07:46:55 +03:00
2018-10-06 11:12:42 +03:00
if ( * nr_effective < nr_replicas & &
2021-12-26 05:21:46 +03:00
test_bit ( ob - > dev , devs_may_alloc - > d ) & &
2018-10-06 11:12:42 +03:00
( ca - > mi . durability | |
2021-12-26 05:21:46 +03:00
( wp - > data_type = = BCH_DATA_user & & ! * have_cache ) ) & &
2018-11-01 22:13:19 +03:00
( ob - > ec | | ! need_ec ) ) {
2019-06-10 18:31:07 +03:00
add_new_bucket ( c , ptrs , devs_may_alloc ,
nr_effective , have_cache ,
flags , ob ) ;
2018-10-06 11:12:42 +03:00
} else {
ob_push ( c , & ptrs_skip , ob ) ;
}
2018-10-06 07:46:55 +03:00
}
2018-10-06 11:12:42 +03:00
wp - > ptrs = ptrs_skip ;
2018-10-06 07:46:55 +03:00
}
2021-11-28 21:42:05 +03:00
static int open_bucket_add_buckets ( struct bch_fs * c ,
2020-07-12 01:52:14 +03:00
struct open_buckets * ptrs ,
struct write_point * wp ,
struct bch_devs_list * devs_have ,
u16 target ,
unsigned erasure_code ,
unsigned nr_replicas ,
unsigned * nr_effective ,
bool * have_cache ,
enum alloc_reserve reserve ,
unsigned flags ,
struct closure * _cl )
2018-10-06 07:46:55 +03:00
{
2018-10-06 11:12:42 +03:00
struct bch_devs_mask devs ;
2018-10-06 07:46:55 +03:00
struct open_bucket * ob ;
2018-11-01 22:13:19 +03:00
struct closure * cl = NULL ;
2021-11-28 21:42:05 +03:00
int ret ;
2019-06-10 18:31:07 +03:00
unsigned i ;
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
rcu_read_lock ( ) ;
2021-12-26 05:21:46 +03:00
devs = target_rw_devs ( c , wp - > data_type , target ) ;
2018-11-01 22:13:19 +03:00
rcu_read_unlock ( ) ;
2018-10-06 11:12:42 +03:00
2018-10-06 07:46:55 +03:00
/* Don't allocate from devices we already have pointers to: */
for ( i = 0 ; i < devs_have - > nr ; i + + )
__clear_bit ( devs_have - > devs [ i ] , devs . d ) ;
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , ptrs , ob , i )
2021-12-26 05:21:46 +03:00
__clear_bit ( ob - > dev , devs . d ) ;
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
if ( erasure_code ) {
2020-07-23 18:31:01 +03:00
if ( ! ec_open_bucket ( c , ptrs ) ) {
get_buckets_from_writepoint ( c , ptrs , wp , & devs ,
nr_replicas , nr_effective ,
have_cache , flags , true ) ;
if ( * nr_effective > = nr_replicas )
return 0 ;
}
2018-10-06 07:46:55 +03:00
2020-07-23 18:31:01 +03:00
if ( ! ec_open_bucket ( c , ptrs ) ) {
2020-12-15 20:38:17 +03:00
ret = bucket_alloc_from_stripe ( c , ptrs , wp , & devs ,
2020-07-23 18:31:01 +03:00
target , erasure_code ,
nr_replicas , nr_effective ,
2020-12-15 20:38:17 +03:00
have_cache , flags , _cl ) ;
2021-11-28 21:42:05 +03:00
if ( ret = = - FREELIST_EMPTY | |
ret = = - OPEN_BUCKETS_EMPTY )
2020-12-15 20:38:17 +03:00
return ret ;
2020-07-23 18:31:01 +03:00
if ( * nr_effective > = nr_replicas )
return 0 ;
}
2018-11-01 22:13:19 +03:00
}
get_buckets_from_writepoint ( c , ptrs , wp , & devs ,
nr_replicas , nr_effective ,
2019-06-10 18:31:07 +03:00
have_cache , flags , false ) ;
2018-11-01 22:13:19 +03:00
if ( * nr_effective > = nr_replicas )
return 0 ;
2018-11-26 08:13:33 +03:00
percpu_down_read ( & c - > mark_lock ) ;
2018-11-01 22:13:19 +03:00
rcu_read_lock ( ) ;
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
retry_blocking :
2018-10-06 11:12:42 +03:00
/*
* Try nonblocking first , so that if one device is full we ' ll try from
* other devices :
*/
2018-11-01 22:13:19 +03:00
ret = bch2_bucket_alloc_set ( c , ptrs , & wp - > stripe , & devs ,
2018-10-06 11:12:42 +03:00
nr_replicas , nr_effective , have_cache ,
2018-11-01 22:13:19 +03:00
reserve , flags , cl ) ;
2021-11-28 21:42:05 +03:00
if ( ret & & ret ! = - INSUFFICIENT_DEVICES & & ! cl & & _cl ) {
2018-11-01 22:13:19 +03:00
cl = _cl ;
goto retry_blocking ;
}
2018-10-06 11:12:42 +03:00
2018-10-06 07:46:55 +03:00
rcu_read_unlock ( ) ;
2018-11-26 08:13:33 +03:00
percpu_up_read ( & c - > mark_lock ) ;
2018-10-06 07:46:55 +03:00
return ret ;
}
2018-11-01 22:13:19 +03:00
void bch2_open_buckets_stop_dev ( struct bch_fs * c , struct bch_dev * ca ,
2019-09-20 23:17:46 +03:00
struct open_buckets * obs )
2018-10-06 07:46:55 +03:00
{
2018-10-06 11:12:42 +03:00
struct open_buckets ptrs = { . nr = 0 } ;
2018-11-01 22:13:19 +03:00
struct open_bucket * ob , * ob2 ;
unsigned i , j ;
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
open_bucket_for_each ( c , obs , ob , i ) {
2021-12-26 05:21:46 +03:00
bool drop = ! ca | | ob - > dev = = ca - > dev_idx ;
2018-11-01 22:13:19 +03:00
if ( ! drop & & ob - > ec ) {
mutex_lock ( & ob - > ec - > lock ) ;
2021-01-19 07:26:42 +03:00
for ( j = 0 ; j < ob - > ec - > new_stripe . key . v . nr_blocks ; j + + ) {
if ( ! ob - > ec - > blocks [ j ] )
continue ;
ob2 = c - > open_buckets + ob - > ec - > blocks [ j ] ;
2021-12-26 05:21:46 +03:00
drop | = ob2 - > dev = = ca - > dev_idx ;
2021-01-19 07:26:42 +03:00
}
2018-11-01 22:13:19 +03:00
mutex_unlock ( & ob - > ec - > lock ) ;
}
if ( drop )
bch2_open_bucket_put ( c , ob ) ;
2018-10-06 11:12:42 +03:00
else
ob_push ( c , & ptrs , ob ) ;
2018-11-01 22:13:19 +03:00
}
2018-10-06 11:12:42 +03:00
2018-11-01 22:13:19 +03:00
* obs = ptrs ;
}
void bch2_writepoint_stop ( struct bch_fs * c , struct bch_dev * ca ,
struct write_point * wp )
{
mutex_lock ( & wp - > lock ) ;
2019-09-20 23:17:46 +03:00
bch2_open_buckets_stop_dev ( c , ca , & wp - > ptrs ) ;
2018-10-06 07:46:55 +03:00
mutex_unlock ( & wp - > lock ) ;
}
2018-11-05 05:55:35 +03:00
static inline struct hlist_head * writepoint_hash ( struct bch_fs * c ,
unsigned long write_point )
{
unsigned hash =
hash_long ( write_point , ilog2 ( ARRAY_SIZE ( c - > write_points_hash ) ) ) ;
return & c - > write_points_hash [ hash ] ;
}
2018-10-06 07:46:55 +03:00
static struct write_point * __writepoint_find ( struct hlist_head * head ,
unsigned long write_point )
{
struct write_point * wp ;
2021-04-16 01:31:58 +03:00
rcu_read_lock ( ) ;
2018-10-06 07:46:55 +03:00
hlist_for_each_entry_rcu ( wp , head , node )
if ( wp - > write_point = = write_point )
2021-04-16 01:31:58 +03:00
goto out ;
wp = NULL ;
out :
rcu_read_unlock ( ) ;
return wp ;
2018-10-06 07:46:55 +03:00
}
2018-11-05 05:55:35 +03:00
static inline bool too_many_writepoints ( struct bch_fs * c , unsigned factor )
{
u64 stranded = c - > write_points_nr * c - > bucket_size_max ;
2019-02-15 02:38:52 +03:00
u64 free = bch2_fs_usage_read_short ( c ) . free ;
2018-11-05 05:55:35 +03:00
return stranded * factor > free ;
}
static bool try_increase_writepoints ( struct bch_fs * c )
{
struct write_point * wp ;
if ( c - > write_points_nr = = ARRAY_SIZE ( c - > write_points ) | |
too_many_writepoints ( c , 32 ) )
return false ;
wp = c - > write_points + c - > write_points_nr + + ;
hlist_add_head_rcu ( & wp - > node , writepoint_hash ( c , wp - > write_point ) ) ;
return true ;
}
static bool try_decrease_writepoints ( struct bch_fs * c ,
unsigned old_nr )
{
struct write_point * wp ;
mutex_lock ( & c - > write_points_hash_lock ) ;
if ( c - > write_points_nr < old_nr ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
return true ;
}
if ( c - > write_points_nr = = 1 | |
! too_many_writepoints ( c , 8 ) ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
return false ;
}
wp = c - > write_points + - - c - > write_points_nr ;
hlist_del_rcu ( & wp - > node ) ;
mutex_unlock ( & c - > write_points_hash_lock ) ;
bch2_writepoint_stop ( c , NULL , wp ) ;
return true ;
}
2018-10-06 07:46:55 +03:00
static struct write_point * writepoint_find ( struct bch_fs * c ,
unsigned long write_point )
{
struct write_point * wp , * oldest ;
struct hlist_head * head ;
if ( ! ( write_point & 1UL ) ) {
wp = ( struct write_point * ) write_point ;
mutex_lock ( & wp - > lock ) ;
return wp ;
}
head = writepoint_hash ( c , write_point ) ;
restart_find :
wp = __writepoint_find ( head , write_point ) ;
if ( wp ) {
lock_wp :
mutex_lock ( & wp - > lock ) ;
if ( wp - > write_point = = write_point )
goto out ;
mutex_unlock ( & wp - > lock ) ;
goto restart_find ;
}
2018-11-05 05:55:35 +03:00
restart_find_oldest :
2018-10-06 07:46:55 +03:00
oldest = NULL ;
for ( wp = c - > write_points ;
2018-11-05 05:55:35 +03:00
wp < c - > write_points + c - > write_points_nr ; wp + + )
2018-10-06 07:46:55 +03:00
if ( ! oldest | | time_before64 ( wp - > last_used , oldest - > last_used ) )
oldest = wp ;
mutex_lock ( & oldest - > lock ) ;
mutex_lock ( & c - > write_points_hash_lock ) ;
2018-11-05 05:55:35 +03:00
if ( oldest > = c - > write_points + c - > write_points_nr | |
try_increase_writepoints ( c ) ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
mutex_unlock ( & oldest - > lock ) ;
goto restart_find_oldest ;
}
2018-10-06 07:46:55 +03:00
wp = __writepoint_find ( head , write_point ) ;
if ( wp & & wp ! = oldest ) {
mutex_unlock ( & c - > write_points_hash_lock ) ;
mutex_unlock ( & oldest - > lock ) ;
goto lock_wp ;
}
wp = oldest ;
hlist_del_rcu ( & wp - > node ) ;
wp - > write_point = write_point ;
hlist_add_head_rcu ( & wp - > node , head ) ;
mutex_unlock ( & c - > write_points_hash_lock ) ;
out :
wp - > last_used = sched_clock ( ) ;
return wp ;
}
/*
* Get us an open_bucket we can allocate from , return with it locked :
*/
struct write_point * bch2_alloc_sectors_start ( struct bch_fs * c ,
unsigned target ,
2018-11-01 22:13:19 +03:00
unsigned erasure_code ,
2018-10-06 07:46:55 +03:00
struct write_point_specifier write_point ,
struct bch_devs_list * devs_have ,
unsigned nr_replicas ,
unsigned nr_replicas_required ,
enum alloc_reserve reserve ,
unsigned flags ,
struct closure * cl )
{
struct write_point * wp ;
struct open_bucket * ob ;
2019-01-19 02:58:51 +03:00
struct open_buckets ptrs ;
unsigned nr_effective , write_points_nr ;
2019-06-10 18:31:07 +03:00
unsigned ob_flags = 0 ;
2019-01-19 02:58:51 +03:00
bool have_cache ;
2021-11-28 21:42:05 +03:00
int ret ;
2020-07-12 01:52:14 +03:00
int i ;
2018-10-06 07:46:55 +03:00
2019-06-10 18:31:07 +03:00
if ( ! ( flags & BCH_WRITE_ONLY_SPECIFIED_DEVS ) )
ob_flags | = BUCKET_ALLOC_USE_DURABILITY ;
2018-10-06 07:46:55 +03:00
BUG_ON ( ! nr_replicas | | ! nr_replicas_required ) ;
2018-11-05 05:55:35 +03:00
retry :
2019-01-19 02:58:51 +03:00
ptrs . nr = 0 ;
nr_effective = 0 ;
2018-11-05 05:55:35 +03:00
write_points_nr = c - > write_points_nr ;
2019-01-19 02:58:51 +03:00
have_cache = false ;
2018-11-01 22:13:19 +03:00
2018-10-06 07:46:55 +03:00
wp = writepoint_find ( c , write_point . v ) ;
2021-12-26 05:21:46 +03:00
if ( wp - > data_type = = BCH_DATA_user )
2019-06-10 18:31:07 +03:00
ob_flags | = BUCKET_MAY_ALLOC_PARTIAL ;
2018-11-01 22:13:19 +03:00
/* metadata may not allocate on cache devices: */
2021-12-26 05:21:46 +03:00
if ( wp - > data_type ! = BCH_DATA_user )
2018-11-01 22:13:19 +03:00
have_cache = true ;
2018-10-06 11:12:42 +03:00
if ( ! target | | ( flags & BCH_WRITE_ONLY_SPECIFIED_DEVS ) ) {
2018-11-01 22:13:19 +03:00
ret = open_bucket_add_buckets ( c , & ptrs , wp , devs_have ,
target , erasure_code ,
2018-10-06 11:12:42 +03:00
nr_replicas , & nr_effective ,
2019-06-10 18:31:07 +03:00
& have_cache , reserve ,
ob_flags , cl ) ;
2018-10-06 07:46:55 +03:00
} else {
2018-11-01 22:13:19 +03:00
ret = open_bucket_add_buckets ( c , & ptrs , wp , devs_have ,
target , erasure_code ,
2018-10-06 11:12:42 +03:00
nr_replicas , & nr_effective ,
2019-06-10 18:31:07 +03:00
& have_cache , reserve ,
ob_flags , NULL ) ;
2018-10-06 07:46:55 +03:00
if ( ! ret )
goto alloc_done ;
2018-11-01 22:13:19 +03:00
ret = open_bucket_add_buckets ( c , & ptrs , wp , devs_have ,
0 , erasure_code ,
2018-10-06 11:12:42 +03:00
nr_replicas , & nr_effective ,
2019-06-10 18:31:07 +03:00
& have_cache , reserve ,
ob_flags , cl ) ;
2018-10-06 07:46:55 +03:00
}
alloc_done :
2018-10-06 11:12:42 +03:00
BUG_ON ( ! ret & & nr_effective < nr_replicas ) ;
2018-10-06 07:46:55 +03:00
2018-11-01 22:13:19 +03:00
if ( erasure_code & & ! ec_open_bucket ( c , & ptrs ) )
pr_debug ( " failed to get ec bucket: ret %u " , ret ) ;
2021-11-28 21:42:05 +03:00
if ( ret = = - INSUFFICIENT_DEVICES & &
2018-10-06 11:12:42 +03:00
nr_effective > = nr_replicas_required )
2018-10-06 07:46:55 +03:00
ret = 0 ;
if ( ret )
goto err ;
2018-10-06 11:12:42 +03:00
/* Free buckets we didn't use: */
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
2020-07-22 00:12:39 +03:00
open_bucket_free_unused ( c , wp , ob ) ;
2018-10-06 07:46:55 +03:00
2018-10-06 11:12:42 +03:00
wp - > ptrs = ptrs ;
2018-10-06 07:46:55 +03:00
wp - > sectors_free = UINT_MAX ;
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
2018-10-06 07:46:55 +03:00
wp - > sectors_free = min ( wp - > sectors_free , ob - > sectors_free ) ;
BUG_ON ( ! wp - > sectors_free | | wp - > sectors_free = = UINT_MAX ) ;
return wp ;
err :
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
if ( ptrs . nr < ARRAY_SIZE ( ptrs . v ) )
ob_push ( c , & ptrs , ob ) ;
else
2020-07-22 00:12:39 +03:00
open_bucket_free_unused ( c , wp , ob ) ;
2018-10-06 11:12:42 +03:00
wp - > ptrs = ptrs ;
2018-10-06 07:46:55 +03:00
mutex_unlock ( & wp - > lock ) ;
2018-11-05 05:55:35 +03:00
2021-11-28 21:42:05 +03:00
if ( ret = = - FREELIST_EMPTY & &
2018-11-05 05:55:35 +03:00
try_decrease_writepoints ( c , write_points_nr ) )
goto retry ;
2020-07-12 01:52:14 +03:00
switch ( ret ) {
2021-11-28 21:42:05 +03:00
case - OPEN_BUCKETS_EMPTY :
case - FREELIST_EMPTY :
2020-07-12 01:52:14 +03:00
return cl ? ERR_PTR ( - EAGAIN ) : ERR_PTR ( - ENOSPC ) ;
2021-11-28 21:42:05 +03:00
case - INSUFFICIENT_DEVICES :
2020-07-12 01:52:14 +03:00
return ERR_PTR ( - EROFS ) ;
default :
BUG ( ) ;
}
2018-10-06 07:46:55 +03:00
}
2021-12-26 05:21:46 +03:00
struct bch_extent_ptr bch2_ob_ptr ( struct bch_fs * c , struct open_bucket * ob )
{
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
return ( struct bch_extent_ptr ) {
. type = 1 < < BCH_EXTENT_ENTRY_ptr ,
. gen = ob - > gen ,
. dev = ob - > dev ,
. offset = bucket_to_sector ( ca , ob - > bucket ) +
ca - > mi . bucket_size -
ob - > sectors_free ,
} ;
}
2018-10-06 07:46:55 +03:00
/*
* Append pointers to the space we just allocated to @ k , and mark @ sectors space
* as allocated out of @ ob
*/
void bch2_alloc_sectors_append_ptrs ( struct bch_fs * c , struct write_point * wp ,
2021-12-26 05:14:49 +03:00
struct bkey_i * k , unsigned sectors ,
bool cached )
2018-11-01 22:10:01 +03:00
2018-10-06 07:46:55 +03:00
{
struct open_bucket * ob ;
unsigned i ;
BUG_ON ( sectors > wp - > sectors_free ) ;
wp - > sectors_free - = sectors ;
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i ) {
2021-12-26 05:21:46 +03:00
struct bch_dev * ca = bch_dev_bkey_exists ( c , ob - > dev ) ;
struct bch_extent_ptr ptr = bch2_ob_ptr ( c , ob ) ;
2018-10-06 07:46:55 +03:00
2021-12-26 05:14:49 +03:00
ptr . cached = cached | |
( ! ca - > mi . durability & &
2021-12-26 05:21:46 +03:00
wp - > data_type = = BCH_DATA_user ) ;
2018-10-06 07:46:55 +03:00
2021-12-26 05:14:49 +03:00
bch2_bkey_append_ptr ( k , ptr ) ;
2018-10-06 07:46:55 +03:00
BUG_ON ( sectors > ob - > sectors_free ) ;
ob - > sectors_free - = sectors ;
}
}
/*
* Append pointers to the space we just allocated to @ k , and mark @ sectors space
* as allocated out of @ ob
*/
void bch2_alloc_sectors_done ( struct bch_fs * c , struct write_point * wp )
{
2018-10-06 11:12:42 +03:00
struct open_buckets ptrs = { . nr = 0 } , keep = { . nr = 0 } ;
struct open_bucket * ob ;
unsigned i ;
2018-10-06 07:46:55 +03:00
2018-10-06 11:12:42 +03:00
open_bucket_for_each ( c , & wp - > ptrs , ob , i )
ob_push ( c , ! ob - > sectors_free ? & ptrs : & keep , ob ) ;
wp - > ptrs = keep ;
2018-10-06 07:46:55 +03:00
mutex_unlock ( & wp - > lock ) ;
2018-10-06 11:12:42 +03:00
bch2_open_buckets_put ( c , & ptrs ) ;
2018-10-06 07:46:55 +03:00
}
2018-11-05 05:55:35 +03:00
2020-07-22 00:12:39 +03:00
static inline void writepoint_init ( struct write_point * wp ,
enum bch_data_type type )
{
mutex_init ( & wp - > lock ) ;
2021-12-26 05:21:46 +03:00
wp - > data_type = type ;
2020-07-22 00:12:39 +03:00
}
2018-11-05 05:55:35 +03:00
void bch2_fs_allocator_foreground_init ( struct bch_fs * c )
{
struct open_bucket * ob ;
struct write_point * wp ;
mutex_init ( & c - > write_points_hash_lock ) ;
c - > write_points_nr = ARRAY_SIZE ( c - > write_points ) ;
/* open bucket 0 is a sentinal NULL: */
spin_lock_init ( & c - > open_buckets [ 0 ] . lock ) ;
for ( ob = c - > open_buckets + 1 ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ; ob + + ) {
spin_lock_init ( & ob - > lock ) ;
c - > open_buckets_nr_free + + ;
ob - > freelist = c - > open_buckets_freelist ;
c - > open_buckets_freelist = ob - c - > open_buckets ;
}
2020-07-11 23:28:54 +03:00
writepoint_init ( & c - > btree_write_point , BCH_DATA_btree ) ;
writepoint_init ( & c - > rebalance_write_point , BCH_DATA_user ) ;
writepoint_init ( & c - > copygc_write_point , BCH_DATA_user ) ;
2018-11-05 05:55:35 +03:00
for ( wp = c - > write_points ;
wp < c - > write_points + c - > write_points_nr ; wp + + ) {
2020-07-10 01:28:11 +03:00
writepoint_init ( wp , BCH_DATA_user ) ;
2018-11-05 05:55:35 +03:00
wp - > last_used = sched_clock ( ) ;
wp - > write_point = ( unsigned long ) wp ;
hlist_add_head_rcu ( & wp - > node ,
writepoint_hash ( c , wp - > write_point ) ) ;
}
}
2021-12-26 05:21:46 +03:00
void bch2_open_buckets_to_text ( struct printbuf * out , struct bch_fs * c )
{
struct open_bucket * ob ;
for ( ob = c - > open_buckets ;
ob < c - > open_buckets + ARRAY_SIZE ( c - > open_buckets ) ;
ob + + ) {
spin_lock ( & ob - > lock ) ;
if ( ob - > valid & & ! ob - > on_partial_list ) {
pr_buf ( out , " %zu ref %u type %s \n " ,
ob - c - > open_buckets ,
atomic_read ( & ob - > pin ) ,
bch2_data_types [ ob - > data_type ] ) ;
}
spin_unlock ( & ob - > lock ) ;
}
}