bcachefs: Scale down number of writepoints when low on space
this means we don't have to reserve space for them when calculating filesystem capacity Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
72644db153
commit
b092dadd55
@ -975,6 +975,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
u64 capacity = 0, reserved_sectors = 0, gc_reserve;
|
||||
unsigned bucket_size_max = 0;
|
||||
unsigned long ra_pages = 0;
|
||||
unsigned i, j;
|
||||
|
||||
@ -1012,12 +1013,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
dev_reserve += ca->free_inc.size;
|
||||
|
||||
dev_reserve += ARRAY_SIZE(c->write_points);
|
||||
|
||||
dev_reserve += 1; /* btree write point */
|
||||
dev_reserve += 1; /* copygc write point */
|
||||
dev_reserve += 1; /* rebalance write point */
|
||||
dev_reserve += WRITE_POINT_COUNT;
|
||||
|
||||
dev_reserve *= ca->mi.bucket_size;
|
||||
|
||||
@ -1027,6 +1025,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
ca->mi.first_bucket);
|
||||
|
||||
reserved_sectors += dev_reserve * 2;
|
||||
|
||||
bucket_size_max = max_t(unsigned, bucket_size_max,
|
||||
ca->mi.bucket_size);
|
||||
}
|
||||
|
||||
gc_reserve = c->opts.gc_reserve_bytes
|
||||
@ -1039,6 +1040,8 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
c->capacity = capacity - reserved_sectors;
|
||||
|
||||
c->bucket_size_max = bucket_size_max;
|
||||
|
||||
if (c->capacity) {
|
||||
bch2_io_timer_add(&c->io_clock[READ],
|
||||
&c->bucket_clock[READ].rescale);
|
||||
@ -1330,8 +1333,6 @@ not_enough:
|
||||
* invalidated on disk:
|
||||
*/
|
||||
if (invalidating_data) {
|
||||
BUG();
|
||||
pr_info("holding writes");
|
||||
pr_debug("invalidating existing data");
|
||||
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
} else {
|
||||
@ -1391,40 +1392,12 @@ int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
return bch2_alloc_write(c);
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_init(struct bch_fs *c)
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *c)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
struct write_point *wp;
|
||||
|
||||
mutex_init(&c->write_points_hash_lock);
|
||||
spin_lock_init(&c->freelist_lock);
|
||||
bch2_bucket_clock_init(c, READ);
|
||||
bch2_bucket_clock_init(c, WRITE);
|
||||
|
||||
/* open bucket 0 is a sentinal NULL: */
|
||||
spin_lock_init(&c->open_buckets[0].lock);
|
||||
|
||||
for (ob = c->open_buckets + 1;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
|
||||
spin_lock_init(&ob->lock);
|
||||
c->open_buckets_nr_free++;
|
||||
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
}
|
||||
|
||||
writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
|
||||
writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) {
|
||||
writepoint_init(wp, BCH_DATA_USER);
|
||||
|
||||
wp->last_used = sched_clock();
|
||||
wp->write_point = (unsigned long) wp;
|
||||
hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
|
||||
}
|
||||
|
||||
c->pd_controllers_update_seconds = 5;
|
||||
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
|
||||
}
|
||||
|
@ -57,6 +57,6 @@ int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
int bch2_alloc_write(struct bch_fs *);
|
||||
int bch2_fs_allocator_start(struct bch_fs *);
|
||||
void bch2_fs_allocator_init(struct bch_fs *);
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
|
||||
|
@ -492,7 +492,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
mutex_lock(&wp->lock);
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
if (ob->ptr.dev == ca->dev_idx)
|
||||
if (!ca || ob->ptr.dev == ca->dev_idx)
|
||||
open_bucket_free_unused(c, wp, ob);
|
||||
else
|
||||
ob_push(c, &ptrs, ob);
|
||||
@ -501,6 +501,15 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
|
||||
unsigned long write_point)
|
||||
{
|
||||
unsigned hash =
|
||||
hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
|
||||
|
||||
return &c->write_points_hash[hash];
|
||||
}
|
||||
|
||||
static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
unsigned long write_point)
|
||||
{
|
||||
@ -513,6 +522,53 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
|
||||
{
|
||||
u64 stranded = c->write_points_nr * c->bucket_size_max;
|
||||
u64 free = bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
|
||||
|
||||
return stranded * factor > free;
|
||||
}
|
||||
|
||||
static bool try_increase_writepoints(struct bch_fs *c)
|
||||
{
|
||||
struct write_point *wp;
|
||||
|
||||
if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
|
||||
too_many_writepoints(c, 32))
|
||||
return false;
|
||||
|
||||
wp = c->write_points + c->write_points_nr++;
|
||||
hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool try_decrease_writepoints(struct bch_fs *c,
|
||||
unsigned old_nr)
|
||||
{
|
||||
struct write_point *wp;
|
||||
|
||||
mutex_lock(&c->write_points_hash_lock);
|
||||
if (c->write_points_nr < old_nr) {
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (c->write_points_nr == 1 ||
|
||||
!too_many_writepoints(c, 8)) {
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
wp = c->write_points + --c->write_points_nr;
|
||||
|
||||
hlist_del_rcu(&wp->node);
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
|
||||
bch2_writepoint_stop(c, NULL, wp);
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct write_point *writepoint_find(struct bch_fs *c,
|
||||
unsigned long write_point)
|
||||
{
|
||||
@ -536,16 +592,22 @@ lock_wp:
|
||||
mutex_unlock(&wp->lock);
|
||||
goto restart_find;
|
||||
}
|
||||
|
||||
restart_find_oldest:
|
||||
oldest = NULL;
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points);
|
||||
wp++)
|
||||
wp < c->write_points + c->write_points_nr; wp++)
|
||||
if (!oldest || time_before64(wp->last_used, oldest->last_used))
|
||||
oldest = wp;
|
||||
|
||||
mutex_lock(&oldest->lock);
|
||||
mutex_lock(&c->write_points_hash_lock);
|
||||
if (oldest >= c->write_points + c->write_points_nr ||
|
||||
try_increase_writepoints(c)) {
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
mutex_unlock(&oldest->lock);
|
||||
goto restart_find_oldest;
|
||||
}
|
||||
|
||||
wp = __writepoint_find(head, write_point);
|
||||
if (wp && wp != oldest) {
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
@ -581,10 +643,12 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
unsigned nr_effective = 0;
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
bool have_cache = false;
|
||||
unsigned write_points_nr;
|
||||
int ret = 0, i;
|
||||
|
||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||
|
||||
retry:
|
||||
write_points_nr = c->write_points_nr;
|
||||
wp = writepoint_find(c, write_point.v);
|
||||
|
||||
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
|
||||
@ -637,6 +701,11 @@ err:
|
||||
wp->ptrs = ptrs;
|
||||
|
||||
mutex_unlock(&wp->lock);
|
||||
|
||||
if (ret == -ENOSPC &&
|
||||
try_decrease_writepoints(c, write_points_nr))
|
||||
goto retry;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -688,3 +757,37 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
|
||||
|
||||
bch2_open_buckets_put(c, &ptrs);
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_foreground_init(struct bch_fs *c)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
struct write_point *wp;
|
||||
|
||||
mutex_init(&c->write_points_hash_lock);
|
||||
c->write_points_nr = ARRAY_SIZE(c->write_points);
|
||||
|
||||
/* open bucket 0 is a sentinal NULL: */
|
||||
spin_lock_init(&c->open_buckets[0].lock);
|
||||
|
||||
for (ob = c->open_buckets + 1;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
|
||||
spin_lock_init(&ob->lock);
|
||||
c->open_buckets_nr_free++;
|
||||
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
}
|
||||
|
||||
writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
|
||||
writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + c->write_points_nr; wp++) {
|
||||
writepoint_init(wp, BCH_DATA_USER);
|
||||
|
||||
wp->last_used = sched_clock();
|
||||
wp->write_point = (unsigned long) wp;
|
||||
hlist_add_head_rcu(&wp->node,
|
||||
writepoint_hash(c, wp->write_point));
|
||||
}
|
||||
}
|
||||
|
@ -91,15 +91,6 @@ void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
|
||||
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
|
||||
struct write_point *);
|
||||
|
||||
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
|
||||
unsigned long write_point)
|
||||
{
|
||||
unsigned hash =
|
||||
hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
|
||||
|
||||
return &c->write_points_hash[hash];
|
||||
}
|
||||
|
||||
static inline struct write_point_specifier writepoint_hashed(unsigned long v)
|
||||
{
|
||||
return (struct write_point_specifier) { .v = v | 1 };
|
||||
@ -117,4 +108,6 @@ static inline void writepoint_init(struct write_point *wp,
|
||||
wp->type = type;
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_foreground_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
|
||||
|
@ -46,7 +46,9 @@ typedef FIFO(long) alloc_fifo;
|
||||
|
||||
/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
|
||||
#define OPEN_BUCKETS_COUNT 256
|
||||
#define WRITE_POINT_COUNT 32
|
||||
|
||||
#define WRITE_POINT_HASH_NR 32
|
||||
#define WRITE_POINT_MAX 32
|
||||
|
||||
struct open_bucket {
|
||||
spinlock_t lock;
|
||||
|
@ -601,6 +601,7 @@ struct bch_fs {
|
||||
* and forces them to be revalidated
|
||||
*/
|
||||
u32 capacity_gen;
|
||||
unsigned bucket_size_max;
|
||||
|
||||
atomic64_t sectors_available;
|
||||
|
||||
@ -630,9 +631,10 @@ struct bch_fs {
|
||||
struct write_point btree_write_point;
|
||||
struct write_point rebalance_write_point;
|
||||
|
||||
struct write_point write_points[WRITE_POINT_COUNT];
|
||||
struct hlist_head write_points_hash[WRITE_POINT_COUNT];
|
||||
struct write_point write_points[WRITE_POINT_MAX];
|
||||
struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
|
||||
struct mutex write_points_hash_lock;
|
||||
unsigned write_points_nr;
|
||||
|
||||
/* GARBAGE COLLECTION */
|
||||
struct task_struct *gc_thread;
|
||||
|
@ -300,11 +300,6 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
return min(c->capacity, __bch2_fs_sectors_used(c, stats));
|
||||
}
|
||||
|
||||
static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
{
|
||||
return c->capacity - bch2_fs_sectors_used(c, stats);
|
||||
}
|
||||
|
||||
static inline int is_unavailable_bucket(struct bucket_mark m)
|
||||
{
|
||||
return !is_available_bucket(m);
|
||||
|
@ -175,6 +175,12 @@ void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
|
||||
static inline u64 bch2_fs_sectors_free(struct bch_fs *c,
|
||||
struct bch_fs_usage stats)
|
||||
{
|
||||
return c->capacity - bch2_fs_sectors_used(c, stats);
|
||||
}
|
||||
|
||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
return (!mark.owned_by_allocator &&
|
||||
|
@ -524,7 +524,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_init(&c->times[i]);
|
||||
|
||||
bch2_fs_allocator_init(c);
|
||||
bch2_fs_allocator_background_init(c);
|
||||
bch2_fs_allocator_foreground_init(c);
|
||||
bch2_fs_rebalance_init(c);
|
||||
bch2_fs_quota_init(c);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user