bcachefs: New erasure coding shutdown path
This implements a new shutdown path for erasure coding, which is needed for the upcoming BCH_WRITE_WAIT_FOR_EC write path. The process is: - Cancel new stripes being built up - Close out/cancel open buckets on write points or the partial list that are for stripes - Shutdown rebalance/copygc - Then wait for in flight new stripes to finish With BCH_WRITE_WAIT_FOR_EC, move ops will be waiting on stripes to fill up before they complete; the new ec shutdown path is needed for shutting down copygc/rebalance without deadlocking. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b9fa375bab
commit
b40901b0f7
@ -2158,44 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
*/
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
/* Next, close write points that point to this device... */
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, &c->write_points[i]);
|
||||
|
||||
bch2_writepoint_stop(c, ca, &c->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->btree_write_point);
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
while (c->btree_reserve_cache_nr) {
|
||||
struct btree_alloc *a =
|
||||
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
|
||||
|
||||
bch2_open_buckets_put(c, &a->ob);
|
||||
}
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
i = 0;
|
||||
while (i < c->open_buckets_partial_nr) {
|
||||
struct open_bucket *ob =
|
||||
c->open_buckets + c->open_buckets_partial[i];
|
||||
|
||||
if (ob->dev == ca->dev_idx) {
|
||||
--c->open_buckets_partial_nr;
|
||||
swap(c->open_buckets_partial[i],
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr]);
|
||||
ob->on_partial_list = false;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
spin_lock(&c->freelist_lock);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
bch2_ec_stop_dev(c, ca);
|
||||
bch2_open_buckets_stop(c, ca, false);
|
||||
|
||||
/*
|
||||
* Wake up threads that were blocked on allocation, so they can notice
|
||||
|
@ -1023,43 +1023,94 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct open_buckets *obs)
|
||||
static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
|
||||
struct bch_dev *ca, bool ec)
|
||||
{
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
struct open_bucket *ob, *ob2;
|
||||
unsigned i, j;
|
||||
|
||||
open_bucket_for_each(c, obs, ob, i) {
|
||||
bool drop = !ca || ob->dev == ca->dev_idx;
|
||||
if (ec) {
|
||||
return ob->ec != NULL;
|
||||
} else if (ca) {
|
||||
bool drop = ob->dev == ca->dev_idx;
|
||||
struct open_bucket *ob2;
|
||||
unsigned i;
|
||||
|
||||
if (!drop && ob->ec) {
|
||||
mutex_lock(&ob->ec->lock);
|
||||
for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
|
||||
if (!ob->ec->blocks[j])
|
||||
for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) {
|
||||
if (!ob->ec->blocks[i])
|
||||
continue;
|
||||
|
||||
ob2 = c->open_buckets + ob->ec->blocks[j];
|
||||
ob2 = c->open_buckets + ob->ec->blocks[i];
|
||||
drop |= ob2->dev == ca->dev_idx;
|
||||
}
|
||||
mutex_unlock(&ob->ec->lock);
|
||||
}
|
||||
|
||||
if (drop)
|
||||
return drop;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool ec, struct write_point *wp)
|
||||
{
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&wp->lock);
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
if (should_drop_bucket(ob, c, ca, ec))
|
||||
bch2_open_bucket_put(c, ob);
|
||||
else
|
||||
ob_push(c, &ptrs, ob);
|
||||
}
|
||||
|
||||
*obs = ptrs;
|
||||
wp->ptrs = ptrs;
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct write_point *wp)
|
||||
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool ec)
|
||||
{
|
||||
mutex_lock(&wp->lock);
|
||||
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
|
||||
mutex_unlock(&wp->lock);
|
||||
unsigned i;
|
||||
|
||||
/* Next, close write points that point to this device... */
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
|
||||
|
||||
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
while (c->btree_reserve_cache_nr) {
|
||||
struct btree_alloc *a =
|
||||
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
|
||||
|
||||
bch2_open_buckets_put(c, &a->ob);
|
||||
}
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
i = 0;
|
||||
while (i < c->open_buckets_partial_nr) {
|
||||
struct open_bucket *ob =
|
||||
c->open_buckets + c->open_buckets_partial[i];
|
||||
|
||||
if (should_drop_bucket(ob, c, ca, ec)) {
|
||||
--c->open_buckets_partial_nr;
|
||||
swap(c->open_buckets_partial[i],
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr]);
|
||||
ob->on_partial_list = false;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
spin_lock(&c->freelist_lock);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
bch2_ec_stop_dev(c, ca);
|
||||
}
|
||||
|
||||
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
|
||||
@ -1107,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool try_decrease_writepoints(struct bch_fs *c,
|
||||
unsigned old_nr)
|
||||
static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr)
|
||||
{
|
||||
struct write_point *wp;
|
||||
|
||||
@ -1129,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c,
|
||||
hlist_del_rcu(&wp->node);
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
|
||||
bch2_writepoint_stop(c, NULL, wp);
|
||||
bch2_writepoint_stop(c, NULL, false, wp);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
|
||||
struct bkey_i *, unsigned, bool);
|
||||
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
|
||||
|
||||
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
|
||||
struct open_buckets *);
|
||||
|
||||
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
|
||||
struct write_point *);
|
||||
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
|
||||
|
||||
static inline struct write_point_specifier writepoint_hashed(unsigned long v)
|
||||
{
|
||||
|
@ -655,7 +655,6 @@ typedef struct {
|
||||
x(fallocate) \
|
||||
x(discard) \
|
||||
x(invalidate) \
|
||||
x(move) \
|
||||
x(delete_dead_snapshots) \
|
||||
x(snapshot_delete_pagecache) \
|
||||
x(sysfs)
|
||||
@ -958,14 +957,14 @@ struct bch_fs {
|
||||
|
||||
struct list_head ec_stripe_new_list;
|
||||
struct mutex ec_stripe_new_lock;
|
||||
wait_queue_head_t ec_stripe_new_wait;
|
||||
|
||||
struct work_struct ec_stripe_create_work;
|
||||
u64 ec_stripe_hint;
|
||||
|
||||
struct bio_set ec_bioset;
|
||||
|
||||
struct work_struct ec_stripe_delete_work;
|
||||
struct llist_head ec_stripe_delete_list;
|
||||
|
||||
struct bio_set ec_bioset;
|
||||
|
||||
/* REFLINK */
|
||||
u64 reflink_hint;
|
||||
|
@ -252,6 +252,7 @@ restart_drop_extra_replicas:
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, &op->res,
|
||||
NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
m->data_opts.btree_insert_flags);
|
||||
if (!ret) {
|
||||
|
@ -989,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
|
||||
while (1) {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
|
||||
s, &bp_offset));
|
||||
@ -1127,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = bch2_trans_do(c, &s->res, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
ec_stripe_key_update(&trans, &s->new_stripe.key,
|
||||
!s->have_existing_stripe));
|
||||
if (ret) {
|
||||
@ -1409,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
|
||||
h = ERR_PTR(-EROFS);
|
||||
goto found;
|
||||
}
|
||||
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list)
|
||||
if (h->target == target &&
|
||||
h->algo == algo &&
|
||||
@ -1753,7 +1761,7 @@ err:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct ec_stripe_head *h;
|
||||
struct open_bucket *ob;
|
||||
@ -1761,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
|
||||
mutex_lock(&h->lock);
|
||||
if (!h->s)
|
||||
goto unlock;
|
||||
|
||||
if (!ca)
|
||||
goto found;
|
||||
|
||||
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
|
||||
if (!h->s->blocks[i])
|
||||
continue;
|
||||
@ -1784,6 +1794,32 @@ unlock:
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
}
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
__bch2_ec_stop(c, ca);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_stop(struct bch_fs *c)
|
||||
{
|
||||
__bch2_ec_stop(c, NULL);
|
||||
}
|
||||
|
||||
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
ret = list_empty(&c->ec_stripe_new_list);
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_ec_flush(struct bch_fs *c)
|
||||
{
|
||||
wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
|
||||
}
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
@ -1915,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_ec_init_early(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->ec_stripes_new_lock);
|
||||
mutex_init(&c->ec_stripes_heap_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_new_list);
|
||||
mutex_init(&c->ec_stripe_new_lock);
|
||||
init_waitqueue_head(&c->ec_stripe_new_wait);
|
||||
|
||||
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
|
||||
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
|
||||
}
|
||||
|
||||
int bch2_fs_ec_init(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->ec_stripes_new_lock);
|
||||
|
||||
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
|
@ -245,8 +245,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
|
||||
}
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_ec_flush_new_stripes(struct bch_fs *);
|
||||
void bch2_fs_ec_stop(struct bch_fs *);
|
||||
void bch2_fs_ec_flush(struct bch_fs *);
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *);
|
||||
|
||||
|
@ -705,7 +705,8 @@ static void bch2_write_done(struct closure *cl)
|
||||
struct bch_fs *c = op->c;
|
||||
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_write);
|
||||
if (!(op->flags & BCH_WRITE_MOVE))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_write);
|
||||
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
|
||||
@ -1842,7 +1843,12 @@ void bch2_write(struct closure *cl)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (c->opts.nochanges ||
|
||||
if (c->opts.nochanges) {
|
||||
op->error = -BCH_ERR_erofs_no_writes;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!(op->flags & BCH_WRITE_MOVE) &&
|
||||
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
|
||||
op->error = -BCH_ERR_erofs_no_writes;
|
||||
goto err;
|
||||
|
@ -59,7 +59,6 @@ struct moving_io {
|
||||
static void move_free(struct moving_io *io)
|
||||
{
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
struct bch_fs *c = ctxt->c;
|
||||
|
||||
if (io->b)
|
||||
atomic_dec(&io->b->count);
|
||||
@ -71,7 +70,6 @@ static void move_free(struct moving_io *io)
|
||||
wake_up(&ctxt->wait);
|
||||
mutex_unlock(&ctxt->lock);
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_move);
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
@ -280,9 +278,6 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
/*
|
||||
* Before memory allocations & taking nocow locks in
|
||||
* bch2_data_update_init():
|
||||
@ -378,7 +373,6 @@ err_free_pages:
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_move);
|
||||
trace_and_count(c, move_extent_alloc_mem_fail, k.k);
|
||||
return ret;
|
||||
}
|
||||
|
@ -205,9 +205,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
unsigned i, clean_passes = 0;
|
||||
u64 seq = 0;
|
||||
|
||||
bch2_fs_ec_stop(c);
|
||||
bch2_open_buckets_stop(c, NULL, true);
|
||||
bch2_rebalance_stop(c);
|
||||
bch2_copygc_stop(c);
|
||||
bch2_gc_thread_stop(c);
|
||||
bch2_fs_ec_flush(c);
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
|
||||
journal_cur_seq(&c->journal));
|
||||
@ -700,15 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_new_list);
|
||||
mutex_init(&c->ec_stripe_new_lock);
|
||||
|
||||
|
||||
mutex_init(&c->ec_stripes_heap_lock);
|
||||
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
|
||||
seqcount_init(&c->usage_lock);
|
||||
|
Loading…
Reference in New Issue
Block a user