bcachefs: Persist alloc info on clean shutdown
- Does not persist alloc info for stripes yet - Also does not yet include filesystem block/sector counts yet, from struct fs_usage - Not made use of just yet Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
5e5d9bdbb8
commit
430735cd1a
@ -250,6 +250,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_usage_from_buckets(c, ca);
|
||||
|
||||
mutex_lock(&c->bucket_clock[READ].lock);
|
||||
for_each_member_device(ca, c, i) {
|
||||
down_read(&ca->bucket_lock);
|
||||
@ -281,35 +284,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
#endif
|
||||
struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
|
||||
struct bucket *g;
|
||||
struct bucket_mark m;
|
||||
struct bucket_mark m, new;
|
||||
int ret;
|
||||
|
||||
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
|
||||
|
||||
a->k.p = POS(ca->dev_idx, b);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, a->k.p);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
g = bucket(ca, b);
|
||||
m = bucket_cmpxchg(g, m, m.dirty = false);
|
||||
m = READ_ONCE(g->mark);
|
||||
|
||||
if (!m.dirty) {
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__alloc_write_key(a, g, m);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, a->k.p);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, journal_seq,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!ret && ca->buckets_written)
|
||||
new = m;
|
||||
new.dirty = false;
|
||||
atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
|
||||
|
||||
if (ca->buckets_written)
|
||||
set_bit(b, ca->buckets_written);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
|
||||
@ -899,10 +918,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
if (fifo_push(&ca->free[i], bucket)) {
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
ca->allocator_blocked_full = false;
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!ca->allocator_blocked_full) {
|
||||
ca->allocator_blocked_full = true;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
}
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
if ((current->flags & PF_KTHREAD) &&
|
||||
@ -1227,6 +1255,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
}
|
||||
|
||||
void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
|
||||
}
|
||||
|
||||
/* stop allocator thread: */
|
||||
void bch2_dev_allocator_stop(struct bch_dev *ca)
|
||||
{
|
||||
|
@ -52,6 +52,7 @@ void bch2_recalc_capacity(struct bch_fs *);
|
||||
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_stop(struct bch_dev *);
|
||||
int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
|
@ -431,7 +431,13 @@ struct bch_dev {
|
||||
|
||||
size_t inc_gen_needs_gc;
|
||||
size_t inc_gen_really_needs_gc;
|
||||
|
||||
/*
|
||||
* XXX: this should be an enum for allocator state, so as to include
|
||||
* error state
|
||||
*/
|
||||
bool allocator_blocked;
|
||||
bool allocator_blocked_full;
|
||||
|
||||
alloc_heap alloc_heap;
|
||||
|
||||
|
@ -78,6 +78,7 @@ enum {
|
||||
__BTREE_INSERT_ATOMIC,
|
||||
__BTREE_INSERT_NOUNLOCK,
|
||||
__BTREE_INSERT_NOFAIL,
|
||||
__BTREE_INSERT_NOCHECK_RW,
|
||||
__BTREE_INSERT_USE_RESERVE,
|
||||
__BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
@ -101,6 +102,8 @@ enum {
|
||||
/* Don't check for -ENOSPC: */
|
||||
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
|
||||
|
||||
#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
|
||||
|
||||
/* for copygc, or when merging btree nodes */
|
||||
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
|
||||
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
|
||||
|
@ -1172,6 +1172,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
percpu_down_read(&c->mark_lock);
|
||||
preempt_disable();
|
||||
fs_usage = bch2_fs_usage_get_scratch(c);
|
||||
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
|
||||
@ -1194,6 +1195,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
|
||||
gc_pos_btree_node(b));
|
||||
|
||||
preempt_enable();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
|
@ -629,7 +629,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
||||
trans_for_each_entry(trans, i)
|
||||
btree_insert_entry_checks(c, i);
|
||||
|
||||
if (unlikely(!percpu_ref_tryget(&c->writes)))
|
||||
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
|
||||
!percpu_ref_tryget(&c->writes)))
|
||||
return -EROFS;
|
||||
retry:
|
||||
trans_for_each_iter(trans, i) {
|
||||
@ -659,6 +660,7 @@ retry:
|
||||
trans_for_each_iter(trans, i)
|
||||
bch2_btree_iter_downgrade(i->iter);
|
||||
out:
|
||||
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
|
||||
percpu_ref_put(&c->writes);
|
||||
|
||||
/* make sure we didn't drop or screw up locks: */
|
||||
|
@ -387,7 +387,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
*old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
BUG_ON(!is_available_bucket(new));
|
||||
|
||||
new.owned_by_allocator = 1;
|
||||
new.owned_by_allocator = true;
|
||||
new.dirty = true;
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
@ -460,6 +461,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
type != BCH_DATA_JOURNAL);
|
||||
|
||||
bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
new.dirty = true;
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
@ -487,12 +489,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
true);
|
||||
} else {
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, new;
|
||||
struct bucket_mark new;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
bucket_cmpxchg(g, new, ({
|
||||
new.dirty = true;
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
@ -546,6 +549,8 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
do {
|
||||
new.v.counter = old.v.counter = v;
|
||||
|
||||
new.dirty = true;
|
||||
|
||||
/*
|
||||
* Check this after reading bucket mark to guard against
|
||||
* the allocator invalidating a bucket after we've already
|
||||
@ -709,6 +714,7 @@ static void bucket_set_stripe(struct bch_fs *c,
|
||||
BUG_ON(ptr_stale(ca, ptr));
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
new.dirty = true;
|
||||
new.stripe = enabled;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
|
@ -182,6 +182,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||
struct bch_dev_usage stats)
|
||||
{
|
||||
|
@ -174,7 +174,9 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
|
||||
static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
bool wrote;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
bch2_rebalance_stop(c);
|
||||
|
||||
@ -189,8 +191,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
do {
|
||||
ret = bch2_alloc_write(c, false, &wrote);
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
bch2_dev_allocator_quiesce(c, ca);
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
@ -203,9 +212,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
} while (wrote);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/* XXX: mark super that alloc info is persistent */
|
||||
|
||||
/*
|
||||
* the journal kicks off btree writes via reclaim - wait for in flight
|
||||
* writes after stopping journal:
|
||||
|
Loading…
Reference in New Issue
Block a user