From 430735cd1a0304195a080f8ee239016444a02715 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Nov 2018 01:31:41 -0500 Subject: [PATCH] bcachefs: Persist alloc info on clean shutdown - Does not persist alloc info for stripes yet - Also does not yet include filesystem block/sector counts yet, from struct fs_usage - Not made use of just yet Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 45 +++++++++++++++++++++++++---- fs/bcachefs/alloc_background.h | 1 + fs/bcachefs/bcachefs.h | 6 ++++ fs/bcachefs/btree_update.h | 3 ++ fs/bcachefs/btree_update_interior.c | 2 ++ fs/bcachefs/btree_update_leaf.c | 6 ++-- fs/bcachefs/buckets.c | 14 ++++++--- fs/bcachefs/buckets.h | 2 ++ fs/bcachefs/super.c | 39 +++++++++++++++++-------- 9 files changed, 94 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index b79d5b059621..686287d12d14 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -250,6 +250,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) bch2_alloc_read_key(c, bkey_i_to_s_c(k)); } + for_each_member_device(ca, c, i) + bch2_dev_usage_from_buckets(c, ca); + mutex_lock(&c->bucket_clock[READ].lock); for_each_member_device(ca, c, i) { down_read(&ca->bucket_lock); @@ -281,35 +284,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, #endif struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k); struct bucket *g; - struct bucket_mark m; + struct bucket_mark m, new; int ret; BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); a->k.p = POS(ca->dev_idx, b); + bch2_btree_iter_set_pos(iter, a->k.p); + + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; + percpu_down_read(&c->mark_lock); g = bucket(ca, b); - m = bucket_cmpxchg(g, m, m.dirty = false); + m = READ_ONCE(g->mark); + + if (!m.dirty) { + percpu_up_read(&c->mark_lock); + return 0; + } __alloc_write_key(a, g, m); percpu_up_read(&c->mark_lock); bch2_btree_iter_cond_resched(iter); - bch2_btree_iter_set_pos(iter, a->k.p); - ret = bch2_btree_insert_at(c, NULL, journal_seq, + BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE| flags, BTREE_INSERT_ENTRY(iter, &a->k_i)); + if (ret) + return ret; - if (!ret && ca->buckets_written) + new = m; + new.dirty = false; + atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter); + + if (ca->buckets_written) set_bit(b, ca->buckets_written); - return ret; + return 0; } int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) @@ -899,10 +918,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t for (i = 0; i < RESERVE_NR; i++) if (fifo_push(&ca->free[i], bucket)) { fifo_pop(&ca->free_inc, bucket); + closure_wake_up(&c->freelist_wait); + ca->allocator_blocked_full = false; + spin_unlock(&c->freelist_lock); goto out; } + + if (!ca->allocator_blocked_full) { + ca->allocator_blocked_full = true; + closure_wake_up(&c->freelist_wait); + } + spin_unlock(&c->freelist_lock); if ((current->flags & PF_KTHREAD) && @@ -1227,6 +1255,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) set_bit(ca->dev_idx, c->rw_devs[i].d); } +void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca) +{ + closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full); +} + /* stop allocator thread: */ void bch2_dev_allocator_stop(struct bch_dev *ca) { diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index ef5ec659b05d..04f1e9152494 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -52,6 +52,7 @@ void bch2_recalc_capacity(struct bch_fs *); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); +void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 17eb0dd657a8..2d67c9911fbb 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -431,7 +431,13 @@ struct bch_dev { size_t inc_gen_needs_gc; size_t inc_gen_really_needs_gc; + + /* + * XXX: this should be an enum for allocator state, so as to include + * error state + */ bool allocator_blocked; + bool allocator_blocked_full; alloc_heap alloc_heap; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 824fb0d1b7f0..9bcab29bd033 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -78,6 +78,7 @@ enum { __BTREE_INSERT_ATOMIC, __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, + __BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, @@ -101,6 +102,8 @@ enum { /* Don't check for -ENOSPC: */ #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) +#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW) + /* for copygc, or when merging btree nodes */ #define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) #define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d55778696bcd..4bc7be9b5298 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1172,6 +1172,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->mark_lock); + preempt_disable(); fs_usage = bch2_fs_usage_get_scratch(c); bch2_mark_key_locked(c, bkey_i_to_s_c(insert), @@ -1194,6 +1195,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, gc_pos_btree_node(b)); + preempt_enable(); percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 12fd7fba3e9a..e052a3debadb 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -629,7 +629,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans) trans_for_each_entry(trans, i) btree_insert_entry_checks(c, i); - if (unlikely(!percpu_ref_tryget(&c->writes))) + if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && + !percpu_ref_tryget(&c->writes))) return -EROFS; retry: trans_for_each_iter(trans, i) { @@ -659,7 +660,8 @@ retry: trans_for_each_iter(trans, i) bch2_btree_iter_downgrade(i->iter); out: - percpu_ref_put(&c->writes); + if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) + percpu_ref_put(&c->writes); /* make sure we didn't drop or screw up locks: */ trans_for_each_iter(trans, i) { diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index cbebc712a1da..3e92a1f6d7fc 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -387,7 +387,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, *old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ BUG_ON(!is_available_bucket(new)); - new.owned_by_allocator = 1; + new.owned_by_allocator = true; + new.dirty = true; new.data_type = 0; new.cached_sectors = 0; new.dirty_sectors = 0; @@ -460,6 +461,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, type != BCH_DATA_JOURNAL); bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ + new.dirty = true; new.data_type = type; checked_add(new.dirty_sectors, sectors); })); @@ -487,13 +489,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, true); } else { struct bucket *g; - struct bucket_mark old, new; + struct bucket_mark new; rcu_read_lock(); g = bucket(ca, b); - old = bucket_cmpxchg(g, new, ({ - new.data_type = type; + bucket_cmpxchg(g, new, ({ + new.dirty = true; + new.data_type = type; checked_add(new.dirty_sectors, sectors); })); @@ -546,6 +549,8 @@ static void bch2_mark_pointer(struct bch_fs *c, do { new.v.counter = old.v.counter = v; + new.dirty = true; + /* * Check this after reading bucket mark to guard against * the allocator invalidating a bucket after we've already @@ -709,6 +714,7 @@ static void bucket_set_stripe(struct bch_fs *c, BUG_ON(ptr_stale(ca, ptr)); old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ + new.dirty = true; new.stripe = enabled; if (journal_seq) { new.journal_seq_valid = 1; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 107cb48e3929..ee8c9e9a1f23 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -182,6 +182,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m, struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *); +void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *); + static inline u64 __dev_buckets_available(struct bch_dev *ca, struct bch_dev_usage stats) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 9a862b19ce22..0ad624294052 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -174,7 +174,9 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) static void __bch2_fs_read_only(struct bch_fs *c) { struct bch_dev *ca; + bool wrote; unsigned i; + int ret; bch2_rebalance_stop(c); @@ -189,23 +191,36 @@ static void __bch2_fs_read_only(struct bch_fs *c) */ bch2_journal_flush_all_pins(&c->journal); + do { + ret = bch2_alloc_write(c, false, &wrote); + if (ret) { + bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); + break; + } + + for_each_member_device(ca, c, i) + bch2_dev_allocator_quiesce(c, ca); + + bch2_journal_flush_all_pins(&c->journal); + + /* + * We need to explicitly wait on btree interior updates to complete + * before stopping the journal, flushing all journal pins isn't + * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree + * interior updates have to drop their journal pin before they're + * fully complete: + */ + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); + } while (wrote); + for_each_member_device(ca, c, i) bch2_dev_allocator_stop(ca); - bch2_journal_flush_all_pins(&c->journal); - - /* - * We need to explicitly wait on btree interior updates to complete - * before stopping the journal, flushing all journal pins isn't - * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree - * interior updates have to drop their journal pin before they're - * fully complete: - */ - closure_wait_event(&c->btree_interior_update_wait, - !bch2_btree_interior_updates_nr_pending(c)); - bch2_fs_journal_stop(&c->journal); + /* XXX: mark super that alloc info is persistent */ + /* * the journal kicks off btree writes via reclaim - wait for in flight * writes after stopping journal: