From 5735608c14e791c10ebcb6a20fab1c8fa4cf3123 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 10 Feb 2022 19:26:55 -0500 Subject: [PATCH] bcachefs: Kill main in-memory bucket array All code using the in-memory bucket array, excluding GC, has now been converted to use the alloc btree directly - so we can finally delete it. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 26 +----------- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/btree_gc.c | 52 +++++++++++++++++++---- fs/bcachefs/buckets.c | 78 +++++++++++----------------------- fs/bcachefs/buckets.h | 20 ++------- fs/bcachefs/buckets_types.h | 1 - fs/bcachefs/recovery.c | 2 +- 8 files changed, 76 insertions(+), 107 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index e8de96e4adf3..b0f49044ea24 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -400,14 +400,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c pr_buf(out, " write_time %llu", a.io_time[WRITE]); } -int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) +int bch2_alloc_read(struct bch_fs *c) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; struct bch_alloc_v4 a; struct bch_dev *ca; - struct bucket *g; int ret; bch2_trans_init(&trans, c, 0, 0); @@ -415,30 +414,9 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { ca = bch_dev_bkey_exists(c, k.k->p.inode); - g = __bucket(ca, k.k->p.offset, gc); bch2_alloc_to_v4(k, &a); - if (!gc) - *bucket_gen(ca, k.k->p.offset) = a.gen; - - g->_mark.gen = a.gen; - g->io_time[READ] = a.io_time[READ]; - g->io_time[WRITE] = a.io_time[WRITE]; - g->gen_valid = 1; - - if (!gc || - (metadata_only && - (a.data_type == BCH_DATA_user || - a.data_type == BCH_DATA_cached || - a.data_type == BCH_DATA_parity))) { - g->_mark.data_type = a.data_type; - g->_mark.dirty_sectors = a.dirty_sectors; - g->_mark.cached_sectors = a.cached_sectors; - g->_mark.stripe = a.stripe != 0; - g->stripe = a.stripe; - g->stripe_redundancy = a.stripe_redundancy; - } - + *bucket_gen(ca, k.k->p.offset) = a.gen; } bch2_trans_iter_exit(&trans, &iter); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index d82e80218b8e..3b49abf1bbc0 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -109,7 +109,7 @@ static inline bool bkey_is_alloc(const struct bkey *k) k->type == KEY_TYPE_alloc_v3; } -int bch2_alloc_read(struct bch_fs *, bool, bool); +int bch2_alloc_read(struct bch_fs *); int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 66d9c209252e..c06837612bdf 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -450,7 +450,7 @@ struct bch_dev { * gc_lock, for device resize - holding any is sufficient for access: * Or rcu_read_lock(), but only for ptr_stale(): */ - struct bucket_array __rcu *buckets[2]; + struct bucket_array __rcu *buckets_gc; struct bucket_gens __rcu *bucket_gens; u8 *oldest_gen; unsigned long *buckets_nouse; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7078b277e23b..f66b2ef03c3a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1160,10 +1160,10 @@ static void bch2_gc_free(struct bch_fs *c) genradix_free(&c->gc_stripes); for_each_member_device(ca, c, i) { - kvpfree(rcu_dereference_protected(ca->buckets[1], 1), + kvpfree(rcu_dereference_protected(ca->buckets_gc, 1), sizeof(struct bucket_array) + ca->mi.nbuckets * sizeof(struct bucket)); - ca->buckets[1] = NULL; + ca->buckets_gc = NULL; free_percpu(ca->usage_gc); ca->usage_gc = NULL; @@ -1292,7 +1292,7 @@ static int bch2_gc_start(struct bch_fs *c, } for_each_member_device(ca, c, i) { - BUG_ON(ca->buckets[1]); + BUG_ON(ca->buckets_gc); BUG_ON(ca->usage_gc); ca->usage_gc = alloc_percpu(struct bch_dev_usage); @@ -1346,8 +1346,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, .data_type = g->mark.data_type, .dirty_sectors = g->mark.dirty_sectors, .cached_sectors = g->mark.cached_sectors, - .io_time[READ] = g->io_time[READ], - .io_time[WRITE] = g->io_time[WRITE], .stripe = g->stripe, .stripe_redundancy = g->stripe_redundancy, }; @@ -1437,7 +1435,13 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) { struct bch_dev *ca; + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + struct bucket *g; + struct bch_alloc_v4 a; unsigned i; + int ret; for_each_member_device(ca, c, i) { struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) + @@ -1445,17 +1449,47 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) GFP_KERNEL|__GFP_ZERO); if (!buckets) { percpu_ref_put(&ca->ref); - percpu_up_write(&c->mark_lock); bch_err(c, "error allocating ca->buckets[gc]"); return -ENOMEM; } buckets->first_bucket = ca->mi.first_bucket; buckets->nbuckets = ca->mi.nbuckets; - rcu_assign_pointer(ca->buckets[1], buckets); + rcu_assign_pointer(ca->buckets_gc, buckets); }; - return bch2_alloc_read(c, true, metadata_only); + bch2_trans_init(&trans, c, 0, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + ca = bch_dev_bkey_exists(c, k.k->p.inode); + g = gc_bucket(ca, k.k->p.offset); + + bch2_alloc_to_v4(k, &a); + + g->_mark.gen = a.gen; + g->gen_valid = 1; + + if (metadata_only && + (a.data_type == BCH_DATA_user || + a.data_type == BCH_DATA_cached || + a.data_type == BCH_DATA_parity)) { + g->_mark.data_type = a.data_type; + g->_mark.dirty_sectors = a.dirty_sectors; + g->_mark.cached_sectors = a.cached_sectors; + g->_mark.stripe = a.stripe != 0; + g->stripe = a.stripe; + g->stripe_redundancy = a.stripe_redundancy; + } + } + bch2_trans_iter_exit(&trans, &iter); + + bch2_trans_exit(&trans); + + if (ret) + bch_err(c, "error reading alloc info at gc start: %i", ret); + + return ret; } static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) @@ -1464,7 +1498,7 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) unsigned i; for_each_member_device(ca, c, i) { - struct bucket_array *buckets = __bucket_array(ca, true); + struct bucket_array *buckets = gc_bucket_array(ca); struct bucket *g; for_each_bucket(g, buckets) { diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 60ad873da54f..572d56676c69 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -512,8 +512,6 @@ int bch2_mark_alloc(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_alloc_v4 old_a, new_a; struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); - struct bucket *g; - struct bucket_mark old_m, m; int ret = 0; if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket || @@ -587,21 +585,22 @@ int bch2_mark_alloc(struct btree_trans *trans, bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); - g = __bucket(ca, new.k->p.offset, gc); + if (gc) { + struct bucket_mark old_m, m; + struct bucket *g = gc_bucket(ca, new.k->p.offset); - old_m = bucket_cmpxchg(g, m, ({ - m.gen = new_a.gen; - m.data_type = new_a.data_type; - m.dirty_sectors = new_a.dirty_sectors; - m.cached_sectors = new_a.cached_sectors; - m.stripe = new_a.stripe != 0; - })); + old_m = bucket_cmpxchg(g, m, ({ + m.gen = new_a.gen; + m.data_type = new_a.data_type; + m.dirty_sectors = new_a.dirty_sectors; + m.cached_sectors = new_a.cached_sectors; + m.stripe = new_a.stripe != 0; + })); - g->io_time[READ] = new_a.io_time[READ]; - g->io_time[WRITE] = new_a.io_time[WRITE]; - g->gen_valid = 1; - g->stripe = new_a.stripe; - g->stripe_redundancy = new_a.stripe_redundancy; + g->gen_valid = 1; + g->stripe = new_a.stripe; + g->stripe_redundancy = new_a.stripe_redundancy; + } percpu_up_read(&c->mark_lock); /* @@ -610,9 +609,9 @@ int bch2_mark_alloc(struct btree_trans *trans, */ if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && - old_m.cached_sectors) { + old_a.cached_sectors) { ret = update_cached_sectors(c, new, ca->dev_idx, - -old_m.cached_sectors, + -old_a.cached_sectors, journal_seq, gc); if (ret) { bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors"); @@ -620,7 +619,7 @@ int bch2_mark_alloc(struct btree_trans *trans, } trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset), - old_m.cached_sectors); + old_a.cached_sectors); } return 0; @@ -2039,16 +2038,6 @@ recalculate: /* Startup/shutdown: */ -static void buckets_free_rcu(struct rcu_head *rcu) -{ - struct bucket_array *buckets = - container_of(rcu, struct bucket_array, rcu); - - kvpfree(buckets, - sizeof(*buckets) + - buckets->nbuckets * sizeof(struct bucket)); -} - static void bucket_gens_free_rcu(struct rcu_head *rcu) { struct bucket_gens *buckets = @@ -2059,16 +2048,12 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu) int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { - struct bucket_array *buckets = NULL, *old_buckets = NULL; struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL; unsigned long *buckets_nouse = NULL; - bool resize = ca->buckets[0] != NULL; + bool resize = ca->bucket_gens != NULL; int ret = -ENOMEM; - if (!(buckets = kvpmalloc(sizeof(struct bucket_array) + - nbuckets * sizeof(struct bucket), - GFP_KERNEL|__GFP_ZERO)) || - !(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets, + if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets, GFP_KERNEL|__GFP_ZERO)) || (c->opts.buckets_nouse && !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * @@ -2076,8 +2061,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) GFP_KERNEL|__GFP_ZERO)))) goto err; - buckets->first_bucket = ca->mi.first_bucket; - buckets->nbuckets = nbuckets; bucket_gens->first_bucket = ca->mi.first_bucket; bucket_gens->nbuckets = nbuckets; @@ -2089,15 +2072,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) percpu_down_write(&c->mark_lock); } - old_buckets = bucket_array(ca); old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - size_t n = min(buckets->nbuckets, old_buckets->nbuckets); + size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); - memcpy(buckets->b, - old_buckets->b, - n * sizeof(struct bucket)); memcpy(bucket_gens->b, old_bucket_gens->b, n); @@ -2107,22 +2086,18 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) BITS_TO_LONGS(n) * sizeof(unsigned long)); } - rcu_assign_pointer(ca->buckets[0], buckets); rcu_assign_pointer(ca->bucket_gens, bucket_gens); - buckets = old_buckets; bucket_gens = old_bucket_gens; swap(ca->buckets_nouse, buckets_nouse); - if (resize) { - percpu_up_write(&c->mark_lock); - up_write(&c->gc_lock); - } - nbuckets = ca->mi.nbuckets; - if (resize) + if (resize) { + percpu_up_write(&c->mark_lock); up_write(&ca->bucket_lock); + up_write(&c->gc_lock); + } ret = 0; err: @@ -2130,8 +2105,6 @@ err: BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); if (bucket_gens) call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu); - if (buckets) - call_rcu(&buckets->rcu, buckets_free_rcu); return ret; } @@ -2144,9 +2117,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca) BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); kvpfree(rcu_dereference_protected(ca->bucket_gens, 1), sizeof(struct bucket_gens) + ca->mi.nbuckets); - kvpfree(rcu_dereference_protected(ca->buckets[0], 1), - sizeof(struct bucket_array) + - ca->mi.nbuckets * sizeof(struct bucket)); for (i = 0; i < ARRAY_SIZE(ca->usage); i++) free_percpu(ca->usage[i]); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 9cc6c16bcc64..7ae1feadf4c0 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -30,34 +30,23 @@ _old; \ }) -static inline struct bucket_array *__bucket_array(struct bch_dev *ca, - bool gc) +static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca) { - return rcu_dereference_check(ca->buckets[gc], + return rcu_dereference_check(ca->buckets_gc, !ca->fs || percpu_rwsem_is_held(&ca->fs->mark_lock) || lockdep_is_held(&ca->fs->gc_lock) || lockdep_is_held(&ca->bucket_lock)); } -static inline struct bucket_array *bucket_array(struct bch_dev *ca) +static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) { - return __bucket_array(ca, false); -} - -static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc) -{ - struct bucket_array *buckets = __bucket_array(ca, gc); + struct bucket_array *buckets = gc_bucket_array(ca); BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets); return buckets->b + b; } -static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) -{ - return __bucket(ca, b, true); -} - static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) { return rcu_dereference_check(ca->bucket_gens, @@ -65,7 +54,6 @@ static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) percpu_rwsem_is_held(&ca->fs->mark_lock) || lockdep_is_held(&ca->fs->gc_lock) || lockdep_is_held(&ca->bucket_lock)); - } static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 6ddbea4da7d1..f7bf5c1d732f 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -27,7 +27,6 @@ struct bucket { const struct bucket_mark mark; }; - u64 io_time[2]; unsigned gen_valid:1; u8 stripe_redundancy; u32 stripe; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 14edc0bf5112..8291e58089fd 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1139,7 +1139,7 @@ use_clean: err = "error reading allocation information"; down_read(&c->gc_lock); - ret = bch2_alloc_read(c, false, false); + ret = bch2_alloc_read(c); up_read(&c->gc_lock); if (ret)