From 12ce5b7df1e0e432bcac22079e4493cab5cd8b23 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Jan 2022 01:14:47 -0500 Subject: [PATCH] bcachefs: Btree key cache coherency - Updates to non key cache iterators will now be transparently redirected to the key cache for cached btrees. - Except when creating new keys: then the update goes to underlying btree For for iterating over a cached btree to work, we need to ensure that if a key exists in the key cache, it also exists in the btree - otherwise the iterator code will skip past it and not check the key cache. Otherwise, for consistency, all updates should go to the same place - the key cache. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 +- fs/bcachefs/btree_iter.c | 2 + fs/bcachefs/btree_key_cache.c | 13 +-- fs/bcachefs/btree_key_cache.h | 10 +-- fs/bcachefs/btree_types.h | 6 ++ fs/bcachefs/btree_update.h | 2 - fs/bcachefs/btree_update_leaf.c | 136 ++++++++++++++++++++++++-------- fs/bcachefs/trace.h | 6 ++ 8 files changed, 128 insertions(+), 50 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1353e72bbfb0..55af41a63ff7 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -271,7 +271,8 @@ int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter, return PTR_ERR(a); bch2_alloc_pack(trans->c, a, *u); - return bch2_trans_update(trans, iter, &a->k, trigger_flags); + return bch2_trans_update(trans, iter, &a->k, trigger_flags| + BTREE_UPDATE_NO_KEY_CACHE_COHERENCY); } static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index ff98024e76fc..c7ba6ce27007 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2188,6 +2188,8 @@ struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) break; if (bpos_cmp(i->k->k.p, iter->path->pos) < 0) continue; + if (i->key_cache_already_flushed) + continue; if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0) ret = i->k; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 29d46d0aa5d3..72a54b9d1335 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -414,6 +414,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, * */ ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, + BTREE_UPDATE_KEY_CACHE_RECLAIM| BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, @@ -555,13 +556,15 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, return true; } -#ifdef CONFIG_BCACHEFS_DEBUG -void bch2_btree_key_cache_verify_clean(struct btree_trans *trans, - enum btree_id id, struct bpos pos) +void bch2_btree_key_cache_drop(struct btree_trans *trans, + struct btree_path *path) { - BUG_ON(bch2_btree_key_cache_find(trans->c, id, pos)); + struct bkey_cached *ck = (void *) path->l[0].b; + + ck->valid = false; + + BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); } -#endif static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, struct shrink_control *sc) diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index b3d241b13453..670746e72dab 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -32,14 +32,8 @@ bool bch2_btree_insert_key_cached(struct btree_trans *, struct btree_path *, struct bkey_i *); int bch2_btree_key_cache_flush(struct btree_trans *, enum btree_id, struct bpos); -#ifdef CONFIG_BCACHEFS_DEBUG -void bch2_btree_key_cache_verify_clean(struct btree_trans *, - enum btree_id, struct bpos); -#else -static inline void -bch2_btree_key_cache_verify_clean(struct btree_trans *trans, - enum btree_id id, struct bpos pos) {} -#endif +void bch2_btree_key_cache_drop(struct btree_trans *, + struct btree_path *); void bch2_fs_btree_key_cache_exit(struct btree_key_cache *); void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 6db2ac49ee3f..0afade4f61f4 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -344,6 +344,7 @@ struct btree_insert_entry { bool cached:1; bool insert_trigger_run:1; bool overwrite_trigger_run:1; + bool key_cache_already_flushed:1; /* * @old_k may be a key from the journal; @old_btree_u64s always refers * to the size of the key being overwritten in the btree: @@ -645,6 +646,8 @@ static inline bool btree_type_has_snapshots(enum btree_id id) enum btree_update_flags { __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, __BTREE_UPDATE_NOJOURNAL, + __BTREE_UPDATE_KEY_CACHE_RECLAIM, + __BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ @@ -658,6 +661,9 @@ enum btree_update_flags { #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) #define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) +#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) +#define BTREE_UPDATE_NO_KEY_CACHE_COHERENCY \ + (1U << __BTREE_UPDATE_NO_KEY_CACHE_COHERENCY) #define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 5e5a1b5e750e..d9a406a28f47 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -76,8 +76,6 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *, struct bkey_i *, enum btree_update_flags); -int __must_check bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, - struct bkey_i *, enum btree_update_flags); int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, struct bkey_i *, enum btree_update_flags); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 2af2d75a06c5..dc033991a4ec 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -23,10 +23,15 @@ #include #include +static int __must_check +bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, + struct bkey_i *, enum btree_update_flags); + static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, const struct btree_insert_entry *r) { return cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->cached, r->cached) ?: -cmp_int(l->level, r->level) ?: bpos_cmp(l->k->k.p, r->k->k.p); } @@ -378,9 +383,14 @@ static inline void do_btree_insert_one(struct btree_trans *trans, i->k->k.needs_whiteout = false; - did_work = !i->cached - ? btree_insert_key_leaf(trans, i) - : bch2_btree_insert_key_cached(trans, i->path, i->k); + if (!i->cached) + did_work = btree_insert_key_leaf(trans, i); + else if (!i->key_cache_already_flushed) + did_work = bch2_btree_insert_key_cached(trans, i->path, i->k); + else { + bch2_btree_key_cache_drop(trans, i->path); + did_work = false; + } if (!did_work) return; @@ -987,18 +997,6 @@ int __bch2_trans_commit(struct btree_trans *trans) goto out_reset; } -#ifdef CONFIG_BCACHEFS_DEBUG - /* - * if BTREE_TRIGGER_NORUN is set, it means we're probably being called - * from the key cache flush code: - */ - trans_for_each_update(trans, i) - if (!i->cached && - !(i->flags & BTREE_TRIGGER_NORUN)) - bch2_btree_key_cache_verify_clean(trans, - i->btree_id, i->k->k.p); -#endif - ret = bch2_trans_commit_run_triggers(trans); if (ret) goto out; @@ -1369,11 +1367,14 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, return ret; } -int __must_check bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, - struct bkey_i *k, enum btree_update_flags flags) +static int __must_check +bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, + struct bkey_i *k, enum btree_update_flags flags, + unsigned long ip) { struct bch_fs *c = trans->c; struct btree_insert_entry *i, n; + int ret = 0; BUG_ON(!path->should_be_locked); @@ -1388,7 +1389,7 @@ int __must_check bch2_trans_update_by_path(struct btree_trans *trans, struct btr .cached = path->cached, .path = path, .k = k, - .ip_allocated = _RET_IP_, + .ip_allocated = ip, }; #ifdef CONFIG_BCACHEFS_DEBUG @@ -1409,17 +1410,6 @@ int __must_check bch2_trans_update_by_path(struct btree_trans *trans, struct btr !btree_insert_entry_cmp(&n, i)) { BUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); - /* - * This is a hack to ensure that inode creates update the btree, - * not the key cache, which helps with cache coherency issues in - * other areas: - */ - if (n.cached && !i->cached) { - i->k = n.k; - i->flags = n.flags; - return 0; - } - bch2_path_put(trans, i->path, true); i->flags = n.flags; i->cached = n.cached; @@ -1444,19 +1434,60 @@ int __must_check bch2_trans_update_by_path(struct btree_trans *trans, struct btr } } - __btree_path_get(n.path, true); - return 0; + __btree_path_get(i->path, true); + + /* + * If a key is present in the key cache, it must also exist in the + * btree - this is necessary for cache coherency. When iterating over + * a btree that's cached in the key cache, the btree iter code checks + * the key cache - but the key has to exist in the btree for that to + * work: + */ + if (path->cached && + bkey_deleted(&i->old_k) && + !(flags & BTREE_UPDATE_NO_KEY_CACHE_COHERENCY)) { + struct btree_path *btree_path; + + i->key_cache_already_flushed = true; + i->flags |= BTREE_TRIGGER_NORUN; + + btree_path = bch2_path_get(trans, path->btree_id, path->pos, + 1, 0, BTREE_ITER_INTENT); + + ret = bch2_btree_path_traverse(trans, btree_path, 0); + if (ret) + goto err; + + btree_path->should_be_locked = true; + ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip); +err: + bch2_path_put(trans, btree_path, true); + } + + return ret; +} + +static int __must_check +bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, + struct bkey_i *k, enum btree_update_flags flags) +{ + return bch2_trans_update_by_path_trace(trans, path, k, flags, _RET_IP_); } int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_update_flags flags) { + struct btree_path *path = iter->update_path ?: iter->path; + struct bkey_cached *ck; + int ret; + if (iter->flags & BTREE_ITER_IS_EXTENTS) return bch2_trans_update_extent(trans, iter, k, flags); if (bkey_deleted(&k->k) && + !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { - int ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); + ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); if (unlikely(ret < 0)) return ret; @@ -1464,8 +1495,45 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter k->k.type = KEY_TYPE_whiteout; } - return bch2_trans_update_by_path(trans, iter->update_path ?: iter->path, - k, flags); + /* + * Ensure that updates to cached btrees go to the key cache: + */ + if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && + !path->cached && + !path->level && + btree_id_cached(trans->c, path->btree_id)) { + if (!iter->key_cache_path || + !iter->key_cache_path->should_be_locked || + bpos_cmp(iter->key_cache_path->pos, k->k.p)) { + if (!iter->key_cache_path) + iter->key_cache_path = + bch2_path_get(trans, path->btree_id, path->pos, 1, 0, + BTREE_ITER_INTENT|BTREE_ITER_CACHED); + + iter->key_cache_path = + bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, + iter->flags & BTREE_ITER_INTENT); + + ret = bch2_btree_path_traverse(trans, iter->key_cache_path, + BTREE_ITER_CACHED); + if (unlikely(ret)) + return ret; + + ck = (void *) iter->key_cache_path->l[0].b; + + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { + trace_trans_restart_key_cache_raced(trans->fn, _RET_IP_); + btree_trans_restart(trans); + return -EINTR; + } + + iter->key_cache_path->should_be_locked = true; + } + + path = iter->key_cache_path; + } + + return bch2_trans_update_by_path(trans, path, k, flags); } void bch2_trans_commit_hook(struct btree_trans *trans, diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d432c90a1491..5e78c396e24c 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -658,6 +658,12 @@ DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, TP_ARGS(trans_fn, caller_ip) ); +DEFINE_EVENT(transaction_restart, trans_restart_key_cache_raced, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip), + TP_ARGS(trans_fn, caller_ip) +); + DECLARE_EVENT_CLASS(transaction_restart_iter, TP_PROTO(const char *trans_fn, unsigned long caller_ip,