bcachefs: Don't deadlock when btree node reuse changes lock ordering
Btree node lock ordering is based on the logical key. However, 'struct btree' may be reused for a different btree node under memory pressure. This patch uses the new six lock callback to check if a btree node is no longer the node we wanted to lock before blocking. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
515282ac7d
commit
bd2bb273a0
@ -678,6 +678,14 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
return b;
|
||||
}
|
||||
|
||||
static int lock_node_check_fn(struct six_lock *lock, void *p)
|
||||
{
|
||||
struct btree *b = container_of(lock, struct btree, c.lock);
|
||||
const struct bkey_i *k = p;
|
||||
|
||||
return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
|
||||
* in from disk if necessary.
|
||||
@ -750,8 +758,12 @@ lock_node:
|
||||
if (btree_node_read_locked(iter, level + 1))
|
||||
btree_node_unlock(iter, level + 1);
|
||||
|
||||
if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
|
||||
if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
|
||||
lock_node_check_fn, (void *) k)) {
|
||||
if (b->hash_val != btree_ptr_hash_val(k))
|
||||
goto retry;
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
|
||||
b->c.level != level ||
|
||||
@ -803,6 +815,7 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
struct bset_tree *t;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
@ -823,7 +836,9 @@ retry:
|
||||
return b;
|
||||
} else {
|
||||
lock_node:
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
|
||||
if (ret)
|
||||
goto retry;
|
||||
|
||||
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
|
||||
b->c.btree_id != btree_id ||
|
||||
|
@ -194,10 +194,13 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
|
||||
/* Slowpath: */
|
||||
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
unsigned level, struct btree_iter *iter,
|
||||
enum six_lock_type type)
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn,
|
||||
void *p)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree_iter *linked;
|
||||
u64 start_time = local_clock();
|
||||
bool ret = true;
|
||||
|
||||
/* Check if it's safe to block: */
|
||||
@ -275,7 +278,14 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
return false;
|
||||
}
|
||||
|
||||
__btree_node_lock_type(iter->trans->c, b, type);
|
||||
if (six_trylock_type(&b->c.lock, type))
|
||||
return true;
|
||||
|
||||
if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
|
||||
return false;
|
||||
|
||||
bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
|
||||
start_time);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -286,6 +296,11 @@ static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
|
||||
BUG_ON(iter->nodes_locked);
|
||||
return;
|
||||
}
|
||||
|
||||
for (l = 0; btree_iter_node(iter, l); l++) {
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
|
||||
!btree_node_locked(iter, l))
|
||||
@ -300,7 +315,7 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
|
||||
trans_for_each_iter(trans, iter)
|
||||
trans_for_each_iter_all(trans, iter)
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
}
|
||||
#else
|
||||
@ -892,18 +907,26 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
|
||||
__btree_iter_init(linked, b->c.level);
|
||||
}
|
||||
|
||||
static int lock_root_check_fn(struct six_lock *lock, void *p)
|
||||
{
|
||||
struct btree *b = container_of(lock, struct btree, c.lock);
|
||||
struct btree **rootp = p;
|
||||
|
||||
return b == *rootp ? 0 : -1;
|
||||
}
|
||||
|
||||
static inline int btree_iter_lock_root(struct btree_iter *iter,
|
||||
unsigned depth_want)
|
||||
{
|
||||
struct bch_fs *c = iter->trans->c;
|
||||
struct btree *b;
|
||||
struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
|
||||
enum six_lock_type lock_type;
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(iter->nodes_locked);
|
||||
|
||||
while (1) {
|
||||
b = READ_ONCE(c->btree_roots[iter->btree_id].b);
|
||||
b = READ_ONCE(*rootp);
|
||||
iter->level = READ_ONCE(b->c.level);
|
||||
|
||||
if (unlikely(iter->level < depth_want)) {
|
||||
@ -921,10 +944,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
|
||||
|
||||
lock_type = __btree_lock_want(iter, iter->level);
|
||||
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
|
||||
iter, lock_type)))
|
||||
iter, lock_type,
|
||||
lock_root_check_fn, rootp)))
|
||||
return -EINTR;
|
||||
|
||||
if (likely(b == c->btree_roots[iter->btree_id].b &&
|
||||
if (likely(b == READ_ONCE(*rootp) &&
|
||||
b->c.level == iter->level &&
|
||||
!race_fault())) {
|
||||
for (i = 0; i < iter->level; i++)
|
||||
|
@ -174,17 +174,21 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
|
||||
struct btree_iter *, enum six_lock_type);
|
||||
struct btree_iter *, enum six_lock_type,
|
||||
six_lock_should_sleep_fn, void *);
|
||||
|
||||
static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
unsigned level,
|
||||
struct btree_iter *iter,
|
||||
enum six_lock_type type)
|
||||
static inline bool btree_node_lock(struct btree *b,
|
||||
struct bpos pos, unsigned level,
|
||||
struct btree_iter *iter,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
bool ret;
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->locking = b;
|
||||
trans->locking_iter_idx = iter->idx;
|
||||
@ -194,7 +198,8 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
#endif
|
||||
ret = likely(six_trylock_type(&b->c.lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
__bch2_btree_node_lock(b, pos, level, iter, type);
|
||||
__bch2_btree_node_lock(b, pos, level, iter, type,
|
||||
should_sleep_fn, p);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->locking = NULL;
|
||||
|
@ -135,6 +135,8 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
six_lock_wakeup_all(&b->c.lock);
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
Loading…
Reference in New Issue
Block a user