bcachefs: Fix race leading to btree node write getting stuck
Checking btree_node_may_write() isn't atomic with the other btree flags, dirty and need_write in particular. There was a rare race where we'd unblock a node from writing while __btree_node_flush() was setting need_write, and no thread would notice that the node was now both able to write and needed to be written. Fix this by adding btree node flags for will_make_reachable and write_blocked that can be checked in the cmpxchg loop in __bch2_btree_node_write. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
6f5f747c31
commit
bf3efff5e4
@ -223,10 +223,9 @@ wait_on_io:
|
||||
goto wait_on_io;
|
||||
}
|
||||
|
||||
if (btree_node_noevict(b))
|
||||
goto out_unlock;
|
||||
|
||||
if (!btree_node_may_write(b))
|
||||
if (btree_node_noevict(b) ||
|
||||
btree_node_write_blocked(b) ||
|
||||
btree_node_will_make_reachable(b))
|
||||
goto out_unlock;
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
|
@ -1606,7 +1606,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
if ((old & (1U << BTREE_NODE_dirty)) &&
|
||||
(old & (1U << BTREE_NODE_need_write)) &&
|
||||
!(old & (1U << BTREE_NODE_never_write)) &&
|
||||
btree_node_may_write(b)) {
|
||||
!(old & (1U << BTREE_NODE_write_blocked)) &&
|
||||
!(old & (1U << BTREE_NODE_will_make_reachable))) {
|
||||
new &= ~(1U << BTREE_NODE_dirty);
|
||||
new &= ~(1U << BTREE_NODE_need_write);
|
||||
new |= (1U << BTREE_NODE_write_in_flight);
|
||||
@ -1778,10 +1779,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
!(old & (1 << BTREE_NODE_need_write)))
|
||||
return;
|
||||
|
||||
if (!btree_node_may_write(b))
|
||||
if (old &
|
||||
((1 << BTREE_NODE_never_write)|
|
||||
(1 << BTREE_NODE_write_blocked)))
|
||||
return;
|
||||
|
||||
if (old & (1 << BTREE_NODE_never_write))
|
||||
if (b->written &&
|
||||
(old & (1 << BTREE_NODE_will_make_reachable)))
|
||||
return;
|
||||
|
||||
if (old & (1 << BTREE_NODE_write_in_flight))
|
||||
|
@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *);
|
||||
void bch2_btree_node_wait_on_read(struct btree *);
|
||||
void bch2_btree_node_wait_on_write(struct btree *);
|
||||
|
||||
static inline bool btree_node_may_write(struct btree *b)
|
||||
{
|
||||
return list_empty_careful(&b->write_blocked) &&
|
||||
(!b->written || !b->will_make_reachable);
|
||||
}
|
||||
|
||||
enum compact_mode {
|
||||
COMPACT_LAZY,
|
||||
COMPACT_ALL,
|
||||
|
@ -434,6 +434,8 @@ struct btree_trans {
|
||||
x(read_error) \
|
||||
x(dirty) \
|
||||
x(need_write) \
|
||||
x(write_blocked) \
|
||||
x(will_make_reachable) \
|
||||
x(noevict) \
|
||||
x(write_idx) \
|
||||
x(accessed) \
|
||||
|
@ -606,6 +606,8 @@ err:
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
list_del(&as->write_blocked_list);
|
||||
if (list_empty(&b->write_blocked))
|
||||
clear_btree_node_write_blocked(b);
|
||||
|
||||
/*
|
||||
* Node might have been freed, recheck under
|
||||
@ -650,6 +652,7 @@ err:
|
||||
|
||||
BUG_ON(b->will_make_reachable != (unsigned long) as);
|
||||
b->will_make_reachable = 0;
|
||||
clear_btree_node_will_make_reachable(b);
|
||||
}
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
|
||||
|
||||
as->mode = BTREE_INTERIOR_UPDATING_NODE;
|
||||
as->b = b;
|
||||
|
||||
set_btree_node_write_blocked(b);
|
||||
list_add(&as->write_blocked_list, &b->write_blocked);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
|
||||
|
||||
as->new_nodes[as->nr_new_nodes++] = b;
|
||||
b->will_make_reachable = 1UL|(unsigned long) as;
|
||||
set_btree_node_will_make_reachable(b);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
|
||||
* xchg() is for synchronization with bch2_btree_complete_write:
|
||||
*/
|
||||
v = xchg(&b->will_make_reachable, 0);
|
||||
clear_btree_node_will_make_reachable(b);
|
||||
as = (struct btree_update *) (v & ~1UL);
|
||||
|
||||
if (!as) {
|
||||
|
Loading…
Reference in New Issue
Block a user