bcachefs: Don't block on journal reservation with btree locks held
Fixes a deadlock between the allocator thread, when it first starts up, and journal replay Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
129550c4d0
commit
f1a79365a7
@ -638,12 +638,12 @@ static void btree_update_wait_on_journal(struct closure *cl)
|
||||
int ret;
|
||||
|
||||
ret = bch2_journal_open_seq_async(&c->journal, as->journal_seq, cl);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (!ret) {
|
||||
if (ret == -EAGAIN) {
|
||||
continue_at(cl, btree_update_wait_on_journal, system_wq);
|
||||
return;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
bch2_journal_flush_seq_async(&c->journal, as->journal_seq, cl);
|
||||
err:
|
||||
|
@ -344,19 +344,35 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
|
||||
trans_for_each_entry(trans, i)
|
||||
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
u64s = 0;
|
||||
trans_for_each_entry(trans, i)
|
||||
u64s += jset_u64s(i->k->k.u64s);
|
||||
|
||||
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
|
||||
|
||||
ret = !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)
|
||||
? bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res,
|
||||
u64s, u64s)
|
||||
: 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
u64s = 0;
|
||||
trans_for_each_entry(trans, i)
|
||||
u64s += jset_u64s(i->k->k.u64s);
|
||||
|
||||
while ((ret = bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res, u64s,
|
||||
JOURNAL_RES_GET_NONBLOCK)) == -EAGAIN) {
|
||||
struct btree_iter *iter = trans->entries[0].iter;
|
||||
|
||||
bch2_btree_iter_unlock(iter);
|
||||
|
||||
ret = bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res, u64s,
|
||||
JOURNAL_RES_GET_CHECK);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bch2_btree_iter_relock(iter)) {
|
||||
trans_restart(" (iter relock after journal res get blocked)");
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
multi_lock_write(c, trans);
|
||||
|
||||
|
@ -335,15 +335,14 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
|
||||
}
|
||||
|
||||
static int __journal_res_get(struct journal *j, struct journal_res *res,
|
||||
unsigned u64s_min, unsigned u64s_max)
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *buf;
|
||||
int ret;
|
||||
retry:
|
||||
ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
return 0;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
@ -351,10 +350,9 @@ retry:
|
||||
* that just did journal_entry_open() and call journal_entry_close()
|
||||
* unnecessarily
|
||||
*/
|
||||
ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
|
||||
if (ret) {
|
||||
if (journal_res_get_fast(j, res, flags)) {
|
||||
spin_unlock(&j->lock);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -377,7 +375,12 @@ retry:
|
||||
spin_unlock(&j->lock);
|
||||
return -EROFS;
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
/* haven't finished writing out the previous one: */
|
||||
/*
|
||||
* The current journal entry is still open, but we failed to get
|
||||
* a journal reservation because there's not enough space in it,
|
||||
* and we can't close it and start another because we haven't
|
||||
* finished writing out the previous entry:
|
||||
*/
|
||||
spin_unlock(&j->lock);
|
||||
trace_journal_entry_full(c);
|
||||
goto blocked;
|
||||
@ -408,7 +411,7 @@ retry:
|
||||
blocked:
|
||||
if (!j->res_get_blocked_start)
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
return 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -422,14 +425,14 @@ blocked:
|
||||
* btree node write locks.
|
||||
*/
|
||||
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
||||
unsigned u64s_min, unsigned u64s_max)
|
||||
unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
wait_event(j->wait,
|
||||
(ret = __journal_res_get(j, res, u64s_min,
|
||||
u64s_max)));
|
||||
return ret < 0 ? ret : 0;
|
||||
(ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
|
||||
(flags & JOURNAL_RES_GET_NONBLOCK));
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 bch2_journal_last_unwritten_seq(struct journal *j)
|
||||
@ -453,28 +456,55 @@ u64 bch2_journal_last_unwritten_seq(struct journal *j)
|
||||
* btree root - every journal entry contains the roots of all the btrees, so it
|
||||
* doesn't need to bother with getting a journal reservation
|
||||
*/
|
||||
int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *parent)
|
||||
int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
|
||||
{
|
||||
int ret;
|
||||
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool need_reclaim = false;
|
||||
retry:
|
||||
spin_lock(&j->lock);
|
||||
BUG_ON(seq > journal_cur_seq(j));
|
||||
|
||||
if (seq < journal_cur_seq(j) ||
|
||||
journal_entry_is_open(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (journal_cur_seq(j) < seq) {
|
||||
switch (journal_buf_switch(j, false)) {
|
||||
case JOURNAL_ENTRY_ERROR:
|
||||
spin_unlock(&j->lock);
|
||||
return -EROFS;
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
/* haven't finished writing out the previous one: */
|
||||
trace_journal_entry_full(c);
|
||||
goto blocked;
|
||||
case JOURNAL_ENTRY_CLOSED:
|
||||
break;
|
||||
case JOURNAL_UNLOCKED:
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(journal_cur_seq(j) < seq);
|
||||
|
||||
if (!journal_entry_open(j)) {
|
||||
need_reclaim = true;
|
||||
goto blocked;
|
||||
}
|
||||
|
||||
ret = journal_entry_open(j);
|
||||
if (!ret)
|
||||
closure_wait(&j->async_wait, parent);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (!ret)
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
return 0;
|
||||
blocked:
|
||||
if (!j->res_get_blocked_start)
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
|
||||
return ret;
|
||||
closure_wait(&j->async_wait, cl);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (need_reclaim)
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
static int journal_seq_error(struct journal *j, u64 seq)
|
||||
@ -594,11 +624,10 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
|
||||
void bch2_journal_meta_async(struct journal *j, struct closure *parent)
|
||||
{
|
||||
struct journal_res res;
|
||||
unsigned u64s = jset_u64s(0);
|
||||
|
||||
memset(&res, 0, sizeof(res));
|
||||
|
||||
bch2_journal_res_get(j, &res, u64s, u64s);
|
||||
bch2_journal_res_get(j, &res, jset_u64s(0), 0);
|
||||
bch2_journal_res_put(j, &res);
|
||||
|
||||
bch2_journal_flush_seq_async(j, res.seq, parent);
|
||||
@ -607,12 +636,11 @@ void bch2_journal_meta_async(struct journal *j, struct closure *parent)
|
||||
int bch2_journal_meta(struct journal *j)
|
||||
{
|
||||
struct journal_res res;
|
||||
unsigned u64s = jset_u64s(0);
|
||||
int ret;
|
||||
|
||||
memset(&res, 0, sizeof(res));
|
||||
|
||||
ret = bch2_journal_res_get(j, &res, u64s, u64s);
|
||||
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -272,12 +272,14 @@ static inline void bch2_journal_res_put(struct journal *j,
|
||||
}
|
||||
|
||||
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
|
||||
unsigned, unsigned);
|
||||
unsigned);
|
||||
|
||||
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
|
||||
#define JOURNAL_RES_GET_CHECK (1 << 1)
|
||||
|
||||
static inline int journal_res_get_fast(struct journal *j,
|
||||
struct journal_res *res,
|
||||
unsigned u64s_min,
|
||||
unsigned u64s_max)
|
||||
unsigned flags)
|
||||
{
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
@ -289,42 +291,45 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
* Check if there is still room in the current journal
|
||||
* entry:
|
||||
*/
|
||||
if (old.cur_entry_offset + u64s_min > j->cur_entry_u64s)
|
||||
if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
|
||||
return 0;
|
||||
|
||||
res->offset = old.cur_entry_offset;
|
||||
res->u64s = min(u64s_max, j->cur_entry_u64s -
|
||||
old.cur_entry_offset);
|
||||
if (flags & JOURNAL_RES_GET_CHECK)
|
||||
return 1;
|
||||
|
||||
journal_state_inc(&new);
|
||||
new.cur_entry_offset += res->u64s;
|
||||
journal_state_inc(&new);
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
res->ref = true;
|
||||
res->idx = new.idx;
|
||||
res->seq = le64_to_cpu(j->buf[res->idx].data->seq);
|
||||
res->ref = true;
|
||||
res->idx = old.idx;
|
||||
res->offset = old.cur_entry_offset;
|
||||
res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
|
||||
unsigned u64s_min, unsigned u64s_max)
|
||||
unsigned u64s, unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
EBUG_ON(res->ref);
|
||||
EBUG_ON(u64s_max < u64s_min);
|
||||
EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
|
||||
|
||||
if (journal_res_get_fast(j, res, u64s_min, u64s_max))
|
||||
res->u64s = u64s;
|
||||
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
goto out;
|
||||
|
||||
ret = bch2_journal_res_get_slowpath(j, res, u64s_min, u64s_max);
|
||||
ret = bch2_journal_res_get_slowpath(j, res, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
out:
|
||||
lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
|
||||
EBUG_ON(!res->ref);
|
||||
if (!(flags & JOURNAL_RES_GET_CHECK)) {
|
||||
lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
|
||||
EBUG_ON(!res->ref);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user