From ed0e24c0992dffe494bdd0ea6ddf3b816c438524 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 18 Nov 2020 13:21:59 -0500 Subject: [PATCH] bcachefs: Be more precise with journal error reporting We were incorrectly detecting a journal deadlock - the journal filling up - when only the journal pin fifo had filled up; if the journal pin fifo is full that just means we need to wait on reclaim. This plumbs through better error reporting so we can better discriminate in the journal_res_get path what's going on. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 80 ++++++++++++++++++----------------- fs/bcachefs/journal_reclaim.c | 6 +-- fs/bcachefs/journal_types.h | 8 +++- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index b2a5e9db404e..bb4353e673e7 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -234,7 +234,7 @@ static int journal_entry_open(struct journal *j) BUG_ON(journal_entry_is_open(j)); if (j->blocked) - return -EAGAIN; + return cur_entry_blocked; if (j->cur_entry_error) return j->cur_entry_error; @@ -250,7 +250,7 @@ static int journal_entry_open(struct journal *j) u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= le32_to_cpu(buf->data->u64s)) - return -ENOSPC; + return cur_entry_journal_full; /* * Must be set before marking the journal entry as open: @@ -262,7 +262,7 @@ static int journal_entry_open(struct journal *j) old.v = new.v = v; if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) - return -EROFS; + return cur_entry_insufficient_devices; /* Handle any already added entries */ new.cur_entry_offset = le32_to_cpu(buf->data->u64s); @@ -375,7 +375,7 @@ retry: * Don't want to close current journal entry, just need to * invoke reclaim: */ - ret = -ENOSPC; + ret = cur_entry_journal_full; goto unlock; } @@ -398,14 +398,16 @@ retry: * there's still a previous one in flight: */ trace_journal_entry_full(c); - ret = -EAGAIN; + ret = cur_entry_blocked; } else { ret = journal_entry_open(j); } unlock: - if ((ret == -EAGAIN || ret == -ENOSPC) && - !j->res_get_blocked_start) + if ((ret && ret != cur_entry_insufficient_devices) && + !j->res_get_blocked_start) { j->res_get_blocked_start = local_clock() ?: 1; + trace_journal_full(c); + } can_discard = j->can_discard; spin_unlock(&j->lock); @@ -413,41 +415,39 @@ unlock: if (!ret) goto retry; - if (ret == -ENOSPC) { - if (WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED), - "JOURNAL_RES_GET_RESERVED set but journal full")) { - char *buf; + if (WARN_ONCE(ret == cur_entry_journal_full && + !can_discard && + (flags & JOURNAL_RES_GET_RESERVED), + "JOURNAL_RES_GET_RESERVED set but journal full")) { + char *buf; - buf = kmalloc(4096, GFP_NOFS); - if (buf) { - bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); - pr_err("\n%s", buf); - kfree(buf); - } + buf = kmalloc(4096, GFP_NOFS); + if (buf) { + bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); + pr_err("\n%s", buf); + kfree(buf); } - - /* - * Journal is full - can't rely on reclaim from work item due to - * freezing: - */ - trace_journal_full(c); - - if (!(flags & JOURNAL_RES_GET_NONBLOCK)) { - if (can_discard) { - bch2_journal_do_discards(j); - goto retry; - } - - if (mutex_trylock(&j->reclaim_lock)) { - bch2_journal_reclaim(j); - mutex_unlock(&j->reclaim_lock); - } - } - - ret = -EAGAIN; } - return ret; + /* + * Journal is full - can't rely on reclaim from work item due to + * freezing: + */ + if ((ret == cur_entry_journal_full || + ret == cur_entry_journal_pin_full) && + !(flags & JOURNAL_RES_GET_NONBLOCK)) { + if (can_discard) { + bch2_journal_do_discards(j); + goto retry; + } + + if (mutex_trylock(&j->reclaim_lock)) { + bch2_journal_reclaim(j); + mutex_unlock(&j->reclaim_lock); + } + } + + return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN; } /* @@ -1072,6 +1072,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) "last_seq_ondisk:\t%llu\n" "prereserved:\t\t%u/%u\n" "current entry sectors:\t%u\n" + "current entry error:\t%u\n" "current entry:\t\t", fifo_used(&j->pin), journal_cur_seq(j), @@ -1079,7 +1080,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) j->last_seq_ondisk, j->prereserved.reserved, j->prereserved.remaining, - j->cur_entry_sectors); + j->cur_entry_sectors, + j->cur_entry_error); switch (s.cur_entry_offset) { case JOURNAL_ENTRY_ERROR_VAL: diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index f9e0160074db..1cd9c11a37f0 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -164,12 +164,12 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; if (nr_online < c->opts.metadata_replicas_required) { - ret = -EROFS; + ret = cur_entry_insufficient_devices; goto out; } if (!fifo_free(&j->pin)) { - ret = -ENOSPC; + ret = cur_entry_journal_pin_full; goto out; } @@ -180,7 +180,7 @@ void bch2_journal_space_available(struct journal *j) clean = __journal_space_available(j, nr_devs_want, journal_space_clean); if (!discarded.next_entry) - ret = -ENOSPC; + ret = cur_entry_journal_full; overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) * journal_entry_overhead(j); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 22ff7f8081c6..5f20653b8eb5 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -146,7 +146,13 @@ struct journal { * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if * insufficient devices: */ - int cur_entry_error; + enum { + cur_entry_ok, + cur_entry_blocked, + cur_entry_journal_full, + cur_entry_journal_pin_full, + cur_entry_insufficient_devices, + } cur_entry_error; union journal_preres_state prereserved;