bcachefs: Be more careful about JOURNAL_RES_GET_RESERVED
JOURNAL_RES_GET_RESERVED should only be used for updatse that need to be done to free up space in the journal. In particular, when we're flushing keys from the key cache, if we're flushing them out of order we shouldn't be using it, since we're using up our remaining space in the journal without dropping a pin that will let us make forward progress. With this patch, BTREE_INSERT_JOURNAL_RECLAIM without BTREE_INSERT_JOURNAL_RESERVED may return -EAGAIN - we can't wait on journal reclaim if we're already in journal reclaim. This means we need to propagate these errors up to journal reclaim, indicating that flushing a journal pin should be retried in the future. This is prep work for a patch to change the way journal reclaim works, to split out flushing key cache keys because the btree key cache is too dirty from journal reclaim because we need space in the journal. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
6167f7c8ff
commit
2940295c97
@ -353,6 +353,7 @@ err:
|
||||
static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
struct bkey_cached_key key,
|
||||
u64 journal_seq,
|
||||
unsigned commit_flags,
|
||||
bool evict)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -391,12 +392,17 @@ retry:
|
||||
BTREE_INSERT_NOUNLOCK|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RESERVED|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM);
|
||||
(ck->journal.seq == journal_last_seq(j)
|
||||
? BTREE_INSERT_JOURNAL_RESERVED
|
||||
: 0)|
|
||||
commit_flags);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
if (ret == -EAGAIN)
|
||||
goto out;
|
||||
|
||||
if (ret) {
|
||||
bch2_fs_fatal_err_on(!bch2_journal_error(j), c,
|
||||
"error flushing key cache: %i", ret);
|
||||
@ -439,15 +445,16 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btree_key_cache_journal_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
static int btree_key_cache_journal_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bkey_cached *ck =
|
||||
container_of(pin, struct bkey_cached, journal);
|
||||
struct bkey_cached_key key;
|
||||
struct btree_trans trans;
|
||||
int ret = 0;
|
||||
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
|
||||
@ -462,10 +469,13 @@ static void btree_key_cache_journal_flush(struct journal *j,
|
||||
six_unlock_read(&ck->c.lock);
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
btree_key_cache_flush_pos(&trans, key, seq, false);
|
||||
ret = btree_key_cache_flush_pos(&trans, key, seq,
|
||||
BTREE_INSERT_JOURNAL_RECLAIM, false);
|
||||
bch2_trans_exit(&trans);
|
||||
unlock:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -481,7 +491,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
|
||||
if (!bch2_btree_key_cache_find(c, id, pos))
|
||||
return 0;
|
||||
|
||||
return btree_key_cache_flush_pos(trans, key, 0, true);
|
||||
return btree_key_cache_flush_pos(trans, key, 0, 0, true);
|
||||
}
|
||||
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
|
@ -916,10 +916,12 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level,
|
||||
struct closure cl;
|
||||
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
|
||||
? BCH_DISK_RESERVATION_NOFAIL : 0;
|
||||
int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
|
||||
? JOURNAL_RES_GET_RECLAIM : 0;
|
||||
int journal_flags = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (flags & BTREE_INSERT_JOURNAL_RESERVED)
|
||||
journal_flags |= JOURNAL_RES_GET_RESERVED;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
retry:
|
||||
/*
|
||||
@ -982,6 +984,9 @@ retry:
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
|
||||
goto err;
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
||||
BTREE_UPDATE_JOURNAL_RES,
|
||||
journal_flags);
|
||||
|
@ -134,7 +134,7 @@ fix_iter:
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
unsigned i, u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
@ -145,14 +145,15 @@ static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
bch2_btree_node_write_cond(c, b,
|
||||
(btree_current_write(b) == w && w->journal.seq == seq));
|
||||
six_unlock_read(&b->c.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
return __btree_node_flush(j, pin, 0, seq);
|
||||
}
|
||||
|
||||
static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
return __btree_node_flush(j, pin, 1, seq);
|
||||
}
|
||||
@ -563,8 +564,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
ret = bch2_journal_preres_get(&c->journal,
|
||||
&trans->journal_preres, trans->journal_preres_u64s,
|
||||
JOURNAL_RES_GET_NONBLOCK|
|
||||
((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
|
||||
? JOURNAL_RES_GET_RECLAIM : 0));
|
||||
((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
|
||||
? JOURNAL_RES_GET_RESERVED : 0));
|
||||
if (unlikely(ret == -EAGAIN))
|
||||
ret = bch2_trans_journal_preres_get_cold(trans,
|
||||
trans->journal_preres_u64s);
|
||||
@ -721,6 +722,10 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
case BTREE_INSERT_NEED_JOURNAL_RES:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
|
||||
return -EAGAIN;
|
||||
|
||||
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
@ -449,6 +450,27 @@ unlock:
|
||||
if (!ret)
|
||||
goto retry;
|
||||
|
||||
if ((ret == cur_entry_journal_full ||
|
||||
ret == cur_entry_journal_pin_full) &&
|
||||
!can_discard &&
|
||||
j->reservations.idx == j->reservations.unwritten_idx &&
|
||||
(flags & JOURNAL_RES_GET_RESERVED)) {
|
||||
char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
|
||||
|
||||
bch_err(c, "Journal stuck!");
|
||||
if (journal_debug_buf) {
|
||||
bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
|
||||
bch_err(c, "%s", journal_debug_buf);
|
||||
|
||||
bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j);
|
||||
bch_err(c, "Journal pins:\n%s", journal_debug_buf);
|
||||
kfree(journal_debug_buf);
|
||||
}
|
||||
|
||||
bch2_fatal_error(c);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
/*
|
||||
* Journal is full - can't rely on reclaim from work item due to
|
||||
* freezing:
|
||||
@ -1169,6 +1191,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
"last_seq_ondisk:\t%llu\n"
|
||||
"flushed_seq_ondisk:\t%llu\n"
|
||||
"prereserved:\t\t%u/%u\n"
|
||||
"each entry reserved:\t%u\n"
|
||||
"nr flush writes:\t%llu\n"
|
||||
"nr noflush writes:\t%llu\n"
|
||||
"nr direct reclaim:\t%llu\n"
|
||||
@ -1183,6 +1206,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
j->flushed_seq_ondisk,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
j->entry_u64s_reserved,
|
||||
j->nr_flush_writes,
|
||||
j->nr_noflush_writes,
|
||||
j->nr_direct_reclaim,
|
||||
|
@ -308,7 +308,6 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
|
||||
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
|
||||
#define JOURNAL_RES_GET_CHECK (1 << 1)
|
||||
#define JOURNAL_RES_GET_RESERVED (1 << 2)
|
||||
#define JOURNAL_RES_GET_RECLAIM (1 << 3)
|
||||
|
||||
static inline int journal_res_get_fast(struct journal *j,
|
||||
struct journal_res *res,
|
||||
@ -446,7 +445,7 @@ static inline int bch2_journal_preres_get_fast(struct journal *j,
|
||||
* into the reclaim path and deadlock:
|
||||
*/
|
||||
|
||||
if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
|
||||
if (!(flags & JOURNAL_RES_GET_RESERVED) &&
|
||||
new.reserved > new.remaining)
|
||||
return 0;
|
||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
||||
|
@ -239,7 +239,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
u64s_remaining = (u64) clean << 6;
|
||||
u64s_remaining -= (u64) total << 3;
|
||||
u64s_remaining = max(0LL, u64s_remaining);
|
||||
u64s_remaining /= 2;
|
||||
u64s_remaining /= 4;
|
||||
u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
|
||||
out:
|
||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||
@ -353,6 +353,9 @@ static inline void __journal_pin_drop(struct journal *j,
|
||||
if (!journal_pin_active(pin))
|
||||
return;
|
||||
|
||||
if (j->flush_in_progress == pin)
|
||||
j->flush_in_progress_dropped = true;
|
||||
|
||||
pin_list = journal_seq_pin(j, pin->seq);
|
||||
pin->seq = 0;
|
||||
list_del_init(&pin->list);
|
||||
@ -439,34 +442,27 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret = NULL;
|
||||
|
||||
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
|
||||
return NULL;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
|
||||
if (*seq > max_seq ||
|
||||
(ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list)))
|
||||
break;
|
||||
|
||||
if (ret) {
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = ret;
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* returns true if we did work */
|
||||
static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
u64 seq, ret = 0;
|
||||
size_t nr_flushed = 0;
|
||||
journal_pin_flush_fn flush_fn;
|
||||
u64 seq;
|
||||
int err;
|
||||
|
||||
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
|
||||
return 0;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
@ -475,23 +471,42 @@ static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
pin = journal_get_next_pin(j, min_nr
|
||||
? U64_MAX : seq_to_flush, &seq);
|
||||
if (pin) {
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = pin;
|
||||
j->flush_in_progress_dropped = false;
|
||||
flush_fn = pin->flush;
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (!pin)
|
||||
break;
|
||||
|
||||
if (min_nr)
|
||||
min_nr--;
|
||||
|
||||
pin->flush(j, pin, seq);
|
||||
err = flush_fn(j, pin, seq);
|
||||
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
spin_lock(&j->lock);
|
||||
/* Pin might have been dropped or rearmed: */
|
||||
if (likely(!err && !j->flush_in_progress_dropped))
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
|
||||
j->flush_in_progress = NULL;
|
||||
j->flush_in_progress_dropped = false;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
wake_up(&j->pin_flush_wait);
|
||||
ret++;
|
||||
|
||||
if (err)
|
||||
break;
|
||||
|
||||
nr_flushed++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return nr_flushed;
|
||||
}
|
||||
|
||||
static u64 journal_seq_to_flush(struct journal *j)
|
||||
@ -556,8 +571,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
u64 seq_to_flush, nr_flushed = 0;
|
||||
size_t min_nr;
|
||||
u64 seq_to_flush;
|
||||
size_t min_nr, nr_flushed;
|
||||
unsigned flags;
|
||||
int ret = 0;
|
||||
|
||||
|
@ -50,7 +50,7 @@ struct journal_entry_pin_list {
|
||||
|
||||
struct journal;
|
||||
struct journal_entry_pin;
|
||||
typedef void (*journal_pin_flush_fn)(struct journal *j,
|
||||
typedef int (*journal_pin_flush_fn)(struct journal *j,
|
||||
struct journal_entry_pin *, u64);
|
||||
|
||||
struct journal_entry_pin {
|
||||
@ -251,6 +251,7 @@ struct journal {
|
||||
|
||||
unsigned long last_flushed;
|
||||
struct journal_entry_pin *flush_in_progress;
|
||||
bool flush_in_progress_dropped;
|
||||
wait_queue_head_t pin_flush_wait;
|
||||
|
||||
/* protects advancing ja->discard_idx: */
|
||||
|
Loading…
Reference in New Issue
Block a user