diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 203c9adb0623..8f5318a38d9b 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -473,8 +473,10 @@ bool bch2_trans_relock(struct btree_trans *trans) trans_for_each_iter(trans, iter) if (btree_iter_keep(trans, iter) && - !bch2_btree_iter_relock(iter, true)) + !bch2_btree_iter_relock(iter, true)) { + trace_trans_restart_relock(trans->ip); return false; + } return true; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 0af46335bd00..ac844f47b8dd 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -445,9 +445,8 @@ out: return ret; } -static int btree_key_cache_journal_flush(struct journal *j, - struct journal_entry_pin *pin, - u64 seq) +int bch2_btree_key_cache_journal_flush(struct journal *j, + struct journal_entry_pin *pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bkey_cached *ck = @@ -528,7 +527,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, } bch2_journal_pin_update(&c->journal, trans->journal_res.seq, - &ck->journal, btree_key_cache_journal_flush); + &ck->journal, bch2_btree_key_cache_journal_flush); if (kick_reclaim) journal_reclaim_kick(&c->journal); diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 4e1e5a9c7656..7e2b0a08f745 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -1,15 +1,6 @@ #ifndef _BCACHEFS_BTREE_KEY_CACHE_H #define _BCACHEFS_BTREE_KEY_CACHE_H -static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c) -{ - size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); - size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); - size_t max_dirty = nr_keys / 4; - - return max_t(ssize_t, 0, nr_dirty - max_dirty); -} - static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); @@ -29,6 +20,9 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags); } +int bch2_btree_key_cache_journal_flush(struct journal *, + struct journal_entry_pin *, u64); + struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e965c8bbddce..b3137525f9c1 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -974,20 +974,25 @@ retry: * closure argument */ if (flags & BTREE_INSERT_NOUNLOCK) { + trace_trans_restart_journal_preres_get(trans->ip); ret = -EINTR; goto err; } bch2_trans_unlock(trans); - if (flags & BTREE_INSERT_JOURNAL_RECLAIM) - goto err; + if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { + bch2_btree_update_free(as); + return ERR_PTR(ret); + } ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, journal_flags); - if (ret) + if (ret) { + trace_trans_restart_journal_preres_get(trans->ip); goto err; + } if (!bch2_trans_relock(trans)) { ret = -EINTR; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 35a48629b63b..af2f8528ac65 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -59,21 +59,23 @@ journal_seq_to_buf(struct journal *j, u64 seq) return buf; } -static void journal_pin_new_entry(struct journal *j, int count) +static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) { - struct journal_entry_pin_list *p; + INIT_LIST_HEAD(&p->list); + INIT_LIST_HEAD(&p->key_cache_list); + INIT_LIST_HEAD(&p->flushed); + atomic_set(&p->count, count); + p->devs.nr = 0; +} +static void journal_pin_new_entry(struct journal *j) +{ /* * The fifo_push() needs to happen at the same time as j->seq is * incremented for journal_last_seq() to be calculated correctly */ atomic64_inc(&j->seq); - p = fifo_push_ref(&j->pin); - - INIT_LIST_HEAD(&p->list); - INIT_LIST_HEAD(&p->flushed); - atomic_set(&p->count, count); - p->devs.nr = 0; + journal_pin_list_init(fifo_push_ref(&j->pin), 1); } static void bch2_journal_buf_init(struct journal *j) @@ -192,7 +194,7 @@ static bool __journal_entry_close(struct journal *j) __bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq)); /* Initialize new buffer: */ - journal_pin_new_entry(j, 1); + journal_pin_new_entry(j); bch2_journal_buf_init(j); @@ -1030,12 +1032,8 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, j->pin.back = cur_seq; atomic64_set(&j->seq, cur_seq - 1); - fifo_for_each_entry_ptr(p, &j->pin, seq) { - INIT_LIST_HEAD(&p->list); - INIT_LIST_HEAD(&p->flushed); - atomic_set(&p->count, 1); - p->devs.nr = 0; - } + fifo_for_each_entry_ptr(p, &j->pin, seq) + journal_pin_list_init(p, 1); list_for_each_entry(i, journal_entries, list) { unsigned ptr; @@ -1058,7 +1056,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, set_bit(JOURNAL_STARTED, &j->flags); j->last_flush_write = jiffies; - journal_pin_new_entry(j, 1); + journal_pin_new_entry(j); j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 42ed7a3525b1..0d7fe1f99dbf 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -407,7 +407,12 @@ void bch2_journal_pin_set(struct journal *j, u64 seq, pin->seq = seq; pin->flush = flush_fn; - list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed); + if (flush_fn == bch2_btree_key_cache_journal_flush) + list_add(&pin->list, &pin_list->key_cache_list); + else if (flush_fn) + list_add(&pin->list, &pin_list->list); + else + list_add(&pin->list, &pin_list->flushed); spin_unlock(&j->lock); /* @@ -437,23 +442,40 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin) */ static struct journal_entry_pin * -journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq) +journal_get_next_pin(struct journal *j, + bool get_any, + bool get_key_cache, + u64 max_seq, u64 *seq) { struct journal_entry_pin_list *pin_list; struct journal_entry_pin *ret = NULL; - fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) - if (*seq > max_seq || - (ret = list_first_entry_or_null(&pin_list->list, - struct journal_entry_pin, list))) + fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) { + if (*seq > max_seq && !get_any && !get_key_cache) break; - return ret; + if (*seq <= max_seq || get_any) { + ret = list_first_entry_or_null(&pin_list->list, + struct journal_entry_pin, list); + if (ret) + return ret; + } + + if (*seq <= max_seq || get_any || get_key_cache) { + ret = list_first_entry_or_null(&pin_list->key_cache_list, + struct journal_entry_pin, list); + if (ret) + return ret; + } + } + + return NULL; } /* returns true if we did work */ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush, - unsigned min_nr) + unsigned min_any, + unsigned min_key_cache) { struct journal_entry_pin *pin; size_t nr_flushed = 0; @@ -472,8 +494,10 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush, j->last_flushed = jiffies; spin_lock(&j->lock); - pin = journal_get_next_pin(j, min_nr - ? U64_MAX : seq_to_flush, &seq); + pin = journal_get_next_pin(j, + min_any != 0, + min_key_cache != 0, + seq_to_flush, &seq); if (pin) { BUG_ON(j->flush_in_progress); j->flush_in_progress = pin; @@ -485,8 +509,11 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush, if (!pin) break; - if (min_nr) - min_nr--; + if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush) + min_key_cache--; + + if (min_any) + min_any--; err = flush_fn(j, pin, seq); @@ -610,18 +637,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) if (j->prereserved.reserved * 2 > j->prereserved.remaining) min_nr = 1; - if (atomic_read(&c->btree_cache.dirty) * 4 > - c->btree_cache.used * 3) - min_nr = 1; - if (fifo_free(&j->pin) <= 32) min_nr = 1; - min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c)); - - /* Don't do too many without delivering wakeup: */ - min_nr = min(min_nr, 128UL); - trace_journal_reclaim_start(c, min_nr, j->prereserved.reserved, @@ -631,7 +649,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) atomic_long_read(&c->btree_key_cache.nr_dirty), atomic_long_read(&c->btree_key_cache.nr_keys)); - nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr); + nr_flushed = journal_flush_pins(j, seq_to_flush, + min_nr, + min(bch2_nr_btree_keys_need_flush(c), 128UL)); if (direct) j->nr_direct_reclaim += nr_flushed; @@ -641,7 +661,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) if (nr_flushed) wake_up(&j->reclaim_wait); - } while (min_nr && nr_flushed); + } while (min_nr && nr_flushed && !direct); memalloc_noreclaim_restore(flags); @@ -734,7 +754,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - *did_work = journal_flush_pins(j, seq_to_flush, 0) != 0; + *did_work = journal_flush_pins(j, seq_to_flush, 0, 0) != 0; spin_lock(&j->lock); /* diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 97d764370b89..f597eb78e66e 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -43,6 +43,7 @@ struct journal_buf { struct journal_entry_pin_list { struct list_head list; + struct list_head key_cache_list; struct list_head flushed; atomic_t count; struct bch_devs_list devs; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index babb07e3acc4..387c1c49f696 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -716,6 +716,11 @@ DEFINE_EVENT(transaction_restart, trans_restart_iter_upgrade, TP_ARGS(ip) ); +DEFINE_EVENT(transaction_restart, trans_restart_relock, + TP_PROTO(unsigned long ip), + TP_ARGS(ip) +); + DEFINE_EVENT(transaction_restart, trans_restart_traverse, TP_PROTO(unsigned long ip), TP_ARGS(ip)