bcachefs: Ensure journal reclaim runs when btree key cache is too dirty
Ensuring the key cache isn't too dirty is critical for ensuring that the shrinker can reclaim memory. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
125907203c
commit
8a92e54559
@ -461,6 +461,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck = (void *) iter->l[0].b;
|
||||
bool kick_reclaim = false;
|
||||
|
||||
BUG_ON(insert->u64s > ck->u64s);
|
||||
|
||||
@ -485,11 +486,18 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
|
||||
set_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||
c->btree_key_cache.nr_dirty++;
|
||||
|
||||
if (bch2_nr_btree_keys_need_flush(c))
|
||||
kick_reclaim = true;
|
||||
|
||||
mutex_unlock(&c->btree_key_cache.lock);
|
||||
}
|
||||
|
||||
bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
|
||||
&ck->journal, btree_key_cache_journal_flush);
|
||||
|
||||
if (kick_reclaim)
|
||||
mod_delayed_work(c->journal_reclaim_wq, &c->journal.reclaim_work, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,15 @@
|
||||
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
|
||||
#define _BCACHEFS_BTREE_KEY_CACHE_H
|
||||
|
||||
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
|
||||
{
|
||||
size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty);
|
||||
size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_dirty);
|
||||
size_t max_dirty = 1024 + (nr_keys * 3) / 4;
|
||||
|
||||
return max_t(ssize_t, 0, nr_dirty - max_dirty);
|
||||
}
|
||||
|
||||
struct bkey_cached *
|
||||
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
|
||||
|
||||
|
@ -1,11 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
#include "trace.h"
|
||||
|
||||
/* Free space calculations: */
|
||||
|
||||
@ -432,7 +434,6 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = ret;
|
||||
j->last_flushed = jiffies;
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
@ -441,17 +442,24 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
||||
}
|
||||
|
||||
/* returns true if we did work */
|
||||
static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
bool ret = false;
|
||||
u64 seq;
|
||||
u64 seq, ret = 0;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
while ((pin = journal_get_next_pin(j, min_nr
|
||||
? U64_MAX : seq_to_flush, &seq))) {
|
||||
while (1) {
|
||||
cond_resched();
|
||||
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
pin = journal_get_next_pin(j, min_nr
|
||||
? U64_MAX : seq_to_flush, &seq);
|
||||
if (!pin)
|
||||
break;
|
||||
|
||||
if (min_nr)
|
||||
min_nr--;
|
||||
|
||||
@ -460,7 +468,7 @@ static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
j->flush_in_progress = NULL;
|
||||
wake_up(&j->pin_flush_wait);
|
||||
ret = true;
|
||||
ret++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -527,8 +535,8 @@ static u64 journal_seq_to_flush(struct journal *j)
|
||||
void bch2_journal_reclaim(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
unsigned min_nr = 0;
|
||||
u64 seq_to_flush = 0;
|
||||
u64 seq_to_flush, nr_flushed = 0;
|
||||
size_t min_nr;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
@ -549,12 +557,25 @@ void bch2_journal_reclaim(struct journal *j)
|
||||
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
|
||||
min_nr = 1;
|
||||
|
||||
if ((atomic_read(&c->btree_cache.dirty) * 4 >
|
||||
c->btree_cache.used * 3) ||
|
||||
(c->btree_key_cache.nr_dirty * 4 >
|
||||
c->btree_key_cache.nr_keys))
|
||||
if (atomic_read(&c->btree_cache.dirty) * 4 >
|
||||
c->btree_cache.used * 3)
|
||||
min_nr = 1;
|
||||
} while (journal_flush_pins(j, seq_to_flush, min_nr));
|
||||
|
||||
min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
|
||||
|
||||
trace_journal_reclaim_start(c,
|
||||
min_nr,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
atomic_read(&c->btree_cache.dirty),
|
||||
c->btree_cache.used,
|
||||
c->btree_key_cache.nr_dirty,
|
||||
c->btree_key_cache.nr_keys);
|
||||
|
||||
nr_flushed += journal_flush_pins(j, seq_to_flush, min_nr);
|
||||
} while (min_nr);
|
||||
|
||||
trace_journal_reclaim_finish(c, nr_flushed);
|
||||
|
||||
if (!bch2_journal_error(j))
|
||||
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
|
||||
@ -582,7 +603,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
*did_work = journal_flush_pins(j, seq_to_flush, 0);
|
||||
*did_work = journal_flush_pins(j, seq_to_flush, 0) != 0;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
|
@ -121,6 +121,65 @@ DEFINE_EVENT(bio, journal_write,
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
TRACE_EVENT(journal_reclaim_start,
|
||||
TP_PROTO(struct bch_fs *c, u64 min_nr,
|
||||
u64 prereserved, u64 prereserved_total,
|
||||
u64 btree_cache_dirty, u64 btree_cache_total,
|
||||
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
|
||||
TP_ARGS(c, min_nr, prereserved, prereserved_total,
|
||||
btree_cache_dirty, btree_cache_total,
|
||||
btree_key_cache_dirty, btree_key_cache_total),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(u64, min_nr )
|
||||
__field(u64, prereserved )
|
||||
__field(u64, prereserved_total )
|
||||
__field(u64, btree_cache_dirty )
|
||||
__field(u64, btree_cache_total )
|
||||
__field(u64, btree_key_cache_dirty )
|
||||
__field(u64, btree_key_cache_total )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->min_nr = min_nr;
|
||||
__entry->prereserved = prereserved;
|
||||
__entry->prereserved_total = prereserved_total;
|
||||
__entry->btree_cache_dirty = btree_cache_dirty;
|
||||
__entry->btree_cache_total = btree_cache_total;
|
||||
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
|
||||
__entry->btree_key_cache_total = btree_key_cache_total;
|
||||
),
|
||||
|
||||
TP_printk("%pU min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
|
||||
__entry->uuid,
|
||||
__entry->min_nr,
|
||||
__entry->prereserved,
|
||||
__entry->prereserved_total,
|
||||
__entry->btree_cache_dirty,
|
||||
__entry->btree_cache_total,
|
||||
__entry->btree_key_cache_dirty,
|
||||
__entry->btree_key_cache_total)
|
||||
);
|
||||
|
||||
TRACE_EVENT(journal_reclaim_finish,
|
||||
TP_PROTO(struct bch_fs *c, u64 nr_flushed),
|
||||
TP_ARGS(c, nr_flushed),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(u64, nr_flushed )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->nr_flushed = nr_flushed;
|
||||
),
|
||||
|
||||
TP_printk("%pU flushed %llu", __entry->uuid, __entry->nr_flushed)
|
||||
);
|
||||
|
||||
/* bset.c: */
|
||||
|
||||
DEFINE_EVENT(bpos, bkey_pack_pos_fail,
|
||||
|
Loading…
Reference in New Issue
Block a user