bcachefs: Add a pre-reserve mechanism for the journal
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
9ace606e93
commit
68ef94a63c
@ -343,6 +343,16 @@ retry:
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(flags & JOURNAL_RES_GET_RESERVED) &&
|
||||
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
|
||||
/*
|
||||
* Don't want to close current journal entry, just need to
|
||||
* invoke reclaim:
|
||||
*/
|
||||
ret = -ENOSPC;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we couldn't get a reservation because the current buf filled up,
|
||||
* and we had room for a bigger entry on disk, signal that we want to
|
||||
@ -366,7 +376,7 @@ retry:
|
||||
} else {
|
||||
ret = journal_entry_open(j);
|
||||
}
|
||||
|
||||
unlock:
|
||||
if ((ret == -EAGAIN || ret == -ENOSPC) &&
|
||||
!j->res_get_blocked_start)
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
@ -378,6 +388,8 @@ retry:
|
||||
goto retry;
|
||||
|
||||
if (ret == -ENOSPC) {
|
||||
BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED));
|
||||
|
||||
/*
|
||||
* Journal is full - can't rely on reclaim from work item due to
|
||||
* freezing:
|
||||
@ -423,6 +435,32 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* journal_preres: */
|
||||
|
||||
static bool journal_preres_available(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s)
|
||||
{
|
||||
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s);
|
||||
|
||||
if (!ret)
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_journal_preres_get(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s)
|
||||
{
|
||||
int ret;
|
||||
|
||||
closure_wait_event(&j->preres_wait,
|
||||
(ret = bch2_journal_error(j)) ||
|
||||
journal_preres_available(j, res, new_u64s));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* journal_entry_res: */
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *j,
|
||||
@ -1110,11 +1148,16 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
"seq:\t\t\t%llu\n"
|
||||
"last_seq:\t\t%llu\n"
|
||||
"last_seq_ondisk:\t%llu\n"
|
||||
"prereserved:\t\t%u/%u\n"
|
||||
"current entry sectors:\t%u\n"
|
||||
"current entry:\t\t",
|
||||
fifo_used(&j->pin),
|
||||
journal_cur_seq(j),
|
||||
journal_last_seq(j),
|
||||
j->last_seq_ondisk);
|
||||
j->last_seq_ondisk,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
j->cur_entry_sectors);
|
||||
|
||||
switch (s.cur_entry_offset) {
|
||||
case JOURNAL_ENTRY_ERROR_VAL:
|
||||
@ -1136,8 +1179,9 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
journal_state_count(s, s.idx));
|
||||
|
||||
if (s.prev_buf_unwritten)
|
||||
pr_buf(&out, "yes, ref %u\n",
|
||||
journal_state_count(s, !s.idx));
|
||||
pr_buf(&out, "yes, ref %u sectors %u\n",
|
||||
journal_state_count(s, !s.idx),
|
||||
journal_prev_buf(j)->sectors);
|
||||
else
|
||||
pr_buf(&out, "no\n");
|
||||
|
||||
|
@ -119,6 +119,7 @@ static inline void journal_wake(struct journal *j)
|
||||
{
|
||||
wake_up(&j->wait);
|
||||
closure_wake_up(&j->async_wait);
|
||||
closure_wake_up(&j->preres_wait);
|
||||
}
|
||||
|
||||
static inline struct journal_buf *journal_cur_buf(struct journal *j)
|
||||
@ -274,6 +275,7 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
|
||||
|
||||
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
|
||||
#define JOURNAL_RES_GET_CHECK (1 << 1)
|
||||
#define JOURNAL_RES_GET_RESERVED (1 << 2)
|
||||
|
||||
static inline int journal_res_get_fast(struct journal *j,
|
||||
struct journal_res *res,
|
||||
@ -294,6 +296,10 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
|
||||
EBUG_ON(!journal_state_count(new, new.idx));
|
||||
|
||||
if (!(flags & JOURNAL_RES_GET_RESERVED) &&
|
||||
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
|
||||
return 0;
|
||||
|
||||
if (flags & JOURNAL_RES_GET_CHECK)
|
||||
return 1;
|
||||
|
||||
@ -333,6 +339,89 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* journal_preres: */
|
||||
|
||||
static inline bool journal_check_may_get_unreserved(struct journal *j)
|
||||
{
|
||||
union journal_preres_state s = READ_ONCE(j->prereserved);
|
||||
bool ret = s.reserved <= s.remaining &&
|
||||
fifo_free(&j->pin) > 8;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
|
||||
if (ret) {
|
||||
set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
|
||||
journal_wake(j);
|
||||
} else {
|
||||
clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void bch2_journal_preres_put(struct journal *j,
|
||||
struct journal_preres *res)
|
||||
{
|
||||
union journal_preres_state s = { .reserved = res->u64s };
|
||||
|
||||
if (!res->u64s)
|
||||
return;
|
||||
|
||||
s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
|
||||
res->u64s = 0;
|
||||
closure_wake_up(&j->preres_wait);
|
||||
|
||||
if (s.reserved <= s.remaining &&
|
||||
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
|
||||
spin_lock(&j->lock);
|
||||
journal_check_may_get_unreserved(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
}
|
||||
|
||||
int __bch2_journal_preres_get(struct journal *,
|
||||
struct journal_preres *, unsigned);
|
||||
|
||||
static inline int bch2_journal_preres_get_fast(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s)
|
||||
{
|
||||
int d = new_u64s - res->u64s;
|
||||
union journal_preres_state old, new;
|
||||
u64 v = atomic64_read(&j->prereserved.counter);
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
|
||||
new.reserved += d;
|
||||
|
||||
if (new.reserved > new.remaining)
|
||||
return 0;
|
||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
res->u64s += d;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int bch2_journal_preres_get(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s,
|
||||
unsigned flags)
|
||||
{
|
||||
if (new_u64s <= res->u64s)
|
||||
return 0;
|
||||
|
||||
if (bch2_journal_preres_get_fast(j, res, new_u64s))
|
||||
return 0;
|
||||
|
||||
if (flags & JOURNAL_RES_GET_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
|
||||
return __bch2_journal_preres_get(j, res, new_u64s);
|
||||
}
|
||||
|
||||
/* journal_entry_res: */
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *,
|
||||
|
@ -974,6 +974,12 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
|
||||
journal_space_discarded)) {
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
ja->sectors_free = ca->mi.bucket_size;
|
||||
|
||||
/*
|
||||
* ja->bucket_seq[ja->cur_idx] must always have
|
||||
* something sensible:
|
||||
*/
|
||||
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,6 +49,18 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
||||
return available;
|
||||
}
|
||||
|
||||
static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
|
||||
{
|
||||
union journal_preres_state old, new;
|
||||
u64 v = atomic64_read(&j->prereserved.counter);
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
new.remaining = u64s_remaining;
|
||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
}
|
||||
|
||||
static struct journal_space {
|
||||
unsigned next_entry;
|
||||
unsigned remaining;
|
||||
@ -124,8 +136,9 @@ void bch2_journal_space_available(struct journal *j)
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_space discarded, clean_ondisk, clean;
|
||||
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
|
||||
j->buf[1].buf_size >> 9);
|
||||
unsigned overhead, u64s_remaining = 0;
|
||||
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
|
||||
j->buf[1].buf_size >> 9);
|
||||
unsigned i, nr_online = 0, nr_devs_want;
|
||||
bool can_discard = false;
|
||||
int ret = 0;
|
||||
@ -176,9 +189,17 @@ void bch2_journal_space_available(struct journal *j)
|
||||
|
||||
if (!discarded.next_entry)
|
||||
ret = -ENOSPC;
|
||||
|
||||
overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
|
||||
journal_entry_overhead(j);
|
||||
u64s_remaining = clean.remaining << 6;
|
||||
u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
|
||||
u64s_remaining /= 4;
|
||||
out:
|
||||
j->cur_entry_sectors = !ret ? discarded.next_entry : 0;
|
||||
j->cur_entry_error = ret;
|
||||
journal_set_remaining(j, u64s_remaining);
|
||||
journal_check_may_get_unreserved(j);
|
||||
|
||||
if (!ret)
|
||||
journal_wake(j);
|
||||
@ -454,7 +475,7 @@ void bch2_journal_reclaim(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
unsigned iter, bucket_to_flush, min_nr = 0;
|
||||
unsigned iter, min_nr = 0;
|
||||
u64 seq_to_flush = 0;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
@ -465,13 +486,22 @@ void bch2_journal_reclaim(struct journal *j)
|
||||
|
||||
for_each_rw_member(ca, c, iter) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
unsigned nr_buckets, bucket_to_flush;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
|
||||
/* Try to keep the journal at most half full: */
|
||||
bucket_to_flush = (ja->cur_idx + (ja->nr >> 1)) % ja->nr;
|
||||
nr_buckets = ja->nr / 2;
|
||||
|
||||
/* And include pre-reservations: */
|
||||
nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
|
||||
(ca->mi.bucket_size << 6) -
|
||||
journal_entry_overhead(j));
|
||||
|
||||
nr_buckets = min(nr_buckets, ja->nr);
|
||||
|
||||
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
|
||||
seq_to_flush = max_t(u64, seq_to_flush,
|
||||
ja->bucket_seq[bucket_to_flush]);
|
||||
}
|
||||
@ -490,6 +520,9 @@ void bch2_journal_reclaim(struct journal *j)
|
||||
msecs_to_jiffies(j->reclaim_delay_ms)))
|
||||
min_nr = 1;
|
||||
|
||||
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
|
||||
min_nr = 1;
|
||||
|
||||
journal_flush_pins(j, seq_to_flush, min_nr);
|
||||
|
||||
if (!test_bit(BCH_FS_RO, &c->flags))
|
||||
|
@ -80,6 +80,14 @@ struct journal_res {
|
||||
u64 seq;
|
||||
};
|
||||
|
||||
/*
|
||||
* For reserving space in the journal prior to getting a reservation on a
|
||||
* particular journal entry:
|
||||
*/
|
||||
struct journal_preres {
|
||||
unsigned u64s;
|
||||
};
|
||||
|
||||
union journal_res_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
@ -98,6 +106,21 @@ union journal_res_state {
|
||||
};
|
||||
};
|
||||
|
||||
union journal_preres_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
};
|
||||
|
||||
struct {
|
||||
u64 v;
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 reserved;
|
||||
u32 remaining;
|
||||
};
|
||||
};
|
||||
|
||||
/* bytes: */
|
||||
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
||||
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
||||
@ -122,6 +145,7 @@ enum {
|
||||
JOURNAL_STARTED,
|
||||
JOURNAL_NEED_WRITE,
|
||||
JOURNAL_NOT_EMPTY,
|
||||
JOURNAL_MAY_GET_UNRESERVED,
|
||||
};
|
||||
|
||||
/* Embedded in struct bch_fs */
|
||||
@ -142,6 +166,8 @@ struct journal {
|
||||
*/
|
||||
int cur_entry_error;
|
||||
|
||||
union journal_preres_state prereserved;
|
||||
|
||||
/* Reserved space in journal entry to be used just prior to write */
|
||||
unsigned entry_u64s_reserved;
|
||||
|
||||
@ -161,6 +187,7 @@ struct journal {
|
||||
/* Used when waiting because the journal was full */
|
||||
wait_queue_head_t wait;
|
||||
struct closure_waitlist async_wait;
|
||||
struct closure_waitlist preres_wait;
|
||||
|
||||
struct closure io;
|
||||
struct delayed_work write_work;
|
||||
|
Loading…
x
Reference in New Issue
Block a user