bcachefs: Increase journal pipelining
This patch increases the maximum journal buffers in flight from 2 to 4 - this will be particularly helpful when in the future we stop requiring flush+fua for every journal write. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
5db43418d5
commit
ebb84d0941
@ -23,7 +23,7 @@ static u64 last_unwritten_seq(struct journal *j)
|
|||||||
|
|
||||||
lockdep_assert_held(&j->lock);
|
lockdep_assert_held(&j->lock);
|
||||||
|
|
||||||
return journal_cur_seq(j) - s.prev_buf_unwritten;
|
return journal_cur_seq(j) - ((s.idx - s.unwritten_idx) & JOURNAL_BUF_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
|
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
|
||||||
@ -51,7 +51,7 @@ journal_seq_to_buf(struct journal *j, u64 seq)
|
|||||||
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
|
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
|
||||||
|
|
||||||
if (journal_seq_unwritten(j, seq)) {
|
if (journal_seq_unwritten(j, seq)) {
|
||||||
buf = j->buf + (seq & 1);
|
buf = j->buf + (seq & JOURNAL_BUF_MASK);
|
||||||
EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
|
EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
|
||||||
}
|
}
|
||||||
return buf;
|
return buf;
|
||||||
@ -108,15 +108,8 @@ void bch2_journal_halt(struct journal *j)
|
|||||||
|
|
||||||
/* journal entry close/open: */
|
/* journal entry close/open: */
|
||||||
|
|
||||||
void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
|
void __bch2_journal_buf_put(struct journal *j)
|
||||||
{
|
{
|
||||||
if (!need_write_just_set &&
|
|
||||||
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
|
||||||
bch2_time_stats_update(j->delay_time,
|
|
||||||
j->need_write_time);
|
|
||||||
|
|
||||||
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
|
|
||||||
|
|
||||||
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,7 +122,6 @@ static bool __journal_entry_close(struct journal *j)
|
|||||||
struct journal_buf *buf = journal_cur_buf(j);
|
struct journal_buf *buf = journal_cur_buf(j);
|
||||||
union journal_res_state old, new;
|
union journal_res_state old, new;
|
||||||
u64 v = atomic64_read(&j->reservations.counter);
|
u64 v = atomic64_read(&j->reservations.counter);
|
||||||
bool set_need_write = false;
|
|
||||||
unsigned sectors;
|
unsigned sectors;
|
||||||
|
|
||||||
lockdep_assert_held(&j->lock);
|
lockdep_assert_held(&j->lock);
|
||||||
@ -148,15 +140,13 @@ static bool __journal_entry_close(struct journal *j)
|
|||||||
if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
|
if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
|
||||||
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||||
j->need_write_time = local_clock();
|
j->need_write_time = local_clock();
|
||||||
set_need_write = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new.prev_buf_unwritten)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
|
new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
|
||||||
new.idx++;
|
new.idx++;
|
||||||
new.prev_buf_unwritten = 1;
|
|
||||||
|
if (new.idx == new.unwritten_idx)
|
||||||
|
return false;
|
||||||
|
|
||||||
BUG_ON(journal_state_count(new, new.idx));
|
BUG_ON(journal_state_count(new, new.idx));
|
||||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||||
@ -190,24 +180,44 @@ static bool __journal_entry_close(struct journal *j)
|
|||||||
*/
|
*/
|
||||||
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
|
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
|
||||||
|
|
||||||
|
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
|
||||||
|
|
||||||
journal_pin_new_entry(j, 1);
|
journal_pin_new_entry(j, 1);
|
||||||
|
|
||||||
bch2_journal_buf_init(j);
|
bch2_journal_buf_init(j);
|
||||||
|
|
||||||
cancel_delayed_work(&j->write_work);
|
cancel_delayed_work(&j->write_work);
|
||||||
|
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||||
|
|
||||||
bch2_journal_space_available(j);
|
bch2_journal_space_available(j);
|
||||||
|
|
||||||
bch2_journal_buf_put(j, old.idx, set_need_write);
|
bch2_journal_buf_put(j, old.idx);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool journal_entry_want_write(struct journal *j)
|
||||||
|
{
|
||||||
|
union journal_res_state s = READ_ONCE(j->reservations);
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't close it yet if we already have a write in flight, but do set
|
||||||
|
* NEED_WRITE:
|
||||||
|
*/
|
||||||
|
if (s.idx != s.unwritten_idx)
|
||||||
|
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||||
|
else
|
||||||
|
ret = __journal_entry_close(j);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static bool journal_entry_close(struct journal *j)
|
static bool journal_entry_close(struct journal *j)
|
||||||
{
|
{
|
||||||
bool ret;
|
bool ret;
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
ret = __journal_entry_close(j);
|
ret = journal_entry_want_write(j);
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -289,8 +299,8 @@ static int journal_entry_open(struct journal *j)
|
|||||||
|
|
||||||
static bool journal_quiesced(struct journal *j)
|
static bool journal_quiesced(struct journal *j)
|
||||||
{
|
{
|
||||||
union journal_res_state state = READ_ONCE(j->reservations);
|
union journal_res_state s = READ_ONCE(j->reservations);
|
||||||
bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
|
bool ret = s.idx == s.unwritten_idx && !__journal_entry_is_open(s);
|
||||||
|
|
||||||
if (!ret)
|
if (!ret)
|
||||||
journal_entry_close(j);
|
journal_entry_close(j);
|
||||||
@ -317,17 +327,29 @@ static void journal_write_work(struct work_struct *work)
|
|||||||
u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
|
u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
|
||||||
{
|
{
|
||||||
size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
|
size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
|
||||||
u64 seq = 0;
|
union journal_res_state s;
|
||||||
|
unsigned i;
|
||||||
|
u64 seq;
|
||||||
|
|
||||||
if (!test_bit(h, j->buf[0].has_inode) &&
|
|
||||||
!test_bit(h, j->buf[1].has_inode))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
if (test_bit(h, journal_cur_buf(j)->has_inode))
|
seq = journal_cur_seq(j);
|
||||||
seq = journal_cur_seq(j);
|
s = READ_ONCE(j->reservations);
|
||||||
else if (test_bit(h, journal_prev_buf(j)->has_inode))
|
i = s.idx;
|
||||||
seq = journal_cur_seq(j) - 1;
|
|
||||||
|
while (1) {
|
||||||
|
if (test_bit(h, j->buf[i].has_inode))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (i == s.unwritten_idx)
|
||||||
|
break;
|
||||||
|
|
||||||
|
i = (i - 1) & JOURNAL_BUF_MASK;
|
||||||
|
seq--;
|
||||||
|
}
|
||||||
|
|
||||||
|
seq = 0;
|
||||||
|
out:
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
return seq;
|
return seq;
|
||||||
@ -574,7 +596,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
|||||||
BUG();
|
BUG();
|
||||||
|
|
||||||
if (seq == journal_cur_seq(j))
|
if (seq == journal_cur_seq(j))
|
||||||
__journal_entry_close(j);
|
journal_entry_want_write(j);
|
||||||
out:
|
out:
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
return ret;
|
return ret;
|
||||||
@ -863,15 +885,18 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
|
|||||||
static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
|
static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
|
||||||
{
|
{
|
||||||
union journal_res_state state;
|
union journal_res_state state;
|
||||||
struct journal_buf *w;
|
bool ret = false;
|
||||||
bool ret;
|
unsigned i;
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
state = READ_ONCE(j->reservations);
|
state = READ_ONCE(j->reservations);
|
||||||
w = j->buf + !state.idx;
|
i = state.idx;
|
||||||
|
|
||||||
ret = state.prev_buf_unwritten &&
|
while (i != state.unwritten_idx) {
|
||||||
bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx);
|
i = (i - 1) & JOURNAL_BUF_MASK;
|
||||||
|
if (bch2_bkey_has_device(bkey_i_to_s_c(&j->buf[i].key), dev_idx))
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -957,7 +982,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
|||||||
|
|
||||||
journal_pin_new_entry(j, 1);
|
journal_pin_new_entry(j, 1);
|
||||||
|
|
||||||
j->reservations.idx = journal_cur_seq(j);
|
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
|
||||||
|
|
||||||
bch2_journal_buf_init(j);
|
bch2_journal_buf_init(j);
|
||||||
|
|
||||||
@ -1015,8 +1040,10 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
|||||||
|
|
||||||
void bch2_fs_journal_exit(struct journal *j)
|
void bch2_fs_journal_exit(struct journal *j)
|
||||||
{
|
{
|
||||||
kvpfree(j->buf[1].data, j->buf[1].buf_size);
|
unsigned i;
|
||||||
kvpfree(j->buf[0].data, j->buf[0].buf_size);
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(j->buf); i++)
|
||||||
|
kvpfree(j->buf[i].data, j->buf[i].buf_size);
|
||||||
free_fifo(&j->pin);
|
free_fifo(&j->pin);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1024,6 +1051,7 @@ int bch2_fs_journal_init(struct journal *j)
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
static struct lock_class_key res_key;
|
static struct lock_class_key res_key;
|
||||||
|
unsigned i;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
pr_verbose_init(c->opts, "");
|
pr_verbose_init(c->opts, "");
|
||||||
@ -1038,8 +1066,6 @@ int bch2_fs_journal_init(struct journal *j)
|
|||||||
|
|
||||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||||
|
|
||||||
j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
|
||||||
j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
|
||||||
j->write_delay_ms = 1000;
|
j->write_delay_ms = 1000;
|
||||||
j->reclaim_delay_ms = 100;
|
j->reclaim_delay_ms = 100;
|
||||||
|
|
||||||
@ -1051,13 +1077,20 @@ int bch2_fs_journal_init(struct journal *j)
|
|||||||
((union journal_res_state)
|
((union journal_res_state)
|
||||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||||
|
|
||||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) {
|
||||||
!(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
|
|
||||||
!(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
|
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(j->buf); i++) {
|
||||||
|
j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
||||||
|
j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL);
|
||||||
|
if (!j->buf[i].data) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
j->pin.front = j->pin.back = 1;
|
j->pin.front = j->pin.back = 1;
|
||||||
out:
|
out:
|
||||||
pr_verbose_init(c->opts, "ret %i", ret);
|
pr_verbose_init(c->opts, "ret %i", ret);
|
||||||
@ -1071,7 +1104,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
|||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
union journal_res_state s;
|
union journal_res_state s;
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
unsigned iter;
|
unsigned i;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
@ -1114,16 +1147,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
|||||||
}
|
}
|
||||||
|
|
||||||
pr_buf(out,
|
pr_buf(out,
|
||||||
"current entry refs:\t%u\n"
|
"current entry:\tidx %u refcount %u\n",
|
||||||
"prev entry unwritten:\t",
|
s.idx, journal_state_count(s, s.idx));
|
||||||
journal_state_count(s, s.idx));
|
|
||||||
|
|
||||||
if (s.prev_buf_unwritten)
|
i = s.idx;
|
||||||
pr_buf(out, "yes, ref %u sectors %u\n",
|
while (i != s.unwritten_idx) {
|
||||||
journal_state_count(s, !s.idx),
|
i = (i - 1) & JOURNAL_BUF_MASK;
|
||||||
journal_prev_buf(j)->sectors);
|
|
||||||
else
|
pr_buf(out, "unwritten entry:\tidx %u refcount %u sectors %u\n",
|
||||||
pr_buf(out, "no\n");
|
i, journal_state_count(s, i), j->buf[i].sectors);
|
||||||
|
}
|
||||||
|
|
||||||
pr_buf(out,
|
pr_buf(out,
|
||||||
"need write:\t\t%i\n"
|
"need write:\t\t%i\n"
|
||||||
@ -1131,7 +1164,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
|||||||
test_bit(JOURNAL_NEED_WRITE, &j->flags),
|
test_bit(JOURNAL_NEED_WRITE, &j->flags),
|
||||||
test_bit(JOURNAL_REPLAY_DONE, &j->flags));
|
test_bit(JOURNAL_REPLAY_DONE, &j->flags));
|
||||||
|
|
||||||
for_each_member_device_rcu(ca, c, iter,
|
for_each_member_device_rcu(ca, c, i,
|
||||||
&c->rw_devs[BCH_DATA_journal]) {
|
&c->rw_devs[BCH_DATA_journal]) {
|
||||||
struct journal_device *ja = &ca->journal;
|
struct journal_device *ja = &ca->journal;
|
||||||
|
|
||||||
@ -1146,7 +1179,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
|||||||
"\tdirty_idx_ondisk\t%u (seq %llu)\n"
|
"\tdirty_idx_ondisk\t%u (seq %llu)\n"
|
||||||
"\tdirty_idx\t\t%u (seq %llu)\n"
|
"\tdirty_idx\t\t%u (seq %llu)\n"
|
||||||
"\tcur_idx\t\t%u (seq %llu)\n",
|
"\tcur_idx\t\t%u (seq %llu)\n",
|
||||||
iter, ja->nr,
|
i, ja->nr,
|
||||||
bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
|
bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
|
||||||
ja->sectors_free,
|
ja->sectors_free,
|
||||||
ja->discard_idx,
|
ja->discard_idx,
|
||||||
|
@ -127,11 +127,6 @@ static inline struct journal_buf *journal_cur_buf(struct journal *j)
|
|||||||
return j->buf + j->reservations.idx;
|
return j->buf + j->reservations.idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct journal_buf *journal_prev_buf(struct journal *j)
|
|
||||||
{
|
|
||||||
return j->buf + !j->reservations.idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sequence number of oldest dirty journal entry */
|
/* Sequence number of oldest dirty journal entry */
|
||||||
|
|
||||||
static inline u64 journal_last_seq(struct journal *j)
|
static inline u64 journal_last_seq(struct journal *j)
|
||||||
@ -151,13 +146,21 @@ void bch2_journal_set_has_inum(struct journal *, u64, u64);
|
|||||||
|
|
||||||
static inline int journal_state_count(union journal_res_state s, int idx)
|
static inline int journal_state_count(union journal_res_state s, int idx)
|
||||||
{
|
{
|
||||||
return idx == 0 ? s.buf0_count : s.buf1_count;
|
switch (idx) {
|
||||||
|
case 0: return s.buf0_count;
|
||||||
|
case 1: return s.buf1_count;
|
||||||
|
case 2: return s.buf2_count;
|
||||||
|
case 3: return s.buf3_count;
|
||||||
|
}
|
||||||
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void journal_state_inc(union journal_res_state *s)
|
static inline void journal_state_inc(union journal_res_state *s)
|
||||||
{
|
{
|
||||||
s->buf0_count += s->idx == 0;
|
s->buf0_count += s->idx == 0;
|
||||||
s->buf1_count += s->idx == 1;
|
s->buf1_count += s->idx == 1;
|
||||||
|
s->buf2_count += s->idx == 2;
|
||||||
|
s->buf3_count += s->idx == 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void bch2_journal_set_has_inode(struct journal *j,
|
static inline void bch2_journal_set_has_inode(struct journal *j,
|
||||||
@ -257,21 +260,24 @@ static inline bool journal_entry_empty(struct jset *j)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __bch2_journal_buf_put(struct journal *, bool);
|
void __bch2_journal_buf_put(struct journal *);
|
||||||
|
|
||||||
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
|
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx)
|
||||||
bool need_write_just_set)
|
|
||||||
{
|
{
|
||||||
union journal_res_state s;
|
union journal_res_state s;
|
||||||
|
|
||||||
s.v = atomic64_sub_return(((union journal_res_state) {
|
s.v = atomic64_sub_return(((union journal_res_state) {
|
||||||
.buf0_count = idx == 0,
|
.buf0_count = idx == 0,
|
||||||
.buf1_count = idx == 1,
|
.buf1_count = idx == 1,
|
||||||
|
.buf2_count = idx == 2,
|
||||||
|
.buf3_count = idx == 3,
|
||||||
}).v, &j->reservations.counter);
|
}).v, &j->reservations.counter);
|
||||||
if (!journal_state_count(s, idx)) {
|
|
||||||
EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
|
EBUG_ON(((s.idx - idx) & 3) >
|
||||||
__bch2_journal_buf_put(j, need_write_just_set);
|
((s.idx - s.unwritten_idx) & 3));
|
||||||
}
|
|
||||||
|
if (!journal_state_count(s, idx) && idx == s.unwritten_idx)
|
||||||
|
__bch2_journal_buf_put(j);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -291,7 +297,7 @@ static inline void bch2_journal_res_put(struct journal *j,
|
|||||||
BCH_JSET_ENTRY_btree_keys,
|
BCH_JSET_ENTRY_btree_keys,
|
||||||
0, 0, NULL, 0);
|
0, 0, NULL, 0);
|
||||||
|
|
||||||
bch2_journal_buf_put(j, res->idx, false);
|
bch2_journal_buf_put(j, res->idx);
|
||||||
|
|
||||||
res->ref = 0;
|
res->ref = 0;
|
||||||
}
|
}
|
||||||
@ -327,11 +333,18 @@ static inline int journal_res_get_fast(struct journal *j,
|
|||||||
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
|
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (flags & JOURNAL_RES_GET_CHECK)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
new.cur_entry_offset += res->u64s;
|
new.cur_entry_offset += res->u64s;
|
||||||
journal_state_inc(&new);
|
journal_state_inc(&new);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the refcount would overflow, we have to wait:
|
||||||
|
* XXX - tracepoint this:
|
||||||
|
*/
|
||||||
|
if (!journal_state_count(new, new.idx))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (flags & JOURNAL_RES_GET_CHECK)
|
||||||
|
return 1;
|
||||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||||
old.v, new.v)) != old.v);
|
old.v, new.v)) != old.v);
|
||||||
|
|
||||||
|
@ -950,16 +950,23 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
|||||||
buf->buf_size = new_size;
|
buf->buf_size = new_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
|
||||||
|
{
|
||||||
|
return j->buf + j->reservations.unwritten_idx;
|
||||||
|
}
|
||||||
|
|
||||||
static void journal_write_done(struct closure *cl)
|
static void journal_write_done(struct closure *cl)
|
||||||
{
|
{
|
||||||
struct journal *j = container_of(cl, struct journal, io);
|
struct journal *j = container_of(cl, struct journal, io);
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct journal_buf *w = journal_prev_buf(j);
|
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||||
struct bch_devs_list devs =
|
struct bch_devs_list devs =
|
||||||
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||||
struct bch_replicas_padded replicas;
|
struct bch_replicas_padded replicas;
|
||||||
|
union journal_res_state old, new;
|
||||||
u64 seq = le64_to_cpu(w->data->seq);
|
u64 seq = le64_to_cpu(w->data->seq);
|
||||||
u64 last_seq = le64_to_cpu(w->data->last_seq);
|
u64 last_seq = le64_to_cpu(w->data->last_seq);
|
||||||
|
u64 v;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
bch2_time_stats_update(j->write_time, j->write_start_time);
|
bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||||
@ -998,9 +1005,14 @@ static void journal_write_done(struct closure *cl)
|
|||||||
/* also must come before signalling write completion: */
|
/* also must come before signalling write completion: */
|
||||||
closure_debug_destroy(cl);
|
closure_debug_destroy(cl);
|
||||||
|
|
||||||
BUG_ON(!j->reservations.prev_buf_unwritten);
|
v = atomic64_read(&j->reservations.counter);
|
||||||
atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v,
|
do {
|
||||||
&j->reservations.counter);
|
old.v = new.v = v;
|
||||||
|
BUG_ON(new.idx == new.unwritten_idx);
|
||||||
|
|
||||||
|
new.unwritten_idx++;
|
||||||
|
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||||
|
old.v, new.v)) != old.v);
|
||||||
|
|
||||||
closure_wake_up(&w->wait);
|
closure_wake_up(&w->wait);
|
||||||
journal_wake(j);
|
journal_wake(j);
|
||||||
@ -1008,6 +1020,10 @@ static void journal_write_done(struct closure *cl)
|
|||||||
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||||
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
|
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
|
if (new.unwritten_idx != new.idx &&
|
||||||
|
!journal_state_count(new, new.unwritten_idx))
|
||||||
|
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void journal_write_endio(struct bio *bio)
|
static void journal_write_endio(struct bio *bio)
|
||||||
@ -1018,7 +1034,7 @@ static void journal_write_endio(struct bio *bio)
|
|||||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
|
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
|
||||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||||
bch2_meta_write_fault("journal")) {
|
bch2_meta_write_fault("journal")) {
|
||||||
struct journal_buf *w = journal_prev_buf(j);
|
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&j->err_lock, flags);
|
spin_lock_irqsave(&j->err_lock, flags);
|
||||||
@ -1035,7 +1051,7 @@ void bch2_journal_write(struct closure *cl)
|
|||||||
struct journal *j = container_of(cl, struct journal, io);
|
struct journal *j = container_of(cl, struct journal, io);
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
struct journal_buf *w = journal_prev_buf(j);
|
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||||
struct jset_entry *start, *end;
|
struct jset_entry *start, *end;
|
||||||
struct jset *jset;
|
struct jset *jset;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
@ -1046,8 +1062,6 @@ void bch2_journal_write(struct closure *cl)
|
|||||||
|
|
||||||
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
|
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
|
||||||
|
|
||||||
bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
|
|
||||||
|
|
||||||
journal_buf_realloc(j, w);
|
journal_buf_realloc(j, w);
|
||||||
jset = w->data;
|
jset = w->data;
|
||||||
|
|
||||||
|
@ -58,6 +58,19 @@ static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
|
|||||||
old.v, new.v)) != old.v);
|
old.v, new.v)) != old.v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned get_unwritten_sectors(struct journal *j, unsigned *idx)
|
||||||
|
{
|
||||||
|
unsigned sectors = 0;
|
||||||
|
|
||||||
|
while (!sectors && *idx != j->reservations.idx) {
|
||||||
|
sectors = j->buf[*idx].sectors;
|
||||||
|
|
||||||
|
*idx = (*idx + 1) & JOURNAL_BUF_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sectors;
|
||||||
|
}
|
||||||
|
|
||||||
static struct journal_space {
|
static struct journal_space {
|
||||||
unsigned next_entry;
|
unsigned next_entry;
|
||||||
unsigned remaining;
|
unsigned remaining;
|
||||||
@ -69,15 +82,14 @@ static struct journal_space {
|
|||||||
unsigned sectors_next_entry = UINT_MAX;
|
unsigned sectors_next_entry = UINT_MAX;
|
||||||
unsigned sectors_total = UINT_MAX;
|
unsigned sectors_total = UINT_MAX;
|
||||||
unsigned i, nr_devs = 0;
|
unsigned i, nr_devs = 0;
|
||||||
unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
|
unsigned unwritten_sectors;
|
||||||
? journal_prev_buf(j)->sectors
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for_each_member_device_rcu(ca, c, i,
|
for_each_member_device_rcu(ca, c, i,
|
||||||
&c->rw_devs[BCH_DATA_journal]) {
|
&c->rw_devs[BCH_DATA_journal]) {
|
||||||
struct journal_device *ja = &ca->journal;
|
struct journal_device *ja = &ca->journal;
|
||||||
unsigned buckets_this_device, sectors_this_device;
|
unsigned buckets_this_device, sectors_this_device;
|
||||||
|
unsigned idx = j->reservations.unwritten_idx;
|
||||||
|
|
||||||
if (!ja->nr)
|
if (!ja->nr)
|
||||||
continue;
|
continue;
|
||||||
@ -89,16 +101,20 @@ static struct journal_space {
|
|||||||
* We that we don't allocate the space for a journal entry
|
* We that we don't allocate the space for a journal entry
|
||||||
* until we write it out - thus, account for it here:
|
* until we write it out - thus, account for it here:
|
||||||
*/
|
*/
|
||||||
if (unwritten_sectors >= sectors_this_device) {
|
while ((unwritten_sectors = get_unwritten_sectors(j, &idx))) {
|
||||||
if (!buckets_this_device)
|
if (unwritten_sectors >= sectors_this_device) {
|
||||||
continue;
|
if (!buckets_this_device) {
|
||||||
|
sectors_this_device = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
buckets_this_device--;
|
buckets_this_device--;
|
||||||
sectors_this_device = ca->mi.bucket_size;
|
sectors_this_device = ca->mi.bucket_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
sectors_this_device -= unwritten_sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
sectors_this_device -= unwritten_sectors;
|
|
||||||
|
|
||||||
if (sectors_this_device < ca->mi.bucket_size &&
|
if (sectors_this_device < ca->mi.bucket_size &&
|
||||||
buckets_this_device) {
|
buckets_this_device) {
|
||||||
buckets_this_device--;
|
buckets_this_device--;
|
||||||
@ -277,6 +293,14 @@ static void bch2_journal_reclaim_fast(struct journal *j)
|
|||||||
bch2_journal_space_available(j);
|
bch2_journal_space_available(j);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __bch2_journal_pin_put(struct journal *j, u64 seq)
|
||||||
|
{
|
||||||
|
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||||
|
|
||||||
|
if (atomic_dec_and_test(&pin_list->count))
|
||||||
|
bch2_journal_reclaim_fast(j);
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_journal_pin_put(struct journal *j, u64 seq)
|
void bch2_journal_pin_put(struct journal *j, u64 seq)
|
||||||
{
|
{
|
||||||
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||||
|
@ -39,6 +39,7 @@ journal_seq_pin(struct journal *j, u64 seq)
|
|||||||
return &j->pin.data[seq & j->pin.mask];
|
return &j->pin.data[seq & j->pin.mask];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __bch2_journal_pin_put(struct journal *, u64);
|
||||||
void bch2_journal_pin_put(struct journal *, u64);
|
void bch2_journal_pin_put(struct journal *, u64);
|
||||||
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
|
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
|
||||||
|
|
||||||
|
@ -11,13 +11,13 @@
|
|||||||
|
|
||||||
struct journal_res;
|
struct journal_res;
|
||||||
|
|
||||||
#define JOURNAL_BUF_BITS 1
|
#define JOURNAL_BUF_BITS 2
|
||||||
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
|
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
|
||||||
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
|
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We put two of these in struct journal; we used them for writes to the
|
* We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
|
||||||
* journal that are being staged or in flight.
|
* the journal that are being staged or in flight.
|
||||||
*/
|
*/
|
||||||
struct journal_buf {
|
struct journal_buf {
|
||||||
struct jset *data;
|
struct jset *data;
|
||||||
@ -85,10 +85,12 @@ union journal_res_state {
|
|||||||
|
|
||||||
struct {
|
struct {
|
||||||
u64 cur_entry_offset:20,
|
u64 cur_entry_offset:20,
|
||||||
idx:1,
|
idx:2,
|
||||||
prev_buf_unwritten:1,
|
unwritten_idx:2,
|
||||||
buf0_count:21,
|
buf0_count:10,
|
||||||
buf1_count:21;
|
buf1_count:10,
|
||||||
|
buf2_count:10,
|
||||||
|
buf3_count:10;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -169,7 +171,7 @@ struct journal {
|
|||||||
* Two journal entries -- one is currently open for new entries, the
|
* Two journal entries -- one is currently open for new entries, the
|
||||||
* other is possibly being written out.
|
* other is possibly being written out.
|
||||||
*/
|
*/
|
||||||
struct journal_buf buf[2];
|
struct journal_buf buf[JOURNAL_BUF_NR];
|
||||||
|
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
|
||||||
|
@ -1048,13 +1048,13 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
if (!c->sb.clean) {
|
if (!c->sb.clean) {
|
||||||
ret = bch2_journal_seq_blacklist_add(c,
|
ret = bch2_journal_seq_blacklist_add(c,
|
||||||
journal_seq,
|
journal_seq,
|
||||||
journal_seq + 4);
|
journal_seq + 8);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "error creating new journal seq blacklist entry");
|
bch_err(c, "error creating new journal seq blacklist entry");
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
journal_seq += 4;
|
journal_seq += 8;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The superblock needs to be written before we do any btree
|
* The superblock needs to be written before we do any btree
|
||||||
|
Loading…
Reference in New Issue
Block a user