09caeabe1a
Previosuly, the transaction commit path would have to add keys to the btree write buffer as a separate operation, requiring additional global synchronization. This patch introduces a new journal entry type, which indicates that the keys need to be copied into the btree write buffer prior to being written out. We switch the journal entry type back to JSET_ENTRY_btree_keys prior to write, so this is not an on disk format change. Flushing the btree write buffer may require pulling keys out of journal entries yet to be written, and quiescing outstanding journal reservations; we previously added journal->buf_lock for synchronization with the journal write path. We also can't put strict bounds on the number of keys in the journal destined for the write buffer, which means we might overflow the size of the preallocated buffer and have to reallocate - this introduces a potentially fatal memory allocation failure. This is something we'll have to watch for, if it becomes an issue in practice we can do additional mitigation. The transaction commit path no longer has to explicitly check if the write buffer is full and wait on flushing; this is another performance optimization. Instead, when the btree write buffer is close to full we change the journal watermark, so that only reservations for journal reclaim are allowed. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
62 lines
1.8 KiB
C
62 lines
1.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
|
|
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
|
|
|
|
#include "bkey.h"
|
|
|
|
static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
|
|
{
|
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
|
|
|
return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
|
|
}
|
|
|
|
static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
|
|
{
|
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
|
|
|
return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
|
|
}
|
|
|
|
struct btree_trans;
|
|
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
|
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
|
|
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
|
|
|
|
struct journal_keys_to_wb {
|
|
struct btree_write_buffer_keys *wb;
|
|
size_t room;
|
|
u64 seq;
|
|
};
|
|
|
|
int __bch2_journal_key_to_wb(struct bch_fs *,
|
|
struct journal_keys_to_wb *,
|
|
enum btree_id, struct bkey_i *);
|
|
|
|
static inline int bch2_journal_key_to_wb(struct bch_fs *c,
|
|
struct journal_keys_to_wb *dst,
|
|
enum btree_id btree, struct bkey_i *k)
|
|
{
|
|
EBUG_ON(!dst->seq);
|
|
|
|
if (unlikely(!dst->room))
|
|
return __bch2_journal_key_to_wb(c, dst, btree, k);
|
|
|
|
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
|
wb_k->journal_seq = dst->seq;
|
|
wb_k->btree = btree;
|
|
bkey_copy(&wb_k->k, k);
|
|
dst->wb->keys.nr++;
|
|
dst->room--;
|
|
return 0;
|
|
}
|
|
|
|
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64);
|
|
void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);
|
|
|
|
int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
|
|
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
|
|
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
|
|
|
|
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */
|