bcachefs: BCH_WRITE_SYNC
This adds a new flag for the write path, BCH_WRITE_SYNC, and switches the O_DIRECT write path to use it when we're not running asynchronously. It runs the btree update after the write in the original thread's context instead of a kworker, cutting context switches in half. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
a101957649
commit
1df3e19996
@ -2156,6 +2156,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
dio->op.subvol = inode->ei_subvol;
|
||||
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
|
||||
|
||||
if (sync)
|
||||
dio->op.flags |= BCH_WRITE_SYNC;
|
||||
if ((req->ki_flags & IOCB_DSYNC) &&
|
||||
!c->opts.journal_flush_disabled)
|
||||
dio->op.flags |= BCH_WRITE_FLUSH;
|
||||
|
@ -596,7 +596,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
|
||||
static void __bch2_write(struct bch_write_op *);
|
||||
|
||||
static void bch2_write_done(struct closure *cl)
|
||||
static void __bch2_write_done(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
@ -612,7 +612,23 @@ static void bch2_write_done(struct closure *cl)
|
||||
|
||||
EBUG_ON(cl->parent);
|
||||
closure_debug_destroy(cl);
|
||||
op->end_io(op);
|
||||
if (op->end_io)
|
||||
op->end_io(op);
|
||||
}
|
||||
|
||||
static __always_inline void bch2_write_done(struct bch_write_op *op)
|
||||
{
|
||||
if (likely(!(op->flags & BCH_WRITE_FLUSH) || op->error)) {
|
||||
__bch2_write_done(&op->cl);
|
||||
} else if (!(op->flags & BCH_WRITE_SYNC)) {
|
||||
bch2_journal_flush_seq_async(&op->c->journal,
|
||||
op->journal_seq,
|
||||
&op->cl);
|
||||
continue_at(&op->cl, __bch2_write_done, index_update_wq(op));
|
||||
} else {
|
||||
bch2_journal_flush_seq(&op->c->journal, op->journal_seq);
|
||||
__bch2_write_done(&op->cl);
|
||||
}
|
||||
}
|
||||
|
||||
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
|
||||
@ -699,6 +715,7 @@ out:
|
||||
err:
|
||||
keys->top = keys->keys;
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -778,9 +795,9 @@ unlock:
|
||||
bch2_journal_flush_seq_async(&op->c->journal,
|
||||
op->journal_seq,
|
||||
&op->cl);
|
||||
continue_at(&op->cl, bch2_write_done, index_update_wq(op));
|
||||
continue_at(&op->cl, __bch2_write_done, index_update_wq(op));
|
||||
} else {
|
||||
bch2_write_done(&op->cl);
|
||||
__bch2_write_done(&op->cl);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1271,10 +1288,10 @@ again:
|
||||
? NULL : &op->cl,
|
||||
&wp);
|
||||
if (unlikely(ret)) {
|
||||
if (unlikely(ret != -EAGAIN))
|
||||
goto err;
|
||||
if (ret == -EAGAIN)
|
||||
break;
|
||||
|
||||
break;
|
||||
goto err;
|
||||
}
|
||||
|
||||
EBUG_ON(!wp);
|
||||
@ -1283,13 +1300,25 @@ again:
|
||||
ret = bch2_write_extent(op, wp, &bio);
|
||||
|
||||
bch2_alloc_sectors_done(c, wp);
|
||||
err:
|
||||
if (ret <= 0) {
|
||||
if (!(op->flags & BCH_WRITE_SYNC)) {
|
||||
spin_lock(&wp->writes_lock);
|
||||
op->wp = wp;
|
||||
list_add_tail(&op->wp_list, &wp->writes);
|
||||
if (wp->state == WRITE_POINT_stopped)
|
||||
__wp_update_state(wp, WRITE_POINT_waiting_io);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
if (!ret)
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
if (ret < 0) {
|
||||
op->error = ret;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bio->bi_end_io = bch2_write_endio;
|
||||
bio->bi_private = &op->cl;
|
||||
bio->bi_opf |= REQ_OP_WRITE;
|
||||
@ -1302,36 +1331,28 @@ again:
|
||||
bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
|
||||
key_to_write);
|
||||
} while (ret);
|
||||
out:
|
||||
|
||||
/*
|
||||
* If the write can't all be submitted at once, we generally want to
|
||||
* block synchronously as that signals backpressure to the caller.
|
||||
* Sync or no?
|
||||
*
|
||||
* If we're running asynchronously, wne may still want to block
|
||||
* synchronously here if we weren't able to submit all of the IO at
|
||||
* once, as that signals backpressure to the caller.
|
||||
*/
|
||||
if (!(op->flags & BCH_WRITE_DONE) &&
|
||||
!(op->flags & BCH_WRITE_IN_WORKER)) {
|
||||
if ((op->flags & BCH_WRITE_SYNC) ||
|
||||
(!(op->flags & BCH_WRITE_DONE) &&
|
||||
!(op->flags & BCH_WRITE_IN_WORKER))) {
|
||||
closure_sync(&op->cl);
|
||||
__bch2_write_index(op);
|
||||
|
||||
if (!(op->flags & BCH_WRITE_DONE))
|
||||
goto again;
|
||||
bch2_write_done(&op->cl);
|
||||
bch2_write_done(op);
|
||||
} else {
|
||||
spin_lock(&wp->writes_lock);
|
||||
op->wp = wp;
|
||||
list_add_tail(&op->wp_list, &wp->writes);
|
||||
if (wp->state == WRITE_POINT_stopped)
|
||||
__wp_update_state(wp, WRITE_POINT_waiting_io);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
|
||||
continue_at(&op->cl, bch2_write_index, NULL);
|
||||
}
|
||||
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
return;
|
||||
err:
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
||||
@ -1374,7 +1395,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
||||
|
||||
__bch2_write_index(op);
|
||||
err:
|
||||
bch2_write_done(&op->cl);
|
||||
bch2_write_done(op);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -39,6 +39,7 @@ enum bch_write_flags {
|
||||
__BCH_WRITE_WROTE_DATA_INLINE,
|
||||
__BCH_WRITE_FROM_INTERNAL,
|
||||
__BCH_WRITE_CHECK_ENOSPC,
|
||||
__BCH_WRITE_SYNC,
|
||||
__BCH_WRITE_MOVE,
|
||||
__BCH_WRITE_IN_WORKER,
|
||||
__BCH_WRITE_DONE,
|
||||
@ -55,6 +56,7 @@ enum bch_write_flags {
|
||||
#define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE)
|
||||
#define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL)
|
||||
#define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC)
|
||||
#define BCH_WRITE_SYNC (1U << __BCH_WRITE_SYNC)
|
||||
#define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE)
|
||||
|
||||
/* Internal: */
|
||||
|
Loading…
Reference in New Issue
Block a user