btrfs: remove btrfs_end_io_wq
All reads bio that go through btrfs_map_bio need to be completed in user context. And read I/Os are the most common and timing critical in almost any file system workloads. Embed a work_struct into struct btrfs_bio and use it to complete all read bios submitted through btrfs_map, using the REQ_META flag to decide which workqueue they are placed on. This removes the need for a separate 128 byte allocation (typically rounded up to 192 bytes by slab) for all reads with a size increase of 24 bytes for struct btrfs_bio. Future patches will reorganize struct btrfs_bio to make use of this extra space for writes as well. (All sizes are based a on typical 64-bit non-debug build) Reviewed-by: Qu Wenruo <wqu@suse.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
08a6f46434
commit
d7b9416fe5
@ -931,10 +931,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
sums += fs_info->csum_size * nr_sectors;
|
||||
|
||||
ASSERT(comp_bio->bi_iter.bi_size);
|
||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
|
||||
BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
|
@ -850,8 +850,8 @@ struct btrfs_fs_info {
|
||||
struct btrfs_workqueue *hipri_workers;
|
||||
struct btrfs_workqueue *delalloc_workers;
|
||||
struct btrfs_workqueue *flush_workers;
|
||||
struct btrfs_workqueue *endio_workers;
|
||||
struct btrfs_workqueue *endio_meta_workers;
|
||||
struct workqueue_struct *endio_workers;
|
||||
struct workqueue_struct *endio_meta_workers;
|
||||
struct workqueue_struct *endio_raid56_workers;
|
||||
struct workqueue_struct *rmw_workers;
|
||||
struct workqueue_struct *compressed_write_workers;
|
||||
|
@ -51,7 +51,6 @@
|
||||
BTRFS_SUPER_FLAG_METADUMP |\
|
||||
BTRFS_SUPER_FLAG_METADUMP_V2)
|
||||
|
||||
static void end_workqueue_fn(struct btrfs_work *work);
|
||||
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
|
||||
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
||||
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
|
||||
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/*
|
||||
* btrfs_end_io_wq structs are used to do processing in task context when an IO
|
||||
* is complete. This is used during reads to verify checksums, and it is used
|
||||
* by writes to insert metadata for new file extents after IO is complete.
|
||||
*/
|
||||
struct btrfs_end_io_wq {
|
||||
struct bio *bio;
|
||||
bio_end_io_t *end_io;
|
||||
void *private;
|
||||
struct btrfs_fs_info *info;
|
||||
blk_status_t status;
|
||||
enum btrfs_wq_endio_type metadata;
|
||||
struct btrfs_work work;
|
||||
};
|
||||
|
||||
static struct kmem_cache *btrfs_end_io_wq_cache;
|
||||
|
||||
int __init btrfs_end_io_wq_init(void)
|
||||
{
|
||||
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
|
||||
sizeof(struct btrfs_end_io_wq),
|
||||
0,
|
||||
SLAB_MEM_SPREAD,
|
||||
NULL);
|
||||
if (!btrfs_end_io_wq_cache)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_end_io_wq_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_end_io_wq_cache);
|
||||
}
|
||||
|
||||
static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (fs_info->csum_shash)
|
||||
@ -740,48 +705,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void end_workqueue_bio(struct bio *bio)
|
||||
{
|
||||
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_workqueue *wq;
|
||||
|
||||
fs_info = end_io_wq->info;
|
||||
end_io_wq->status = bio->bi_status;
|
||||
|
||||
if (end_io_wq->metadata)
|
||||
wq = fs_info->endio_meta_workers;
|
||||
else
|
||||
wq = fs_info->endio_workers;
|
||||
|
||||
btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
|
||||
btrfs_queue_work(wq, &end_io_wq->work);
|
||||
}
|
||||
|
||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
enum btrfs_wq_endio_type metadata)
|
||||
{
|
||||
struct btrfs_end_io_wq *end_io_wq;
|
||||
|
||||
if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
|
||||
if (!end_io_wq)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
end_io_wq->private = bio->bi_private;
|
||||
end_io_wq->end_io = bio->bi_end_io;
|
||||
end_io_wq->info = info;
|
||||
end_io_wq->status = 0;
|
||||
end_io_wq->bio = bio;
|
||||
end_io_wq->metadata = metadata;
|
||||
|
||||
bio->bi_private = end_io_wq;
|
||||
bio->bi_end_io = end_workqueue_bio;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void run_one_async_start(struct btrfs_work *work)
|
||||
{
|
||||
struct async_submit_bio *async;
|
||||
@ -917,14 +840,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
|
||||
bio->bi_opf |= REQ_META;
|
||||
|
||||
if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
|
||||
/*
|
||||
* called for a read, do the setup so that checksum validation
|
||||
* can happen in the async kernel threads
|
||||
*/
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
||||
BTRFS_WQ_ENDIO_METADATA);
|
||||
if (!ret)
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
|
||||
ret = btree_csum_one_bio(bio);
|
||||
if (!ret)
|
||||
@ -1947,25 +1863,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
|
||||
return root;
|
||||
}
|
||||
|
||||
/*
|
||||
* called by the kthread helper functions to finally call the bio end_io
|
||||
* functions. This is where read checksum verification actually happens
|
||||
*/
|
||||
static void end_workqueue_fn(struct btrfs_work *work)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct btrfs_end_io_wq *end_io_wq;
|
||||
|
||||
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
|
||||
bio = end_io_wq->bio;
|
||||
|
||||
bio->bi_status = end_io_wq->status;
|
||||
bio->bi_private = end_io_wq->private;
|
||||
bio->bi_end_io = end_io_wq->end_io;
|
||||
bio_endio(bio);
|
||||
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
|
||||
}
|
||||
|
||||
static int cleaner_kthread(void *arg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = arg;
|
||||
@ -2272,7 +2169,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
||||
btrfs_destroy_workqueue(fs_info->delalloc_workers);
|
||||
btrfs_destroy_workqueue(fs_info->hipri_workers);
|
||||
btrfs_destroy_workqueue(fs_info->workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_workers);
|
||||
if (fs_info->endio_workers)
|
||||
destroy_workqueue(fs_info->endio_workers);
|
||||
if (fs_info->endio_raid56_workers)
|
||||
destroy_workqueue(fs_info->endio_raid56_workers);
|
||||
if (fs_info->rmw_workers)
|
||||
@ -2292,7 +2190,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
||||
* the queues used for metadata I/O, since tasks from those other work
|
||||
* queues can do metadata I/O operations.
|
||||
*/
|
||||
btrfs_destroy_workqueue(fs_info->endio_meta_workers);
|
||||
if (fs_info->endio_meta_workers)
|
||||
destroy_workqueue(fs_info->endio_meta_workers);
|
||||
}
|
||||
|
||||
static void free_root_extent_buffers(struct btrfs_root *root)
|
||||
@ -2471,15 +2370,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
||||
fs_info->fixup_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
|
||||
|
||||
/*
|
||||
* endios are largely parallel and should have a very
|
||||
* low idle thresh
|
||||
*/
|
||||
fs_info->endio_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
|
||||
alloc_workqueue("btrfs-endio", flags, max_active);
|
||||
fs_info->endio_meta_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
|
||||
max_active, 4);
|
||||
alloc_workqueue("btrfs-endio-meta", flags, max_active);
|
||||
fs_info->endio_raid56_workers =
|
||||
alloc_workqueue("btrfs-endio-raid56", flags, max_active);
|
||||
fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
|
||||
|
@ -17,12 +17,6 @@
|
||||
*/
|
||||
#define BTRFS_BDEV_BLOCKSIZE (4096)
|
||||
|
||||
enum btrfs_wq_endio_type {
|
||||
BTRFS_WQ_ENDIO_DATA,
|
||||
BTRFS_WQ_ENDIO_METADATA,
|
||||
BTRFS_WQ_ENDIO_FREE_SPACE,
|
||||
};
|
||||
|
||||
static inline u64 btrfs_sb_offset(int mirror)
|
||||
{
|
||||
u64 start = SZ_16K;
|
||||
@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
|
||||
int level, struct btrfs_key *first_key);
|
||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
enum btrfs_wq_endio_type metadata);
|
||||
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start);
|
||||
@ -144,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
|
||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
|
||||
int btrfs_init_root_free_objectid(struct btrfs_root *root);
|
||||
int __init btrfs_end_io_wq_init(void);
|
||||
void __cold btrfs_end_io_wq_exit(void);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
void btrfs_set_buffer_lockdep_class(u64 objectid,
|
||||
|
@ -2640,12 +2640,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
|
||||
return;
|
||||
}
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
||||
btrfs_is_free_space_inode(BTRFS_I(inode)) ?
|
||||
BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Lookup bio sums does extra checks around whether we need to csum or
|
||||
* not, which is why we ignore skip_sum here.
|
||||
@ -7879,9 +7873,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
|
||||
|
||||
BUG_ON(bio_op(bio) == REQ_OP_WRITE);
|
||||
|
||||
if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
|
||||
return;
|
||||
|
||||
refcount_inc(&dip->refs);
|
||||
if (btrfs_map_bio(fs_info, bio, mirror_num))
|
||||
refcount_dec(&dip->refs);
|
||||
@ -7970,19 +7961,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_dio_private *dip = bio->bi_private;
|
||||
bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
|
||||
blk_status_t ret;
|
||||
|
||||
if (!write) {
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
|
||||
goto map;
|
||||
|
||||
if (write) {
|
||||
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
|
||||
/* Check btrfs_submit_data_write_bio() for async submit rules */
|
||||
if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
|
||||
return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
|
||||
@ -10314,12 +10298,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret) {
|
||||
btrfs_bio_free_csum(bbio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
atomic_inc(&priv->pending);
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
if (ret) {
|
||||
|
@ -1932,8 +1932,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
|
||||
btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
|
||||
@ -2702,13 +2700,9 @@ static int __init init_btrfs_fs(void)
|
||||
if (err)
|
||||
goto free_delayed_ref;
|
||||
|
||||
err = btrfs_end_io_wq_init();
|
||||
if (err)
|
||||
goto free_prelim_ref;
|
||||
|
||||
err = btrfs_interface_init();
|
||||
if (err)
|
||||
goto free_end_io_wq;
|
||||
goto free_prelim_ref;
|
||||
|
||||
btrfs_print_mod_info();
|
||||
|
||||
@ -2724,8 +2718,6 @@ static int __init init_btrfs_fs(void)
|
||||
|
||||
unregister_ioctl:
|
||||
btrfs_interface_exit();
|
||||
free_end_io_wq:
|
||||
btrfs_end_io_wq_exit();
|
||||
free_prelim_ref:
|
||||
btrfs_prelim_ref_exit();
|
||||
free_delayed_ref:
|
||||
@ -2763,7 +2755,6 @@ static void __exit exit_btrfs_fs(void)
|
||||
extent_state_cache_exit();
|
||||
extent_io_exit();
|
||||
btrfs_interface_exit();
|
||||
btrfs_end_io_wq_exit();
|
||||
unregister_filesystem(&btrfs_fs_type);
|
||||
btrfs_exit_sysfs();
|
||||
btrfs_cleanup_fs_uuids();
|
||||
|
@ -6616,11 +6616,27 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
|
||||
}
|
||||
|
||||
static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
|
||||
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
|
||||
{
|
||||
if (bioc->orig_bio->bi_opf & REQ_META)
|
||||
return bioc->fs_info->endio_meta_workers;
|
||||
return bioc->fs_info->endio_workers;
|
||||
}
|
||||
|
||||
static void btrfs_end_bio_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_bio *bbio =
|
||||
container_of(work, struct btrfs_bio, end_io_work);
|
||||
|
||||
bio_endio(&bbio->bio);
|
||||
}
|
||||
|
||||
static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
|
||||
{
|
||||
struct bio *orig_bio = bioc->orig_bio;
|
||||
struct btrfs_bio *bbio = btrfs_bio(orig_bio);
|
||||
|
||||
btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
|
||||
bbio->mirror_num = bioc->mirror_num;
|
||||
orig_bio->bi_private = bioc->private;
|
||||
orig_bio->bi_end_io = bioc->end_io;
|
||||
|
||||
@ -6632,7 +6648,14 @@ static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
|
||||
orig_bio->bi_status = BLK_STS_IOERR;
|
||||
else
|
||||
orig_bio->bi_status = BLK_STS_OK;
|
||||
bio_endio(orig_bio);
|
||||
|
||||
if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
|
||||
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
|
||||
queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
|
||||
} else {
|
||||
bio_endio(orig_bio);
|
||||
}
|
||||
|
||||
btrfs_put_bioc(bioc);
|
||||
}
|
||||
|
||||
@ -6664,7 +6687,7 @@ static void btrfs_end_bio(struct bio *bio)
|
||||
|
||||
btrfs_bio_counter_dec(bioc->fs_info);
|
||||
if (atomic_dec_and_test(&bioc->stripes_pending))
|
||||
btrfs_end_bioc(bioc);
|
||||
btrfs_end_bioc(bioc, true);
|
||||
}
|
||||
|
||||
static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
|
||||
@ -6762,7 +6785,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
|
||||
atomic_inc(&bioc->error);
|
||||
if (atomic_dec_and_test(&bioc->stripes_pending))
|
||||
btrfs_end_bioc(bioc);
|
||||
btrfs_end_bioc(bioc, false);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -371,6 +371,9 @@ struct btrfs_bio {
|
||||
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
||||
struct bvec_iter iter;
|
||||
|
||||
/* For read end I/O handling */
|
||||
struct work_struct end_io_work;
|
||||
|
||||
/*
|
||||
* This member must come last, bio_alloc_bioset will allocate enough
|
||||
* bytes for entire btrfs_bio but relies on bio being last.
|
||||
|
Loading…
Reference in New Issue
Block a user