for-5.18-rc4-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmJnGGIACgkQxWXV+ddt WDumDw//cE1NcawdnVkEaKr20PetHfzPyFSIIr17nedtnVvWYyOFF/0uJlHNhv8Z CZIfJ7fmH/pO5oWPXN84wKNfumDWNwc36QrvoXC67TrKUSiBN8BzL83HvAjGwYFH G+LfZXGnVbqq8F1iYkIsuH0Oo1x/N/LPM3s6iZy3O4l8s96u+J4GRnc8Tr0AH4MA zgz3fab8Ec378HTG9fvdAQNLxFEe0VatD6WrzILnmM8UgeQK7g73dqH9Ni9gz2DW 2GDlO6aevQ1G6dm2AJ0ItExnbHH7TfOThkG56Gdqrzb/d39GzrVpeob7QiorETus EWS1rXaeikUiD4Bzt/RszUNL80yMN1DjcN3QBkiDf3ShSDFteoHMPw3e6jcQCy1m Dxf5oditQqltuFNLeSiVbZEMw2kXqBP7RoPiirF9rdvrDNLHhAE9wu0kpSGSSvT7 Tyu9JyLw2axU6wGTi1GHAXurlW2ItRRyFAewWWul1lLkuz/6YXI4F/EHm3Mbh6Nh pMIFMNr4Oafdx+3Ful8ZA4PynirXub/xVDefcFBibz/PTGEnHG4ZVzRudmVnowh7 GP2pql1+Y/TFkXdD98V8GWD+E10JAmNCkQSoiggJooNWR28whukmDVX/HY8lGmWg DjxwGkte3SltUBWNOTGnO7546hMwOxOPZHENPh+gffYkeMeIxPI= =xDWz -----END PGP SIGNATURE----- Merge tag 'for-5.18-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - direct IO fixes: - restore passing file offset to correctly calculate checksums when repairing on read and bio split happens - use correct bio when sumitting IO on zoned filesystem - zoned mode fixes: - fix selection of device to correctly calculate device capabilities when allocating a new bio - use a dedicated lock for exclusion during relocation - fix leaked plug after failure syncing log - fix assertion during scrub and relocation * tag 'for-5.18-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: zoned: use dedicated lock for data relocation btrfs: fix assertion failure during scrub due to block group reallocation btrfs: fix direct I/O writes for split bios on zoned devices btrfs: fix direct I/O read repair for split bios btrfs: fix and document the zoned device choice in alloc_new_bio btrfs: fix leaked plug after failure syncing log on zoned filesystems
This commit is contained in:
commit
fd574a2f84
@ -1060,6 +1060,7 @@ struct btrfs_fs_info {
|
||||
*/
|
||||
spinlock_t relocation_bg_lock;
|
||||
u64 data_reloc_bg;
|
||||
struct mutex zoned_data_reloc_io_lock;
|
||||
|
||||
u64 nr_global_roots;
|
||||
|
||||
|
@ -734,7 +734,12 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
|
||||
/* Commit dev_replace state and reserve 1 item for it. */
|
||||
/*
|
||||
* Commit dev_replace state and reserve 1 item for it.
|
||||
* This is crucial to ensure we won't miss copying extents for new block
|
||||
* groups that are allocated after we started the device replace, and
|
||||
* must be done after setting up the device replace state.
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
|
@ -3157,6 +3157,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
mutex_init(&fs_info->reloc_mutex);
|
||||
mutex_init(&fs_info->delalloc_root_mutex);
|
||||
mutex_init(&fs_info->zoned_meta_io_lock);
|
||||
mutex_init(&fs_info->zoned_data_reloc_io_lock);
|
||||
seqlock_init(&fs_info->profiles_lock);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
|
||||
|
@ -2658,6 +2658,7 @@ int btrfs_repair_one_sector(struct inode *inode,
|
||||
|
||||
repair_bio = btrfs_bio_alloc(1);
|
||||
repair_bbio = btrfs_bio(repair_bio);
|
||||
repair_bbio->file_offset = start;
|
||||
repair_bio->bi_opf = REQ_OP_READ;
|
||||
repair_bio->bi_end_io = failed_bio->bi_end_io;
|
||||
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
|
||||
@ -3333,24 +3334,37 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
||||
ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
if (wbc) {
|
||||
struct block_device *bdev;
|
||||
/*
|
||||
* For Zone append we need the correct block_device that we are
|
||||
* going to write to set in the bio to be able to respect the
|
||||
* hardware limitation. Look it up here:
|
||||
*/
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
struct btrfs_device *dev;
|
||||
|
||||
bdev = fs_info->fs_devices->latest_dev->bdev;
|
||||
bio_set_dev(bio, bdev);
|
||||
wbc_init_bio(wbc, bio);
|
||||
}
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
struct btrfs_device *device;
|
||||
dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
|
||||
fs_info->sectorsize);
|
||||
if (IS_ERR(dev)) {
|
||||
ret = PTR_ERR(dev);
|
||||
goto error;
|
||||
}
|
||||
|
||||
device = btrfs_zoned_get_device(fs_info, disk_bytenr,
|
||||
fs_info->sectorsize);
|
||||
if (IS_ERR(device)) {
|
||||
ret = PTR_ERR(device);
|
||||
goto error;
|
||||
bio_set_dev(bio, dev->bdev);
|
||||
} else {
|
||||
/*
|
||||
* Otherwise pick the last added device to support
|
||||
* cgroup writeback. For multi-device file systems this
|
||||
* means blk-cgroup policies have to always be set on the
|
||||
* last added/replaced device. This is a bit odd but has
|
||||
* been like that for a long time.
|
||||
*/
|
||||
bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
|
||||
}
|
||||
|
||||
btrfs_bio(bio)->device = device;
|
||||
wbc_init_bio(wbc, bio);
|
||||
} else {
|
||||
ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
|
||||
}
|
||||
return 0;
|
||||
error:
|
||||
|
@ -7810,8 +7810,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
|
||||
const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
const u64 orig_file_offset = dip->file_offset;
|
||||
u64 start = orig_file_offset;
|
||||
u32 bio_offset = 0;
|
||||
blk_status_t err = BLK_STS_OK;
|
||||
|
||||
@ -7821,6 +7819,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
|
||||
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
|
||||
pgoff = bvec.bv_offset;
|
||||
for (i = 0; i < nr_sectors; i++) {
|
||||
u64 start = bbio->file_offset + bio_offset;
|
||||
|
||||
ASSERT(pgoff < PAGE_SIZE);
|
||||
if (uptodate &&
|
||||
(!csum || !check_data_csum(inode, bbio,
|
||||
@ -7833,17 +7833,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
ASSERT((start - orig_file_offset) < UINT_MAX);
|
||||
ret = btrfs_repair_one_sector(inode,
|
||||
&bbio->bio,
|
||||
start - orig_file_offset,
|
||||
bvec.bv_page, pgoff,
|
||||
ret = btrfs_repair_one_sector(inode, &bbio->bio,
|
||||
bio_offset, bvec.bv_page, pgoff,
|
||||
start, bbio->mirror_num,
|
||||
submit_dio_repair_bio);
|
||||
if (ret)
|
||||
err = errno_to_blk_status(ret);
|
||||
}
|
||||
start += sectorsize;
|
||||
ASSERT(bio_offset + sectorsize > bio_offset);
|
||||
bio_offset += sectorsize;
|
||||
pgoff += sectorsize;
|
||||
@ -7870,6 +7866,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
|
||||
static void btrfs_end_dio_bio(struct bio *bio)
|
||||
{
|
||||
struct btrfs_dio_private *dip = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t err = bio->bi_status;
|
||||
|
||||
if (err)
|
||||
@ -7880,12 +7877,12 @@ static void btrfs_end_dio_bio(struct bio *bio)
|
||||
bio->bi_iter.bi_size, err);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ)
|
||||
err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err);
|
||||
err = btrfs_check_read_dio_bio(dip, bbio, !err);
|
||||
|
||||
if (err)
|
||||
dip->dio_bio->bi_status = err;
|
||||
|
||||
btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
|
||||
btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
|
||||
|
||||
bio_put(bio);
|
||||
btrfs_dio_private_put(dip);
|
||||
@ -8046,6 +8043,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
|
||||
bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
|
||||
bio->bi_private = dip;
|
||||
bio->bi_end_io = btrfs_end_dio_bio;
|
||||
btrfs_bio(bio)->file_offset = file_offset;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
status = extract_ordered_extent(BTRFS_I(inode), bio,
|
||||
|
@ -3699,6 +3699,31 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||
if (!cache)
|
||||
goto skip;
|
||||
|
||||
ASSERT(cache->start <= chunk_offset);
|
||||
/*
|
||||
* We are using the commit root to search for device extents, so
|
||||
* that means we could have found a device extent item from a
|
||||
* block group that was deleted in the current transaction. The
|
||||
* logical start offset of the deleted block group, stored at
|
||||
* @chunk_offset, might be part of the logical address range of
|
||||
* a new block group (which uses different physical extents).
|
||||
* In this case btrfs_lookup_block_group() has returned the new
|
||||
* block group, and its start address is less than @chunk_offset.
|
||||
*
|
||||
* We skip such new block groups, because it's pointless to
|
||||
* process them, as we won't find their extents because we search
|
||||
* for them using the commit root of the extent tree. For a device
|
||||
* replace it's also fine to skip it, we won't miss copying them
|
||||
* to the target device because we have the write duplication
|
||||
* setup through the regular write path (by btrfs_map_block()),
|
||||
* and we have committed a transaction when we started the device
|
||||
* replace, right after setting up the device replace state.
|
||||
*/
|
||||
if (cache->start < chunk_offset) {
|
||||
btrfs_put_block_group(cache);
|
||||
goto skip;
|
||||
}
|
||||
|
||||
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
|
||||
spin_lock(&cache->lock);
|
||||
if (!cache->to_copy) {
|
||||
@ -3822,7 +3847,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||
dev_replace->item_needs_writeback = 1;
|
||||
up_write(&dev_replace->rwsem);
|
||||
|
||||
ASSERT(cache->start == chunk_offset);
|
||||
ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset,
|
||||
dev_extent_len);
|
||||
|
||||
|
@ -3188,6 +3188,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->tree_root->log_mutex);
|
||||
blk_finish_plug(&plug);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -328,6 +328,9 @@ struct btrfs_fs_devices {
|
||||
struct btrfs_bio {
|
||||
unsigned int mirror_num;
|
||||
|
||||
/* for direct I/O */
|
||||
u64 file_offset;
|
||||
|
||||
/* @device is for stripe IO submission. */
|
||||
struct btrfs_device *device;
|
||||
u8 *csum;
|
||||
|
@ -359,7 +359,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
|
||||
struct btrfs_root *root = inode->root;
|
||||
|
||||
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
|
||||
btrfs_inode_lock(&inode->vfs_inode, 0);
|
||||
mutex_lock(&root->fs_info->zoned_data_reloc_io_lock);
|
||||
}
|
||||
|
||||
static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
|
||||
@ -367,7 +367,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
|
||||
struct btrfs_root *root = inode->root;
|
||||
|
||||
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
|
||||
btrfs_inode_unlock(&inode->vfs_inode, 0);
|
||||
mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user