ext4: DAX iomap write support
Implement DAX writes using the new iomap infrastructure instead of overloading the direct IO path. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
47e6935136
commit
776722e85d
@ -169,6 +169,41 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
return iov_iter_count(from);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static ssize_t
|
||||
ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
ssize_t ret;
|
||||
bool overwrite = false;
|
||||
|
||||
inode_lock(inode);
|
||||
ret = ext4_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
ret = file_remove_privs(iocb->ki_filp);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = file_update_time(iocb->ki_filp);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
|
||||
overwrite = true;
|
||||
downgrade_write(&inode->i_rwsem);
|
||||
}
|
||||
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
|
||||
out:
|
||||
if (!overwrite)
|
||||
inode_unlock(inode);
|
||||
else
|
||||
inode_unlock_shared(inode);
|
||||
if (ret > 0)
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t
|
||||
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
@ -178,6 +213,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
int overwrite = 0;
|
||||
ssize_t ret;
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (IS_DAX(inode))
|
||||
return ext4_dax_write_iter(iocb, from);
|
||||
#endif
|
||||
|
||||
inode_lock(inode);
|
||||
ret = ext4_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
|
122
fs/ext4/inode.c
122
fs/ext4/inode.c
@ -3329,18 +3329,79 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
struct ext4_map_blocks map;
|
||||
int ret;
|
||||
|
||||
if (flags & IOMAP_WRITE)
|
||||
return -EIO;
|
||||
|
||||
if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
|
||||
return -ERANGE;
|
||||
|
||||
map.m_lblk = first_block;
|
||||
map.m_len = last_block - first_block + 1;
|
||||
|
||||
if (!(flags & IOMAP_WRITE)) {
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret < 0)
|
||||
} else {
|
||||
int dio_credits;
|
||||
handle_t *handle;
|
||||
int retries = 0;
|
||||
|
||||
/* Trim mapping request to maximum we can map at once for DIO */
|
||||
if (map.m_len > DIO_MAX_BLOCKS)
|
||||
map.m_len = DIO_MAX_BLOCKS;
|
||||
dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
|
||||
retry:
|
||||
/*
|
||||
* Either we allocate blocks and then we don't get unwritten
|
||||
* extent so we have reserved enough credits, or the blocks
|
||||
* are already allocated and unwritten and in that case
|
||||
* extent conversion fits in the credits as well.
|
||||
*/
|
||||
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
|
||||
dio_credits);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
ret = ext4_map_blocks(handle, inode, &map,
|
||||
EXT4_GET_BLOCKS_PRE_IO |
|
||||
EXT4_GET_BLOCKS_CREATE_ZERO);
|
||||
if (ret < 0) {
|
||||
ext4_journal_stop(handle);
|
||||
if (ret == -ENOSPC &&
|
||||
ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
/* For DAX writes we need to zero out unwritten extents */
|
||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
||||
/*
|
||||
* We are protected by i_mmap_sem or i_rwsem so we know
|
||||
* block cannot go away from under us even though we
|
||||
* dropped i_data_sem. Convert extent to written and
|
||||
* write zeros there.
|
||||
*/
|
||||
ret = ext4_map_blocks(handle, inode, &map,
|
||||
EXT4_GET_BLOCKS_CONVERT |
|
||||
EXT4_GET_BLOCKS_CREATE_ZERO);
|
||||
if (ret < 0) {
|
||||
ext4_journal_stop(handle);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we added blocks beyond i_size we need to make sure they
|
||||
* will get truncated if we crash before updating i_size in
|
||||
* ext4_iomap_end().
|
||||
*/
|
||||
if (first_block + map.m_len >
|
||||
(inode->i_size + (1 << blkbits) - 1) >> blkbits) {
|
||||
int err;
|
||||
|
||||
err = ext4_orphan_add(handle, inode);
|
||||
if (err < 0) {
|
||||
ext4_journal_stop(handle);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
|
||||
iomap->flags = 0;
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
@ -3368,8 +3429,61 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
|
||||
ssize_t written, unsigned flags, struct iomap *iomap)
|
||||
{
|
||||
int ret = 0;
|
||||
handle_t *handle;
|
||||
int blkbits = inode->i_blkbits;
|
||||
bool truncate = false;
|
||||
|
||||
if (!(flags & IOMAP_WRITE))
|
||||
return 0;
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto orphan_del;
|
||||
}
|
||||
if (ext4_update_inode_size(inode, offset + written))
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
/*
|
||||
* We may need to truncate allocated but not written blocks beyond EOF.
|
||||
*/
|
||||
if (iomap->offset + iomap->length >
|
||||
ALIGN(inode->i_size, 1 << blkbits)) {
|
||||
ext4_lblk_t written_blk, end_blk;
|
||||
|
||||
written_blk = (offset + written) >> blkbits;
|
||||
end_blk = (offset + length) >> blkbits;
|
||||
if (written_blk < end_blk && ext4_can_truncate(inode))
|
||||
truncate = true;
|
||||
}
|
||||
/*
|
||||
* Remove inode from orphan list if we were extending a inode and
|
||||
* everything went fine.
|
||||
*/
|
||||
if (!truncate && inode->i_nlink &&
|
||||
!list_empty(&EXT4_I(inode)->i_orphan))
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
if (truncate) {
|
||||
ext4_truncate_failed_write(inode);
|
||||
orphan_del:
|
||||
/*
|
||||
* If truncate failed early the inode might still be on the
|
||||
* orphan list; we need to make sure the inode is removed from
|
||||
* the orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct iomap_ops ext4_iomap_ops = {
|
||||
.iomap_begin = ext4_iomap_begin,
|
||||
.iomap_end = ext4_iomap_end,
|
||||
};
|
||||
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user