xfs: split direct I/O and DAX path
So far the DAX code overloaded the direct I/O code path. There is very little in common between the two, and untangling them allows to clean up both variants. As a side effect we also get separate trace points for both I/O types. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
fa8d972d05
commit
16d4d43595
@ -305,13 +305,11 @@ xfs_file_dio_aio_read(
|
|||||||
else
|
else
|
||||||
target = ip->i_mount->m_ddev_targp;
|
target = ip->i_mount->m_ddev_targp;
|
||||||
|
|
||||||
if (!IS_DAX(inode)) {
|
/* DIO must be aligned to device logical sector size */
|
||||||
/* DIO must be aligned to device logical sector size */
|
if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
|
||||||
if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
|
if (iocb->ki_pos == isize)
|
||||||
if (iocb->ki_pos == isize)
|
return 0;
|
||||||
return 0;
|
return -EINVAL;
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -360,13 +358,37 @@ xfs_file_dio_aio_read(
|
|||||||
}
|
}
|
||||||
|
|
||||||
data = *to;
|
data = *to;
|
||||||
if (IS_DAX(inode)) {
|
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
||||||
ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
|
xfs_get_blocks_direct, NULL, NULL, 0);
|
||||||
NULL, 0);
|
if (ret > 0) {
|
||||||
} else {
|
iocb->ki_pos += ret;
|
||||||
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
iov_iter_advance(to, ret);
|
||||||
xfs_get_blocks_direct, NULL, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||||
|
|
||||||
|
file_accessed(iocb->ki_filp);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC ssize_t
|
||||||
|
xfs_file_dax_read(
|
||||||
|
struct kiocb *iocb,
|
||||||
|
struct iov_iter *to)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
|
struct iov_iter data = *to;
|
||||||
|
size_t count = iov_iter_count(to);
|
||||||
|
ssize_t ret = 0;
|
||||||
|
|
||||||
|
trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
|
||||||
|
|
||||||
|
if (!count)
|
||||||
|
return 0; /* skip atime */
|
||||||
|
|
||||||
|
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
||||||
|
ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
iocb->ki_pos += ret;
|
iocb->ki_pos += ret;
|
||||||
iov_iter_advance(to, ret);
|
iov_iter_advance(to, ret);
|
||||||
@ -399,7 +421,8 @@ xfs_file_read_iter(
|
|||||||
struct kiocb *iocb,
|
struct kiocb *iocb,
|
||||||
struct iov_iter *to)
|
struct iov_iter *to)
|
||||||
{
|
{
|
||||||
struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
|
struct inode *inode = file_inode(iocb->ki_filp);
|
||||||
|
struct xfs_mount *mp = XFS_I(inode)->i_mount;
|
||||||
ssize_t ret = 0;
|
ssize_t ret = 0;
|
||||||
|
|
||||||
XFS_STATS_INC(mp, xs_read_calls);
|
XFS_STATS_INC(mp, xs_read_calls);
|
||||||
@ -407,7 +430,9 @@ xfs_file_read_iter(
|
|||||||
if (XFS_FORCED_SHUTDOWN(mp))
|
if (XFS_FORCED_SHUTDOWN(mp))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_DIRECT)
|
if (IS_DAX(inode))
|
||||||
|
ret = xfs_file_dax_read(iocb, to);
|
||||||
|
else if (iocb->ki_flags & IOCB_DIRECT)
|
||||||
ret = xfs_file_dio_aio_read(iocb, to);
|
ret = xfs_file_dio_aio_read(iocb, to);
|
||||||
else
|
else
|
||||||
ret = xfs_file_buffered_aio_read(iocb, to);
|
ret = xfs_file_buffered_aio_read(iocb, to);
|
||||||
@ -755,8 +780,7 @@ xfs_file_dio_aio_write(
|
|||||||
mp->m_rtdev_targp : mp->m_ddev_targp;
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
||||||
|
|
||||||
/* DIO must be aligned to device logical sector size */
|
/* DIO must be aligned to device logical sector size */
|
||||||
if (!IS_DAX(inode) &&
|
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
|
||||||
((iocb->ki_pos | count) & target->bt_logical_sectormask))
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* "unaligned" here means not aligned to a filesystem block */
|
/* "unaligned" here means not aligned to a filesystem block */
|
||||||
@ -825,14 +849,9 @@ xfs_file_dio_aio_write(
|
|||||||
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
|
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
|
||||||
|
|
||||||
data = *from;
|
data = *from;
|
||||||
if (IS_DAX(inode)) {
|
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
||||||
ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
|
xfs_get_blocks_direct, xfs_end_io_direct_write,
|
||||||
xfs_end_io_direct_write, 0);
|
NULL, DIO_ASYNC_EXTEND);
|
||||||
} else {
|
|
||||||
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
||||||
xfs_get_blocks_direct, xfs_end_io_direct_write,
|
|
||||||
NULL, DIO_ASYNC_EXTEND);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* see generic_file_direct_write() for why this is necessary */
|
/* see generic_file_direct_write() for why this is necessary */
|
||||||
if (mapping->nrpages) {
|
if (mapping->nrpages) {
|
||||||
@ -849,10 +868,70 @@ out:
|
|||||||
xfs_rw_iunlock(ip, iolock);
|
xfs_rw_iunlock(ip, iolock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No fallback to buffered IO on errors for XFS. DAX can result in
|
* No fallback to buffered IO on errors for XFS, direct IO will either
|
||||||
* partial writes, but direct IO will either complete fully or fail.
|
* complete fully or fail.
|
||||||
*/
|
*/
|
||||||
ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
|
ASSERT(ret < 0 || ret == count);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC ssize_t
|
||||||
|
xfs_file_dax_write(
|
||||||
|
struct kiocb *iocb,
|
||||||
|
struct iov_iter *from)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
ssize_t ret = 0;
|
||||||
|
int unaligned_io = 0;
|
||||||
|
int iolock;
|
||||||
|
struct iov_iter data;
|
||||||
|
|
||||||
|
/* "unaligned" here means not aligned to a filesystem block */
|
||||||
|
if ((iocb->ki_pos & mp->m_blockmask) ||
|
||||||
|
((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
|
||||||
|
unaligned_io = 1;
|
||||||
|
iolock = XFS_IOLOCK_EXCL;
|
||||||
|
} else if (mapping->nrpages) {
|
||||||
|
iolock = XFS_IOLOCK_EXCL;
|
||||||
|
} else {
|
||||||
|
iolock = XFS_IOLOCK_SHARED;
|
||||||
|
}
|
||||||
|
xfs_rw_ilock(ip, iolock);
|
||||||
|
|
||||||
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Yes, even DAX files can have page cache attached to them: A zeroed
|
||||||
|
* page is inserted into the pagecache when we have to serve a write
|
||||||
|
* fault on a hole. It should never be dirtied and can simply be
|
||||||
|
* dropped from the pagecache once we get real data for the page.
|
||||||
|
*/
|
||||||
|
if (mapping->nrpages) {
|
||||||
|
ret = invalidate_inode_pages2(mapping);
|
||||||
|
WARN_ON_ONCE(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
|
||||||
|
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
||||||
|
iolock = XFS_IOLOCK_SHARED;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
|
||||||
|
|
||||||
|
data = *from;
|
||||||
|
ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
|
||||||
|
xfs_end_io_direct_write, 0);
|
||||||
|
if (ret > 0) {
|
||||||
|
iocb->ki_pos += ret;
|
||||||
|
iov_iter_advance(from, ret);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
xfs_rw_iunlock(ip, iolock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -934,7 +1013,9 @@ xfs_file_write_iter(
|
|||||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
|
ret = xfs_file_dax_write(iocb, from);
|
||||||
|
else if (iocb->ki_flags & IOCB_DIRECT)
|
||||||
ret = xfs_file_dio_aio_write(iocb, from);
|
ret = xfs_file_dio_aio_write(iocb, from);
|
||||||
else
|
else
|
||||||
ret = xfs_file_buffered_aio_write(iocb, from);
|
ret = xfs_file_buffered_aio_write(iocb, from);
|
||||||
|
@ -1164,8 +1164,10 @@ DEFINE_EVENT(xfs_file_class, name, \
|
|||||||
TP_ARGS(ip, count, offset))
|
TP_ARGS(ip, count, offset))
|
||||||
DEFINE_RW_EVENT(xfs_file_buffered_read);
|
DEFINE_RW_EVENT(xfs_file_buffered_read);
|
||||||
DEFINE_RW_EVENT(xfs_file_direct_read);
|
DEFINE_RW_EVENT(xfs_file_direct_read);
|
||||||
|
DEFINE_RW_EVENT(xfs_file_dax_read);
|
||||||
DEFINE_RW_EVENT(xfs_file_buffered_write);
|
DEFINE_RW_EVENT(xfs_file_buffered_write);
|
||||||
DEFINE_RW_EVENT(xfs_file_direct_write);
|
DEFINE_RW_EVENT(xfs_file_direct_write);
|
||||||
|
DEFINE_RW_EVENT(xfs_file_dax_write);
|
||||||
DEFINE_RW_EVENT(xfs_file_splice_read);
|
DEFINE_RW_EVENT(xfs_file_splice_read);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(xfs_page_class,
|
DECLARE_EVENT_CLASS(xfs_page_class,
|
||||||
|
Loading…
Reference in New Issue
Block a user