fs: allow short direct-io reads to be completed via buffered IO
This is similar to what already happens in the write case. If we have a short read while doing O_DIRECT, instead of just returning, fallthrough and try to read the rest via buffered IO. BTRFS needs this because if we encounter a compressed or inline extent during DIO, we need to fallback on buffered. If the extent is compressed we need to read the entire thing into memory and de-compress it into the users pages. I have tested this with fsx and everything works great. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
3fd0a5585e
commit
66f998f611
36
mm/filemap.c
36
mm/filemap.c
@ -1263,7 +1263,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
{
|
||||
struct file *filp = iocb->ki_filp;
|
||||
ssize_t retval;
|
||||
unsigned long seg;
|
||||
unsigned long seg = 0;
|
||||
size_t count;
|
||||
loff_t *ppos = &iocb->ki_pos;
|
||||
|
||||
@ -1290,21 +1290,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
retval = mapping->a_ops->direct_IO(READ, iocb,
|
||||
iov, pos, nr_segs);
|
||||
}
|
||||
if (retval > 0)
|
||||
if (retval > 0) {
|
||||
*ppos = pos + retval;
|
||||
if (retval) {
|
||||
count -= retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Btrfs can have a short DIO read if we encounter
|
||||
* compressed extents, so if there was an error, or if
|
||||
* we've already read everything we wanted to, or if
|
||||
* there was a short read because we hit EOF, go ahead
|
||||
* and return. Otherwise fallthrough to buffered io for
|
||||
* the rest of the read.
|
||||
*/
|
||||
if (retval < 0 || !count || *ppos >= size) {
|
||||
file_accessed(filp);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
count = retval;
|
||||
for (seg = 0; seg < nr_segs; seg++) {
|
||||
read_descriptor_t desc;
|
||||
loff_t offset = 0;
|
||||
|
||||
/*
|
||||
* If we did a short DIO read we need to skip the section of the
|
||||
* iov that we've already read data into.
|
||||
*/
|
||||
if (count) {
|
||||
if (count > iov[seg].iov_len) {
|
||||
count -= iov[seg].iov_len;
|
||||
continue;
|
||||
}
|
||||
offset = count;
|
||||
count = 0;
|
||||
}
|
||||
|
||||
desc.written = 0;
|
||||
desc.arg.buf = iov[seg].iov_base;
|
||||
desc.count = iov[seg].iov_len;
|
||||
desc.arg.buf = iov[seg].iov_base + offset;
|
||||
desc.count = iov[seg].iov_len - offset;
|
||||
if (desc.count == 0)
|
||||
continue;
|
||||
desc.error = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user