fs: allow short direct-io reads to be completed via buffered IO

This is similar to what already happens in the write case.  If we have a short
read while doing O_DIRECT, instead of just returning, fallthrough and try to
read the rest via buffered IO.  BTRFS needs this because if we encounter a
compressed or inline extent during DIO, we need to fallback on buffered.  If the
extent is compressed we need to read the entire thing into memory and
de-compress it into the users pages.  I have tested this with fsx and everything
works great.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Josef Bacik 2010-05-23 11:00:54 -04:00 committed by Chris Mason
parent 3fd0a5585e
commit 66f998f611

View File

@ -1263,7 +1263,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
{ {
struct file *filp = iocb->ki_filp; struct file *filp = iocb->ki_filp;
ssize_t retval; ssize_t retval;
unsigned long seg; unsigned long seg = 0;
size_t count; size_t count;
loff_t *ppos = &iocb->ki_pos; loff_t *ppos = &iocb->ki_pos;
@ -1290,21 +1290,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
retval = mapping->a_ops->direct_IO(READ, iocb, retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs); iov, pos, nr_segs);
} }
if (retval > 0) if (retval > 0) {
*ppos = pos + retval; *ppos = pos + retval;
if (retval) { count -= retval;
}
/*
* Btrfs can have a short DIO read if we encounter
* compressed extents, so if there was an error, or if
* we've already read everything we wanted to, or if
* there was a short read because we hit EOF, go ahead
* and return. Otherwise fallthrough to buffered io for
* the rest of the read.
*/
if (retval < 0 || !count || *ppos >= size) {
file_accessed(filp); file_accessed(filp);
goto out; goto out;
} }
} }
} }
count = retval;
for (seg = 0; seg < nr_segs; seg++) { for (seg = 0; seg < nr_segs; seg++) {
read_descriptor_t desc; read_descriptor_t desc;
loff_t offset = 0;
/*
* If we did a short DIO read we need to skip the section of the
* iov that we've already read data into.
*/
if (count) {
if (count > iov[seg].iov_len) {
count -= iov[seg].iov_len;
continue;
}
offset = count;
count = 0;
}
desc.written = 0; desc.written = 0;
desc.arg.buf = iov[seg].iov_base; desc.arg.buf = iov[seg].iov_base + offset;
desc.count = iov[seg].iov_len; desc.count = iov[seg].iov_len - offset;
if (desc.count == 0) if (desc.count == 0)
continue; continue;
desc.error = 0; desc.error = 0;