f4b896c213
The iomap_iter struct provides a convenient way to package up and maintain all the arguments to the various mapping and operation functions. It is operated on using the iomap_iter() function that is called in loop until the whole range has been processed. Compared to the existing iomap_apply() function this avoid an indirect call for each iteration. For now iomap_iter() calls back into the existing ->iomap_begin and ->iomap_end methods, but in the future this could be further optimized to avoid indirect calls entirely. Based on an earlier patch from Matthew Wilcox <willy@infradead.org>. Signed-off-by: Christoph Hellwig <hch@lst.de> [djwong: add to apply.c to preserve git history of iomap loop control] Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
172 lines
5.5 KiB
C
172 lines
5.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2010 Red Hat, Inc.
|
|
* Copyright (c) 2016-2021 Christoph Hellwig.
|
|
*/
|
|
#include <linux/module.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/iomap.h>
|
|
#include "trace.h"
|
|
|
|
/*
|
|
* Execute a iomap write on a segment of the mapping that spans a
|
|
* contiguous range of pages that have identical block mapping state.
|
|
*
|
|
* This avoids the need to map pages individually, do individual allocations
|
|
* for each page and most importantly avoid the need for filesystem specific
|
|
* locking per page. Instead, all the operations are amortised over the entire
|
|
* range of pages. It is assumed that the filesystems will lock whatever
|
|
* resources they require in the iomap_begin call, and release them in the
|
|
* iomap_end call.
|
|
*/
|
|
loff_t
|
|
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
|
|
const struct iomap_ops *ops, void *data, iomap_actor_t actor)
|
|
{
|
|
struct iomap iomap = { .type = IOMAP_HOLE };
|
|
struct iomap srcmap = { .type = IOMAP_HOLE };
|
|
loff_t written = 0, ret;
|
|
u64 end;
|
|
|
|
trace_iomap_apply(inode, pos, length, flags, ops, actor, _RET_IP_);
|
|
|
|
/*
|
|
* Need to map a range from start position for length bytes. This can
|
|
* span multiple pages - it is only guaranteed to return a range of a
|
|
* single type of pages (e.g. all into a hole, all mapped or all
|
|
* unwritten). Failure at this point has nothing to undo.
|
|
*
|
|
* If allocation is required for this range, reserve the space now so
|
|
* that the allocation is guaranteed to succeed later on. Once we copy
|
|
* the data into the page cache pages, then we cannot fail otherwise we
|
|
* expose transient stale data. If the reserve fails, we can safely
|
|
* back out at this point as there is nothing to undo.
|
|
*/
|
|
ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
|
|
if (ret)
|
|
return ret;
|
|
if (WARN_ON(iomap.offset > pos)) {
|
|
written = -EIO;
|
|
goto out;
|
|
}
|
|
if (WARN_ON(iomap.length == 0)) {
|
|
written = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
trace_iomap_apply_dstmap(inode, &iomap);
|
|
if (srcmap.type != IOMAP_HOLE)
|
|
trace_iomap_apply_srcmap(inode, &srcmap);
|
|
|
|
/*
|
|
* Cut down the length to the one actually provided by the filesystem,
|
|
* as it might not be able to give us the whole size that we requested.
|
|
*/
|
|
end = iomap.offset + iomap.length;
|
|
if (srcmap.type != IOMAP_HOLE)
|
|
end = min(end, srcmap.offset + srcmap.length);
|
|
if (pos + length > end)
|
|
length = end - pos;
|
|
|
|
/*
|
|
* Now that we have guaranteed that the space allocation will succeed,
|
|
* we can do the copy-in page by page without having to worry about
|
|
* failures exposing transient data.
|
|
*
|
|
* To support COW operations, we read in data for partially blocks from
|
|
* the srcmap if the file system filled it in. In that case we the
|
|
* length needs to be limited to the earlier of the ends of the iomaps.
|
|
* If the file system did not provide a srcmap we pass in the normal
|
|
* iomap into the actors so that they don't need to have special
|
|
* handling for the two cases.
|
|
*/
|
|
written = actor(inode, pos, length, data, &iomap,
|
|
srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
|
|
|
|
out:
|
|
/*
|
|
* Now the data has been copied, commit the range we've copied. This
|
|
* should not fail unless the filesystem has had a fatal error.
|
|
*/
|
|
if (ops->iomap_end) {
|
|
ret = ops->iomap_end(inode, pos, length,
|
|
written > 0 ? written : 0,
|
|
flags, &iomap);
|
|
}
|
|
|
|
return written ? written : ret;
|
|
}
|
|
|
|
static inline int iomap_iter_advance(struct iomap_iter *iter)
|
|
{
|
|
/* handle the previous iteration (if any) */
|
|
if (iter->iomap.length) {
|
|
if (iter->processed <= 0)
|
|
return iter->processed;
|
|
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
|
|
return -EIO;
|
|
iter->pos += iter->processed;
|
|
iter->len -= iter->processed;
|
|
if (!iter->len)
|
|
return 0;
|
|
}
|
|
|
|
/* clear the state for the next iteration */
|
|
iter->processed = 0;
|
|
memset(&iter->iomap, 0, sizeof(iter->iomap));
|
|
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
|
|
return 1;
|
|
}
|
|
|
|
static inline void iomap_iter_done(struct iomap_iter *iter)
|
|
{
|
|
WARN_ON_ONCE(iter->iomap.offset > iter->pos);
|
|
WARN_ON_ONCE(iter->iomap.length == 0);
|
|
WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
|
|
|
|
trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
|
|
if (iter->srcmap.type != IOMAP_HOLE)
|
|
trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
|
|
}
|
|
|
|
/**
|
|
* iomap_iter - iterate over a ranges in a file
|
|
* @iter: iteration structue
|
|
* @ops: iomap ops provided by the file system
|
|
*
|
|
* Iterate over filesystem-provided space mappings for the provided file range.
|
|
*
|
|
* This function handles cleanup of resources acquired for iteration when the
|
|
* filesystem indicates there are no more space mappings, which means that this
|
|
* function must be called in a loop that continues as long it returns a
|
|
* positive value. If 0 or a negative value is returned, the caller must not
|
|
* return to the loop body. Within a loop body, there are two ways to break out
|
|
* of the loop body: leave @iter.processed unchanged, or set it to a negative
|
|
* errno.
|
|
*/
|
|
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
|
|
{
|
|
int ret;
|
|
|
|
if (iter->iomap.length && ops->iomap_end) {
|
|
ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter),
|
|
iter->processed > 0 ? iter->processed : 0,
|
|
iter->flags, &iter->iomap);
|
|
if (ret < 0 && !iter->processed)
|
|
return ret;
|
|
}
|
|
|
|
trace_iomap_iter(iter, ops, _RET_IP_);
|
|
ret = iomap_iter_advance(iter);
|
|
if (ret <= 0)
|
|
return ret;
|
|
|
|
ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
|
|
&iter->iomap, &iter->srcmap);
|
|
if (ret < 0)
|
|
return ret;
|
|
iomap_iter_done(iter);
|
|
return 1;
|
|
}
|