Merge branch 'xfs-writepage-rework-4.6' into for-next
This commit is contained in:
commit
3d93ec0364
@ -40,6 +40,17 @@
|
|||||||
#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
|
#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
|
||||||
#define XFS_DIO_FLAG_APPEND (1 << 1)
|
#define XFS_DIO_FLAG_APPEND (1 << 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure owned by writepages passed to individual writepage calls
|
||||||
|
*/
|
||||||
|
struct xfs_writepage_ctx {
|
||||||
|
struct xfs_bmbt_irec imap;
|
||||||
|
bool imap_valid;
|
||||||
|
unsigned int io_type;
|
||||||
|
struct xfs_ioend *ioend;
|
||||||
|
sector_t last_block;
|
||||||
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
xfs_count_page_state(
|
xfs_count_page_state(
|
||||||
struct page *page,
|
struct page *page,
|
||||||
@ -271,7 +282,7 @@ xfs_alloc_ioend(
|
|||||||
*/
|
*/
|
||||||
atomic_set(&ioend->io_remaining, 1);
|
atomic_set(&ioend->io_remaining, 1);
|
||||||
ioend->io_error = 0;
|
ioend->io_error = 0;
|
||||||
ioend->io_list = NULL;
|
INIT_LIST_HEAD(&ioend->io_list);
|
||||||
ioend->io_type = type;
|
ioend->io_type = type;
|
||||||
ioend->io_inode = inode;
|
ioend->io_inode = inode;
|
||||||
ioend->io_buffer_head = NULL;
|
ioend->io_buffer_head = NULL;
|
||||||
@ -289,8 +300,7 @@ xfs_map_blocks(
|
|||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
loff_t offset,
|
loff_t offset,
|
||||||
struct xfs_bmbt_irec *imap,
|
struct xfs_bmbt_irec *imap,
|
||||||
int type,
|
int type)
|
||||||
int nonblocking)
|
|
||||||
{
|
{
|
||||||
struct xfs_inode *ip = XFS_I(inode);
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
struct xfs_mount *mp = ip->i_mount;
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
@ -306,12 +316,7 @@ xfs_map_blocks(
|
|||||||
if (type == XFS_IO_UNWRITTEN)
|
if (type == XFS_IO_UNWRITTEN)
|
||||||
bmapi_flags |= XFS_BMAPI_IGSTATE;
|
bmapi_flags |= XFS_BMAPI_IGSTATE;
|
||||||
|
|
||||||
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
|
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||||
if (nonblocking)
|
|
||||||
return -EAGAIN;
|
|
||||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||||
(ip->i_df.if_flags & XFS_IFEXTENTS));
|
(ip->i_df.if_flags & XFS_IFEXTENTS));
|
||||||
ASSERT(offset <= mp->m_super->s_maxbytes);
|
ASSERT(offset <= mp->m_super->s_maxbytes);
|
||||||
@ -347,7 +352,7 @@ xfs_map_blocks(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC int
|
STATIC bool
|
||||||
xfs_imap_valid(
|
xfs_imap_valid(
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
struct xfs_bmbt_irec *imap,
|
struct xfs_bmbt_irec *imap,
|
||||||
@ -420,8 +425,7 @@ xfs_start_buffer_writeback(
|
|||||||
STATIC void
|
STATIC void
|
||||||
xfs_start_page_writeback(
|
xfs_start_page_writeback(
|
||||||
struct page *page,
|
struct page *page,
|
||||||
int clear_dirty,
|
int clear_dirty)
|
||||||
int buffers)
|
|
||||||
{
|
{
|
||||||
ASSERT(PageLocked(page));
|
ASSERT(PageLocked(page));
|
||||||
ASSERT(!PageWriteback(page));
|
ASSERT(!PageWriteback(page));
|
||||||
@ -440,10 +444,6 @@ xfs_start_page_writeback(
|
|||||||
set_page_writeback_keepwrite(page);
|
set_page_writeback_keepwrite(page);
|
||||||
|
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
|
||||||
/* If no buffers on the page are to be written, finish it here */
|
|
||||||
if (!buffers)
|
|
||||||
end_page_writeback(page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
|
static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
|
||||||
@ -452,153 +452,101 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Submit all of the bios for all of the ioends we have saved up, covering the
|
* Submit all of the bios for an ioend. We are only passed a single ioend at a
|
||||||
* initial writepage page and also any probed pages.
|
* time; the caller is responsible for chaining prior to submission.
|
||||||
*
|
|
||||||
* Because we may have multiple ioends spanning a page, we need to start
|
|
||||||
* writeback on all the buffers before we submit them for I/O. If we mark the
|
|
||||||
* buffers as we got, then we can end up with a page that only has buffers
|
|
||||||
* marked async write and I/O complete on can occur before we mark the other
|
|
||||||
* buffers async write.
|
|
||||||
*
|
|
||||||
* The end result of this is that we trip a bug in end_page_writeback() because
|
|
||||||
* we call it twice for the one page as the code in end_buffer_async_write()
|
|
||||||
* assumes that all buffers on the page are started at the same time.
|
|
||||||
*
|
|
||||||
* The fix is two passes across the ioend list - one to start writeback on the
|
|
||||||
* buffer_heads, and then submit them for I/O on the second pass.
|
|
||||||
*
|
*
|
||||||
* If @fail is non-zero, it means that we have a situation where some part of
|
* If @fail is non-zero, it means that we have a situation where some part of
|
||||||
* the submission process has failed after we have marked paged for writeback
|
* the submission process has failed after we have marked paged for writeback
|
||||||
* and unlocked them. In this situation, we need to fail the ioend chain rather
|
* and unlocked them. In this situation, we need to fail the ioend chain rather
|
||||||
* than submit it to IO. This typically only happens on a filesystem shutdown.
|
* than submit it to IO. This typically only happens on a filesystem shutdown.
|
||||||
*/
|
*/
|
||||||
STATIC void
|
STATIC int
|
||||||
xfs_submit_ioend(
|
xfs_submit_ioend(
|
||||||
struct writeback_control *wbc,
|
struct writeback_control *wbc,
|
||||||
xfs_ioend_t *ioend,
|
xfs_ioend_t *ioend,
|
||||||
int fail)
|
int status)
|
||||||
{
|
{
|
||||||
xfs_ioend_t *head = ioend;
|
|
||||||
xfs_ioend_t *next;
|
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
sector_t lastblock = 0;
|
sector_t lastblock = 0;
|
||||||
|
|
||||||
/* Pass 1 - start writeback */
|
/* Reserve log space if we might write beyond the on-disk inode size. */
|
||||||
do {
|
if (!status &&
|
||||||
next = ioend->io_list;
|
ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
|
||||||
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
|
status = xfs_setfilesize_trans_alloc(ioend);
|
||||||
xfs_start_buffer_writeback(bh);
|
/*
|
||||||
} while ((ioend = next) != NULL);
|
* If we are failing the IO now, just mark the ioend with an
|
||||||
|
* error and finish it. This will run IO completion immediately
|
||||||
/* Pass 2 - submit I/O */
|
* as there is only one reference to the ioend at this point in
|
||||||
ioend = head;
|
* time.
|
||||||
do {
|
*/
|
||||||
next = ioend->io_list;
|
if (status) {
|
||||||
bio = NULL;
|
ioend->io_error = status;
|
||||||
|
|
||||||
/*
|
|
||||||
* If we are failing the IO now, just mark the ioend with an
|
|
||||||
* error and finish it. This will run IO completion immediately
|
|
||||||
* as there is only one reference to the ioend at this point in
|
|
||||||
* time.
|
|
||||||
*/
|
|
||||||
if (fail) {
|
|
||||||
ioend->io_error = fail;
|
|
||||||
xfs_finish_ioend(ioend);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
|
|
||||||
|
|
||||||
if (!bio) {
|
|
||||||
retry:
|
|
||||||
bio = xfs_alloc_ioend_bio(bh);
|
|
||||||
} else if (bh->b_blocknr != lastblock + 1) {
|
|
||||||
xfs_submit_ioend_bio(wbc, ioend, bio);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
|
|
||||||
xfs_submit_ioend_bio(wbc, ioend, bio);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
lastblock = bh->b_blocknr;
|
|
||||||
}
|
|
||||||
if (bio)
|
|
||||||
xfs_submit_ioend_bio(wbc, ioend, bio);
|
|
||||||
xfs_finish_ioend(ioend);
|
xfs_finish_ioend(ioend);
|
||||||
} while ((ioend = next) != NULL);
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
bio = NULL;
|
||||||
* Cancel submission of all buffer_heads so far in this endio.
|
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
|
||||||
* Toss the endio too. Only ever called for the initial page
|
|
||||||
* in a writepage request, so only ever one page.
|
|
||||||
*/
|
|
||||||
STATIC void
|
|
||||||
xfs_cancel_ioend(
|
|
||||||
xfs_ioend_t *ioend)
|
|
||||||
{
|
|
||||||
xfs_ioend_t *next;
|
|
||||||
struct buffer_head *bh, *next_bh;
|
|
||||||
|
|
||||||
do {
|
if (!bio) {
|
||||||
next = ioend->io_list;
|
retry:
|
||||||
bh = ioend->io_buffer_head;
|
bio = xfs_alloc_ioend_bio(bh);
|
||||||
do {
|
} else if (bh->b_blocknr != lastblock + 1) {
|
||||||
next_bh = bh->b_private;
|
xfs_submit_ioend_bio(wbc, ioend, bio);
|
||||||
clear_buffer_async_write(bh);
|
goto retry;
|
||||||
/*
|
}
|
||||||
* The unwritten flag is cleared when added to the
|
|
||||||
* ioend. We're not submitting for I/O so mark the
|
|
||||||
* buffer unwritten again for next time around.
|
|
||||||
*/
|
|
||||||
if (ioend->io_type == XFS_IO_UNWRITTEN)
|
|
||||||
set_buffer_unwritten(bh);
|
|
||||||
unlock_buffer(bh);
|
|
||||||
} while ((bh = next_bh) != NULL);
|
|
||||||
|
|
||||||
mempool_free(ioend, xfs_ioend_pool);
|
if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
|
||||||
} while ((ioend = next) != NULL);
|
xfs_submit_ioend_bio(wbc, ioend, bio);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastblock = bh->b_blocknr;
|
||||||
|
}
|
||||||
|
if (bio)
|
||||||
|
xfs_submit_ioend_bio(wbc, ioend, bio);
|
||||||
|
xfs_finish_ioend(ioend);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test to see if we've been building up a completion structure for
|
* Test to see if we've been building up a completion structure for
|
||||||
* earlier buffers -- if so, we try to append to this ioend if we
|
* earlier buffers -- if so, we try to append to this ioend if we
|
||||||
* can, otherwise we finish off any current ioend and start another.
|
* can, otherwise we finish off any current ioend and start another.
|
||||||
* Return true if we've finished the given ioend.
|
* Return the ioend we finished off so that the caller can submit it
|
||||||
|
* once it has finished processing the dirty page.
|
||||||
*/
|
*/
|
||||||
STATIC void
|
STATIC void
|
||||||
xfs_add_to_ioend(
|
xfs_add_to_ioend(
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
struct buffer_head *bh,
|
struct buffer_head *bh,
|
||||||
xfs_off_t offset,
|
xfs_off_t offset,
|
||||||
unsigned int type,
|
struct xfs_writepage_ctx *wpc,
|
||||||
xfs_ioend_t **result,
|
struct list_head *iolist)
|
||||||
int need_ioend)
|
|
||||||
{
|
{
|
||||||
xfs_ioend_t *ioend = *result;
|
if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
|
||||||
|
bh->b_blocknr != wpc->last_block + 1 ||
|
||||||
|
offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
|
||||||
|
struct xfs_ioend *new;
|
||||||
|
|
||||||
if (!ioend || need_ioend || type != ioend->io_type) {
|
if (wpc->ioend)
|
||||||
xfs_ioend_t *previous = *result;
|
list_add(&wpc->ioend->io_list, iolist);
|
||||||
|
|
||||||
ioend = xfs_alloc_ioend(inode, type);
|
new = xfs_alloc_ioend(inode, wpc->io_type);
|
||||||
ioend->io_offset = offset;
|
new->io_offset = offset;
|
||||||
ioend->io_buffer_head = bh;
|
new->io_buffer_head = bh;
|
||||||
ioend->io_buffer_tail = bh;
|
new->io_buffer_tail = bh;
|
||||||
if (previous)
|
wpc->ioend = new;
|
||||||
previous->io_list = ioend;
|
|
||||||
*result = ioend;
|
|
||||||
} else {
|
} else {
|
||||||
ioend->io_buffer_tail->b_private = bh;
|
wpc->ioend->io_buffer_tail->b_private = bh;
|
||||||
ioend->io_buffer_tail = bh;
|
wpc->ioend->io_buffer_tail = bh;
|
||||||
}
|
}
|
||||||
|
|
||||||
bh->b_private = NULL;
|
bh->b_private = NULL;
|
||||||
ioend->io_size += bh->b_size;
|
wpc->ioend->io_size += bh->b_size;
|
||||||
|
wpc->last_block = bh->b_blocknr;
|
||||||
|
xfs_start_buffer_writeback(bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC void
|
STATIC void
|
||||||
@ -684,183 +632,6 @@ xfs_check_page_type(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate & map buffers for page given the extent map. Write it out.
|
|
||||||
* except for the original page of a writepage, this is called on
|
|
||||||
* delalloc/unwritten pages only, for the original page it is possible
|
|
||||||
* that the page has no mapping at all.
|
|
||||||
*/
|
|
||||||
STATIC int
|
|
||||||
xfs_convert_page(
|
|
||||||
struct inode *inode,
|
|
||||||
struct page *page,
|
|
||||||
loff_t tindex,
|
|
||||||
struct xfs_bmbt_irec *imap,
|
|
||||||
xfs_ioend_t **ioendp,
|
|
||||||
struct writeback_control *wbc)
|
|
||||||
{
|
|
||||||
struct buffer_head *bh, *head;
|
|
||||||
xfs_off_t end_offset;
|
|
||||||
unsigned long p_offset;
|
|
||||||
unsigned int type;
|
|
||||||
int len, page_dirty;
|
|
||||||
int count = 0, done = 0, uptodate = 1;
|
|
||||||
xfs_off_t offset = page_offset(page);
|
|
||||||
|
|
||||||
if (page->index != tindex)
|
|
||||||
goto fail;
|
|
||||||
if (!trylock_page(page))
|
|
||||||
goto fail;
|
|
||||||
if (PageWriteback(page))
|
|
||||||
goto fail_unlock_page;
|
|
||||||
if (page->mapping != inode->i_mapping)
|
|
||||||
goto fail_unlock_page;
|
|
||||||
if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
|
|
||||||
goto fail_unlock_page;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* page_dirty is initially a count of buffers on the page before
|
|
||||||
* EOF and is decremented as we move each into a cleanable state.
|
|
||||||
*
|
|
||||||
* Derivation:
|
|
||||||
*
|
|
||||||
* End offset is the highest offset that this page should represent.
|
|
||||||
* If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
|
|
||||||
* will evaluate non-zero and be less than PAGE_CACHE_SIZE and
|
|
||||||
* hence give us the correct page_dirty count. On any other page,
|
|
||||||
* it will be zero and in that case we need page_dirty to be the
|
|
||||||
* count of buffers on the page.
|
|
||||||
*/
|
|
||||||
end_offset = min_t(unsigned long long,
|
|
||||||
(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
|
|
||||||
i_size_read(inode));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the current map does not span the entire page we are about to try
|
|
||||||
* to write, then give up. The only way we can write a page that spans
|
|
||||||
* multiple mappings in a single writeback iteration is via the
|
|
||||||
* xfs_vm_writepage() function. Data integrity writeback requires the
|
|
||||||
* entire page to be written in a single attempt, otherwise the part of
|
|
||||||
* the page we don't write here doesn't get written as part of the data
|
|
||||||
* integrity sync.
|
|
||||||
*
|
|
||||||
* For normal writeback, we also don't attempt to write partial pages
|
|
||||||
* here as it simply means that write_cache_pages() will see it under
|
|
||||||
* writeback and ignore the page until some point in the future, at
|
|
||||||
* which time this will be the only page in the file that needs
|
|
||||||
* writeback. Hence for more optimal IO patterns, we should always
|
|
||||||
* avoid partial page writeback due to multiple mappings on a page here.
|
|
||||||
*/
|
|
||||||
if (!xfs_imap_valid(inode, imap, end_offset))
|
|
||||||
goto fail_unlock_page;
|
|
||||||
|
|
||||||
len = 1 << inode->i_blkbits;
|
|
||||||
p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
|
|
||||||
PAGE_CACHE_SIZE);
|
|
||||||
p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
|
|
||||||
page_dirty = p_offset / len;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The moment we find a buffer that doesn't match our current type
|
|
||||||
* specification or can't be written, abort the loop and start
|
|
||||||
* writeback. As per the above xfs_imap_valid() check, only
|
|
||||||
* xfs_vm_writepage() can handle partial page writeback fully - we are
|
|
||||||
* limited here to the buffers that are contiguous with the current
|
|
||||||
* ioend, and hence a buffer we can't write breaks that contiguity and
|
|
||||||
* we have to defer the rest of the IO to xfs_vm_writepage().
|
|
||||||
*/
|
|
||||||
bh = head = page_buffers(page);
|
|
||||||
do {
|
|
||||||
if (offset >= end_offset)
|
|
||||||
break;
|
|
||||||
if (!buffer_uptodate(bh))
|
|
||||||
uptodate = 0;
|
|
||||||
if (!(PageUptodate(page) || buffer_uptodate(bh))) {
|
|
||||||
done = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (buffer_unwritten(bh) || buffer_delay(bh) ||
|
|
||||||
buffer_mapped(bh)) {
|
|
||||||
if (buffer_unwritten(bh))
|
|
||||||
type = XFS_IO_UNWRITTEN;
|
|
||||||
else if (buffer_delay(bh))
|
|
||||||
type = XFS_IO_DELALLOC;
|
|
||||||
else
|
|
||||||
type = XFS_IO_OVERWRITE;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* imap should always be valid because of the above
|
|
||||||
* partial page end_offset check on the imap.
|
|
||||||
*/
|
|
||||||
ASSERT(xfs_imap_valid(inode, imap, offset));
|
|
||||||
|
|
||||||
lock_buffer(bh);
|
|
||||||
if (type != XFS_IO_OVERWRITE)
|
|
||||||
xfs_map_at_offset(inode, bh, imap, offset);
|
|
||||||
xfs_add_to_ioend(inode, bh, offset, type,
|
|
||||||
ioendp, done);
|
|
||||||
|
|
||||||
page_dirty--;
|
|
||||||
count++;
|
|
||||||
} else {
|
|
||||||
done = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while (offset += len, (bh = bh->b_this_page) != head);
|
|
||||||
|
|
||||||
if (uptodate && bh == head)
|
|
||||||
SetPageUptodate(page);
|
|
||||||
|
|
||||||
if (count) {
|
|
||||||
if (--wbc->nr_to_write <= 0 &&
|
|
||||||
wbc->sync_mode == WB_SYNC_NONE)
|
|
||||||
done = 1;
|
|
||||||
}
|
|
||||||
xfs_start_page_writeback(page, !page_dirty, count);
|
|
||||||
|
|
||||||
return done;
|
|
||||||
fail_unlock_page:
|
|
||||||
unlock_page(page);
|
|
||||||
fail:
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert & write out a cluster of pages in the same extent as defined
|
|
||||||
* by mp and following the start page.
|
|
||||||
*/
|
|
||||||
STATIC void
|
|
||||||
xfs_cluster_write(
|
|
||||||
struct inode *inode,
|
|
||||||
pgoff_t tindex,
|
|
||||||
struct xfs_bmbt_irec *imap,
|
|
||||||
xfs_ioend_t **ioendp,
|
|
||||||
struct writeback_control *wbc,
|
|
||||||
pgoff_t tlast)
|
|
||||||
{
|
|
||||||
struct pagevec pvec;
|
|
||||||
int done = 0, i;
|
|
||||||
|
|
||||||
pagevec_init(&pvec, 0);
|
|
||||||
while (!done && tindex <= tlast) {
|
|
||||||
unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
|
|
||||||
|
|
||||||
if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
|
|
||||||
break;
|
|
||||||
|
|
||||||
for (i = 0; i < pagevec_count(&pvec); i++) {
|
|
||||||
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
|
|
||||||
imap, ioendp, wbc);
|
|
||||||
if (done)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
pagevec_release(&pvec);
|
|
||||||
cond_resched();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
STATIC void
|
STATIC void
|
||||||
xfs_vm_invalidatepage(
|
xfs_vm_invalidatepage(
|
||||||
struct page *page,
|
struct page *page,
|
||||||
@ -937,6 +708,164 @@ out_invalidate:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We implement an immediate ioend submission policy here to avoid needing to
|
||||||
|
* chain multiple ioends and hence nest mempool allocations which can violate
|
||||||
|
* forward progress guarantees we need to provide. The current ioend we are
|
||||||
|
* adding buffers to is cached on the writepage context, and if the new buffer
|
||||||
|
* does not append to the cached ioend it will create a new ioend and cache that
|
||||||
|
* instead.
|
||||||
|
*
|
||||||
|
* If a new ioend is created and cached, the old ioend is returned and queued
|
||||||
|
* locally for submission once the entire page is processed or an error has been
|
||||||
|
* detected. While ioends are submitted immediately after they are completed,
|
||||||
|
* batching optimisations are provided by higher level block plugging.
|
||||||
|
*
|
||||||
|
* At the end of a writeback pass, there will be a cached ioend remaining on the
|
||||||
|
* writepage context that the caller will need to submit.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
xfs_writepage_map(
|
||||||
|
struct xfs_writepage_ctx *wpc,
|
||||||
|
struct writeback_control *wbc,
|
||||||
|
struct inode *inode,
|
||||||
|
struct page *page,
|
||||||
|
loff_t offset,
|
||||||
|
__uint64_t end_offset)
|
||||||
|
{
|
||||||
|
LIST_HEAD(submit_list);
|
||||||
|
struct xfs_ioend *ioend, *next;
|
||||||
|
struct buffer_head *bh, *head;
|
||||||
|
ssize_t len = 1 << inode->i_blkbits;
|
||||||
|
int error = 0;
|
||||||
|
int count = 0;
|
||||||
|
int uptodate = 1;
|
||||||
|
|
||||||
|
bh = head = page_buffers(page);
|
||||||
|
offset = page_offset(page);
|
||||||
|
do {
|
||||||
|
if (offset >= end_offset)
|
||||||
|
break;
|
||||||
|
if (!buffer_uptodate(bh))
|
||||||
|
uptodate = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set_page_dirty dirties all buffers in a page, independent
|
||||||
|
* of their state. The dirty state however is entirely
|
||||||
|
* meaningless for holes (!mapped && uptodate), so skip
|
||||||
|
* buffers covering holes here.
|
||||||
|
*/
|
||||||
|
if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
|
||||||
|
wpc->imap_valid = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer_unwritten(bh)) {
|
||||||
|
if (wpc->io_type != XFS_IO_UNWRITTEN) {
|
||||||
|
wpc->io_type = XFS_IO_UNWRITTEN;
|
||||||
|
wpc->imap_valid = false;
|
||||||
|
}
|
||||||
|
} else if (buffer_delay(bh)) {
|
||||||
|
if (wpc->io_type != XFS_IO_DELALLOC) {
|
||||||
|
wpc->io_type = XFS_IO_DELALLOC;
|
||||||
|
wpc->imap_valid = false;
|
||||||
|
}
|
||||||
|
} else if (buffer_uptodate(bh)) {
|
||||||
|
if (wpc->io_type != XFS_IO_OVERWRITE) {
|
||||||
|
wpc->io_type = XFS_IO_OVERWRITE;
|
||||||
|
wpc->imap_valid = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (PageUptodate(page))
|
||||||
|
ASSERT(buffer_mapped(bh));
|
||||||
|
/*
|
||||||
|
* This buffer is not uptodate and will not be
|
||||||
|
* written to disk. Ensure that we will put any
|
||||||
|
* subsequent writeable buffers into a new
|
||||||
|
* ioend.
|
||||||
|
*/
|
||||||
|
wpc->imap_valid = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wpc->imap_valid)
|
||||||
|
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
|
||||||
|
offset);
|
||||||
|
if (!wpc->imap_valid) {
|
||||||
|
error = xfs_map_blocks(inode, offset, &wpc->imap,
|
||||||
|
wpc->io_type);
|
||||||
|
if (error)
|
||||||
|
goto out;
|
||||||
|
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
if (wpc->imap_valid) {
|
||||||
|
lock_buffer(bh);
|
||||||
|
if (wpc->io_type != XFS_IO_OVERWRITE)
|
||||||
|
xfs_map_at_offset(inode, bh, &wpc->imap, offset);
|
||||||
|
xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
} while (offset += len, ((bh = bh->b_this_page) != head));
|
||||||
|
|
||||||
|
if (uptodate && bh == head)
|
||||||
|
SetPageUptodate(page);
|
||||||
|
|
||||||
|
ASSERT(wpc->ioend || list_empty(&submit_list));
|
||||||
|
|
||||||
|
out:
|
||||||
|
/*
|
||||||
|
* On error, we have to fail the ioend here because we have locked
|
||||||
|
* buffers in the ioend. If we don't do this, we'll deadlock
|
||||||
|
* invalidating the page as that tries to lock the buffers on the page.
|
||||||
|
* Also, because we may have set pages under writeback, we have to make
|
||||||
|
* sure we run IO completion to mark the error state of the IO
|
||||||
|
* appropriately, so we can't cancel the ioend directly here. That means
|
||||||
|
* we have to mark this page as under writeback if we included any
|
||||||
|
* buffers from it in the ioend chain so that completion treats it
|
||||||
|
* correctly.
|
||||||
|
*
|
||||||
|
* If we didn't include the page in the ioend, the on error we can
|
||||||
|
* simply discard and unlock it as there are no other users of the page
|
||||||
|
* or it's buffers right now. The caller will still need to trigger
|
||||||
|
* submission of outstanding ioends on the writepage context so they are
|
||||||
|
* treated correctly on error.
|
||||||
|
*/
|
||||||
|
if (count) {
|
||||||
|
xfs_start_page_writeback(page, !error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Preserve the original error if there was one, otherwise catch
|
||||||
|
* submission errors here and propagate into subsequent ioend
|
||||||
|
* submissions.
|
||||||
|
*/
|
||||||
|
list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
|
||||||
|
int error2;
|
||||||
|
|
||||||
|
list_del_init(&ioend->io_list);
|
||||||
|
error2 = xfs_submit_ioend(wbc, ioend, error);
|
||||||
|
if (error2 && !error)
|
||||||
|
error = error2;
|
||||||
|
}
|
||||||
|
} else if (error) {
|
||||||
|
xfs_aops_discard_page(page);
|
||||||
|
ClearPageUptodate(page);
|
||||||
|
unlock_page(page);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* We can end up here with no error and nothing to write if we
|
||||||
|
* race with a partial page truncate on a sub-page block sized
|
||||||
|
* filesystem. In that case we need to mark the page clean.
|
||||||
|
*/
|
||||||
|
xfs_start_page_writeback(page, 1);
|
||||||
|
end_page_writeback(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
mapping_set_error(page->mapping, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write out a dirty page.
|
* Write out a dirty page.
|
||||||
*
|
*
|
||||||
@ -946,22 +875,16 @@ out_invalidate:
|
|||||||
* For any other dirty buffer heads on the page we should flush them.
|
* For any other dirty buffer heads on the page we should flush them.
|
||||||
*/
|
*/
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_vm_writepage(
|
xfs_do_writepage(
|
||||||
struct page *page,
|
struct page *page,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc,
|
||||||
|
void *data)
|
||||||
{
|
{
|
||||||
|
struct xfs_writepage_ctx *wpc = data;
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = page->mapping->host;
|
||||||
struct buffer_head *bh, *head;
|
|
||||||
struct xfs_bmbt_irec imap;
|
|
||||||
xfs_ioend_t *ioend = NULL, *iohead = NULL;
|
|
||||||
loff_t offset;
|
loff_t offset;
|
||||||
unsigned int type;
|
|
||||||
__uint64_t end_offset;
|
__uint64_t end_offset;
|
||||||
pgoff_t end_index, last_index;
|
pgoff_t end_index;
|
||||||
ssize_t len;
|
|
||||||
int err, imap_valid = 0, uptodate = 1;
|
|
||||||
int count = 0;
|
|
||||||
int nonblocking = 0;
|
|
||||||
|
|
||||||
trace_xfs_writepage(inode, page, 0, 0);
|
trace_xfs_writepage(inode, page, 0, 0);
|
||||||
|
|
||||||
@ -988,12 +911,9 @@ xfs_vm_writepage(
|
|||||||
if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
|
if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
|
||||||
goto redirty;
|
goto redirty;
|
||||||
|
|
||||||
/* Is this page beyond the end of the file? */
|
|
||||||
offset = i_size_read(inode);
|
|
||||||
end_index = offset >> PAGE_CACHE_SHIFT;
|
|
||||||
last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* Is this page beyond the end of the file?
|
||||||
|
*
|
||||||
* The page index is less than the end_index, adjust the end_offset
|
* The page index is less than the end_index, adjust the end_offset
|
||||||
* to the highest offset that this page should represent.
|
* to the highest offset that this page should represent.
|
||||||
* -----------------------------------------------------
|
* -----------------------------------------------------
|
||||||
@ -1004,6 +924,8 @@ xfs_vm_writepage(
|
|||||||
* | desired writeback range | see else |
|
* | desired writeback range | see else |
|
||||||
* ---------------------------------^------------------|
|
* ---------------------------------^------------------|
|
||||||
*/
|
*/
|
||||||
|
offset = i_size_read(inode);
|
||||||
|
end_index = offset >> PAGE_CACHE_SHIFT;
|
||||||
if (page->index < end_index)
|
if (page->index < end_index)
|
||||||
end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
|
end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
|
||||||
else {
|
else {
|
||||||
@ -1055,152 +977,7 @@ xfs_vm_writepage(
|
|||||||
end_offset = offset;
|
end_offset = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
len = 1 << inode->i_blkbits;
|
return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
|
||||||
|
|
||||||
bh = head = page_buffers(page);
|
|
||||||
offset = page_offset(page);
|
|
||||||
type = XFS_IO_OVERWRITE;
|
|
||||||
|
|
||||||
if (wbc->sync_mode == WB_SYNC_NONE)
|
|
||||||
nonblocking = 1;
|
|
||||||
|
|
||||||
do {
|
|
||||||
int new_ioend = 0;
|
|
||||||
|
|
||||||
if (offset >= end_offset)
|
|
||||||
break;
|
|
||||||
if (!buffer_uptodate(bh))
|
|
||||||
uptodate = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* set_page_dirty dirties all buffers in a page, independent
|
|
||||||
* of their state. The dirty state however is entirely
|
|
||||||
* meaningless for holes (!mapped && uptodate), so skip
|
|
||||||
* buffers covering holes here.
|
|
||||||
*/
|
|
||||||
if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
|
|
||||||
imap_valid = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (buffer_unwritten(bh)) {
|
|
||||||
if (type != XFS_IO_UNWRITTEN) {
|
|
||||||
type = XFS_IO_UNWRITTEN;
|
|
||||||
imap_valid = 0;
|
|
||||||
}
|
|
||||||
} else if (buffer_delay(bh)) {
|
|
||||||
if (type != XFS_IO_DELALLOC) {
|
|
||||||
type = XFS_IO_DELALLOC;
|
|
||||||
imap_valid = 0;
|
|
||||||
}
|
|
||||||
} else if (buffer_uptodate(bh)) {
|
|
||||||
if (type != XFS_IO_OVERWRITE) {
|
|
||||||
type = XFS_IO_OVERWRITE;
|
|
||||||
imap_valid = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (PageUptodate(page))
|
|
||||||
ASSERT(buffer_mapped(bh));
|
|
||||||
/*
|
|
||||||
* This buffer is not uptodate and will not be
|
|
||||||
* written to disk. Ensure that we will put any
|
|
||||||
* subsequent writeable buffers into a new
|
|
||||||
* ioend.
|
|
||||||
*/
|
|
||||||
imap_valid = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (imap_valid)
|
|
||||||
imap_valid = xfs_imap_valid(inode, &imap, offset);
|
|
||||||
if (!imap_valid) {
|
|
||||||
/*
|
|
||||||
* If we didn't have a valid mapping then we need to
|
|
||||||
* put the new mapping into a separate ioend structure.
|
|
||||||
* This ensures non-contiguous extents always have
|
|
||||||
* separate ioends, which is particularly important
|
|
||||||
* for unwritten extent conversion at I/O completion
|
|
||||||
* time.
|
|
||||||
*/
|
|
||||||
new_ioend = 1;
|
|
||||||
err = xfs_map_blocks(inode, offset, &imap, type,
|
|
||||||
nonblocking);
|
|
||||||
if (err)
|
|
||||||
goto error;
|
|
||||||
imap_valid = xfs_imap_valid(inode, &imap, offset);
|
|
||||||
}
|
|
||||||
if (imap_valid) {
|
|
||||||
lock_buffer(bh);
|
|
||||||
if (type != XFS_IO_OVERWRITE)
|
|
||||||
xfs_map_at_offset(inode, bh, &imap, offset);
|
|
||||||
xfs_add_to_ioend(inode, bh, offset, type, &ioend,
|
|
||||||
new_ioend);
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!iohead)
|
|
||||||
iohead = ioend;
|
|
||||||
|
|
||||||
} while (offset += len, ((bh = bh->b_this_page) != head));
|
|
||||||
|
|
||||||
if (uptodate && bh == head)
|
|
||||||
SetPageUptodate(page);
|
|
||||||
|
|
||||||
xfs_start_page_writeback(page, 1, count);
|
|
||||||
|
|
||||||
/* if there is no IO to be submitted for this page, we are done */
|
|
||||||
if (!ioend)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ASSERT(iohead);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Any errors from this point onwards need tobe reported through the IO
|
|
||||||
* completion path as we have marked the initial page as under writeback
|
|
||||||
* and unlocked it.
|
|
||||||
*/
|
|
||||||
if (imap_valid) {
|
|
||||||
xfs_off_t end_index;
|
|
||||||
|
|
||||||
end_index = imap.br_startoff + imap.br_blockcount;
|
|
||||||
|
|
||||||
/* to bytes */
|
|
||||||
end_index <<= inode->i_blkbits;
|
|
||||||
|
|
||||||
/* to pages */
|
|
||||||
end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
|
|
||||||
|
|
||||||
/* check against file size */
|
|
||||||
if (end_index > last_index)
|
|
||||||
end_index = last_index;
|
|
||||||
|
|
||||||
xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
|
|
||||||
wbc, end_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Reserve log space if we might write beyond the on-disk inode size.
|
|
||||||
*/
|
|
||||||
err = 0;
|
|
||||||
if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
|
|
||||||
err = xfs_setfilesize_trans_alloc(ioend);
|
|
||||||
|
|
||||||
xfs_submit_ioend(wbc, iohead, err);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
error:
|
|
||||||
if (iohead)
|
|
||||||
xfs_cancel_ioend(iohead);
|
|
||||||
|
|
||||||
if (err == -EAGAIN)
|
|
||||||
goto redirty;
|
|
||||||
|
|
||||||
xfs_aops_discard_page(page);
|
|
||||||
ClearPageUptodate(page);
|
|
||||||
unlock_page(page);
|
|
||||||
return err;
|
|
||||||
|
|
||||||
redirty:
|
redirty:
|
||||||
redirty_page_for_writepage(wbc, page);
|
redirty_page_for_writepage(wbc, page);
|
||||||
@ -1208,13 +985,37 @@ redirty:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STATIC int
|
||||||
|
xfs_vm_writepage(
|
||||||
|
struct page *page,
|
||||||
|
struct writeback_control *wbc)
|
||||||
|
{
|
||||||
|
struct xfs_writepage_ctx wpc = {
|
||||||
|
.io_type = XFS_IO_INVALID,
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = xfs_do_writepage(page, wbc, &wpc);
|
||||||
|
if (wpc.ioend)
|
||||||
|
ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_vm_writepages(
|
xfs_vm_writepages(
|
||||||
struct address_space *mapping,
|
struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
struct xfs_writepage_ctx wpc = {
|
||||||
|
.io_type = XFS_IO_INVALID,
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
|
||||||
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
|
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
|
||||||
return generic_writepages(mapping, wbc);
|
ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
|
||||||
|
if (wpc.ioend)
|
||||||
|
ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -24,12 +24,14 @@ extern mempool_t *xfs_ioend_pool;
|
|||||||
* Types of I/O for bmap clustering and I/O completion tracking.
|
* Types of I/O for bmap clustering and I/O completion tracking.
|
||||||
*/
|
*/
|
||||||
enum {
|
enum {
|
||||||
|
XFS_IO_INVALID, /* initial state */
|
||||||
XFS_IO_DELALLOC, /* covers delalloc region */
|
XFS_IO_DELALLOC, /* covers delalloc region */
|
||||||
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
|
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
|
||||||
XFS_IO_OVERWRITE, /* covers already allocated extent */
|
XFS_IO_OVERWRITE, /* covers already allocated extent */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define XFS_IO_TYPES \
|
#define XFS_IO_TYPES \
|
||||||
|
{ XFS_IO_INVALID, "invalid" }, \
|
||||||
{ XFS_IO_DELALLOC, "delalloc" }, \
|
{ XFS_IO_DELALLOC, "delalloc" }, \
|
||||||
{ XFS_IO_UNWRITTEN, "unwritten" }, \
|
{ XFS_IO_UNWRITTEN, "unwritten" }, \
|
||||||
{ XFS_IO_OVERWRITE, "overwrite" }
|
{ XFS_IO_OVERWRITE, "overwrite" }
|
||||||
@ -39,7 +41,7 @@ enum {
|
|||||||
* It can manage several multi-page bio's at once.
|
* It can manage several multi-page bio's at once.
|
||||||
*/
|
*/
|
||||||
typedef struct xfs_ioend {
|
typedef struct xfs_ioend {
|
||||||
struct xfs_ioend *io_list; /* next ioend in chain */
|
struct list_head io_list; /* next ioend in chain */
|
||||||
unsigned int io_type; /* delalloc / unwritten */
|
unsigned int io_type; /* delalloc / unwritten */
|
||||||
int io_error; /* I/O error code */
|
int io_error; /* I/O error code */
|
||||||
atomic_t io_remaining; /* hold count */
|
atomic_t io_remaining; /* hold count */
|
||||||
|
Loading…
Reference in New Issue
Block a user