mm: only enforce stable page writes if the backing device requires it
Create a helper function to check if a backing device requires stable page writes and, if so, performs the necessary wait. Then, make it so that all points in the memory manager that handle making pages writable use the helper function. This should provide stable page write support to most filesystems, while eliminating unnecessary waiting for devices that don't require the feature. Before this patchset, all filesystems would block, regardless of whether or not it was necessary. ext3 would wait, but still generate occasional checksum errors. The network filesystems were left to do their own thing, so they'd wait too. After this patchset, all the disk filesystems except ext3 and btrfs will wait only if the hardware requires it. ext3 (if necessary) snapshots pages instead of blocking, and btrfs provides its own bdi so the mm will never wait. Network filesystems haven't been touched, so either they provide their own stable page guarantees or they don't block at all. The blocking behavior is back to what it was before 3.0 if you don't have a disk requiring stable page writes. Here's the result of using dbench to test latency on ext2: 3.8.0-rc3: Operation Count AvgLat MaxLat ---------------------------------------- WriteX 109347 0.028 59.817 ReadX 347180 0.004 3.391 Flush 15514 29.828 287.283 Throughput 57.429 MB/sec 4 clients 4 procs max_latency=287.290 ms 3.8.0-rc3 + patches: WriteX 105556 0.029 4.273 ReadX 335004 0.005 4.112 Flush 14982 30.540 298.634 Throughput 55.4496 MB/sec 4 clients 4 procs max_latency=298.650 ms As you can see, the maximum write latency drops considerably with this patch enabled. The other filesystems (ext3/ext4/xfs/btrfs) behave similarly, but see the cover letter for those results. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Acked-by: Steven Whitehouse <swhiteho@redhat.com> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Artem Bityutskiy <dedekind1@gmail.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Eric Van Hensbergen <ericvh@gmail.com> Cc: Ron Minnich <rminnich@sandia.gov> Cc: Latchesar Ionkov <lucho@ionkov.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
7d311cdab6
commit
1d1d1a7672
@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
wait_on_page_writeback(page);
|
wait_for_stable_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
out_unlock:
|
out_unlock:
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
@ -4968,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
0, len, NULL,
|
0, len, NULL,
|
||||||
ext4_bh_unmapped)) {
|
ext4_bh_unmapped)) {
|
||||||
/* Wait so that we don't change page under IO */
|
/* Wait so that we don't change page under IO */
|
||||||
wait_on_page_writeback(page);
|
wait_for_stable_page(page);
|
||||||
ret = VM_FAULT_LOCKED;
|
ret = VM_FAULT_LOCKED;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -483,7 +483,7 @@ out:
|
|||||||
gfs2_holder_uninit(&gh);
|
gfs2_holder_uninit(&gh);
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
wait_on_page_writeback(page);
|
wait_for_stable_page(page);
|
||||||
}
|
}
|
||||||
sb_end_pagefault(inode->i_sb);
|
sb_end_pagefault(inode->i_sb);
|
||||||
return block_page_mkwrite_return(ret);
|
return block_page_mkwrite_return(ret);
|
||||||
|
@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
nilfs_transaction_commit(inode->i_sb);
|
nilfs_transaction_commit(inode->i_sb);
|
||||||
|
|
||||||
mapped:
|
mapped:
|
||||||
wait_on_page_writeback(page);
|
wait_for_stable_page(page);
|
||||||
out:
|
out:
|
||||||
sb_end_pagefault(inode->i_sb);
|
sb_end_pagefault(inode->i_sb);
|
||||||
return block_page_mkwrite_return(ret);
|
return block_page_mkwrite_return(ret);
|
||||||
|
@ -414,6 +414,7 @@ static inline void wait_on_page_writeback(struct page *page)
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern void end_page_writeback(struct page *page);
|
extern void end_page_writeback(struct page *page);
|
||||||
|
void wait_for_stable_page(struct page *page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add an arbitrary waiter to a page's wait queue
|
* Add an arbitrary waiter to a page's wait queue
|
||||||
|
@ -1728,6 +1728,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
* see the dirty page and writeprotect it again.
|
* see the dirty page and writeprotect it again.
|
||||||
*/
|
*/
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
|
wait_for_stable_page(page);
|
||||||
out:
|
out:
|
||||||
sb_end_pagefault(inode->i_sb);
|
sb_end_pagefault(inode->i_sb);
|
||||||
return ret;
|
return ret;
|
||||||
@ -2274,7 +2275,7 @@ repeat:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
found:
|
found:
|
||||||
wait_on_page_writeback(page);
|
wait_for_stable_page(page);
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(grab_cache_page_write_begin);
|
EXPORT_SYMBOL(grab_cache_page_write_begin);
|
||||||
|
@ -2290,3 +2290,23 @@ int mapping_tagged(struct address_space *mapping, int tag)
|
|||||||
return radix_tree_tagged(&mapping->page_tree, tag);
|
return radix_tree_tagged(&mapping->page_tree, tag);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(mapping_tagged);
|
EXPORT_SYMBOL(mapping_tagged);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_for_stable_page() - wait for writeback to finish, if necessary.
|
||||||
|
* @page: The page to wait on.
|
||||||
|
*
|
||||||
|
* This function determines if the given page is related to a backing device
|
||||||
|
* that requires page contents to be held stable during writeback. If so, then
|
||||||
|
* it will wait for any pending writeback to complete.
|
||||||
|
*/
|
||||||
|
void wait_for_stable_page(struct page *page)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = page_mapping(page);
|
||||||
|
struct backing_dev_info *bdi = mapping->backing_dev_info;
|
||||||
|
|
||||||
|
if (!bdi_cap_stable_pages_required(bdi))
|
||||||
|
return;
|
||||||
|
|
||||||
|
wait_on_page_writeback(page);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(wait_for_stable_page);
|
||||||
|
Loading…
Reference in New Issue
Block a user