09cbfeaf1a
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
286 lines
7.0 KiB
C
286 lines
7.0 KiB
C
/*
|
|
* fs/logfs/file.c - prepare_write, commit_write and friends
|
|
*
|
|
* As should be obvious for Linux kernel code, license is GPLv2
|
|
*
|
|
* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
|
|
*/
|
|
#include "logfs.h"
|
|
#include <linux/sched.h>
|
|
#include <linux/writeback.h>
|
|
|
|
static int logfs_write_begin(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned len, unsigned flags,
|
|
struct page **pagep, void **fsdata)
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
struct page *page;
|
|
pgoff_t index = pos >> PAGE_SHIFT;
|
|
|
|
page = grab_cache_page_write_begin(mapping, index, flags);
|
|
if (!page)
|
|
return -ENOMEM;
|
|
*pagep = page;
|
|
|
|
if ((len == PAGE_SIZE) || PageUptodate(page))
|
|
return 0;
|
|
if ((pos & PAGE_MASK) >= i_size_read(inode)) {
|
|
unsigned start = pos & (PAGE_SIZE - 1);
|
|
unsigned end = start + len;
|
|
|
|
/* Reading beyond i_size is simple: memset to zero */
|
|
zero_user_segments(page, 0, start, end, PAGE_SIZE);
|
|
return 0;
|
|
}
|
|
return logfs_readpage_nolock(page);
|
|
}
|
|
|
|
static int logfs_write_end(struct file *file, struct address_space *mapping,
|
|
loff_t pos, unsigned len, unsigned copied, struct page *page,
|
|
void *fsdata)
|
|
{
|
|
struct inode *inode = mapping->host;
|
|
pgoff_t index = page->index;
|
|
unsigned start = pos & (PAGE_SIZE - 1);
|
|
unsigned end = start + copied;
|
|
int ret = 0;
|
|
|
|
BUG_ON(PAGE_SIZE != inode->i_sb->s_blocksize);
|
|
BUG_ON(page->index > I3_BLOCKS);
|
|
|
|
if (copied < len) {
|
|
/*
|
|
* Short write of a non-initialized paged. Just tell userspace
|
|
* to retry the entire page.
|
|
*/
|
|
if (!PageUptodate(page)) {
|
|
copied = 0;
|
|
goto out;
|
|
}
|
|
}
|
|
if (copied == 0)
|
|
goto out; /* FIXME: do we need to update inode? */
|
|
|
|
if (i_size_read(inode) < (index << PAGE_SHIFT) + end) {
|
|
i_size_write(inode, (index << PAGE_SHIFT) + end);
|
|
mark_inode_dirty_sync(inode);
|
|
}
|
|
|
|
SetPageUptodate(page);
|
|
if (!PageDirty(page)) {
|
|
if (!get_page_reserve(inode, page))
|
|
__set_page_dirty_nobuffers(page);
|
|
else
|
|
ret = logfs_write_buf(inode, page, WF_LOCK);
|
|
}
|
|
out:
|
|
unlock_page(page);
|
|
put_page(page);
|
|
return ret ? ret : copied;
|
|
}
|
|
|
|
int logfs_readpage(struct file *file, struct page *page)
|
|
{
|
|
int ret;
|
|
|
|
ret = logfs_readpage_nolock(page);
|
|
unlock_page(page);
|
|
return ret;
|
|
}
|
|
|
|
/* Clear the page's dirty flag in the radix tree. */
|
|
/* TODO: mucking with PageWriteback is silly. Add a generic function to clear
|
|
* the dirty bit from the radix tree for filesystems that don't have to wait
|
|
* for page writeback to finish (i.e. any compressing filesystem).
|
|
*/
|
|
static void clear_radix_tree_dirty(struct page *page)
|
|
{
|
|
BUG_ON(PagePrivate(page) || page->private);
|
|
set_page_writeback(page);
|
|
end_page_writeback(page);
|
|
}
|
|
|
|
static int __logfs_writepage(struct page *page)
|
|
{
|
|
struct inode *inode = page->mapping->host;
|
|
int err;
|
|
|
|
err = logfs_write_buf(inode, page, WF_LOCK);
|
|
if (err)
|
|
set_page_dirty(page);
|
|
else
|
|
clear_radix_tree_dirty(page);
|
|
unlock_page(page);
|
|
return err;
|
|
}
|
|
|
|
static int logfs_writepage(struct page *page, struct writeback_control *wbc)
|
|
{
|
|
struct inode *inode = page->mapping->host;
|
|
loff_t i_size = i_size_read(inode);
|
|
pgoff_t end_index = i_size >> PAGE_SHIFT;
|
|
unsigned offset;
|
|
u64 bix;
|
|
level_t level;
|
|
|
|
log_file("logfs_writepage(%lx, %lx, %p)\n", inode->i_ino, page->index,
|
|
page);
|
|
|
|
logfs_unpack_index(page->index, &bix, &level);
|
|
|
|
/* Indirect blocks are never truncated */
|
|
if (level != 0)
|
|
return __logfs_writepage(page);
|
|
|
|
/*
|
|
* TODO: everything below is a near-verbatim copy of nobh_writepage().
|
|
* The relevant bits should be factored out after logfs is merged.
|
|
*/
|
|
|
|
/* Is the page fully inside i_size? */
|
|
if (bix < end_index)
|
|
return __logfs_writepage(page);
|
|
|
|
/* Is the page fully outside i_size? (truncate in progress) */
|
|
offset = i_size & (PAGE_SIZE-1);
|
|
if (bix > end_index || offset == 0) {
|
|
unlock_page(page);
|
|
return 0; /* don't care */
|
|
}
|
|
|
|
/*
|
|
* The page straddles i_size. It must be zeroed out on each and every
|
|
* writepage invokation because it may be mmapped. "A file is mapped
|
|
* in multiples of the page size. For a file that is not a multiple of
|
|
* the page size, the remaining memory is zeroed when mapped, and
|
|
* writes to that region are not written out to the file."
|
|
*/
|
|
zero_user_segment(page, offset, PAGE_SIZE);
|
|
return __logfs_writepage(page);
|
|
}
|
|
|
|
static void logfs_invalidatepage(struct page *page, unsigned int offset,
|
|
unsigned int length)
|
|
{
|
|
struct logfs_block *block = logfs_block(page);
|
|
|
|
if (block->reserved_bytes) {
|
|
struct super_block *sb = page->mapping->host->i_sb;
|
|
struct logfs_super *super = logfs_super(sb);
|
|
|
|
super->s_dirty_pages -= block->reserved_bytes;
|
|
block->ops->free_block(sb, block);
|
|
BUG_ON(bitmap_weight(block->alias_map, LOGFS_BLOCK_FACTOR));
|
|
} else
|
|
move_page_to_btree(page);
|
|
BUG_ON(PagePrivate(page) || page->private);
|
|
}
|
|
|
|
static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this)
|
|
{
|
|
return 0; /* None of these are easy to release */
|
|
}
|
|
|
|
|
|
long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct logfs_inode *li = logfs_inode(inode);
|
|
unsigned int oldflags, flags;
|
|
int err;
|
|
|
|
switch (cmd) {
|
|
case FS_IOC_GETFLAGS:
|
|
flags = li->li_flags & LOGFS_FL_USER_VISIBLE;
|
|
return put_user(flags, (int __user *)arg);
|
|
case FS_IOC_SETFLAGS:
|
|
if (IS_RDONLY(inode))
|
|
return -EROFS;
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
err = get_user(flags, (int __user *)arg);
|
|
if (err)
|
|
return err;
|
|
|
|
inode_lock(inode);
|
|
oldflags = li->li_flags;
|
|
flags &= LOGFS_FL_USER_MODIFIABLE;
|
|
flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
|
|
li->li_flags = flags;
|
|
inode_unlock(inode);
|
|
|
|
inode->i_ctime = CURRENT_TIME;
|
|
mark_inode_dirty_sync(inode);
|
|
return 0;
|
|
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
|
{
|
|
struct super_block *sb = file->f_mapping->host->i_sb;
|
|
struct inode *inode = file->f_mapping->host;
|
|
int ret;
|
|
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
|
if (ret)
|
|
return ret;
|
|
|
|
inode_lock(inode);
|
|
logfs_get_wblocks(sb, NULL, WF_LOCK);
|
|
logfs_write_anchor(sb);
|
|
logfs_put_wblocks(sb, NULL, WF_LOCK);
|
|
inode_unlock(inode);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
int err = 0;
|
|
|
|
err = inode_change_ok(inode, attr);
|
|
if (err)
|
|
return err;
|
|
|
|
if (attr->ia_valid & ATTR_SIZE) {
|
|
err = logfs_truncate(inode, attr->ia_size);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
setattr_copy(inode, attr);
|
|
mark_inode_dirty(inode);
|
|
return 0;
|
|
}
|
|
|
|
const struct inode_operations logfs_reg_iops = {
|
|
.setattr = logfs_setattr,
|
|
};
|
|
|
|
const struct file_operations logfs_reg_fops = {
|
|
.read_iter = generic_file_read_iter,
|
|
.write_iter = generic_file_write_iter,
|
|
.fsync = logfs_fsync,
|
|
.unlocked_ioctl = logfs_ioctl,
|
|
.llseek = generic_file_llseek,
|
|
.mmap = generic_file_readonly_mmap,
|
|
.open = generic_file_open,
|
|
};
|
|
|
|
const struct address_space_operations logfs_reg_aops = {
|
|
.invalidatepage = logfs_invalidatepage,
|
|
.readpage = logfs_readpage,
|
|
.releasepage = logfs_releasepage,
|
|
.set_page_dirty = __set_page_dirty_nobuffers,
|
|
.writepage = logfs_writepage,
|
|
.writepages = generic_writepages,
|
|
.write_begin = logfs_write_begin,
|
|
.write_end = logfs_write_end,
|
|
};
|