6907e3c00a
Add helper similar to file_{get,set}_page, but which deal with folios and don't allocate new folio unless explicitly asked to, which map to shmem_get_folio instead of calling into the aops. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
417 lines
9.9 KiB
C
417 lines
9.9 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "scrub/xfile.h"
|
|
#include "scrub/xfarray.h"
|
|
#include "scrub/scrub.h"
|
|
#include "scrub/trace.h"
|
|
#include <linux/shmem_fs.h>
|
|
|
|
/*
|
|
* Swappable Temporary Memory
|
|
* ==========================
|
|
*
|
|
* Online checking sometimes needs to be able to stage a large amount of data
|
|
* in memory. This information might not fit in the available memory and it
|
|
* doesn't all need to be accessible at all times. In other words, we want an
|
|
* indexed data buffer to store data that can be paged out.
|
|
*
|
|
* When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
|
|
* requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
|
|
* store our staging data. This file is not installed in the file descriptor
|
|
* table so that user programs cannot access the data, which means that the
|
|
* xfile must be freed with xfile_destroy.
|
|
*
|
|
* xfiles assume that the caller will handle all required concurrency
|
|
* management; standard vfs locks (freezer and inode) are not taken. Reads
|
|
* and writes are satisfied directly from the page cache.
|
|
*/
|
|
|
|
/*
|
|
* xfiles must not be exposed to userspace and require upper layers to
|
|
* coordinate access to the one handle returned by the constructor, so
|
|
* establish a separate lock class for xfiles to avoid confusing lockdep.
|
|
*/
|
|
static struct lock_class_key xfile_i_mutex_key;
|
|
|
|
/*
|
|
* Create an xfile of the given size. The description will be used in the
|
|
* trace output.
|
|
*/
|
|
int
|
|
xfile_create(
|
|
const char *description,
|
|
loff_t isize,
|
|
struct xfile **xfilep)
|
|
{
|
|
struct inode *inode;
|
|
struct xfile *xf;
|
|
int error;
|
|
|
|
xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
|
|
if (!xf)
|
|
return -ENOMEM;
|
|
|
|
xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
|
|
if (IS_ERR(xf->file)) {
|
|
error = PTR_ERR(xf->file);
|
|
goto out_xfile;
|
|
}
|
|
|
|
inode = file_inode(xf->file);
|
|
lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
|
|
|
|
/*
|
|
* We don't want to bother with kmapping data during repair, so don't
|
|
* allow highmem pages to back this mapping.
|
|
*/
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
|
|
|
|
trace_xfile_create(xf);
|
|
|
|
*xfilep = xf;
|
|
return 0;
|
|
out_xfile:
|
|
kfree(xf);
|
|
return error;
|
|
}
|
|
|
|
/* Close the file and release all resources. */
|
|
void
|
|
xfile_destroy(
|
|
struct xfile *xf)
|
|
{
|
|
struct inode *inode = file_inode(xf->file);
|
|
|
|
trace_xfile_destroy(xf);
|
|
|
|
lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
|
|
fput(xf->file);
|
|
kfree(xf);
|
|
}
|
|
|
|
/*
|
|
* Load an object. Since we're treating this file as "memory", any error or
|
|
* short IO is treated as a failure to allocate memory.
|
|
*/
|
|
int
|
|
xfile_load(
|
|
struct xfile *xf,
|
|
void *buf,
|
|
size_t count,
|
|
loff_t pos)
|
|
{
|
|
struct inode *inode = file_inode(xf->file);
|
|
unsigned int pflags;
|
|
|
|
if (count > MAX_RW_COUNT)
|
|
return -ENOMEM;
|
|
if (inode->i_sb->s_maxbytes - pos < count)
|
|
return -ENOMEM;
|
|
|
|
trace_xfile_load(xf, pos, count);
|
|
|
|
pflags = memalloc_nofs_save();
|
|
while (count > 0) {
|
|
struct folio *folio;
|
|
unsigned int len;
|
|
unsigned int offset;
|
|
|
|
if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
|
|
SGP_READ) < 0)
|
|
break;
|
|
if (!folio) {
|
|
/*
|
|
* No data stored at this offset, just zero the output
|
|
* buffer until the next page boundary.
|
|
*/
|
|
len = min_t(ssize_t, count,
|
|
PAGE_SIZE - offset_in_page(pos));
|
|
memset(buf, 0, len);
|
|
} else {
|
|
if (filemap_check_wb_err(inode->i_mapping, 0)) {
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
break;
|
|
}
|
|
|
|
offset = offset_in_folio(folio, pos);
|
|
len = min_t(ssize_t, count, folio_size(folio) - offset);
|
|
memcpy(buf, folio_address(folio) + offset, len);
|
|
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
}
|
|
count -= len;
|
|
pos += len;
|
|
buf += len;
|
|
}
|
|
memalloc_nofs_restore(pflags);
|
|
|
|
if (count)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Store an object. Since we're treating this file as "memory", any error or
|
|
* short IO is treated as a failure to allocate memory.
|
|
*/
|
|
int
|
|
xfile_store(
|
|
struct xfile *xf,
|
|
const void *buf,
|
|
size_t count,
|
|
loff_t pos)
|
|
{
|
|
struct inode *inode = file_inode(xf->file);
|
|
unsigned int pflags;
|
|
|
|
if (count > MAX_RW_COUNT)
|
|
return -ENOMEM;
|
|
if (inode->i_sb->s_maxbytes - pos < count)
|
|
return -ENOMEM;
|
|
|
|
trace_xfile_store(xf, pos, count);
|
|
|
|
/*
|
|
* Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
|
|
* actually allocates a folio instead of erroring out.
|
|
*/
|
|
if (pos + count > i_size_read(inode))
|
|
i_size_write(inode, pos + count);
|
|
|
|
pflags = memalloc_nofs_save();
|
|
while (count > 0) {
|
|
struct folio *folio;
|
|
unsigned int len;
|
|
unsigned int offset;
|
|
|
|
if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
|
|
SGP_CACHE) < 0)
|
|
break;
|
|
if (filemap_check_wb_err(inode->i_mapping, 0)) {
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
break;
|
|
}
|
|
|
|
offset = offset_in_folio(folio, pos);
|
|
len = min_t(ssize_t, count, folio_size(folio) - offset);
|
|
memcpy(folio_address(folio) + offset, buf, len);
|
|
|
|
folio_mark_dirty(folio);
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
|
|
count -= len;
|
|
pos += len;
|
|
buf += len;
|
|
}
|
|
memalloc_nofs_restore(pflags);
|
|
|
|
if (count)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
/* Find the next written area in the xfile data for a given offset. */
|
|
loff_t
|
|
xfile_seek_data(
|
|
struct xfile *xf,
|
|
loff_t pos)
|
|
{
|
|
loff_t ret;
|
|
|
|
ret = vfs_llseek(xf->file, pos, SEEK_DATA);
|
|
trace_xfile_seek_data(xf, pos, ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Grab the (locked) page for a memory object. The object cannot span a page
|
|
* boundary. Returns 0 (and a locked page) if successful, -ENOTBLK if we
|
|
* cannot grab the page, or the usual negative errno.
|
|
*/
|
|
int
|
|
xfile_get_page(
|
|
struct xfile *xf,
|
|
loff_t pos,
|
|
unsigned int len,
|
|
struct xfile_page *xfpage)
|
|
{
|
|
struct inode *inode = file_inode(xf->file);
|
|
struct address_space *mapping = inode->i_mapping;
|
|
const struct address_space_operations *aops = mapping->a_ops;
|
|
struct page *page = NULL;
|
|
void *fsdata = NULL;
|
|
loff_t key = round_down(pos, PAGE_SIZE);
|
|
unsigned int pflags;
|
|
int error;
|
|
|
|
if (inode->i_sb->s_maxbytes - pos < len)
|
|
return -ENOMEM;
|
|
if (len > PAGE_SIZE - offset_in_page(pos))
|
|
return -ENOTBLK;
|
|
|
|
trace_xfile_get_page(xf, pos, len);
|
|
|
|
pflags = memalloc_nofs_save();
|
|
|
|
/*
|
|
* We call write_begin directly here to avoid all the freezer
|
|
* protection lock-taking that happens in the normal path. shmem
|
|
* doesn't support fs freeze, but lockdep doesn't know that and will
|
|
* trip over that.
|
|
*/
|
|
error = aops->write_begin(NULL, mapping, key, PAGE_SIZE, &page,
|
|
&fsdata);
|
|
if (error)
|
|
goto out_pflags;
|
|
|
|
/* We got the page, so make sure we push out EOF. */
|
|
if (i_size_read(inode) < pos + len)
|
|
i_size_write(inode, pos + len);
|
|
|
|
/*
|
|
* If the page isn't up to date, fill it with zeroes before we hand it
|
|
* to the caller and make sure the backing store will hold on to them.
|
|
*/
|
|
if (!PageUptodate(page)) {
|
|
memset(page_address(page), 0, PAGE_SIZE);
|
|
SetPageUptodate(page);
|
|
}
|
|
|
|
/*
|
|
* Mark each page dirty so that the contents are written to some
|
|
* backing store when we drop this buffer, and take an extra reference
|
|
* to prevent the xfile page from being swapped or removed from the
|
|
* page cache by reclaim if the caller unlocks the page.
|
|
*/
|
|
set_page_dirty(page);
|
|
get_page(page);
|
|
|
|
xfpage->page = page;
|
|
xfpage->fsdata = fsdata;
|
|
xfpage->pos = key;
|
|
out_pflags:
|
|
memalloc_nofs_restore(pflags);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Release the (locked) page for a memory object. Returns 0 or a negative
|
|
* errno.
|
|
*/
|
|
int
|
|
xfile_put_page(
|
|
struct xfile *xf,
|
|
struct xfile_page *xfpage)
|
|
{
|
|
struct inode *inode = file_inode(xf->file);
|
|
struct address_space *mapping = inode->i_mapping;
|
|
const struct address_space_operations *aops = mapping->a_ops;
|
|
unsigned int pflags;
|
|
int ret;
|
|
|
|
trace_xfile_put_page(xf, xfpage->pos, PAGE_SIZE);
|
|
|
|
/* Give back the reference that we took in xfile_get_page. */
|
|
put_page(xfpage->page);
|
|
|
|
pflags = memalloc_nofs_save();
|
|
ret = aops->write_end(NULL, mapping, xfpage->pos, PAGE_SIZE, PAGE_SIZE,
|
|
xfpage->page, xfpage->fsdata);
|
|
memalloc_nofs_restore(pflags);
|
|
memset(xfpage, 0, sizeof(struct xfile_page));
|
|
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret != PAGE_SIZE)
|
|
return -EIO;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Grab the (locked) folio for a memory object. The object cannot span a folio
|
|
* boundary. Returns the locked folio if successful, NULL if there was no
|
|
* folio or it didn't cover the range requested, or an ERR_PTR on failure.
|
|
*/
|
|
struct folio *
|
|
xfile_get_folio(
|
|
struct xfile *xf,
|
|
loff_t pos,
|
|
size_t len,
|
|
unsigned int flags)
|
|
{
|
|
struct inode *inode = file_inode(xf->file);
|
|
struct folio *folio = NULL;
|
|
unsigned int pflags;
|
|
int error;
|
|
|
|
if (inode->i_sb->s_maxbytes - pos < len)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
trace_xfile_get_folio(xf, pos, len);
|
|
|
|
/*
|
|
* Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
|
|
* actually allocates a folio instead of erroring out.
|
|
*/
|
|
if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
|
|
i_size_write(inode, pos + len);
|
|
|
|
pflags = memalloc_nofs_save();
|
|
error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
|
|
(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
|
|
memalloc_nofs_restore(pflags);
|
|
if (error)
|
|
return ERR_PTR(error);
|
|
|
|
if (!folio)
|
|
return NULL;
|
|
|
|
if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
return NULL;
|
|
}
|
|
|
|
if (filemap_check_wb_err(inode->i_mapping, 0)) {
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
return ERR_PTR(-EIO);
|
|
}
|
|
|
|
/*
|
|
* Mark the folio dirty so that it won't be reclaimed once we drop the
|
|
* (potentially last) reference in xfile_put_folio.
|
|
*/
|
|
if (flags & XFILE_ALLOC)
|
|
folio_set_dirty(folio);
|
|
return folio;
|
|
}
|
|
|
|
/*
|
|
* Release the (locked) folio for a memory object.
|
|
*/
|
|
void
|
|
xfile_put_folio(
|
|
struct xfile *xf,
|
|
struct folio *folio)
|
|
{
|
|
trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
|
|
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
}
|