linux/fs/xfs/scrub/xfile.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/scrub.h"
#include "scrub/trace.h"
#include <linux/shmem_fs.h>

/*
 * Swappable Temporary Memory
 * ==========================
 *
 * Online checking sometimes needs to be able to stage a large amount of data
 * in memory.  This information might not fit in the available memory and it
 * doesn't all need to be accessible at all times.  In other words, we want an
 * indexed data buffer to store data that can be paged out.
 *
 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
 * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
 * store our staging data.  This file is not installed in the file descriptor
 * table so that user programs cannot access the data, which means that the
 * xfile must be freed with xfile_destroy.
 *
 * xfiles assume that the caller will handle all required concurrency
 * management; standard vfs locks (freezer and inode) are not taken.  Reads
 * and writes are satisfied directly from the page cache.
 */

/*
 * xfiles must not be exposed to userspace and require upper layers to
 * coordinate access to the one handle returned by the constructor, so
 * establish a separate lock class for xfiles to avoid confusing lockdep.
 */
static struct lock_class_key xfile_i_mutex_key;

/*
 * Create an xfile of the given size.  The description will be used in the
 * trace output.
 */
int
xfile_create(
	const char		*description,
	loff_t			isize,
	struct xfile		**xfilep)
{
	struct inode		*inode;
	struct xfile		*xf;
	int			error;

	xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
	if (!xf)
		return -ENOMEM;

	xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
	if (IS_ERR(xf->file)) {
		error = PTR_ERR(xf->file);
		goto out_xfile;
	}

	inode = file_inode(xf->file);
	lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);

	/*
	 * We don't want to bother with kmapping data during repair, so don't
	 * allow highmem pages to back this mapping.
	 */
	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);

	trace_xfile_create(xf);

	*xfilep = xf;
	return 0;
out_xfile:
	kfree(xf);
	return error;
}

/* Close the file and release all resources. */
void
xfile_destroy(
	struct xfile		*xf)
{
	struct inode		*inode = file_inode(xf->file);

	trace_xfile_destroy(xf);

	lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
	fput(xf->file);
	kfree(xf);
}

/*
 * Load an object.  Since we're treating this file as "memory", any error or
 * short IO is treated as a failure to allocate memory.
 */
int
xfile_load(
	struct xfile		*xf,
	void			*buf,
	size_t			count,
	loff_t			pos)
{
	struct inode		*inode = file_inode(xf->file);
	unsigned int		pflags;

	if (count > MAX_RW_COUNT)
		return -ENOMEM;
	if (inode->i_sb->s_maxbytes - pos < count)
		return -ENOMEM;

	trace_xfile_load(xf, pos, count);

	pflags = memalloc_nofs_save();
	while (count > 0) {
		struct folio	*folio;
		unsigned int	len;
		unsigned int	offset;

		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
				SGP_READ) < 0)
			break;
		if (!folio) {
			/*
			 * No data stored at this offset, just zero the output
			 * buffer until the next page boundary.
			 */
			len = min_t(ssize_t, count,
				PAGE_SIZE - offset_in_page(pos));
			memset(buf, 0, len);
		} else {
			if (filemap_check_wb_err(inode->i_mapping, 0)) {
				folio_unlock(folio);
				folio_put(folio);
				break;
			}

			offset = offset_in_folio(folio, pos);
			len = min_t(ssize_t, count, folio_size(folio) - offset);
			memcpy(buf, folio_address(folio) + offset, len);

			folio_unlock(folio);
			folio_put(folio);
		}
		count -= len;
		pos += len;
		buf += len;
	}
	memalloc_nofs_restore(pflags);

	if (count)
		return -ENOMEM;
	return 0;
}

/*
 * Store an object.  Since we're treating this file as "memory", any error or
 * short IO is treated as a failure to allocate memory.
 */
int
xfile_store(
	struct xfile		*xf,
	const void		*buf,
	size_t			count,
	loff_t			pos)
{
	struct inode		*inode = file_inode(xf->file);
	unsigned int		pflags;

	if (count > MAX_RW_COUNT)
		return -ENOMEM;
	if (inode->i_sb->s_maxbytes - pos < count)
		return -ENOMEM;

	trace_xfile_store(xf, pos, count);

	/*
	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
	 * actually allocates a folio instead of erroring out.
	 */
	if (pos + count > i_size_read(inode))
		i_size_write(inode, pos + count);

	pflags = memalloc_nofs_save();
	while (count > 0) {
		struct folio	*folio;
		unsigned int	len;
		unsigned int	offset;

		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
				SGP_CACHE) < 0)
			break;
		if (filemap_check_wb_err(inode->i_mapping, 0)) {
			folio_unlock(folio);
			folio_put(folio);
			break;
		}

		offset = offset_in_folio(folio, pos);
		len = min_t(ssize_t, count, folio_size(folio) - offset);
		memcpy(folio_address(folio) + offset, buf, len);

		folio_mark_dirty(folio);
		folio_unlock(folio);
		folio_put(folio);

		count -= len;
		pos += len;
		buf += len;
	}
	memalloc_nofs_restore(pflags);

	if (count)
		return -ENOMEM;
	return 0;
}

/* Find the next written area in the xfile data for a given offset. */
loff_t
xfile_seek_data(
	struct xfile		*xf,
	loff_t			pos)
{
	loff_t			ret;

	ret = vfs_llseek(xf->file, pos, SEEK_DATA);
	trace_xfile_seek_data(xf, pos, ret);
	return ret;
}

/*
 * Grab the (locked) page for a memory object.  The object cannot span a page
 * boundary.  Returns 0 (and a locked page) if successful, -ENOTBLK if we
 * cannot grab the page, or the usual negative errno.
 */
int
xfile_get_page(
	struct xfile		*xf,
	loff_t			pos,
	unsigned int		len,
	struct xfile_page	*xfpage)
{
	struct inode		*inode = file_inode(xf->file);
	struct address_space	*mapping = inode->i_mapping;
	const struct address_space_operations *aops = mapping->a_ops;
	struct page		*page = NULL;
	void			*fsdata = NULL;
	loff_t			key = round_down(pos, PAGE_SIZE);
	unsigned int		pflags;
	int			error;

	if (inode->i_sb->s_maxbytes - pos < len)
		return -ENOMEM;
	if (len > PAGE_SIZE - offset_in_page(pos))
		return -ENOTBLK;

	trace_xfile_get_page(xf, pos, len);

	pflags = memalloc_nofs_save();

	/*
	 * We call write_begin directly here to avoid all the freezer
	 * protection lock-taking that happens in the normal path.  shmem
	 * doesn't support fs freeze, but lockdep doesn't know that and will
	 * trip over that.
	 */
	error = aops->write_begin(NULL, mapping, key, PAGE_SIZE, &page,
			&fsdata);
	if (error)
		goto out_pflags;

	/* We got the page, so make sure we push out EOF. */
	if (i_size_read(inode) < pos + len)
		i_size_write(inode, pos + len);

	/*
	 * If the page isn't up to date, fill it with zeroes before we hand it
	 * to the caller and make sure the backing store will hold on to them.
	 */
	if (!PageUptodate(page)) {
		memset(page_address(page), 0, PAGE_SIZE);
		SetPageUptodate(page);
	}

	/*
	 * Mark each page dirty so that the contents are written to some
	 * backing store when we drop this buffer, and take an extra reference
	 * to prevent the xfile page from being swapped or removed from the
	 * page cache by reclaim if the caller unlocks the page.
	 */
	set_page_dirty(page);
	get_page(page);

	xfpage->page = page;
	xfpage->fsdata = fsdata;
	xfpage->pos = key;
out_pflags:
	memalloc_nofs_restore(pflags);
	return error;
}

/*
 * Release the (locked) page for a memory object.  Returns 0 or a negative
 * errno.
 */
int
xfile_put_page(
	struct xfile		*xf,
	struct xfile_page	*xfpage)
{
	struct inode		*inode = file_inode(xf->file);
	struct address_space	*mapping = inode->i_mapping;
	const struct address_space_operations *aops = mapping->a_ops;
	unsigned int		pflags;
	int			ret;

	trace_xfile_put_page(xf, xfpage->pos, PAGE_SIZE);

	/* Give back the reference that we took in xfile_get_page. */
	put_page(xfpage->page);

	pflags = memalloc_nofs_save();
	ret = aops->write_end(NULL, mapping, xfpage->pos, PAGE_SIZE, PAGE_SIZE,
			xfpage->page, xfpage->fsdata);
	memalloc_nofs_restore(pflags);
	memset(xfpage, 0, sizeof(struct xfile_page));

	if (ret < 0)
		return ret;
	if (ret != PAGE_SIZE)
		return -EIO;
	return 0;
}

/*
 * Grab the (locked) folio for a memory object.  The object cannot span a folio
 * boundary.  Returns the locked folio if successful, NULL if there was no
 * folio or it didn't cover the range requested, or an ERR_PTR on failure.
 */
struct folio *
xfile_get_folio(
	struct xfile		*xf,
	loff_t			pos,
	size_t			len,
	unsigned int		flags)
{
	struct inode		*inode = file_inode(xf->file);
	struct folio		*folio = NULL;
	unsigned int		pflags;
	int			error;

	if (inode->i_sb->s_maxbytes - pos < len)
		return ERR_PTR(-ENOMEM);

	trace_xfile_get_folio(xf, pos, len);

	/*
	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
	 * actually allocates a folio instead of erroring out.
	 */
	if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
		i_size_write(inode, pos + len);

	pflags = memalloc_nofs_save();
	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
			(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
	memalloc_nofs_restore(pflags);
	if (error)
		return ERR_PTR(error);

	if (!folio)
		return NULL;

	if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
		folio_unlock(folio);
		folio_put(folio);
		return NULL;
	}

	if (filemap_check_wb_err(inode->i_mapping, 0)) {
		folio_unlock(folio);
		folio_put(folio);
		return ERR_PTR(-EIO);
	}

	/*
	 * Mark the folio dirty so that it won't be reclaimed once we drop the
	 * (potentially last) reference in xfile_put_folio.
	 */
	if (flags & XFILE_ALLOC)
		folio_set_dirty(folio);
	return folio;
}

/*
 * Release the (locked) folio for a memory object.
 */
void
xfile_put_folio(
	struct xfile		*xf,
	struct folio		*folio)
{
	trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));

	folio_unlock(folio);
	folio_put(folio);
}