a3841f94c7
* Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable 'userspace flush' of persistent memory updates via filesystem-dax mappings. It arranges for any filesystem metadata updates that may be required to satisfy a write fault to also be flushed ("on disk") before the kernel returns to userspace from the fault handler. Effectively every write-fault that dirties metadata completes an fsync() before returning from the fault handler. The new MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag is validated as supported by the filesystem's ->mmap() file operation. * Add support for the standard ACPI 6.2 label access methods that replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods. This enables interoperability with environments that only implement the standardized methods. * Add support for the ACPI 6.2 NVDIMM media error injection methods. * Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for latch last shutdown status, firmware update, SMART error injection, and SMART alarm threshold control. * Cleanup physical address information disclosures to be root-only. * Fix revalidation of the DIMM "locked label area" status to support dynamic unlock of the label area. * Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA (system-physical-address) command and error injection commands. Acknowledgements that came after the commits were pushed to -next: 957ac8c421ad dax: fix PMD faults on zero-length files Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> a39e596baa07 xfs: support for synchronous DAX faults Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> 7b565c9f965b xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault() Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJaDfvcAAoJEB7SkWpmfYgCk7sP/2qJhBH+VTTdg2osDnhAdAhI co/AGEmsHFlUCMBb/Ek7UnMAmhBYiJU2q4ywPsNFBpusXpMlqNy5Iwo7k4/wQHE/ SJcIM0g4zg0ViFuUhwV+C2T0R5UzFR8JLd9EYWj/YS6aJpurtotm5l4UStaM0Hzo AhxSXJLrBDuqCpbOxbctfiGEmdRL7aRfBEAARTNRKBn/iXxJUcYHlp62rtXQS+t4 I6LC/URCWTNTTMGmzW6TRsgSD9WMfd19xKcGzN3qL6ee0KFccxN4ctFqHA/sFGOh iYLeR0XJUjJxyp+PkWGteXPVZL0Kj3bD/lSTG+Co5bm/ra8a/sh3TSFfgFyoBZD1 EqMN8Ryf80hGp3FabeH2Iw2SviYPZpHSWgjddjxLD0RA6OmpzINc+Wm8eqApjMME sbZDTOijiab4QMQ0XamF4GuDHyQtawv5Y/w2Ehhl1tmiqW+5tKhsKqxkQt+/V3Yt RTVSRe2Pkway66b+cD64IdQ6L2tyonPnmi5IzgkKOhlOEGomy+4/U2Jt2bMbhzq6 ymszKmXp2XI8P06wU8sHrIUeXO5I9qoKn/fZA73Eb8aIzgJe3tBE/5+Ab7RG6HB9 1OVfcMWoXU1gNgNktTs63X1Lsg4aW9kt/K4fPHHcqUcaliEJpJTlAbg9GLF2buoW nQ+0fTRgMRihE3ZA0Fs3 =h2vZ -----END PGP SIGNATURE----- Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull libnvdimm and dax updates from Dan Williams: "Save for a few late fixes, all of these commits have shipped in -next releases since before the merge window opened, and 0day has given a build success notification. The ext4 touches came from Jan, and the xfs touches have Darrick's reviewed-by. An xfstest for the MAP_SYNC feature has been through a few round of reviews and is on track to be merged. - Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable 'userspace flush' of persistent memory updates via filesystem-dax mappings. It arranges for any filesystem metadata updates that may be required to satisfy a write fault to also be flushed ("on disk") before the kernel returns to userspace from the fault handler. Effectively every write-fault that dirties metadata completes an fsync() before returning from the fault handler. The new MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag is validated as supported by the filesystem's ->mmap() file operation. - Add support for the standard ACPI 6.2 label access methods that replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods. This enables interoperability with environments that only implement the standardized methods. - Add support for the ACPI 6.2 NVDIMM media error injection methods. - Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for latch last shutdown status, firmware update, SMART error injection, and SMART alarm threshold control. - Cleanup physical address information disclosures to be root-only. - Fix revalidation of the DIMM "locked label area" status to support dynamic unlock of the label area. - Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA (system-physical-address) command and error injection commands. Acknowledgements that came after the commits were pushed to -next: - 957ac8c421ad ("dax: fix PMD faults on zero-length files"): Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> - a39e596baa07 ("xfs: support for synchronous DAX faults") and 7b565c9f965b ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()") Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>" * tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits) acpi, nfit: add 'Enable Latch System Shutdown Status' command support dax: fix general protection fault in dax_alloc_inode dax: fix PMD faults on zero-length files dax: stop requiring a live device for dax_flush() brd: remove dax support dax: quiet bdev_dax_supported() fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core tools/testing/nvdimm: unit test clear-error commands acpi, nfit: validate commands against the device type tools/testing/nvdimm: stricter bounds checking for error injection commands xfs: support for synchronous DAX faults xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault() ext4: Support for synchronous DAX faults ext4: Simplify error handling in ext4_dax_huge_fault() dax: Implement dax_finish_sync_fault() dax, iomap: Add support for synchronous faults mm: Define MAP_SYNC and VM_SYNC flags dax: Allow tuning whether dax_insert_mapping_entry() dirties entry dax: Allow dax_iomap_fault() to return pfn dax: Fix comment describing dax_iomap_fault() ...
208 lines
5.2 KiB
C
208 lines
5.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/fs/ext2/file.c
|
|
*
|
|
* Copyright (C) 1992, 1993, 1994, 1995
|
|
* Remy Card (card@masi.ibp.fr)
|
|
* Laboratoire MASI - Institut Blaise Pascal
|
|
* Universite Pierre et Marie Curie (Paris VI)
|
|
*
|
|
* from
|
|
*
|
|
* linux/fs/minix/file.c
|
|
*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
*
|
|
* ext2 fs regular file handling primitives
|
|
*
|
|
* 64-bit file support on 64-bit platforms by Jakub Jelinek
|
|
* (jj@sunsite.ms.mff.cuni.cz)
|
|
*/
|
|
|
|
#include <linux/time.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/dax.h>
|
|
#include <linux/quotaops.h>
|
|
#include <linux/iomap.h>
|
|
#include <linux/uio.h>
|
|
#include "ext2.h"
|
|
#include "xattr.h"
|
|
#include "acl.h"
|
|
|
|
#ifdef CONFIG_FS_DAX
|
|
static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|
{
|
|
struct inode *inode = iocb->ki_filp->f_mapping->host;
|
|
ssize_t ret;
|
|
|
|
if (!iov_iter_count(to))
|
|
return 0; /* skip atime */
|
|
|
|
inode_lock_shared(inode);
|
|
ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
|
|
inode_unlock_shared(inode);
|
|
|
|
file_accessed(iocb->ki_filp);
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|
{
|
|
struct file *file = iocb->ki_filp;
|
|
struct inode *inode = file->f_mapping->host;
|
|
ssize_t ret;
|
|
|
|
inode_lock(inode);
|
|
ret = generic_write_checks(iocb, from);
|
|
if (ret <= 0)
|
|
goto out_unlock;
|
|
ret = file_remove_privs(file);
|
|
if (ret)
|
|
goto out_unlock;
|
|
ret = file_update_time(file);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
|
|
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
|
|
i_size_write(inode, iocb->ki_pos);
|
|
mark_inode_dirty(inode);
|
|
}
|
|
|
|
out_unlock:
|
|
inode_unlock(inode);
|
|
if (ret > 0)
|
|
ret = generic_write_sync(iocb, ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* The lock ordering for ext2 DAX fault paths is:
|
|
*
|
|
* mmap_sem (MM)
|
|
* sb_start_pagefault (vfs, freeze)
|
|
* ext2_inode_info->dax_sem
|
|
* address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
|
|
* ext2_inode_info->truncate_mutex
|
|
*
|
|
* The default page_lock and i_size verification done by non-DAX fault paths
|
|
* is sufficient because ext2 doesn't support hole punching.
|
|
*/
|
|
static int ext2_dax_fault(struct vm_fault *vmf)
|
|
{
|
|
struct inode *inode = file_inode(vmf->vma->vm_file);
|
|
struct ext2_inode_info *ei = EXT2_I(inode);
|
|
int ret;
|
|
|
|
if (vmf->flags & FAULT_FLAG_WRITE) {
|
|
sb_start_pagefault(inode->i_sb);
|
|
file_update_time(vmf->vma->vm_file);
|
|
}
|
|
down_read(&ei->dax_sem);
|
|
|
|
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops);
|
|
|
|
up_read(&ei->dax_sem);
|
|
if (vmf->flags & FAULT_FLAG_WRITE)
|
|
sb_end_pagefault(inode->i_sb);
|
|
return ret;
|
|
}
|
|
|
|
static const struct vm_operations_struct ext2_dax_vm_ops = {
|
|
.fault = ext2_dax_fault,
|
|
/*
|
|
* .huge_fault is not supported for DAX because allocation in ext2
|
|
* cannot be reliably aligned to huge page sizes and so pmd faults
|
|
* will always fail and fail back to regular faults.
|
|
*/
|
|
.page_mkwrite = ext2_dax_fault,
|
|
.pfn_mkwrite = ext2_dax_fault,
|
|
};
|
|
|
|
static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|
{
|
|
if (!IS_DAX(file_inode(file)))
|
|
return generic_file_mmap(file, vma);
|
|
|
|
file_accessed(file);
|
|
vma->vm_ops = &ext2_dax_vm_ops;
|
|
vma->vm_flags |= VM_MIXEDMAP;
|
|
return 0;
|
|
}
|
|
#else
|
|
#define ext2_file_mmap generic_file_mmap
|
|
#endif
|
|
|
|
/*
|
|
* Called when filp is released. This happens when all file descriptors
|
|
* for a single struct file are closed. Note that different open() calls
|
|
* for the same file yield different struct file structures.
|
|
*/
|
|
static int ext2_release_file (struct inode * inode, struct file * filp)
|
|
{
|
|
if (filp->f_mode & FMODE_WRITE) {
|
|
mutex_lock(&EXT2_I(inode)->truncate_mutex);
|
|
ext2_discard_reservation(inode);
|
|
mutex_unlock(&EXT2_I(inode)->truncate_mutex);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
|
{
|
|
int ret;
|
|
struct super_block *sb = file->f_mapping->host->i_sb;
|
|
|
|
ret = generic_file_fsync(file, start, end, datasync);
|
|
if (ret == -EIO)
|
|
/* We don't really know where the IO error happened... */
|
|
ext2_error(sb, __func__,
|
|
"detected IO error when writing metadata buffers");
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|
{
|
|
#ifdef CONFIG_FS_DAX
|
|
if (IS_DAX(iocb->ki_filp->f_mapping->host))
|
|
return ext2_dax_read_iter(iocb, to);
|
|
#endif
|
|
return generic_file_read_iter(iocb, to);
|
|
}
|
|
|
|
static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|
{
|
|
#ifdef CONFIG_FS_DAX
|
|
if (IS_DAX(iocb->ki_filp->f_mapping->host))
|
|
return ext2_dax_write_iter(iocb, from);
|
|
#endif
|
|
return generic_file_write_iter(iocb, from);
|
|
}
|
|
|
|
const struct file_operations ext2_file_operations = {
|
|
.llseek = generic_file_llseek,
|
|
.read_iter = ext2_file_read_iter,
|
|
.write_iter = ext2_file_write_iter,
|
|
.unlocked_ioctl = ext2_ioctl,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = ext2_compat_ioctl,
|
|
#endif
|
|
.mmap = ext2_file_mmap,
|
|
.open = dquot_file_open,
|
|
.release = ext2_release_file,
|
|
.fsync = ext2_fsync,
|
|
.get_unmapped_area = thp_get_unmapped_area,
|
|
.splice_read = generic_file_splice_read,
|
|
.splice_write = iter_file_splice_write,
|
|
};
|
|
|
|
const struct inode_operations ext2_file_inode_operations = {
|
|
#ifdef CONFIG_EXT2_FS_XATTR
|
|
.listxattr = ext2_listxattr,
|
|
#endif
|
|
.setattr = ext2_setattr,
|
|
.get_acl = ext2_get_acl,
|
|
.set_acl = ext2_set_acl,
|
|
.fiemap = ext2_fiemap,
|
|
};
|