dax + libnvdimm for v5.17
- Simplify the dax_operations API - Eliminate bdev_dax_pgoff() in favor of the filesystem maintaining and applying a partition offset to all its DAX iomap operations. - Remove wrappers and device-mapper stacked callbacks for ->copy_from_iter() and ->copy_to_iter() in favor of moving block_device relative offset responsibility to the dax_direct_access() caller. - Remove the need for an @bdev in filesystem-DAX infrastructure - Remove unused uio helpers copy_from_iter_flushcache() and copy_mc_to_iter() as only the non-check_copy_size() versions are used for DAX. - Prepare XFS for the pending (next merge window) DAX+reflink support - Remove deprecated DEV_DAX_PMEM_COMPAT support - Cleanup a straggling misuse of the GUID api Tags offered after the branch was cut: Reviewed-by: Mike Snitzer <snitzer@redhat.com> Link: https://lore.kernel.org/r/Ydb/3P+8nvjCjYfO@redhat.com -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCYd3dTAAKCRDfioYZHlFs Z//UAP9zetoTE+O7zJG7CXja4jSopSadbdbh6QKSXaqfKBPvQQD+N4US3wA2bGv8 f/qCY62j2Hj3hUTGHs9RvTyw3JsSYAA= =QvDs -----END PGP SIGNATURE----- Merge tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull dax and libnvdimm updates from Dan Williams: "The bulk of this is a rework of the dax_operations API after discovering the obstacles it posed to the work-in-progress DAX+reflink support for XFS and other copy-on-write filesystem mechanics. Primarily the need to plumb a block_device through the API to handle partition offsets was a sticking point and Christoph untangled that dependency in addition to other cleanups to make landing the DAX+reflink support easier. The DAX_PMEM_COMPAT option has been around for 4 years and not only are distributions shipping userspace that understand the current configuration API, but some are not even bothering to turn this option on anymore, so it seems a good time to remove it per the deprecation schedule. Recall that this was added after the device-dax subsystem moved from /sys/class/dax to /sys/bus/dax for its sysfs organization. All recent functionality depends on /sys/bus/dax. Some other miscellaneous cleanups and reflink prep patches are included as well. Summary: - Simplify the dax_operations API: - Eliminate bdev_dax_pgoff() in favor of the filesystem maintaining and applying a partition offset to all its DAX iomap operations. - Remove wrappers and device-mapper stacked callbacks for ->copy_from_iter() and ->copy_to_iter() in favor of moving block_device relative offset responsibility to the dax_direct_access() caller. - Remove the need for an @bdev in filesystem-DAX infrastructure - Remove unused uio helpers copy_from_iter_flushcache() and copy_mc_to_iter() as only the non-check_copy_size() versions are used for DAX. - Prepare XFS for the pending (next merge window) DAX+reflink support - Remove deprecated DEV_DAX_PMEM_COMPAT support - Cleanup a straggling misuse of the GUID api" * tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (38 commits) iomap: Fix error handling in iomap_zero_iter() ACPI: NFIT: Import GUID before use dax: remove the copy_from_iter and copy_to_iter methods dax: remove the DAXDEV_F_SYNC flag dax: simplify dax_synchronous and set_dax_synchronous uio: remove copy_from_iter_flushcache() and copy_mc_to_iter() iomap: turn the byte variable in iomap_zero_iter into a ssize_t memremap: remove support for external pgmap refcounts fsdax: don't require CONFIG_BLOCK iomap: build the block based code conditionally dax: fix up some of the block device related ifdefs fsdax: shift partition offset handling into the file systems dax: return the partition offset from fs_dax_get_by_bdev iomap: add a IOMAP_DAX flag xfs: pass the mapping flags to xfs_bmbt_to_iomap xfs: use xfs_direct_write_iomap_ops for DAX zeroing xfs: move dax device handling into xfs_{alloc,free}_buftarg ext4: cleanup the dax handling in ext4_fill_super ext2: cleanup the dax handling in ext2_fill_super fsdax: decouple zeroing from the iomap buffered I/O code ...
This commit is contained in:
commit
3acbdbf42e
@ -1,22 +0,0 @@
|
||||
What: /sys/class/dax/
|
||||
Date: May, 2016
|
||||
KernelVersion: v4.7
|
||||
Contact: nvdimm@lists.linux.dev
|
||||
Description: Device DAX is the device-centric analogue of Filesystem
|
||||
DAX (CONFIG_FS_DAX). It allows memory ranges to be
|
||||
allocated and mapped without need of an intervening file
|
||||
system. Device DAX is strict, precise and predictable.
|
||||
Specifically this interface:
|
||||
|
||||
1. Guarantees fault granularity with respect to a given
|
||||
page size (pte, pmd, or pud) set at configuration time.
|
||||
|
||||
2. Enforces deterministic behavior by being strict about
|
||||
what fault scenarios are supported.
|
||||
|
||||
The /sys/class/dax/ interface enumerates all the
|
||||
device-dax instances in the system. The ABI is
|
||||
deprecated and will be removed after 2020. It is
|
||||
replaced with the DAX bus interface /sys/bus/dax/ where
|
||||
device-dax instances can be found under
|
||||
/sys/bus/dax/devices/
|
@ -678,10 +678,12 @@ static const char *spa_type_name(u16 type)
|
||||
|
||||
int nfit_spa_type(struct acpi_nfit_system_address *spa)
|
||||
{
|
||||
guid_t guid;
|
||||
int i;
|
||||
|
||||
import_guid(&guid, spa->range_guid);
|
||||
for (i = 0; i < NFIT_UUID_MAX; i++)
|
||||
if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
|
||||
if (guid_equal(to_nfit_uuid(i), &guid))
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
@ -1,8 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config DAX_DRIVER
|
||||
select DAX
|
||||
bool
|
||||
|
||||
menuconfig DAX
|
||||
tristate "DAX: direct access to differentiated memory"
|
||||
select SRCU
|
||||
@ -70,13 +66,4 @@ config DEV_DAX_KMEM
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
config DEV_DAX_PMEM_COMPAT
|
||||
tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
|
||||
depends on m && DEV_DAX_PMEM=m
|
||||
default DEV_DAX_PMEM
|
||||
help
|
||||
Older versions of the libdaxctl library expect to find all
|
||||
device-dax instances under /sys/class/dax. If libdaxctl in
|
||||
your distribution is older than v58 say M, otherwise say N.
|
||||
|
||||
endif
|
||||
|
@ -2,10 +2,11 @@
|
||||
obj-$(CONFIG_DAX) += dax.o
|
||||
obj-$(CONFIG_DEV_DAX) += device_dax.o
|
||||
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
|
||||
|
||||
dax-y := super.o
|
||||
dax-y += bus.o
|
||||
device_dax-y := device.o
|
||||
dax_pmem-y := pmem.o
|
||||
|
||||
obj-y += pmem/
|
||||
obj-y += hmem/
|
||||
|
@ -10,8 +10,6 @@
|
||||
#include "dax-private.h"
|
||||
#include "bus.h"
|
||||
|
||||
static struct class *dax_class;
|
||||
|
||||
static DEFINE_MUTEX(dax_bus_lock);
|
||||
|
||||
#define DAX_NAME_LEN 30
|
||||
@ -1323,14 +1321,17 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
|
||||
}
|
||||
|
||||
/*
|
||||
* No 'host' or dax_operations since there is no access to this
|
||||
* device outside of mmap of the resulting character device.
|
||||
* No dax_operations since there is no access to this device outside of
|
||||
* mmap of the resulting character device.
|
||||
*/
|
||||
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
|
||||
dax_dev = alloc_dax(dev_dax, NULL);
|
||||
if (IS_ERR(dax_dev)) {
|
||||
rc = PTR_ERR(dax_dev);
|
||||
goto err_alloc_dax;
|
||||
}
|
||||
set_dax_synchronous(dax_dev);
|
||||
set_dax_nocache(dax_dev);
|
||||
set_dax_nomc(dax_dev);
|
||||
|
||||
/* a device_dax instance is dead while the driver is not attached */
|
||||
kill_dax(dax_dev);
|
||||
@ -1343,10 +1344,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
|
||||
|
||||
inode = dax_inode(dax_dev);
|
||||
dev->devt = inode->i_rdev;
|
||||
if (data->subsys == DEV_DAX_BUS)
|
||||
dev->bus = &dax_bus_type;
|
||||
else
|
||||
dev->class = dax_class;
|
||||
dev->bus = &dax_bus_type;
|
||||
dev->parent = parent;
|
||||
dev->type = &dev_dax_type;
|
||||
|
||||
@ -1445,22 +1443,10 @@ EXPORT_SYMBOL_GPL(dax_driver_unregister);
|
||||
|
||||
int __init dax_bus_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
|
||||
dax_class = class_create(THIS_MODULE, "dax");
|
||||
if (IS_ERR(dax_class))
|
||||
return PTR_ERR(dax_class);
|
||||
}
|
||||
|
||||
rc = bus_register(&dax_bus_type);
|
||||
if (rc)
|
||||
class_destroy(dax_class);
|
||||
return rc;
|
||||
return bus_register(&dax_bus_type);
|
||||
}
|
||||
|
||||
void __exit dax_bus_exit(void)
|
||||
{
|
||||
bus_unregister(&dax_bus_type);
|
||||
class_destroy(dax_class);
|
||||
}
|
||||
|
@ -16,24 +16,15 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
|
||||
struct range *range, int target_node, unsigned int align,
|
||||
unsigned long flags);
|
||||
|
||||
enum dev_dax_subsys {
|
||||
DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
|
||||
DEV_DAX_CLASS,
|
||||
};
|
||||
|
||||
struct dev_dax_data {
|
||||
struct dax_region *dax_region;
|
||||
struct dev_pagemap *pgmap;
|
||||
enum dev_dax_subsys subsys;
|
||||
resource_size_t size;
|
||||
int id;
|
||||
};
|
||||
|
||||
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
|
||||
|
||||
/* to be deleted when DEV_DAX_CLASS is removed */
|
||||
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
|
||||
|
||||
struct dax_device_driver {
|
||||
struct device_driver drv;
|
||||
struct list_head ids;
|
||||
@ -49,10 +40,6 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
|
||||
void dax_driver_unregister(struct dax_device_driver *dax_drv);
|
||||
void kill_dev_dax(struct dev_dax *dev_dax);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
|
||||
int dev_dax_probe(struct dev_dax *dev_dax);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* While run_dax() is potentially a generic operation that could be
|
||||
* defined in include/linux/dax.h we don't want to grow any users
|
||||
|
@ -433,11 +433,7 @@ int dev_dax_probe(struct dev_dax *dev_dax)
|
||||
inode = dax_inode(dax_dev);
|
||||
cdev = inode->i_cdev;
|
||||
cdev_init(cdev, &dax_fops);
|
||||
if (dev->class) {
|
||||
/* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
|
||||
cdev->owner = dev->parent->driver->owner;
|
||||
} else
|
||||
cdev->owner = dev->driver->owner;
|
||||
cdev->owner = dev->driver->owner;
|
||||
cdev_set_parent(cdev, &dev->kobj);
|
||||
rc = cdev_add(cdev, dev->devt, 1);
|
||||
if (rc)
|
||||
|
@ -3,11 +3,11 @@
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include "../../nvdimm/pfn.h"
|
||||
#include "../../nvdimm/nd.h"
|
||||
#include "../bus.h"
|
||||
#include "../nvdimm/pfn.h"
|
||||
#include "../nvdimm/nd.h"
|
||||
#include "bus.h"
|
||||
|
||||
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
|
||||
static struct dev_dax *__dax_pmem_probe(struct device *dev)
|
||||
{
|
||||
struct range range;
|
||||
int rc, id, region_id;
|
||||
@ -63,7 +63,6 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
|
||||
.dax_region = dax_region,
|
||||
.id = id,
|
||||
.pgmap = &pgmap,
|
||||
.subsys = subsys,
|
||||
.size = range_len(&range),
|
||||
};
|
||||
dev_dax = devm_create_dev_dax(&data);
|
||||
@ -73,7 +72,32 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
|
||||
|
||||
return dev_dax;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_pmem_probe);
|
||||
|
||||
static int dax_pmem_probe(struct device *dev)
|
||||
{
|
||||
return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev));
|
||||
}
|
||||
|
||||
static struct nd_device_driver dax_pmem_driver = {
|
||||
.probe = dax_pmem_probe,
|
||||
.drv = {
|
||||
.name = "dax_pmem",
|
||||
},
|
||||
.type = ND_DRIVER_DAX_PMEM,
|
||||
};
|
||||
|
||||
static int __init dax_pmem_init(void)
|
||||
{
|
||||
return nd_driver_register(&dax_pmem_driver);
|
||||
}
|
||||
module_init(dax_pmem_init);
|
||||
|
||||
static void __exit dax_pmem_exit(void)
|
||||
{
|
||||
driver_unregister(&dax_pmem_driver.drv);
|
||||
}
|
||||
module_exit(dax_pmem_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
|
@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
|
||||
|
||||
dax_pmem-y := pmem.o
|
||||
dax_pmem_core-y := core.o
|
||||
|
@ -1,72 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/nd.h>
|
||||
#include "../bus.h"
|
||||
|
||||
/* we need the private definitions to implement compat suport */
|
||||
#include "../dax-private.h"
|
||||
|
||||
static int dax_pmem_compat_probe(struct device *dev)
|
||||
{
|
||||
struct dev_dax *dev_dax = __dax_pmem_probe(dev, DEV_DAX_CLASS);
|
||||
int rc;
|
||||
|
||||
if (IS_ERR(dev_dax))
|
||||
return PTR_ERR(dev_dax);
|
||||
|
||||
if (!devres_open_group(&dev_dax->dev, dev_dax, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
device_lock(&dev_dax->dev);
|
||||
rc = dev_dax_probe(dev_dax);
|
||||
device_unlock(&dev_dax->dev);
|
||||
|
||||
devres_close_group(&dev_dax->dev, dev_dax);
|
||||
if (rc)
|
||||
devres_release_group(&dev_dax->dev, dev_dax);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int dax_pmem_compat_release(struct device *dev, void *data)
|
||||
{
|
||||
device_lock(dev);
|
||||
devres_release_group(dev, to_dev_dax(dev));
|
||||
device_unlock(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dax_pmem_compat_remove(struct device *dev)
|
||||
{
|
||||
device_for_each_child(dev, NULL, dax_pmem_compat_release);
|
||||
}
|
||||
|
||||
static struct nd_device_driver dax_pmem_compat_driver = {
|
||||
.probe = dax_pmem_compat_probe,
|
||||
.remove = dax_pmem_compat_remove,
|
||||
.drv = {
|
||||
.name = "dax_pmem_compat",
|
||||
},
|
||||
.type = ND_DRIVER_DAX_PMEM,
|
||||
};
|
||||
|
||||
static int __init dax_pmem_compat_init(void)
|
||||
{
|
||||
return nd_driver_register(&dax_pmem_compat_driver);
|
||||
}
|
||||
module_init(dax_pmem_compat_init);
|
||||
|
||||
static void __exit dax_pmem_compat_exit(void)
|
||||
{
|
||||
driver_unregister(&dax_pmem_compat_driver.drv);
|
||||
}
|
||||
module_exit(dax_pmem_compat_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
|
@ -7,34 +7,4 @@
|
||||
#include <linux/nd.h>
|
||||
#include "../bus.h"
|
||||
|
||||
static int dax_pmem_probe(struct device *dev)
|
||||
{
|
||||
return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev, DEV_DAX_BUS));
|
||||
}
|
||||
|
||||
static struct nd_device_driver dax_pmem_driver = {
|
||||
.probe = dax_pmem_probe,
|
||||
.drv = {
|
||||
.name = "dax_pmem",
|
||||
},
|
||||
.type = ND_DRIVER_DAX_PMEM,
|
||||
};
|
||||
|
||||
static int __init dax_pmem_init(void)
|
||||
{
|
||||
return nd_driver_register(&dax_pmem_driver);
|
||||
}
|
||||
module_init(dax_pmem_init);
|
||||
|
||||
static void __exit dax_pmem_exit(void)
|
||||
{
|
||||
driver_unregister(&dax_pmem_driver.drv);
|
||||
}
|
||||
module_exit(dax_pmem_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
#if !IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
|
||||
/* For compat builds, don't load this module by default */
|
||||
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
|
||||
#endif
|
||||
|
@ -7,10 +7,8 @@
|
||||
#include <linux/mount.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/dax.h>
|
||||
@ -21,15 +19,12 @@
|
||||
* struct dax_device - anchor object for dax services
|
||||
* @inode: core vfs
|
||||
* @cdev: optional character interface for "device dax"
|
||||
* @host: optional name for lookups where the device path is not available
|
||||
* @private: dax driver private data
|
||||
* @flags: state and boolean properties
|
||||
*/
|
||||
struct dax_device {
|
||||
struct hlist_node list;
|
||||
struct inode inode;
|
||||
struct cdev cdev;
|
||||
const char *host;
|
||||
void *private;
|
||||
unsigned long flags;
|
||||
const struct dax_operations *ops;
|
||||
@ -42,10 +37,6 @@ static DEFINE_IDA(dax_minor_ida);
|
||||
static struct kmem_cache *dax_cache __read_mostly;
|
||||
static struct super_block *dax_superblock __read_mostly;
|
||||
|
||||
#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
|
||||
static struct hlist_head dax_host_list[DAX_HASH_SIZE];
|
||||
static DEFINE_SPINLOCK(dax_host_lock);
|
||||
|
||||
int dax_read_lock(void)
|
||||
{
|
||||
return srcu_read_lock(&dax_srcu);
|
||||
@ -58,169 +49,54 @@ void dax_read_unlock(int id)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_read_unlock);
|
||||
|
||||
static int dax_host_hash(const char *host)
|
||||
{
|
||||
return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
|
||||
pgoff_t *pgoff)
|
||||
static DEFINE_XARRAY(dax_hosts);
|
||||
|
||||
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
|
||||
{
|
||||
sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
|
||||
phys_addr_t phys_off = (start_sect + sector) * 512;
|
||||
|
||||
if (pgoff)
|
||||
*pgoff = PHYS_PFN(phys_off);
|
||||
if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
return xa_insert(&dax_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL);
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_dax_pgoff);
|
||||
EXPORT_SYMBOL_GPL(dax_add_host);
|
||||
|
||||
void dax_remove_host(struct gendisk *disk)
|
||||
{
|
||||
xa_erase(&dax_hosts, (unsigned long)disk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_remove_host);
|
||||
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
/**
|
||||
* dax_get_by_host() - temporary lookup mechanism for filesystem-dax
|
||||
* @host: alternate name for the device registered by a dax driver
|
||||
* fs_dax_get_by_bdev() - temporary lookup mechanism for filesystem-dax
|
||||
* @bdev: block device to find a dax_device for
|
||||
* @start_off: returns the byte offset into the dax_device that @bdev starts
|
||||
*/
|
||||
static struct dax_device *dax_get_by_host(const char *host)
|
||||
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off)
|
||||
{
|
||||
struct dax_device *dax_dev, *found = NULL;
|
||||
int hash, id;
|
||||
|
||||
if (!host)
|
||||
return NULL;
|
||||
|
||||
hash = dax_host_hash(host);
|
||||
|
||||
id = dax_read_lock();
|
||||
spin_lock(&dax_host_lock);
|
||||
hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
|
||||
if (!dax_alive(dax_dev)
|
||||
|| strcmp(host, dax_dev->host) != 0)
|
||||
continue;
|
||||
|
||||
if (igrab(&dax_dev->inode))
|
||||
found = dax_dev;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&dax_host_lock);
|
||||
dax_read_unlock(id);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
|
||||
{
|
||||
if (!blk_queue_dax(bdev->bd_disk->queue))
|
||||
return NULL;
|
||||
return dax_get_by_host(bdev->bd_disk->disk_name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
|
||||
|
||||
bool generic_fsdax_supported(struct dax_device *dax_dev,
|
||||
struct block_device *bdev, int blocksize, sector_t start,
|
||||
sector_t sectors)
|
||||
{
|
||||
bool dax_enabled = false;
|
||||
pgoff_t pgoff, pgoff_end;
|
||||
void *kaddr, *end_kaddr;
|
||||
pfn_t pfn, end_pfn;
|
||||
sector_t last_page;
|
||||
long len, len2;
|
||||
int err, id;
|
||||
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!dax_dev) {
|
||||
pr_debug("%pg: error: dax unsupported by block device\n", bdev);
|
||||
return false;
|
||||
}
|
||||
|
||||
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
|
||||
if (err) {
|
||||
pr_info("%pg: error: unaligned partition for dax\n", bdev);
|
||||
return false;
|
||||
}
|
||||
|
||||
last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
|
||||
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
|
||||
if (err) {
|
||||
pr_info("%pg: error: unaligned partition for dax\n", bdev);
|
||||
return false;
|
||||
}
|
||||
|
||||
id = dax_read_lock();
|
||||
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
|
||||
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
|
||||
|
||||
if (len < 1 || len2 < 1) {
|
||||
pr_info("%pg: error: dax access failed (%ld)\n",
|
||||
bdev, len < 1 ? len : len2);
|
||||
dax_read_unlock(id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
|
||||
/*
|
||||
* An arch that has enabled the pmem api should also
|
||||
* have its drivers support pfn_t_devmap()
|
||||
*
|
||||
* This is a developer warning and should not trigger in
|
||||
* production. dax_flush() will crash since it depends
|
||||
* on being able to do (page_address(pfn_to_page())).
|
||||
*/
|
||||
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
|
||||
dax_enabled = true;
|
||||
} else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
|
||||
struct dev_pagemap *pgmap, *end_pgmap;
|
||||
|
||||
pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
|
||||
end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
|
||||
if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
|
||||
&& pfn_t_to_page(pfn)->pgmap == pgmap
|
||||
&& pfn_t_to_page(end_pfn)->pgmap == pgmap
|
||||
&& pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
|
||||
&& pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
|
||||
dax_enabled = true;
|
||||
put_dev_pagemap(pgmap);
|
||||
put_dev_pagemap(end_pgmap);
|
||||
|
||||
}
|
||||
dax_read_unlock(id);
|
||||
|
||||
if (!dax_enabled) {
|
||||
pr_info("%pg: error: dax support not enabled\n", bdev);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(generic_fsdax_supported);
|
||||
|
||||
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
|
||||
int blocksize, sector_t start, sector_t len)
|
||||
{
|
||||
bool ret = false;
|
||||
struct dax_device *dax_dev;
|
||||
u64 part_size;
|
||||
int id;
|
||||
|
||||
if (!dax_dev)
|
||||
return false;
|
||||
if (!blk_queue_dax(bdev->bd_disk->queue))
|
||||
return NULL;
|
||||
|
||||
*start_off = get_start_sect(bdev) * SECTOR_SIZE;
|
||||
part_size = bdev_nr_sectors(bdev) * SECTOR_SIZE;
|
||||
if (*start_off % PAGE_SIZE || part_size % PAGE_SIZE) {
|
||||
pr_info("%pg: error: unaligned partition for dax\n", bdev);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
id = dax_read_lock();
|
||||
if (dax_alive(dax_dev) && dax_dev->ops->dax_supported)
|
||||
ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize,
|
||||
start, len);
|
||||
dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk);
|
||||
if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode))
|
||||
dax_dev = NULL;
|
||||
dax_read_unlock(id);
|
||||
return ret;
|
||||
|
||||
return dax_dev;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_supported);
|
||||
#endif /* CONFIG_FS_DAX */
|
||||
#endif /* CONFIG_BLOCK */
|
||||
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
|
||||
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
|
||||
|
||||
enum dax_device_flags {
|
||||
/* !alive + rcu grace period == no new operations / mappings */
|
||||
@ -229,6 +105,10 @@ enum dax_device_flags {
|
||||
DAXDEV_WRITE_CACHE,
|
||||
/* flag to check if device supports synchronous flush */
|
||||
DAXDEV_SYNC,
|
||||
/* do not leave the caches dirty after writes */
|
||||
DAXDEV_NOCACHE,
|
||||
/* handle CPU fetch exceptions during reads */
|
||||
DAXDEV_NOMC,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -270,9 +150,15 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
|
||||
if (!dax_alive(dax_dev))
|
||||
return 0;
|
||||
|
||||
return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
|
||||
/*
|
||||
* The userspace address for the memory copy has already been validated
|
||||
* via access_ok() in vfs_write, so use the 'no check' version to bypass
|
||||
* the HARDENED_USERCOPY overhead.
|
||||
*/
|
||||
if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags))
|
||||
return _copy_from_iter_flushcache(addr, bytes, i);
|
||||
return _copy_from_iter(addr, bytes, i);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_copy_from_iter);
|
||||
|
||||
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
|
||||
size_t bytes, struct iov_iter *i)
|
||||
@ -280,9 +166,15 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
|
||||
if (!dax_alive(dax_dev))
|
||||
return 0;
|
||||
|
||||
return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
|
||||
/*
|
||||
* The userspace address for the memory copy has already been validated
|
||||
* via access_ok() in vfs_red, so use the 'no check' version to bypass
|
||||
* the HARDENED_USERCOPY overhead.
|
||||
*/
|
||||
if (test_bit(DAXDEV_NOMC, &dax_dev->flags))
|
||||
return _copy_mc_to_iter(addr, bytes, i);
|
||||
return _copy_to_iter(addr, bytes, i);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_copy_to_iter);
|
||||
|
||||
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
size_t nr_pages)
|
||||
@ -332,17 +224,29 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
|
||||
|
||||
bool __dax_synchronous(struct dax_device *dax_dev)
|
||||
bool dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
return test_bit(DAXDEV_SYNC, &dax_dev->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_synchronous);
|
||||
EXPORT_SYMBOL_GPL(dax_synchronous);
|
||||
|
||||
void __set_dax_synchronous(struct dax_device *dax_dev)
|
||||
void set_dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
set_bit(DAXDEV_SYNC, &dax_dev->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__set_dax_synchronous);
|
||||
EXPORT_SYMBOL_GPL(set_dax_synchronous);
|
||||
|
||||
void set_dax_nocache(struct dax_device *dax_dev)
|
||||
{
|
||||
set_bit(DAXDEV_NOCACHE, &dax_dev->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_dax_nocache);
|
||||
|
||||
void set_dax_nomc(struct dax_device *dax_dev)
|
||||
{
|
||||
set_bit(DAXDEV_NOMC, &dax_dev->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_dax_nomc);
|
||||
|
||||
bool dax_alive(struct dax_device *dax_dev)
|
||||
{
|
||||
@ -363,12 +267,7 @@ void kill_dax(struct dax_device *dax_dev)
|
||||
return;
|
||||
|
||||
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
|
||||
|
||||
synchronize_srcu(&dax_srcu);
|
||||
|
||||
spin_lock(&dax_host_lock);
|
||||
hlist_del_init(&dax_dev->list);
|
||||
spin_unlock(&dax_host_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kill_dax);
|
||||
|
||||
@ -400,8 +299,6 @@ static struct dax_device *to_dax_dev(struct inode *inode)
|
||||
static void dax_free_inode(struct inode *inode)
|
||||
{
|
||||
struct dax_device *dax_dev = to_dax_dev(inode);
|
||||
kfree(dax_dev->host);
|
||||
dax_dev->host = NULL;
|
||||
if (inode->i_rdev)
|
||||
ida_simple_remove(&dax_minor_ida, iminor(inode));
|
||||
kmem_cache_free(dax_cache, dax_dev);
|
||||
@ -476,65 +373,30 @@ static struct dax_device *dax_dev_get(dev_t devt)
|
||||
return dax_dev;
|
||||
}
|
||||
|
||||
static void dax_add_host(struct dax_device *dax_dev, const char *host)
|
||||
{
|
||||
int hash;
|
||||
|
||||
/*
|
||||
* Unconditionally init dax_dev since it's coming from a
|
||||
* non-zeroed slab cache
|
||||
*/
|
||||
INIT_HLIST_NODE(&dax_dev->list);
|
||||
dax_dev->host = host;
|
||||
if (!host)
|
||||
return;
|
||||
|
||||
hash = dax_host_hash(host);
|
||||
spin_lock(&dax_host_lock);
|
||||
hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
|
||||
spin_unlock(&dax_host_lock);
|
||||
}
|
||||
|
||||
struct dax_device *alloc_dax(void *private, const char *__host,
|
||||
const struct dax_operations *ops, unsigned long flags)
|
||||
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
|
||||
{
|
||||
struct dax_device *dax_dev;
|
||||
const char *host;
|
||||
dev_t devt;
|
||||
int minor;
|
||||
|
||||
if (ops && !ops->zero_page_range) {
|
||||
pr_debug("%s: error: device does not provide dax"
|
||||
" operation zero_page_range()\n",
|
||||
__host ? __host : "Unknown");
|
||||
if (WARN_ON_ONCE(ops && !ops->zero_page_range))
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
host = kstrdup(__host, GFP_KERNEL);
|
||||
if (__host && !host)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
|
||||
if (minor < 0)
|
||||
goto err_minor;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
devt = MKDEV(MAJOR(dax_devt), minor);
|
||||
dax_dev = dax_dev_get(devt);
|
||||
if (!dax_dev)
|
||||
goto err_dev;
|
||||
|
||||
dax_add_host(dax_dev, host);
|
||||
dax_dev->ops = ops;
|
||||
dax_dev->private = private;
|
||||
if (flags & DAXDEV_F_SYNC)
|
||||
set_dax_synchronous(dax_dev);
|
||||
|
||||
return dax_dev;
|
||||
|
||||
err_dev:
|
||||
ida_simple_remove(&dax_minor_ida, minor);
|
||||
err_minor:
|
||||
kfree(host);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_dax);
|
||||
|
@ -162,71 +162,34 @@ static int linear_iterate_devices(struct dm_target *ti,
|
||||
return fn(ti, lc->dev, lc->start, ti->len, data);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DAX_DRIVER)
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
|
||||
{
|
||||
struct linear_c *lc = ti->private;
|
||||
sector_t sector = linear_map_sector(ti, *pgoff << PAGE_SECTORS_SHIFT);
|
||||
|
||||
*pgoff = (get_start_sect(lc->dev->bdev) + sector) >> PAGE_SECTORS_SHIFT;
|
||||
return lc->dev->dax_dev;
|
||||
}
|
||||
|
||||
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
long ret;
|
||||
struct linear_c *lc = ti->private;
|
||||
struct block_device *bdev = lc->dev->bdev;
|
||||
struct dax_device *dax_dev = lc->dev->dax_dev;
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
|
||||
|
||||
dev_sector = linear_map_sector(ti, sector);
|
||||
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct linear_c *lc = ti->private;
|
||||
struct block_device *bdev = lc->dev->bdev;
|
||||
struct dax_device *dax_dev = lc->dev->dax_dev;
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
|
||||
dev_sector = linear_map_sector(ti, sector);
|
||||
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
|
||||
return 0;
|
||||
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct linear_c *lc = ti->private;
|
||||
struct block_device *bdev = lc->dev->bdev;
|
||||
struct dax_device *dax_dev = lc->dev->dax_dev;
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
|
||||
dev_sector = linear_map_sector(ti, sector);
|
||||
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
|
||||
return 0;
|
||||
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
size_t nr_pages)
|
||||
{
|
||||
int ret;
|
||||
struct linear_c *lc = ti->private;
|
||||
struct block_device *bdev = lc->dev->bdev;
|
||||
struct dax_device *dax_dev = lc->dev->dax_dev;
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
|
||||
|
||||
dev_sector = linear_map_sector(ti, sector);
|
||||
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
||||
}
|
||||
|
||||
#else
|
||||
#define linear_dax_direct_access NULL
|
||||
#define linear_dax_copy_from_iter NULL
|
||||
#define linear_dax_copy_to_iter NULL
|
||||
#define linear_dax_zero_page_range NULL
|
||||
#endif
|
||||
|
||||
@ -244,8 +207,6 @@ static struct target_type linear_target = {
|
||||
.prepare_ioctl = linear_prepare_ioctl,
|
||||
.iterate_devices = linear_iterate_devices,
|
||||
.direct_access = linear_dax_direct_access,
|
||||
.dax_copy_from_iter = linear_dax_copy_from_iter,
|
||||
.dax_copy_to_iter = linear_dax_copy_to_iter,
|
||||
.dax_zero_page_range = linear_dax_zero_page_range,
|
||||
};
|
||||
|
||||
|
@ -901,120 +901,34 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
|
||||
limits->io_min = limits->physical_block_size;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DAX_DRIVER)
|
||||
static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
|
||||
struct iov_iter *i)
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
|
||||
pgoff_t *pgoff)
|
||||
{
|
||||
struct pending_block *block;
|
||||
struct log_writes_c *lc = ti->private;
|
||||
|
||||
if (!bytes)
|
||||
return 0;
|
||||
|
||||
block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
|
||||
if (!block) {
|
||||
DMERR("Error allocating dax pending block");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
block->data = kzalloc(bytes, GFP_KERNEL);
|
||||
if (!block->data) {
|
||||
DMERR("Error allocating dax data space");
|
||||
kfree(block);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* write data provided via the iterator */
|
||||
if (!copy_from_iter(block->data, bytes, i)) {
|
||||
DMERR("Error copying dax data");
|
||||
kfree(block->data);
|
||||
kfree(block);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* rewind the iterator so that the block driver can use it */
|
||||
iov_iter_revert(i, bytes);
|
||||
|
||||
block->datalen = bytes;
|
||||
block->sector = bio_to_dev_sectors(lc, sector);
|
||||
block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
|
||||
|
||||
atomic_inc(&lc->pending_blocks);
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
list_add_tail(&block->list, &lc->unflushed_blocks);
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
wake_up_process(lc->log_kthread);
|
||||
|
||||
return 0;
|
||||
*pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT);
|
||||
return lc->dev->dax_dev;
|
||||
}
|
||||
|
||||
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
int ret;
|
||||
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
|
||||
|
||||
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
|
||||
pgoff_t pgoff, void *addr, size_t bytes,
|
||||
struct iov_iter *i)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
int err;
|
||||
|
||||
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
|
||||
return 0;
|
||||
|
||||
/* Don't bother doing anything if logging has been disabled */
|
||||
if (!lc->logging_enabled)
|
||||
goto dax_copy;
|
||||
|
||||
err = log_dax(lc, sector, bytes, i);
|
||||
if (err) {
|
||||
DMWARN("Error %d logging DAX write", err);
|
||||
return 0;
|
||||
}
|
||||
dax_copy:
|
||||
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
|
||||
pgoff_t pgoff, void *addr, size_t bytes,
|
||||
struct iov_iter *i)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
|
||||
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
|
||||
return 0;
|
||||
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
size_t nr_pages)
|
||||
{
|
||||
int ret;
|
||||
struct log_writes_c *lc = ti->private;
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
|
||||
|
||||
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
|
||||
&pgoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
return dax_zero_page_range(lc->dev->dax_dev, pgoff,
|
||||
nr_pages << PAGE_SHIFT);
|
||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
#else
|
||||
#define log_writes_dax_direct_access NULL
|
||||
#define log_writes_dax_copy_from_iter NULL
|
||||
#define log_writes_dax_copy_to_iter NULL
|
||||
#define log_writes_dax_zero_page_range NULL
|
||||
#endif
|
||||
|
||||
@ -1032,8 +946,6 @@ static struct target_type log_writes_target = {
|
||||
.iterate_devices = log_writes_iterate_devices,
|
||||
.io_hints = log_writes_io_hints,
|
||||
.direct_access = log_writes_dax_direct_access,
|
||||
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
|
||||
.dax_copy_to_iter = log_writes_dax_copy_to_iter,
|
||||
.dax_zero_page_range = log_writes_dax_zero_page_range,
|
||||
};
|
||||
|
||||
|
@ -300,91 +300,40 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
|
||||
return DM_MAPIO_REMAPPED;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DAX_DRIVER)
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
|
||||
{
|
||||
struct stripe_c *sc = ti->private;
|
||||
struct block_device *bdev;
|
||||
sector_t dev_sector;
|
||||
uint32_t stripe;
|
||||
|
||||
stripe_map_sector(sc, *pgoff * PAGE_SECTORS, &stripe, &dev_sector);
|
||||
dev_sector += sc->stripe[stripe].physical_start;
|
||||
bdev = sc->stripe[stripe].dev->bdev;
|
||||
|
||||
*pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT;
|
||||
return sc->stripe[stripe].dev->dax_dev;
|
||||
}
|
||||
|
||||
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
struct stripe_c *sc = ti->private;
|
||||
struct dax_device *dax_dev;
|
||||
struct block_device *bdev;
|
||||
uint32_t stripe;
|
||||
long ret;
|
||||
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
|
||||
|
||||
stripe_map_sector(sc, sector, &stripe, &dev_sector);
|
||||
dev_sector += sc->stripe[stripe].physical_start;
|
||||
dax_dev = sc->stripe[stripe].dev->dax_dev;
|
||||
bdev = sc->stripe[stripe].dev->bdev;
|
||||
|
||||
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
struct stripe_c *sc = ti->private;
|
||||
struct dax_device *dax_dev;
|
||||
struct block_device *bdev;
|
||||
uint32_t stripe;
|
||||
|
||||
stripe_map_sector(sc, sector, &stripe, &dev_sector);
|
||||
dev_sector += sc->stripe[stripe].physical_start;
|
||||
dax_dev = sc->stripe[stripe].dev->dax_dev;
|
||||
bdev = sc->stripe[stripe].dev->bdev;
|
||||
|
||||
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
|
||||
return 0;
|
||||
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
struct stripe_c *sc = ti->private;
|
||||
struct dax_device *dax_dev;
|
||||
struct block_device *bdev;
|
||||
uint32_t stripe;
|
||||
|
||||
stripe_map_sector(sc, sector, &stripe, &dev_sector);
|
||||
dev_sector += sc->stripe[stripe].physical_start;
|
||||
dax_dev = sc->stripe[stripe].dev->dax_dev;
|
||||
bdev = sc->stripe[stripe].dev->bdev;
|
||||
|
||||
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
|
||||
return 0;
|
||||
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
size_t nr_pages)
|
||||
{
|
||||
int ret;
|
||||
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
|
||||
struct stripe_c *sc = ti->private;
|
||||
struct dax_device *dax_dev;
|
||||
struct block_device *bdev;
|
||||
uint32_t stripe;
|
||||
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
|
||||
|
||||
stripe_map_sector(sc, sector, &stripe, &dev_sector);
|
||||
dev_sector += sc->stripe[stripe].physical_start;
|
||||
dax_dev = sc->stripe[stripe].dev->dax_dev;
|
||||
bdev = sc->stripe[stripe].dev->bdev;
|
||||
|
||||
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
||||
}
|
||||
|
||||
#else
|
||||
#define stripe_dax_direct_access NULL
|
||||
#define stripe_dax_copy_from_iter NULL
|
||||
#define stripe_dax_copy_to_iter NULL
|
||||
#define stripe_dax_zero_page_range NULL
|
||||
#endif
|
||||
|
||||
@ -521,8 +470,6 @@ static struct target_type stripe_target = {
|
||||
.iterate_devices = stripe_iterate_devices,
|
||||
.io_hints = stripe_io_hints,
|
||||
.direct_access = stripe_dax_direct_access,
|
||||
.dax_copy_from_iter = stripe_dax_copy_from_iter,
|
||||
.dax_copy_to_iter = stripe_dax_copy_to_iter,
|
||||
.dax_zero_page_range = stripe_dax_zero_page_range,
|
||||
};
|
||||
|
||||
|
@ -806,12 +806,14 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
|
||||
EXPORT_SYMBOL_GPL(dm_table_set_type);
|
||||
|
||||
/* validate the dax capability of the target device span */
|
||||
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
int blocksize = *(int *) data;
|
||||
if (dev->dax_dev)
|
||||
return false;
|
||||
|
||||
return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
|
||||
DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check devices support synchronous DAX */
|
||||
@ -821,8 +823,8 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de
|
||||
return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
|
||||
}
|
||||
|
||||
bool dm_table_supports_dax(struct dm_table *t,
|
||||
iterate_devices_callout_fn iterate_fn, int *blocksize)
|
||||
static bool dm_table_supports_dax(struct dm_table *t,
|
||||
iterate_devices_callout_fn iterate_fn)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i;
|
||||
@ -835,7 +837,7 @@ bool dm_table_supports_dax(struct dm_table *t,
|
||||
return false;
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
ti->type->iterate_devices(ti, iterate_fn, blocksize))
|
||||
ti->type->iterate_devices(ti, iterate_fn, NULL))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -862,7 +864,6 @@ static int dm_table_determine_type(struct dm_table *t)
|
||||
struct dm_target *tgt;
|
||||
struct list_head *devices = dm_table_get_devices(t);
|
||||
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
|
||||
int page_size = PAGE_SIZE;
|
||||
|
||||
if (t->type != DM_TYPE_NONE) {
|
||||
/* target already set the table's type */
|
||||
@ -906,7 +907,7 @@ static int dm_table_determine_type(struct dm_table *t)
|
||||
verify_bio_based:
|
||||
/* We must use this table as bio-based */
|
||||
t->type = DM_TYPE_BIO_BASED;
|
||||
if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) ||
|
||||
if (dm_table_supports_dax(t, device_not_dax_capable) ||
|
||||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
|
||||
t->type = DM_TYPE_DAX_BIO_BASED;
|
||||
}
|
||||
@ -1976,7 +1977,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *limits)
|
||||
{
|
||||
bool wc = false, fua = false;
|
||||
int page_size = PAGE_SIZE;
|
||||
int r;
|
||||
|
||||
/*
|
||||
@ -2010,9 +2010,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
}
|
||||
blk_queue_write_cache(q, wc, fua);
|
||||
|
||||
if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) {
|
||||
if (dm_table_supports_dax(t, device_not_dax_capable)) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL))
|
||||
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
|
||||
set_dax_synchronous(t->md->dax_dev);
|
||||
}
|
||||
else
|
||||
|
@ -38,7 +38,7 @@
|
||||
#define BITMAP_GRANULARITY PAGE_SIZE
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER)
|
||||
#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX)
|
||||
#define DM_WRITECACHE_HAS_PMEM
|
||||
#endif
|
||||
|
||||
|
@ -637,7 +637,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
|
||||
struct mapped_device *md)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
|
||||
u64 part_off;
|
||||
int r;
|
||||
|
||||
BUG_ON(td->dm_dev.bdev);
|
||||
@ -653,7 +653,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
|
||||
}
|
||||
|
||||
td->dm_dev.bdev = bdev;
|
||||
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
|
||||
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1027,74 +1027,6 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
|
||||
int blocksize, sector_t start, sector_t len)
|
||||
{
|
||||
struct mapped_device *md = dax_get_private(dax_dev);
|
||||
struct dm_table *map;
|
||||
bool ret = false;
|
||||
int srcu_idx;
|
||||
|
||||
map = dm_get_live_table(md, &srcu_idx);
|
||||
if (!map)
|
||||
goto out;
|
||||
|
||||
ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
|
||||
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct mapped_device *md = dax_get_private(dax_dev);
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
struct dm_target *ti;
|
||||
long ret = 0;
|
||||
int srcu_idx;
|
||||
|
||||
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
|
||||
|
||||
if (!ti)
|
||||
goto out;
|
||||
if (!ti->type->dax_copy_from_iter) {
|
||||
ret = copy_from_iter(addr, bytes, i);
|
||||
goto out;
|
||||
}
|
||||
ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct mapped_device *md = dax_get_private(dax_dev);
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
struct dm_target *ti;
|
||||
long ret = 0;
|
||||
int srcu_idx;
|
||||
|
||||
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
|
||||
|
||||
if (!ti)
|
||||
goto out;
|
||||
if (!ti->type->dax_copy_to_iter) {
|
||||
ret = copy_to_iter(addr, bytes, i);
|
||||
goto out;
|
||||
}
|
||||
ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
size_t nr_pages)
|
||||
{
|
||||
@ -1683,6 +1615,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
|
||||
bioset_exit(&md->io_bs);
|
||||
|
||||
if (md->dax_dev) {
|
||||
dax_remove_host(md->disk);
|
||||
kill_dax(md->dax_dev);
|
||||
put_dax(md->dax_dev);
|
||||
md->dax_dev = NULL;
|
||||
@ -1784,10 +1717,15 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
md->disk->private_data = md;
|
||||
sprintf(md->disk->disk_name, "dm-%d", minor);
|
||||
|
||||
if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
|
||||
md->dax_dev = alloc_dax(md, md->disk->disk_name,
|
||||
&dm_dax_ops, 0);
|
||||
if (IS_ERR(md->dax_dev))
|
||||
if (IS_ENABLED(CONFIG_FS_DAX)) {
|
||||
md->dax_dev = alloc_dax(md, &dm_dax_ops);
|
||||
if (IS_ERR(md->dax_dev)) {
|
||||
md->dax_dev = NULL;
|
||||
goto bad;
|
||||
}
|
||||
set_dax_nocache(md->dax_dev);
|
||||
set_dax_nomc(md->dax_dev);
|
||||
if (dax_add_host(md->dax_dev, md->disk))
|
||||
goto bad;
|
||||
}
|
||||
|
||||
@ -3041,9 +2979,6 @@ static const struct block_device_operations dm_rq_blk_dops = {
|
||||
|
||||
static const struct dax_operations dm_dax_ops = {
|
||||
.direct_access = dm_dax_direct_access,
|
||||
.dax_supported = dm_dax_supported,
|
||||
.copy_from_iter = dm_dax_copy_from_iter,
|
||||
.copy_to_iter = dm_dax_copy_to_iter,
|
||||
.zero_page_range = dm_dax_zero_page_range,
|
||||
};
|
||||
|
||||
|
@ -73,10 +73,6 @@ bool dm_table_bio_based(struct dm_table *t);
|
||||
bool dm_table_request_based(struct dm_table *t);
|
||||
void dm_table_free_md_mempools(struct dm_table *t);
|
||||
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
|
||||
bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
|
||||
int *blocksize);
|
||||
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data);
|
||||
|
||||
void dm_lock_md_type(struct mapped_device *md);
|
||||
void dm_unlock_md_type(struct mapped_device *md);
|
||||
|
@ -22,7 +22,7 @@ if LIBNVDIMM
|
||||
config BLK_DEV_PMEM
|
||||
tristate "PMEM: Persistent memory block device support"
|
||||
default LIBNVDIMM
|
||||
select DAX_DRIVER
|
||||
select DAX
|
||||
select ND_BTT if BTT
|
||||
select ND_PFN if NVDIMM_PFN
|
||||
help
|
||||
|
@ -301,29 +301,8 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
|
||||
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the 'no check' versions of copy_from_iter_flushcache() and
|
||||
* copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
|
||||
* checking, both file offset and device offset, is handled by
|
||||
* dax_iomap_actor()
|
||||
*/
|
||||
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return _copy_from_iter_flushcache(addr, bytes, i);
|
||||
}
|
||||
|
||||
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return _copy_mc_to_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
static const struct dax_operations pmem_dax_ops = {
|
||||
.direct_access = pmem_dax_direct_access,
|
||||
.dax_supported = generic_fsdax_supported,
|
||||
.copy_from_iter = pmem_copy_from_iter,
|
||||
.copy_to_iter = pmem_copy_to_iter,
|
||||
.zero_page_range = pmem_dax_zero_page_range,
|
||||
};
|
||||
|
||||
@ -379,6 +358,7 @@ static void pmem_release_disk(void *__pmem)
|
||||
{
|
||||
struct pmem_device *pmem = __pmem;
|
||||
|
||||
dax_remove_host(pmem->disk);
|
||||
kill_dax(pmem->dax_dev);
|
||||
put_dax(pmem->dax_dev);
|
||||
del_gendisk(pmem->disk);
|
||||
@ -402,7 +382,6 @@ static int pmem_attach_disk(struct device *dev,
|
||||
struct gendisk *disk;
|
||||
void *addr;
|
||||
int rc;
|
||||
unsigned long flags = 0UL;
|
||||
|
||||
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
|
||||
if (!pmem)
|
||||
@ -495,19 +474,24 @@ static int pmem_attach_disk(struct device *dev,
|
||||
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
|
||||
disk->bb = &pmem->bb;
|
||||
|
||||
if (is_nvdimm_sync(nd_region))
|
||||
flags = DAXDEV_F_SYNC;
|
||||
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
|
||||
dax_dev = alloc_dax(pmem, &pmem_dax_ops);
|
||||
if (IS_ERR(dax_dev)) {
|
||||
rc = PTR_ERR(dax_dev);
|
||||
goto out;
|
||||
}
|
||||
set_dax_nocache(dax_dev);
|
||||
set_dax_nomc(dax_dev);
|
||||
if (is_nvdimm_sync(nd_region))
|
||||
set_dax_synchronous(dax_dev);
|
||||
rc = dax_add_host(dax_dev, disk);
|
||||
if (rc)
|
||||
goto out_cleanup_dax;
|
||||
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
|
||||
pmem->dax_dev = dax_dev;
|
||||
|
||||
rc = device_add_disk(dev, disk, pmem_attribute_groups);
|
||||
if (rc)
|
||||
goto out_cleanup_dax;
|
||||
goto out_remove_host;
|
||||
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
|
||||
return -ENOMEM;
|
||||
|
||||
@ -519,6 +503,8 @@ static int pmem_attach_disk(struct device *dev,
|
||||
dev_warn(dev, "'badblocks' notification disabled\n");
|
||||
return 0;
|
||||
|
||||
out_remove_host:
|
||||
dax_remove_host(pmem->disk);
|
||||
out_cleanup_dax:
|
||||
kill_dax(pmem->dax_dev);
|
||||
put_dax(pmem->dax_dev);
|
||||
|
@ -219,7 +219,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
|
||||
error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
|
||||
pci_bus_address(pdev, bar) + offset,
|
||||
range_len(&pgmap->range), dev_to_node(&pdev->dev),
|
||||
pgmap->ref);
|
||||
&pgmap->ref);
|
||||
if (error)
|
||||
goto pages_free;
|
||||
|
||||
|
@ -5,7 +5,7 @@ comment "S/390 block device drivers"
|
||||
config DCSSBLK
|
||||
def_tristate m
|
||||
select FS_DAX_LIMITED
|
||||
select DAX_DRIVER
|
||||
select DAX
|
||||
prompt "DCSSBLK support"
|
||||
depends on S390 && BLOCK
|
||||
help
|
||||
|
@ -44,18 +44,6 @@ static const struct block_device_operations dcssblk_devops = {
|
||||
.release = dcssblk_release,
|
||||
};
|
||||
|
||||
static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return copy_from_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return copy_to_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, size_t nr_pages)
|
||||
{
|
||||
@ -72,9 +60,6 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
|
||||
|
||||
static const struct dax_operations dcssblk_dax_ops = {
|
||||
.direct_access = dcssblk_dax_direct_access,
|
||||
.dax_supported = generic_fsdax_supported,
|
||||
.copy_from_iter = dcssblk_dax_copy_from_iter,
|
||||
.copy_to_iter = dcssblk_dax_copy_to_iter,
|
||||
.zero_page_range = dcssblk_dax_zero_page_range,
|
||||
};
|
||||
|
||||
@ -687,18 +672,21 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
|
||||
if (rc)
|
||||
goto put_dev;
|
||||
|
||||
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
|
||||
&dcssblk_dax_ops, DAXDEV_F_SYNC);
|
||||
dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
|
||||
if (IS_ERR(dev_info->dax_dev)) {
|
||||
rc = PTR_ERR(dev_info->dax_dev);
|
||||
dev_info->dax_dev = NULL;
|
||||
goto put_dev;
|
||||
}
|
||||
set_dax_synchronous(dev_info->dax_dev);
|
||||
rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
|
||||
if (rc)
|
||||
goto out_dax;
|
||||
|
||||
get_device(&dev_info->dev);
|
||||
rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL);
|
||||
if (rc)
|
||||
goto out_dax;
|
||||
goto out_dax_host;
|
||||
|
||||
switch (dev_info->segment_type) {
|
||||
case SEG_TYPE_SR:
|
||||
@ -714,6 +702,8 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
|
||||
rc = count;
|
||||
goto out;
|
||||
|
||||
out_dax_host:
|
||||
dax_remove_host(dev_info->gd);
|
||||
out_dax:
|
||||
put_device(&dev_info->dev);
|
||||
kill_dax(dev_info->dax_dev);
|
||||
|
@ -15,11 +15,11 @@ config VALIDATE_FS_PARSER
|
||||
Enable this to perform validation of the parameter description for a
|
||||
filesystem when it is registered.
|
||||
|
||||
if BLOCK
|
||||
|
||||
config FS_IOMAP
|
||||
bool
|
||||
|
||||
if BLOCK
|
||||
|
||||
source "fs/ext2/Kconfig"
|
||||
source "fs/ext4/Kconfig"
|
||||
source "fs/jbd2/Kconfig"
|
||||
@ -42,6 +42,8 @@ source "fs/nilfs2/Kconfig"
|
||||
source "fs/f2fs/Kconfig"
|
||||
source "fs/zonefs/Kconfig"
|
||||
|
||||
endif # BLOCK
|
||||
|
||||
config FS_DAX
|
||||
bool "File system based Direct Access (DAX) support"
|
||||
depends on MMU
|
||||
@ -89,8 +91,6 @@ config FS_DAX_PMD
|
||||
config FS_DAX_LIMITED
|
||||
bool
|
||||
|
||||
endif # BLOCK
|
||||
|
||||
# Posix ACL utility routines
|
||||
#
|
||||
# Note: Posix ACLs can be implemented without these helpers. Never use
|
||||
|
161
fs/dax.c
161
fs/dax.c
@ -709,26 +709,26 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||
return __dax_invalidate_entry(mapping, index, false);
|
||||
}
|
||||
|
||||
static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev,
|
||||
sector_t sector, struct page *to, unsigned long vaddr)
|
||||
static pgoff_t dax_iomap_pgoff(const struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
return PHYS_PFN(iomap->addr + (pos & PAGE_MASK) - iomap->offset);
|
||||
}
|
||||
|
||||
static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter)
|
||||
{
|
||||
pgoff_t pgoff = dax_iomap_pgoff(&iter->iomap, iter->pos);
|
||||
void *vto, *kaddr;
|
||||
pgoff_t pgoff;
|
||||
long rc;
|
||||
int id;
|
||||
|
||||
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
id = dax_read_lock();
|
||||
rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
if (rc < 0) {
|
||||
dax_read_unlock(id);
|
||||
return rc;
|
||||
}
|
||||
vto = kmap_atomic(to);
|
||||
copy_user_page(vto, (void __force *)kaddr, vaddr, to);
|
||||
vto = kmap_atomic(vmf->cow_page);
|
||||
copy_user_page(vto, kaddr, vmf->address, vmf->cow_page);
|
||||
kunmap_atomic(vto);
|
||||
dax_read_unlock(id);
|
||||
return 0;
|
||||
@ -1005,22 +1005,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
|
||||
|
||||
static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
|
||||
}
|
||||
|
||||
static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
|
||||
pfn_t *pfnp)
|
||||
{
|
||||
const sector_t sector = dax_iomap_sector(iomap, pos);
|
||||
pgoff_t pgoff;
|
||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||
int id, rc;
|
||||
long length;
|
||||
|
||||
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
|
||||
if (rc)
|
||||
return rc;
|
||||
id = dax_read_lock();
|
||||
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
|
||||
NULL, pfnp);
|
||||
@ -1126,50 +1117,94 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
|
||||
}
|
||||
#endif /* CONFIG_FS_DAX_PMD */
|
||||
|
||||
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
|
||||
static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
unsigned int offset, size_t size)
|
||||
{
|
||||
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
|
||||
pgoff_t pgoff;
|
||||
long rc, id;
|
||||
void *kaddr;
|
||||
bool page_aligned = false;
|
||||
unsigned offset = offset_in_page(pos);
|
||||
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
||||
long ret;
|
||||
|
||||
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
|
||||
(size == PAGE_SIZE))
|
||||
page_aligned = true;
|
||||
|
||||
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
id = dax_read_lock();
|
||||
|
||||
if (page_aligned)
|
||||
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
|
||||
else
|
||||
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
if (rc < 0) {
|
||||
dax_read_unlock(id);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!page_aligned) {
|
||||
ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
if (ret > 0) {
|
||||
memset(kaddr + offset, 0, size);
|
||||
dax_flush(iomap->dax_dev, kaddr + offset, size);
|
||||
dax_flush(dax_dev, kaddr + offset, size);
|
||||
}
|
||||
dax_read_unlock(id);
|
||||
return size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
{
|
||||
const struct iomap *iomap = &iter->iomap;
|
||||
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||
loff_t pos = iter->pos;
|
||||
u64 length = iomap_length(iter);
|
||||
s64 written = 0;
|
||||
|
||||
/* already zeroed? we're done. */
|
||||
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||
return length;
|
||||
|
||||
do {
|
||||
unsigned offset = offset_in_page(pos);
|
||||
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||
long rc;
|
||||
int id;
|
||||
|
||||
id = dax_read_lock();
|
||||
if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
|
||||
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
|
||||
else
|
||||
rc = dax_memzero(iomap->dax_dev, pgoff, offset, size);
|
||||
dax_read_unlock(id);
|
||||
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
pos += size;
|
||||
length -= size;
|
||||
written += size;
|
||||
if (did_zero)
|
||||
*did_zero = true;
|
||||
} while (length > 0);
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
struct iomap_iter iter = {
|
||||
.inode = inode,
|
||||
.pos = pos,
|
||||
.len = len,
|
||||
.flags = IOMAP_DAX | IOMAP_ZERO,
|
||||
};
|
||||
int ret;
|
||||
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = dax_zero_iter(&iter, did_zero);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_zero_range);
|
||||
|
||||
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
unsigned int off = pos & (blocksize - 1);
|
||||
|
||||
/* Block boundary? Nothing to do */
|
||||
if (!off)
|
||||
return 0;
|
||||
return dax_zero_range(inode, pos, blocksize - off, did_zero, ops);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_truncate_page);
|
||||
|
||||
static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
const struct iomap *iomap = &iomi->iomap;
|
||||
loff_t length = iomap_length(iomi);
|
||||
loff_t pos = iomi->pos;
|
||||
struct block_device *bdev = iomap->bdev;
|
||||
struct dax_device *dax_dev = iomap->dax_dev;
|
||||
loff_t end = pos + length, done = 0;
|
||||
ssize_t ret = 0;
|
||||
@ -1203,9 +1238,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
while (pos < end) {
|
||||
unsigned offset = pos & (PAGE_SIZE - 1);
|
||||
const size_t size = ALIGN(length + offset, PAGE_SIZE);
|
||||
const sector_t sector = dax_iomap_sector(iomap, pos);
|
||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||
ssize_t map_len;
|
||||
pgoff_t pgoff;
|
||||
void *kaddr;
|
||||
|
||||
if (fatal_signal_pending(current)) {
|
||||
@ -1213,10 +1247,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
|
||||
&kaddr, NULL);
|
||||
if (map_len < 0) {
|
||||
@ -1230,11 +1260,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
if (map_len > end - pos)
|
||||
map_len = end - pos;
|
||||
|
||||
/*
|
||||
* The userspace address for the memory copy has already been
|
||||
* validated via access_ok() in either vfs_read() or
|
||||
* vfs_write(), depending on which operation we are doing.
|
||||
*/
|
||||
if (iov_iter_rw(iter) == WRITE)
|
||||
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
|
||||
map_len, iter);
|
||||
@ -1274,6 +1299,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
.inode = iocb->ki_filp->f_mapping->host,
|
||||
.pos = iocb->ki_pos,
|
||||
.len = iov_iter_count(iter),
|
||||
.flags = IOMAP_DAX,
|
||||
};
|
||||
loff_t done = 0;
|
||||
int ret;
|
||||
@ -1332,19 +1358,16 @@ static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
|
||||
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
|
||||
const struct iomap_iter *iter)
|
||||
{
|
||||
sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
|
||||
unsigned long vaddr = vmf->address;
|
||||
vm_fault_t ret;
|
||||
int error = 0;
|
||||
|
||||
switch (iter->iomap.type) {
|
||||
case IOMAP_HOLE:
|
||||
case IOMAP_UNWRITTEN:
|
||||
clear_user_highpage(vmf->cow_page, vaddr);
|
||||
clear_user_highpage(vmf->cow_page, vmf->address);
|
||||
break;
|
||||
case IOMAP_MAPPED:
|
||||
error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev,
|
||||
sector, vmf->cow_page, vaddr);
|
||||
error = copy_cow_page_dax(vmf, iter);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
@ -1430,7 +1453,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
.inode = mapping->host,
|
||||
.pos = (loff_t)vmf->pgoff << PAGE_SHIFT,
|
||||
.len = PAGE_SIZE,
|
||||
.flags = IOMAP_FAULT,
|
||||
.flags = IOMAP_DAX | IOMAP_FAULT,
|
||||
};
|
||||
vm_fault_t ret = 0;
|
||||
void *entry;
|
||||
@ -1539,7 +1562,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
struct iomap_iter iter = {
|
||||
.inode = mapping->host,
|
||||
.len = PMD_SIZE,
|
||||
.flags = IOMAP_FAULT,
|
||||
.flags = IOMAP_DAX | IOMAP_FAULT,
|
||||
};
|
||||
vm_fault_t ret = VM_FAULT_FALLBACK;
|
||||
pgoff_t max_pgoff;
|
||||
|
@ -192,6 +192,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
|
||||
/* primary device by default */
|
||||
map->m_bdev = sb->s_bdev;
|
||||
map->m_daxdev = EROFS_SB(sb)->dax_dev;
|
||||
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
|
||||
|
||||
if (map->m_deviceid) {
|
||||
down_read(&devs->rwsem);
|
||||
@ -202,6 +203,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
|
||||
}
|
||||
map->m_bdev = dif->bdev;
|
||||
map->m_daxdev = dif->dax_dev;
|
||||
map->m_dax_part_off = dif->dax_part_off;
|
||||
up_read(&devs->rwsem);
|
||||
} else if (devs->extra_devices) {
|
||||
down_read(&devs->rwsem);
|
||||
@ -218,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
|
||||
map->m_pa -= startoff;
|
||||
map->m_bdev = dif->bdev;
|
||||
map->m_daxdev = dif->dax_dev;
|
||||
map->m_dax_part_off = dif->dax_part_off;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -248,9 +251,13 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
iomap->bdev = mdev.m_bdev;
|
||||
iomap->dax_dev = mdev.m_daxdev;
|
||||
iomap->offset = map.m_la;
|
||||
if (flags & IOMAP_DAX) {
|
||||
iomap->dax_dev = mdev.m_daxdev;
|
||||
iomap->offset += mdev.m_dax_part_off;
|
||||
} else {
|
||||
iomap->bdev = mdev.m_bdev;
|
||||
}
|
||||
iomap->length = map.m_llen;
|
||||
iomap->flags = 0;
|
||||
iomap->private = NULL;
|
||||
|
@ -51,6 +51,7 @@ struct erofs_device_info {
|
||||
char *path;
|
||||
struct block_device *bdev;
|
||||
struct dax_device *dax_dev;
|
||||
u64 dax_part_off;
|
||||
|
||||
u32 blocks;
|
||||
u32 mapped_blkaddr;
|
||||
@ -115,6 +116,7 @@ struct erofs_sb_info {
|
||||
#endif /* CONFIG_EROFS_FS_ZIP */
|
||||
struct erofs_dev_context *devs;
|
||||
struct dax_device *dax_dev;
|
||||
u64 dax_part_off;
|
||||
u64 total_blocks;
|
||||
u32 primarydevice_blocks;
|
||||
|
||||
@ -467,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
|
||||
struct erofs_map_dev {
|
||||
struct block_device *m_bdev;
|
||||
struct dax_device *m_daxdev;
|
||||
u64 m_dax_part_off;
|
||||
|
||||
erofs_off_t m_pa;
|
||||
unsigned int m_deviceid;
|
||||
|
@ -267,7 +267,7 @@ static int erofs_init_devices(struct super_block *sb,
|
||||
break;
|
||||
}
|
||||
dif->bdev = bdev;
|
||||
dif->dax_dev = fs_dax_get_by_bdev(bdev);
|
||||
dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off);
|
||||
dif->blocks = le32_to_cpu(dis->blocks);
|
||||
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
|
||||
sbi->total_blocks += dif->blocks;
|
||||
@ -597,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||
|
||||
sb->s_fs_info = sbi;
|
||||
sbi->opt = ctx->opt;
|
||||
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
|
||||
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off);
|
||||
sbi->devs = ctx->devs;
|
||||
ctx->devs = NULL;
|
||||
|
||||
@ -605,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (test_opt(&sbi->opt, DAX_ALWAYS) &&
|
||||
!dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
|
||||
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
|
||||
clear_opt(&sbi->opt, DAX_ALWAYS);
|
||||
if (test_opt(&sbi->opt, DAX_ALWAYS)) {
|
||||
BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
|
||||
|
||||
if (!sbi->dax_dev) {
|
||||
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
|
||||
clear_opt(&sbi->opt, DAX_ALWAYS);
|
||||
}
|
||||
}
|
||||
sb->s_flags |= SB_RDONLY | SB_NOATIME;
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
|
@ -118,6 +118,7 @@ struct ext2_sb_info {
|
||||
spinlock_t s_lock;
|
||||
struct mb_cache *s_ea_block_cache;
|
||||
struct dax_device *s_daxdev;
|
||||
u64 s_dax_part_off;
|
||||
};
|
||||
|
||||
static inline spinlock_t *
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/dax.h>
|
||||
#include "ext2.h"
|
||||
#include "acl.h"
|
||||
#include "xattr.h"
|
||||
@ -816,9 +817,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
return ret;
|
||||
|
||||
iomap->flags = 0;
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->offset = (u64)first_block << blkbits;
|
||||
iomap->dax_dev = sbi->s_daxdev;
|
||||
if (flags & IOMAP_DAX)
|
||||
iomap->dax_dev = sbi->s_daxdev;
|
||||
else
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
|
||||
if (ret == 0) {
|
||||
iomap->type = IOMAP_HOLE;
|
||||
@ -827,6 +830,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
} else {
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
iomap->addr = (u64)bno << blkbits;
|
||||
if (flags & IOMAP_DAX)
|
||||
iomap->addr += sbi->s_dax_part_off;
|
||||
iomap->length = (u64)ret << blkbits;
|
||||
iomap->flags |= IOMAP_F_MERGED;
|
||||
}
|
||||
@ -1297,9 +1302,9 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
|
||||
inode_dio_wait(inode);
|
||||
|
||||
if (IS_DAX(inode)) {
|
||||
error = iomap_zero_range(inode, newsize,
|
||||
PAGE_ALIGN(newsize) - newsize, NULL,
|
||||
&ext2_iomap_ops);
|
||||
error = dax_zero_range(inode, newsize,
|
||||
PAGE_ALIGN(newsize) - newsize, NULL,
|
||||
&ext2_iomap_ops);
|
||||
} else if (test_opt(inode->i_sb, NOBH))
|
||||
error = nobh_truncate_page(inode->i_mapping,
|
||||
newsize, ext2_get_block);
|
||||
|
@ -802,7 +802,6 @@ static unsigned long descriptor_loc(struct super_block *sb,
|
||||
|
||||
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
{
|
||||
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
|
||||
struct buffer_head * bh;
|
||||
struct ext2_sb_info * sbi;
|
||||
struct ext2_super_block * es;
|
||||
@ -822,17 +821,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
|
||||
if (!sbi)
|
||||
goto failed;
|
||||
return -ENOMEM;
|
||||
|
||||
sbi->s_blockgroup_lock =
|
||||
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
|
||||
if (!sbi->s_blockgroup_lock) {
|
||||
kfree(sbi);
|
||||
goto failed;
|
||||
return -ENOMEM;
|
||||
}
|
||||
sb->s_fs_info = sbi;
|
||||
sbi->s_sb_block = sb_block;
|
||||
sbi->s_daxdev = dax_dev;
|
||||
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
|
||||
|
||||
spin_lock_init(&sbi->s_lock);
|
||||
ret = -EINVAL;
|
||||
@ -946,11 +945,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
|
||||
|
||||
if (test_opt(sb, DAX)) {
|
||||
if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
|
||||
bdev_nr_sectors(sb->s_bdev))) {
|
||||
if (!sbi->s_daxdev) {
|
||||
ext2_msg(sb, KERN_ERR,
|
||||
"DAX unsupported by block device. Turning off DAX.");
|
||||
clear_opt(sbi->s_mount_opt, DAX);
|
||||
} else if (blocksize != PAGE_SIZE) {
|
||||
ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
|
||||
clear_opt(sbi->s_mount_opt, DAX);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1199,11 +1200,10 @@ failed_mount_group_desc:
|
||||
failed_mount:
|
||||
brelse(bh);
|
||||
failed_sbi:
|
||||
fs_put_dax(sbi->s_daxdev);
|
||||
sb->s_fs_info = NULL;
|
||||
kfree(sbi->s_blockgroup_lock);
|
||||
kfree(sbi);
|
||||
failed:
|
||||
fs_put_dax(dax_dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1699,6 +1699,7 @@ struct ext4_sb_info {
|
||||
*/
|
||||
struct percpu_rw_semaphore s_writepages_rwsem;
|
||||
struct dax_device *s_daxdev;
|
||||
u64 s_dax_part_off;
|
||||
#ifdef CONFIG_EXT4_DEBUG
|
||||
unsigned long s_simulate_fail;
|
||||
#endif
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/dax.h>
|
||||
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
@ -3253,7 +3254,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
|
||||
|
||||
static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
|
||||
struct ext4_map_blocks *map, loff_t offset,
|
||||
loff_t length)
|
||||
loff_t length, unsigned int flags)
|
||||
{
|
||||
u8 blkbits = inode->i_blkbits;
|
||||
|
||||
@ -3270,8 +3271,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
|
||||
if (map->m_flags & EXT4_MAP_NEW)
|
||||
iomap->flags |= IOMAP_F_NEW;
|
||||
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
|
||||
if (flags & IOMAP_DAX)
|
||||
iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
|
||||
else
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->offset = (u64) map->m_lblk << blkbits;
|
||||
iomap->length = (u64) map->m_len << blkbits;
|
||||
|
||||
@ -3291,9 +3294,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
|
||||
if (map->m_flags & EXT4_MAP_UNWRITTEN) {
|
||||
iomap->type = IOMAP_UNWRITTEN;
|
||||
iomap->addr = (u64) map->m_pblk << blkbits;
|
||||
if (flags & IOMAP_DAX)
|
||||
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
|
||||
} else if (map->m_flags & EXT4_MAP_MAPPED) {
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
iomap->addr = (u64) map->m_pblk << blkbits;
|
||||
if (flags & IOMAP_DAX)
|
||||
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
|
||||
} else {
|
||||
iomap->type = IOMAP_HOLE;
|
||||
iomap->addr = IOMAP_NULL_ADDR;
|
||||
@ -3330,8 +3337,8 @@ retry:
|
||||
* DAX and direct I/O are the only two operations that are currently
|
||||
* supported with IOMAP_WRITE.
|
||||
*/
|
||||
WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT));
|
||||
if (IS_DAX(inode))
|
||||
WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT)));
|
||||
if (flags & IOMAP_DAX)
|
||||
m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
|
||||
/*
|
||||
* We use i_size instead of i_disksize here because delalloc writeback
|
||||
@ -3402,7 +3409,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
out:
|
||||
ext4_set_iomap(inode, iomap, &map, offset, length);
|
||||
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3522,7 +3529,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
|
||||
delalloc = ext4_iomap_is_delalloc(inode, &map);
|
||||
|
||||
set_iomap:
|
||||
ext4_set_iomap(inode, iomap, &map, offset, length);
|
||||
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
|
||||
if (delalloc && iomap->type == IOMAP_HOLE)
|
||||
iomap->type = IOMAP_DELALLOC;
|
||||
|
||||
@ -3762,8 +3769,8 @@ static int ext4_block_zero_page_range(handle_t *handle,
|
||||
length = max;
|
||||
|
||||
if (IS_DAX(inode)) {
|
||||
return iomap_zero_range(inode, from, length, NULL,
|
||||
&ext4_iomap_ops);
|
||||
return dax_zero_range(inode, from, length, NULL,
|
||||
&ext4_iomap_ops);
|
||||
}
|
||||
return __ext4_block_zero_page_range(handle, mapping, from, length);
|
||||
}
|
||||
|
@ -4338,7 +4338,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
|
||||
if (!sbi)
|
||||
return NULL;
|
||||
|
||||
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev);
|
||||
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
|
||||
|
||||
sbi->s_blockgroup_lock =
|
||||
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
|
||||
@ -4756,9 +4756,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
goto failed_mount;
|
||||
}
|
||||
|
||||
if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0,
|
||||
bdev_nr_sectors(sb->s_bdev)))
|
||||
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
|
||||
if (sbi->s_daxdev) {
|
||||
if (blocksize == PAGE_SIZE)
|
||||
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
|
||||
else
|
||||
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
|
||||
}
|
||||
|
||||
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
|
||||
if (ext4_has_feature_inline_data(sb)) {
|
||||
|
@ -45,7 +45,7 @@ config FUSE_DAX
|
||||
select INTERVAL_TREE
|
||||
depends on VIRTIO_FS
|
||||
depends on FS_DAX
|
||||
depends on DAX_DRIVER
|
||||
depends on DAX
|
||||
help
|
||||
This allows bypassing guest page cache and allows mapping host page
|
||||
cache directly in guest address space.
|
||||
|
@ -765,20 +765,6 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
|
||||
}
|
||||
|
||||
static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, void *addr,
|
||||
size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return copy_from_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, void *addr,
|
||||
size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return copy_to_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, size_t nr_pages)
|
||||
{
|
||||
@ -795,8 +781,6 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
|
||||
|
||||
static const struct dax_operations virtio_fs_dax_ops = {
|
||||
.direct_access = virtio_fs_direct_access,
|
||||
.copy_from_iter = virtio_fs_copy_from_iter,
|
||||
.copy_to_iter = virtio_fs_copy_to_iter,
|
||||
.zero_page_range = virtio_fs_zero_page_range,
|
||||
};
|
||||
|
||||
@ -862,7 +846,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
|
||||
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
|
||||
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
|
||||
|
||||
fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
|
||||
fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
|
||||
if (IS_ERR(fs->dax_dev))
|
||||
return PTR_ERR(fs->dax_dev);
|
||||
|
||||
|
@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events
|
||||
obj-$(CONFIG_FS_IOMAP) += iomap.o
|
||||
|
||||
iomap-y += trace.o \
|
||||
buffered-io.o \
|
||||
iter.o
|
||||
iomap-$(CONFIG_BLOCK) += buffered-io.o \
|
||||
direct-io.o \
|
||||
fiemap.o \
|
||||
iter.o \
|
||||
seek.o
|
||||
iomap-$(CONFIG_SWAP) += swapfile.o
|
||||
|
@ -897,7 +897,6 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare);
|
||||
|
||||
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
{
|
||||
struct iomap *iomap = &iter->iomap;
|
||||
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||
loff_t pos = iter->pos;
|
||||
loff_t length = iomap_length(iter);
|
||||
@ -913,14 +912,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
size_t offset;
|
||||
size_t bytes = min_t(u64, SIZE_MAX, length);
|
||||
|
||||
if (IS_DAX(iter->inode)) {
|
||||
s64 tmp = dax_iomap_zero(pos, bytes, iomap);
|
||||
if (tmp < 0)
|
||||
return tmp;
|
||||
bytes = tmp;
|
||||
goto good;
|
||||
}
|
||||
|
||||
status = iomap_write_begin(iter, pos, bytes, &folio);
|
||||
if (status)
|
||||
return status;
|
||||
@ -933,7 +924,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
folio_mark_accessed(folio);
|
||||
|
||||
bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
|
||||
good:
|
||||
if (WARN_ON_ONCE(bytes == 0))
|
||||
return -EIO;
|
||||
|
||||
|
@ -4551,7 +4551,7 @@ xfs_bmapi_convert_delalloc(
|
||||
* the extent. Just return the real extent at this offset.
|
||||
*/
|
||||
if (!isnullstartblock(bma.got.br_startblock)) {
|
||||
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
|
||||
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
|
||||
*seq = READ_ONCE(ifp->if_seq);
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
@ -4598,7 +4598,7 @@ xfs_bmapi_convert_delalloc(
|
||||
XFS_STATS_INC(mp, xs_xstrat_quick);
|
||||
|
||||
ASSERT(!isnullstartblock(bma.got.br_startblock));
|
||||
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
|
||||
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
|
||||
*seq = READ_ONCE(ifp->if_seq);
|
||||
|
||||
if (whichfork == XFS_COW_FORK)
|
||||
|
@ -359,7 +359,7 @@ retry:
|
||||
isnullstartblock(imap.br_startblock))
|
||||
goto allocate_blocks;
|
||||
|
||||
xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
|
||||
xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
|
||||
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
|
||||
return 0;
|
||||
allocate_blocks:
|
||||
|
@ -1001,7 +1001,7 @@ xfs_free_file_space(
|
||||
|
||||
/*
|
||||
* Now that we've unmap all full blocks we'll have to zero out any
|
||||
* partial block at the beginning and/or end. iomap_zero_range is smart
|
||||
* partial block at the beginning and/or end. xfs_zero_range is smart
|
||||
* enough to skip any holes, including those we just created, but we
|
||||
* must take care not to zero beyond EOF and enlarge i_size.
|
||||
*/
|
||||
@ -1009,15 +1009,14 @@ xfs_free_file_space(
|
||||
return 0;
|
||||
if (offset + len > XFS_ISIZE(ip))
|
||||
len = XFS_ISIZE(ip) - offset;
|
||||
error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
error = xfs_zero_range(ip, offset, len, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* If we zeroed right up to EOF and EOF straddles a page boundary we
|
||||
* must make sure that the post-EOF area is also zeroed because the
|
||||
* page could be mmap'd and iomap_zero_range doesn't do that for us.
|
||||
* page could be mmap'd and xfs_zero_range doesn't do that for us.
|
||||
* Writeback of the eof page will do this, albeit clumsily.
|
||||
*/
|
||||
if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
|
||||
|
@ -1892,6 +1892,7 @@ xfs_free_buftarg(
|
||||
list_lru_destroy(&btp->bt_lru);
|
||||
|
||||
blkdev_issue_flush(btp->bt_bdev);
|
||||
fs_put_dax(btp->bt_daxdev);
|
||||
|
||||
kmem_free(btp);
|
||||
}
|
||||
@ -1932,11 +1933,10 @@ xfs_setsize_buftarg_early(
|
||||
return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
|
||||
}
|
||||
|
||||
xfs_buftarg_t *
|
||||
struct xfs_buftarg *
|
||||
xfs_alloc_buftarg(
|
||||
struct xfs_mount *mp,
|
||||
struct block_device *bdev,
|
||||
struct dax_device *dax_dev)
|
||||
struct block_device *bdev)
|
||||
{
|
||||
xfs_buftarg_t *btp;
|
||||
|
||||
@ -1945,7 +1945,7 @@ xfs_alloc_buftarg(
|
||||
btp->bt_mount = mp;
|
||||
btp->bt_dev = bdev->bd_dev;
|
||||
btp->bt_bdev = bdev;
|
||||
btp->bt_daxdev = dax_dev;
|
||||
btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
|
||||
|
||||
/*
|
||||
* Buffer IO error rate limiting. Limit it to no more than 10 messages
|
||||
|
@ -89,6 +89,7 @@ typedef struct xfs_buftarg {
|
||||
dev_t bt_dev;
|
||||
struct block_device *bt_bdev;
|
||||
struct dax_device *bt_daxdev;
|
||||
u64 bt_dax_part_off;
|
||||
struct xfs_mount *bt_mount;
|
||||
unsigned int bt_meta_sectorsize;
|
||||
size_t bt_meta_sectormask;
|
||||
@ -338,8 +339,8 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
|
||||
/*
|
||||
* Handling of buftargs.
|
||||
*/
|
||||
extern struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *,
|
||||
struct block_device *, struct dax_device *);
|
||||
struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
|
||||
struct block_device *bdev);
|
||||
extern void xfs_free_buftarg(struct xfs_buftarg *);
|
||||
extern void xfs_buftarg_wait(struct xfs_buftarg *);
|
||||
extern void xfs_buftarg_drain(struct xfs_buftarg *);
|
||||
|
@ -437,8 +437,7 @@ restart:
|
||||
}
|
||||
|
||||
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
|
||||
error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
|
||||
NULL, &xfs_buffered_write_iomap_ops);
|
||||
error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
} else
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include "xfs_dquot.h"
|
||||
#include "xfs_reflink.h"
|
||||
|
||||
|
||||
#define XFS_ALLOC_ALIGN(mp, off) \
|
||||
(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
|
||||
|
||||
@ -54,7 +53,8 @@ xfs_bmbt_to_iomap(
|
||||
struct xfs_inode *ip,
|
||||
struct iomap *iomap,
|
||||
struct xfs_bmbt_irec *imap,
|
||||
u16 flags)
|
||||
unsigned int mapping_flags,
|
||||
u16 iomap_flags)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
|
||||
@ -71,16 +71,22 @@ xfs_bmbt_to_iomap(
|
||||
iomap->type = IOMAP_DELALLOC;
|
||||
} else {
|
||||
iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
|
||||
if (mapping_flags & IOMAP_DAX)
|
||||
iomap->addr += target->bt_dax_part_off;
|
||||
|
||||
if (imap->br_state == XFS_EXT_UNWRITTEN)
|
||||
iomap->type = IOMAP_UNWRITTEN;
|
||||
else
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
|
||||
}
|
||||
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
|
||||
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
|
||||
iomap->bdev = target->bt_bdev;
|
||||
iomap->dax_dev = target->bt_daxdev;
|
||||
iomap->flags = flags;
|
||||
if (mapping_flags & IOMAP_DAX)
|
||||
iomap->dax_dev = target->bt_daxdev;
|
||||
else
|
||||
iomap->bdev = target->bt_bdev;
|
||||
iomap->flags = iomap_flags;
|
||||
|
||||
if (xfs_ipincount(ip) &&
|
||||
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
|
||||
@ -188,6 +194,7 @@ xfs_iomap_write_direct(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fileoff_t offset_fsb,
|
||||
xfs_fileoff_t count_fsb,
|
||||
unsigned int flags,
|
||||
struct xfs_bmbt_irec *imap)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
@ -229,7 +236,7 @@ xfs_iomap_write_direct(
|
||||
* the reserve block pool for bmbt block allocation if there is no space
|
||||
* left but we need to do unwritten extent conversion.
|
||||
*/
|
||||
if (IS_DAX(VFS_I(ip))) {
|
||||
if (flags & IOMAP_DAX) {
|
||||
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
|
||||
if (imap->br_state == XFS_EXT_UNWRITTEN) {
|
||||
force = true;
|
||||
@ -620,7 +627,7 @@ imap_needs_alloc(
|
||||
imap->br_startblock == DELAYSTARTBLOCK)
|
||||
return true;
|
||||
/* we convert unwritten extents before copying the data for DAX */
|
||||
if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN)
|
||||
if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@ -800,7 +807,7 @@ xfs_direct_write_iomap_begin(
|
||||
|
||||
xfs_iunlock(ip, lockmode);
|
||||
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
|
||||
|
||||
allocate_blocks:
|
||||
error = -EAGAIN;
|
||||
@ -826,23 +833,24 @@ allocate_blocks:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
|
||||
error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
|
||||
&imap);
|
||||
flags, &imap);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
|
||||
iomap_flags | IOMAP_F_NEW);
|
||||
|
||||
out_found_cow:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
|
||||
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
|
||||
if (imap.br_startblock != HOLESTARTBLOCK) {
|
||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
|
||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
|
||||
|
||||
out_unlock:
|
||||
if (lockmode)
|
||||
@ -1052,23 +1060,24 @@ retry:
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
|
||||
|
||||
found_imap:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
|
||||
|
||||
found_cow:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (imap.br_startoff <= offset_fsb) {
|
||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
|
||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
|
||||
if (error)
|
||||
return error;
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
|
||||
IOMAP_F_SHARED);
|
||||
}
|
||||
|
||||
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
@ -1177,7 +1186,8 @@ xfs_read_iomap_begin(
|
||||
if (error)
|
||||
return error;
|
||||
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
|
||||
shared ? IOMAP_F_SHARED : 0);
|
||||
}
|
||||
|
||||
const struct iomap_ops xfs_read_iomap_ops = {
|
||||
@ -1236,7 +1246,8 @@ xfs_seek_iomap_begin(
|
||||
if (data_fsb < cow_fsb + cmap.br_blockcount)
|
||||
end_fsb = min(end_fsb, data_fsb);
|
||||
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
|
||||
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
|
||||
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
|
||||
IOMAP_F_SHARED);
|
||||
/*
|
||||
* This is a COW extent, so we must probe the page cache
|
||||
* because there could be dirty page cache being backed
|
||||
@ -1258,7 +1269,7 @@ xfs_seek_iomap_begin(
|
||||
imap.br_state = XFS_EXT_NORM;
|
||||
done:
|
||||
xfs_trim_extent(&imap, offset_fsb, end_fsb);
|
||||
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
|
||||
error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return error;
|
||||
@ -1305,9 +1316,40 @@ out_unlock:
|
||||
if (error)
|
||||
return error;
|
||||
ASSERT(nimaps);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
|
||||
}
|
||||
|
||||
const struct iomap_ops xfs_xattr_iomap_ops = {
|
||||
.iomap_begin = xfs_xattr_iomap_begin,
|
||||
};
|
||||
|
||||
int
|
||||
xfs_zero_range(
|
||||
struct xfs_inode *ip,
|
||||
loff_t pos,
|
||||
loff_t len,
|
||||
bool *did_zero)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
if (IS_DAX(inode))
|
||||
return dax_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_direct_write_iomap_ops);
|
||||
return iomap_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
}
|
||||
|
||||
int
|
||||
xfs_truncate_page(
|
||||
struct xfs_inode *ip,
|
||||
loff_t pos,
|
||||
bool *did_zero)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
if (IS_DAX(inode))
|
||||
return dax_truncate_page(inode, pos, did_zero,
|
||||
&xfs_direct_write_iomap_ops);
|
||||
return iomap_truncate_page(inode, pos, did_zero,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
}
|
||||
|
@ -12,13 +12,19 @@ struct xfs_inode;
|
||||
struct xfs_bmbt_irec;
|
||||
|
||||
int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
|
||||
xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap);
|
||||
xfs_fileoff_t count_fsb, unsigned int flags,
|
||||
struct xfs_bmbt_irec *imap);
|
||||
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
|
||||
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
|
||||
xfs_fileoff_t end_fsb);
|
||||
|
||||
int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
|
||||
struct xfs_bmbt_irec *, u16);
|
||||
int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
|
||||
struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
|
||||
u16 iomap_flags);
|
||||
|
||||
int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
|
||||
bool *did_zero);
|
||||
int xfs_truncate_page(struct xfs_inode *ip, loff_t pos, bool *did_zero);
|
||||
|
||||
static inline xfs_filblks_t
|
||||
xfs_aligned_fsb_count(
|
||||
|
@ -890,8 +890,8 @@ xfs_setattr_size(
|
||||
*/
|
||||
if (newsize > oldsize) {
|
||||
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
|
||||
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
|
||||
&did_zeroing, &xfs_buffered_write_iomap_ops);
|
||||
error = xfs_zero_range(ip, oldsize, newsize - oldsize,
|
||||
&did_zeroing);
|
||||
} else {
|
||||
/*
|
||||
* iomap won't detect a dirty page over an unwritten block (or a
|
||||
@ -903,8 +903,7 @@ xfs_setattr_size(
|
||||
newsize);
|
||||
if (error)
|
||||
return error;
|
||||
error = iomap_truncate_page(inode, newsize, &did_zeroing,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
error = xfs_truncate_page(ip, newsize, &did_zeroing);
|
||||
}
|
||||
|
||||
if (error)
|
||||
|
@ -155,7 +155,7 @@ xfs_fs_map_blocks(
|
||||
xfs_iunlock(ip, lock_flags);
|
||||
|
||||
error = xfs_iomap_write_direct(ip, offset_fsb,
|
||||
end_fsb - offset_fsb, &imap);
|
||||
end_fsb - offset_fsb, 0, &imap);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
@ -173,7 +173,7 @@ xfs_fs_map_blocks(
|
||||
}
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
|
||||
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
|
||||
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
|
||||
*device_generation = mp->m_generation;
|
||||
return error;
|
||||
out_unlock:
|
||||
|
@ -1272,8 +1272,7 @@ xfs_reflink_zero_posteof(
|
||||
return 0;
|
||||
|
||||
trace_xfs_zero_eof(ip, isize, pos - isize);
|
||||
return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
return xfs_zero_range(ip, isize, pos - isize, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -331,13 +331,34 @@ xfs_set_inode_alloc(
|
||||
return xfs_is_inode32(mp) ? maxagi : agcount;
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_buftarg_is_dax(
|
||||
struct super_block *sb,
|
||||
struct xfs_buftarg *bt)
|
||||
static int
|
||||
xfs_setup_dax_always(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
|
||||
bdev_nr_sectors(bt->bt_bdev));
|
||||
if (!mp->m_ddev_targp->bt_daxdev &&
|
||||
(!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
|
||||
xfs_alert(mp,
|
||||
"DAX unsupported by block device. Turning off DAX.");
|
||||
goto disable_dax;
|
||||
}
|
||||
|
||||
if (mp->m_super->s_blocksize != PAGE_SIZE) {
|
||||
xfs_alert(mp,
|
||||
"DAX not supported for blocksize. Turning off DAX.");
|
||||
goto disable_dax;
|
||||
}
|
||||
|
||||
if (xfs_has_reflink(mp)) {
|
||||
xfs_alert(mp, "DAX and reflink cannot be used together!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
|
||||
return 0;
|
||||
|
||||
disable_dax:
|
||||
xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
@ -370,26 +391,19 @@ STATIC void
|
||||
xfs_close_devices(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
|
||||
|
||||
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
|
||||
struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
|
||||
struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
|
||||
|
||||
xfs_free_buftarg(mp->m_logdev_targp);
|
||||
xfs_blkdev_put(logdev);
|
||||
fs_put_dax(dax_logdev);
|
||||
}
|
||||
if (mp->m_rtdev_targp) {
|
||||
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
|
||||
struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
|
||||
|
||||
xfs_free_buftarg(mp->m_rtdev_targp);
|
||||
xfs_blkdev_put(rtdev);
|
||||
fs_put_dax(dax_rtdev);
|
||||
}
|
||||
xfs_free_buftarg(mp->m_ddev_targp);
|
||||
fs_put_dax(dax_ddev);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -407,8 +421,6 @@ xfs_open_devices(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct block_device *ddev = mp->m_super->s_bdev;
|
||||
struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev);
|
||||
struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL;
|
||||
struct block_device *logdev = NULL, *rtdev = NULL;
|
||||
int error;
|
||||
|
||||
@ -418,8 +430,7 @@ xfs_open_devices(
|
||||
if (mp->m_logname) {
|
||||
error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
|
||||
if (error)
|
||||
goto out;
|
||||
dax_logdev = fs_dax_get_by_bdev(logdev);
|
||||
return error;
|
||||
}
|
||||
|
||||
if (mp->m_rtname) {
|
||||
@ -433,25 +444,24 @@ xfs_open_devices(
|
||||
error = -EINVAL;
|
||||
goto out_close_rtdev;
|
||||
}
|
||||
dax_rtdev = fs_dax_get_by_bdev(rtdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup xfs_mount buffer target pointers
|
||||
*/
|
||||
error = -ENOMEM;
|
||||
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
|
||||
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
|
||||
if (!mp->m_ddev_targp)
|
||||
goto out_close_rtdev;
|
||||
|
||||
if (rtdev) {
|
||||
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
|
||||
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
|
||||
if (!mp->m_rtdev_targp)
|
||||
goto out_free_ddev_targ;
|
||||
}
|
||||
|
||||
if (logdev && logdev != ddev) {
|
||||
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
|
||||
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
|
||||
if (!mp->m_logdev_targp)
|
||||
goto out_free_rtdev_targ;
|
||||
} else {
|
||||
@ -467,14 +477,9 @@ xfs_open_devices(
|
||||
xfs_free_buftarg(mp->m_ddev_targp);
|
||||
out_close_rtdev:
|
||||
xfs_blkdev_put(rtdev);
|
||||
fs_put_dax(dax_rtdev);
|
||||
out_close_logdev:
|
||||
if (logdev && logdev != ddev) {
|
||||
if (logdev && logdev != ddev)
|
||||
xfs_blkdev_put(logdev);
|
||||
fs_put_dax(dax_logdev);
|
||||
}
|
||||
out:
|
||||
fs_put_dax(dax_ddev);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -1593,26 +1598,9 @@ xfs_fs_fill_super(
|
||||
sb->s_flags |= SB_I_VERSION;
|
||||
|
||||
if (xfs_has_dax_always(mp)) {
|
||||
bool rtdev_is_dax = false, datadev_is_dax;
|
||||
|
||||
xfs_warn(mp,
|
||||
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
|
||||
|
||||
datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
|
||||
if (mp->m_rtdev_targp)
|
||||
rtdev_is_dax = xfs_buftarg_is_dax(sb,
|
||||
mp->m_rtdev_targp);
|
||||
if (!rtdev_is_dax && !datadev_is_dax) {
|
||||
xfs_alert(mp,
|
||||
"DAX unsupported by block device. Turning off DAX.");
|
||||
xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
|
||||
}
|
||||
if (xfs_has_reflink(mp)) {
|
||||
xfs_alert(mp,
|
||||
"DAX and reflink cannot be used together!");
|
||||
error = -EINVAL;
|
||||
error = xfs_setup_dax_always(mp);
|
||||
if (error)
|
||||
goto out_filestream_unmount;
|
||||
}
|
||||
}
|
||||
|
||||
if (xfs_has_discard(mp)) {
|
||||
|
@ -6,14 +6,14 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/radix-tree.h>
|
||||
|
||||
/* Flag for synchronous flush */
|
||||
#define DAXDEV_F_SYNC (1UL << 0)
|
||||
|
||||
typedef unsigned long dax_entry_t;
|
||||
|
||||
struct iomap_ops;
|
||||
struct iomap;
|
||||
struct dax_device;
|
||||
struct gendisk;
|
||||
struct iomap_ops;
|
||||
struct iomap_iter;
|
||||
struct iomap;
|
||||
|
||||
struct dax_operations {
|
||||
/*
|
||||
* direct_access: translate a device-relative
|
||||
@ -28,33 +28,18 @@ struct dax_operations {
|
||||
*/
|
||||
bool (*dax_supported)(struct dax_device *, struct block_device *, int,
|
||||
sector_t, sector_t);
|
||||
/* copy_from_iter: required operation for fs-dax direct-i/o */
|
||||
size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
|
||||
struct iov_iter *);
|
||||
/* copy_to_iter: required operation for fs-dax direct-i/o */
|
||||
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
|
||||
struct iov_iter *);
|
||||
/* zero_page_range: required operation. Zero page range */
|
||||
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_DAX)
|
||||
struct dax_device *alloc_dax(void *private, const char *host,
|
||||
const struct dax_operations *ops, unsigned long flags);
|
||||
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
|
||||
void put_dax(struct dax_device *dax_dev);
|
||||
void kill_dax(struct dax_device *dax_dev);
|
||||
void dax_write_cache(struct dax_device *dax_dev, bool wc);
|
||||
bool dax_write_cache_enabled(struct dax_device *dax_dev);
|
||||
bool __dax_synchronous(struct dax_device *dax_dev);
|
||||
static inline bool dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
return __dax_synchronous(dax_dev);
|
||||
}
|
||||
void __set_dax_synchronous(struct dax_device *dax_dev);
|
||||
static inline void set_dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
__set_dax_synchronous(dax_dev);
|
||||
}
|
||||
bool dax_synchronous(struct dax_device *dax_dev);
|
||||
void set_dax_synchronous(struct dax_device *dax_dev);
|
||||
/*
|
||||
* Check if given mapping is supported by the file / underlying device.
|
||||
*/
|
||||
@ -68,8 +53,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
|
||||
return dax_synchronous(dax_dev);
|
||||
}
|
||||
#else
|
||||
static inline struct dax_device *alloc_dax(void *private, const char *host,
|
||||
const struct dax_operations *ops, unsigned long flags)
|
||||
static inline struct dax_device *alloc_dax(void *private,
|
||||
const struct dax_operations *ops)
|
||||
{
|
||||
/*
|
||||
* Callers should check IS_ENABLED(CONFIG_DAX) to know if this
|
||||
@ -104,22 +89,38 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
|
||||
}
|
||||
#endif
|
||||
|
||||
void set_dax_nocache(struct dax_device *dax_dev);
|
||||
void set_dax_nomc(struct dax_device *dax_dev);
|
||||
|
||||
struct writeback_control;
|
||||
int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
bool generic_fsdax_supported(struct dax_device *dax_dev,
|
||||
struct block_device *bdev, int blocksize, sector_t start,
|
||||
sector_t sectors);
|
||||
|
||||
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
|
||||
int blocksize, sector_t start, sector_t len);
|
||||
|
||||
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
|
||||
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
|
||||
void dax_remove_host(struct gendisk *disk);
|
||||
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
|
||||
u64 *start_off);
|
||||
static inline void fs_put_dax(struct dax_device *dax_dev)
|
||||
{
|
||||
put_dax(dax_dev);
|
||||
}
|
||||
#else
|
||||
static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void dax_remove_host(struct gendisk *disk)
|
||||
{
|
||||
}
|
||||
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
|
||||
u64 *start_off)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void fs_put_dax(struct dax_device *dax_dev)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
|
||||
|
||||
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
int dax_writeback_mapping_range(struct address_space *mapping,
|
||||
struct dax_device *dax_dev, struct writeback_control *wbc);
|
||||
|
||||
@ -128,24 +129,6 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
|
||||
dax_entry_t dax_lock_page(struct page *page);
|
||||
void dax_unlock_page(struct page *page, dax_entry_t cookie);
|
||||
#else
|
||||
#define generic_fsdax_supported NULL
|
||||
|
||||
static inline bool dax_supported(struct dax_device *dax_dev,
|
||||
struct block_device *bdev, int blocksize, sector_t start,
|
||||
sector_t len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void fs_put_dax(struct dax_device *dax_dev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct page *dax_layout_busy_page(struct address_space *mapping)
|
||||
{
|
||||
return NULL;
|
||||
@ -174,6 +157,11 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
|
||||
}
|
||||
#endif
|
||||
|
||||
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||
const struct iomap_ops *ops);
|
||||
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
||||
const struct iomap_ops *ops);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DAX)
|
||||
int dax_read_lock(void);
|
||||
void dax_read_unlock(int id);
|
||||
@ -208,7 +196,6 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
|
||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||
pgoff_t index);
|
||||
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
|
||||
static inline bool dax_mapping(struct address_space *mapping)
|
||||
{
|
||||
return mapping->host && IS_DAX(mapping->host);
|
||||
|
@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
|
||||
*/
|
||||
typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn);
|
||||
typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i);
|
||||
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
|
||||
size_t nr_pages);
|
||||
|
||||
@ -200,8 +198,6 @@ struct target_type {
|
||||
dm_iterate_devices_fn iterate_devices;
|
||||
dm_io_hints_fn io_hints;
|
||||
dm_dax_direct_access_fn direct_access;
|
||||
dm_dax_copy_iter_fn dax_copy_from_iter;
|
||||
dm_dax_copy_iter_fn dax_copy_to_iter;
|
||||
dm_dax_zero_page_range_fn dax_zero_page_range;
|
||||
|
||||
/* For internal device-mapper use. */
|
||||
|
@ -141,6 +141,11 @@ struct iomap_page_ops {
|
||||
#define IOMAP_NOWAIT (1 << 5) /* do not block */
|
||||
#define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */
|
||||
#define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */
|
||||
#ifdef CONFIG_FS_DAX
|
||||
#define IOMAP_DAX (1 << 8) /* DAX mapping */
|
||||
#else
|
||||
#define IOMAP_DAX 0
|
||||
#endif /* CONFIG_FS_DAX */
|
||||
|
||||
struct iomap_ops {
|
||||
/*
|
||||
|
@ -72,16 +72,6 @@ struct dev_pagemap_ops {
|
||||
*/
|
||||
void (*page_free)(struct page *page);
|
||||
|
||||
/*
|
||||
* Transition the refcount in struct dev_pagemap to the dead state.
|
||||
*/
|
||||
void (*kill)(struct dev_pagemap *pgmap);
|
||||
|
||||
/*
|
||||
* Wait for refcount in struct dev_pagemap to be idle and reap it.
|
||||
*/
|
||||
void (*cleanup)(struct dev_pagemap *pgmap);
|
||||
|
||||
/*
|
||||
* Used for private (un-addressable) device memory only. Must migrate
|
||||
* the page back to a CPU accessible page.
|
||||
@ -95,8 +85,7 @@ struct dev_pagemap_ops {
|
||||
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
|
||||
* @altmap: pre-allocated/reserved memory for vmemmap allocations
|
||||
* @ref: reference count that pins the devm_memremap_pages() mapping
|
||||
* @internal_ref: internal reference if @ref is not provided by the caller
|
||||
* @done: completion for @internal_ref
|
||||
* @done: completion for @ref
|
||||
* @type: memory type: see MEMORY_* in memory_hotplug.h
|
||||
* @flags: PGMAP_* flags to specify defailed behavior
|
||||
* @ops: method table
|
||||
@ -109,8 +98,7 @@ struct dev_pagemap_ops {
|
||||
*/
|
||||
struct dev_pagemap {
|
||||
struct vmem_altmap altmap;
|
||||
struct percpu_ref *ref;
|
||||
struct percpu_ref internal_ref;
|
||||
struct percpu_ref ref;
|
||||
struct completion done;
|
||||
enum memory_type type;
|
||||
unsigned int flags;
|
||||
@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void)
|
||||
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
|
||||
{
|
||||
if (pgmap)
|
||||
percpu_ref_put(pgmap->ref);
|
||||
percpu_ref_put(&pgmap->ref);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_MEMREMAP_H_ */
|
||||
|
@ -203,7 +203,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
||||
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
||||
/*
|
||||
* Note, users like pmem that depend on the stricter semantics of
|
||||
* copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
|
||||
* _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for
|
||||
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
|
||||
* destination is flushed from the cache on return.
|
||||
*/
|
||||
@ -218,24 +218,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
||||
#define _copy_mc_to_iter _copy_to_iter
|
||||
#endif
|
||||
|
||||
static __always_inline __must_check
|
||||
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
if (unlikely(!check_copy_size(addr, bytes, false)))
|
||||
return 0;
|
||||
else
|
||||
return _copy_from_iter_flushcache(addr, bytes, i);
|
||||
}
|
||||
|
||||
static __always_inline __must_check
|
||||
size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
if (unlikely(!check_copy_size(addr, bytes, true)))
|
||||
return 0;
|
||||
else
|
||||
return _copy_mc_to_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
|
||||
unsigned long iov_iter_alignment(const struct iov_iter *i);
|
||||
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
|
||||
|
@ -112,30 +112,6 @@ static unsigned long pfn_next(unsigned long pfn)
|
||||
#define for_each_device_pfn(pfn, map, i) \
|
||||
for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
|
||||
|
||||
static void dev_pagemap_kill(struct dev_pagemap *pgmap)
|
||||
{
|
||||
if (pgmap->ops && pgmap->ops->kill)
|
||||
pgmap->ops->kill(pgmap);
|
||||
else
|
||||
percpu_ref_kill(pgmap->ref);
|
||||
}
|
||||
|
||||
static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
|
||||
{
|
||||
if (pgmap->ops && pgmap->ops->cleanup) {
|
||||
pgmap->ops->cleanup(pgmap);
|
||||
} else {
|
||||
wait_for_completion(&pgmap->done);
|
||||
percpu_ref_exit(pgmap->ref);
|
||||
}
|
||||
/*
|
||||
* Undo the pgmap ref assignment for the internal case as the
|
||||
* caller may re-enable the same pgmap.
|
||||
*/
|
||||
if (pgmap->ref == &pgmap->internal_ref)
|
||||
pgmap->ref = NULL;
|
||||
}
|
||||
|
||||
static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
|
||||
{
|
||||
struct range *range = &pgmap->ranges[range_id];
|
||||
@ -167,11 +143,12 @@ void memunmap_pages(struct dev_pagemap *pgmap)
|
||||
unsigned long pfn;
|
||||
int i;
|
||||
|
||||
dev_pagemap_kill(pgmap);
|
||||
percpu_ref_kill(&pgmap->ref);
|
||||
for (i = 0; i < pgmap->nr_range; i++)
|
||||
for_each_device_pfn(pfn, pgmap, i)
|
||||
put_page(pfn_to_page(pfn));
|
||||
dev_pagemap_cleanup(pgmap);
|
||||
wait_for_completion(&pgmap->done);
|
||||
percpu_ref_exit(&pgmap->ref);
|
||||
|
||||
for (i = 0; i < pgmap->nr_range; i++)
|
||||
pageunmap_range(pgmap, i);
|
||||
@ -188,8 +165,7 @@ static void devm_memremap_pages_release(void *data)
|
||||
|
||||
static void dev_pagemap_percpu_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct dev_pagemap *pgmap =
|
||||
container_of(ref, struct dev_pagemap, internal_ref);
|
||||
struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
|
||||
|
||||
complete(&pgmap->done);
|
||||
}
|
||||
@ -295,8 +271,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
|
||||
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
|
||||
PHYS_PFN(range->start),
|
||||
PHYS_PFN(range_len(range)), pgmap);
|
||||
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id)
|
||||
- pfn_first(pgmap, range_id));
|
||||
percpu_ref_get_many(&pgmap->ref,
|
||||
pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
|
||||
return 0;
|
||||
|
||||
err_add_memory:
|
||||
@ -362,22 +338,11 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pgmap->ref) {
|
||||
if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
init_completion(&pgmap->done);
|
||||
error = percpu_ref_init(&pgmap->internal_ref,
|
||||
dev_pagemap_percpu_release, 0, GFP_KERNEL);
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
pgmap->ref = &pgmap->internal_ref;
|
||||
} else {
|
||||
if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
|
||||
WARN(1, "Missing reference count teardown definition\n");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
}
|
||||
init_completion(&pgmap->done);
|
||||
error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
|
||||
GFP_KERNEL);
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
|
||||
devmap_managed_enable_get(pgmap);
|
||||
|
||||
@ -486,7 +451,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
|
||||
/* fall back to slow path lookup */
|
||||
rcu_read_lock();
|
||||
pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
|
||||
if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
|
||||
if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
|
||||
pgmap = NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
|
@ -35,8 +35,6 @@ obj-$(CONFIG_DAX) += dax.o
|
||||
endif
|
||||
obj-$(CONFIG_DEV_DAX) += device_dax.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
|
||||
|
||||
nfit-y := $(ACPI_SRC)/core.o
|
||||
nfit-y += $(ACPI_SRC)/intel.o
|
||||
@ -67,12 +65,8 @@ device_dax-y += dax-dev.o
|
||||
device_dax-y += device_dax_test.o
|
||||
device_dax-y += config_check.o
|
||||
|
||||
dax_pmem-y := $(DAX_SRC)/pmem/pmem.o
|
||||
dax_pmem-y := $(DAX_SRC)/pmem.o
|
||||
dax_pmem-y += dax_pmem_test.o
|
||||
dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
|
||||
dax_pmem_core-y += dax_pmem_core_test.o
|
||||
dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
|
||||
dax_pmem_compat-y += dax_pmem_compat_test.o
|
||||
dax_pmem-y += config_check.o
|
||||
|
||||
libnvdimm-y := $(NVDIMM_SRC)/core.o
|
||||
|
@ -1,8 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright(c) 2019 Intel Corporation. All rights reserved.
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/printk.h>
|
||||
#include "watermark.h"
|
||||
|
||||
nfit_test_watermark(dax_pmem_compat);
|
@ -1,8 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright(c) 2019 Intel Corporation. All rights reserved.
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/printk.h>
|
||||
#include "watermark.h"
|
||||
|
||||
nfit_test_watermark(dax_pmem_core);
|
@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap)
|
||||
{
|
||||
struct dev_pagemap *pgmap = _pgmap;
|
||||
|
||||
WARN_ON(!pgmap || !pgmap->ref);
|
||||
WARN_ON(!pgmap);
|
||||
|
||||
if (pgmap->ops && pgmap->ops->kill)
|
||||
pgmap->ops->kill(pgmap);
|
||||
else
|
||||
percpu_ref_kill(pgmap->ref);
|
||||
percpu_ref_kill(&pgmap->ref);
|
||||
|
||||
if (pgmap->ops && pgmap->ops->cleanup) {
|
||||
pgmap->ops->cleanup(pgmap);
|
||||
} else {
|
||||
wait_for_completion(&pgmap->done);
|
||||
percpu_ref_exit(pgmap->ref);
|
||||
}
|
||||
wait_for_completion(&pgmap->done);
|
||||
percpu_ref_exit(&pgmap->ref);
|
||||
}
|
||||
|
||||
static void dev_pagemap_percpu_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct dev_pagemap *pgmap =
|
||||
container_of(ref, struct dev_pagemap, internal_ref);
|
||||
struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
|
||||
|
||||
complete(&pgmap->done);
|
||||
}
|
||||
@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
|
||||
if (!nfit_res)
|
||||
return devm_memremap_pages(dev, pgmap);
|
||||
|
||||
if (!pgmap->ref) {
|
||||
if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
init_completion(&pgmap->done);
|
||||
error = percpu_ref_init(&pgmap->internal_ref,
|
||||
dev_pagemap_percpu_release, 0, GFP_KERNEL);
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
pgmap->ref = &pgmap->internal_ref;
|
||||
} else {
|
||||
if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
|
||||
WARN(1, "Missing reference count teardown definition\n");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
}
|
||||
init_completion(&pgmap->done);
|
||||
error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
|
||||
GFP_KERNEL);
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
|
||||
error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
|
||||
if (error)
|
||||
|
@ -1054,10 +1054,6 @@ static __init int ndtest_init(void)
|
||||
libnvdimm_test();
|
||||
device_dax_test();
|
||||
dax_pmem_test();
|
||||
dax_pmem_core_test();
|
||||
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
|
||||
dax_pmem_compat_test();
|
||||
#endif
|
||||
|
||||
nfit_test_setup(ndtest_resource_lookup, NULL);
|
||||
|
||||
|
@ -3300,10 +3300,6 @@ static __init int nfit_test_init(void)
|
||||
acpi_nfit_test();
|
||||
device_dax_test();
|
||||
dax_pmem_test();
|
||||
dax_pmem_core_test();
|
||||
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
|
||||
dax_pmem_compat_test();
|
||||
#endif
|
||||
|
||||
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user