2019-05-29 17:18:09 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2017-04-11 19:49:49 +03:00
/*
* Copyright ( c ) 2017 Intel Corporation . All rights reserved .
*/
# include <linux/pagemap.h>
# include <linux/module.h>
# include <linux/mount.h>
2019-03-25 19:38:24 +03:00
# include <linux/pseudo_fs.h>
2017-04-11 19:49:49 +03:00
# include <linux/magic.h>
2017-10-14 21:33:32 +03:00
# include <linux/pfn_t.h>
2017-04-11 19:49:49 +03:00
# include <linux/cdev.h>
# include <linux/slab.h>
2017-05-29 22:57:56 +03:00
# include <linux/uio.h>
2017-01-25 05:44:18 +03:00
# include <linux/dax.h>
2017-04-11 19:49:49 +03:00
# include <linux/fs.h>
2017-07-13 03:58:21 +03:00
# include "dax-private.h"
2017-04-11 19:49:49 +03:00
2021-08-26 16:55:05 +03:00
/**
* struct dax_device - anchor object for dax services
* @ inode : core vfs
* @ cdev : optional character interface for " device dax "
* @ private : dax driver private data
* @ flags : state and boolean properties
*/
struct dax_device {
struct inode inode ;
struct cdev cdev ;
void * private ;
unsigned long flags ;
const struct dax_operations * ops ;
} ;
2017-04-11 19:49:49 +03:00
static dev_t dax_devt ;
DEFINE_STATIC_SRCU ( dax_srcu ) ;
static struct vfsmount * dax_mnt ;
static DEFINE_IDA ( dax_minor_ida ) ;
static struct kmem_cache * dax_cache __read_mostly ;
static struct super_block * dax_superblock __read_mostly ;
int dax_read_lock ( void )
{
return srcu_read_lock ( & dax_srcu ) ;
}
EXPORT_SYMBOL_GPL ( dax_read_lock ) ;
void dax_read_unlock ( int id )
{
srcu_read_unlock ( & dax_srcu , id ) ;
}
EXPORT_SYMBOL_GPL ( dax_read_unlock ) ;
2021-11-29 13:21:38 +03:00
# if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
# include <linux/blkdev.h>
static DEFINE_XARRAY ( dax_hosts ) ;
int dax_add_host ( struct dax_device * dax_dev , struct gendisk * disk )
2021-08-26 16:55:05 +03:00
{
2021-11-29 13:21:38 +03:00
return xa_insert ( & dax_hosts , ( unsigned long ) disk , dax_dev , GFP_KERNEL ) ;
2021-08-26 16:55:05 +03:00
}
2021-11-29 13:21:38 +03:00
EXPORT_SYMBOL_GPL ( dax_add_host ) ;
2021-08-26 16:55:05 +03:00
2021-11-29 13:21:38 +03:00
void dax_remove_host ( struct gendisk * disk )
{
xa_erase ( & dax_hosts , ( unsigned long ) disk ) ;
}
EXPORT_SYMBOL_GPL ( dax_remove_host ) ;
2021-09-22 20:34:30 +03:00
2021-08-26 16:55:05 +03:00
/**
2021-11-29 13:21:38 +03:00
* fs_dax_get_by_bdev ( ) - temporary lookup mechanism for filesystem - dax
* @ bdev : block device to find a dax_device for
2021-11-29 13:21:59 +03:00
* @ start_off : returns the byte offset into the dax_device that @ bdev starts
2021-08-26 16:55:05 +03:00
*/
2021-11-29 13:21:59 +03:00
struct dax_device * fs_dax_get_by_bdev ( struct block_device * bdev , u64 * start_off )
2021-08-26 16:55:05 +03:00
{
2021-11-29 13:21:38 +03:00
struct dax_device * dax_dev ;
2021-11-29 13:21:59 +03:00
u64 part_size ;
2021-11-29 13:21:38 +03:00
int id ;
2021-08-26 16:55:05 +03:00
2021-11-29 13:21:38 +03:00
if ( ! blk_queue_dax ( bdev - > bd_disk - > queue ) )
2021-08-26 16:55:05 +03:00
return NULL ;
2021-11-29 13:21:59 +03:00
* start_off = get_start_sect ( bdev ) * SECTOR_SIZE ;
part_size = bdev_nr_sectors ( bdev ) * SECTOR_SIZE ;
if ( * start_off % PAGE_SIZE | | part_size % PAGE_SIZE ) {
2021-11-29 13:21:40 +03:00
pr_info ( " %pg: error: unaligned partition for dax \n " , bdev ) ;
return NULL ;
}
2021-08-26 16:55:05 +03:00
id = dax_read_lock ( ) ;
2021-11-29 13:21:38 +03:00
dax_dev = xa_load ( & dax_hosts , ( unsigned long ) bdev - > bd_disk ) ;
if ( ! dax_dev | | ! dax_alive ( dax_dev ) | | ! igrab ( & dax_dev - > inode ) )
dax_dev = NULL ;
2021-08-26 16:55:05 +03:00
dax_read_unlock ( id ) ;
2021-11-29 13:21:38 +03:00
return dax_dev ;
2017-08-30 19:16:38 +03:00
}
EXPORT_SYMBOL_GPL ( fs_dax_get_by_bdev ) ;
2021-11-29 13:21:36 +03:00
# endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
2017-05-08 20:55:27 +03:00
2017-06-28 03:59:28 +03:00
enum dax_device_flags {
/* !alive + rcu grace period == no new operations / mappings */
DAXDEV_ALIVE ,
2017-06-27 07:28:41 +03:00
/* gate whether dax_flush() calls the low level flush routine */
DAXDEV_WRITE_CACHE ,
2019-07-05 17:03:24 +03:00
/* flag to check if device supports synchronous flush */
DAXDEV_SYNC ,
2021-12-15 11:45:08 +03:00
/* do not leave the caches dirty after writes */
DAXDEV_NOCACHE ,
/* handle CPU fetch exceptions during reads */
DAXDEV_NOMC ,
2017-06-28 03:59:28 +03:00
} ;
2017-01-27 07:37:35 +03:00
/**
* dax_direct_access ( ) - translate a device pgoff to an absolute pfn
* @ dax_dev : a dax_device instance representing the logical memory range
* @ pgoff : offset in pages from the start of the device to translate
* @ nr_pages : number of consecutive pages caller can handle relative to @ pfn
* @ kaddr : output parameter that returns a virtual address mapping of pfn
* @ pfn : output parameter that returns an absolute pfn translation of @ pgoff
*
* Return : negative errno if an error occurs , otherwise the number of
* pages accessible at the device relative @ pgoff .
*/
long dax_direct_access ( struct dax_device * dax_dev , pgoff_t pgoff , long nr_pages ,
void * * kaddr , pfn_t * pfn )
{
long avail ;
if ( ! dax_dev )
return - EOPNOTSUPP ;
if ( ! dax_alive ( dax_dev ) )
return - ENXIO ;
if ( nr_pages < 0 )
2021-05-25 20:24:28 +03:00
return - EINVAL ;
2017-01-27 07:37:35 +03:00
avail = dax_dev - > ops - > direct_access ( dax_dev , pgoff , nr_pages ,
kaddr , pfn ) ;
if ( ! avail )
return - ERANGE ;
return min ( avail , nr_pages ) ;
}
EXPORT_SYMBOL_GPL ( dax_direct_access ) ;
2017-05-29 22:57:56 +03:00
size_t dax_copy_from_iter ( struct dax_device * dax_dev , pgoff_t pgoff , void * addr ,
size_t bytes , struct iov_iter * i )
{
if ( ! dax_alive ( dax_dev ) )
return 0 ;
2021-12-15 11:45:08 +03:00
/*
* The userspace address for the memory copy has already been validated
* via access_ok ( ) in vfs_write , so use the ' no check ' version to bypass
* the HARDENED_USERCOPY overhead .
*/
if ( test_bit ( DAXDEV_NOCACHE , & dax_dev - > flags ) )
return _copy_from_iter_flushcache ( addr , bytes , i ) ;
return _copy_from_iter ( addr , bytes , i ) ;
2017-05-29 22:57:56 +03:00
}
2018-05-02 16:46:33 +03:00
size_t dax_copy_to_iter ( struct dax_device * dax_dev , pgoff_t pgoff , void * addr ,
size_t bytes , struct iov_iter * i )
{
if ( ! dax_alive ( dax_dev ) )
return 0 ;
2021-12-15 11:45:08 +03:00
/*
* The userspace address for the memory copy has already been validated
* via access_ok ( ) in vfs_red , so use the ' no check ' version to bypass
* the HARDENED_USERCOPY overhead .
*/
if ( test_bit ( DAXDEV_NOMC , & dax_dev - > flags ) )
return _copy_mc_to_iter ( addr , bytes , i ) ;
return _copy_to_iter ( addr , bytes , i ) ;
2018-05-02 16:46:33 +03:00
}
2020-02-28 19:34:52 +03:00
int dax_zero_page_range ( struct dax_device * dax_dev , pgoff_t pgoff ,
size_t nr_pages )
{
if ( ! dax_alive ( dax_dev ) )
return - ENXIO ;
/*
* There are no callers that want to zero more than one page as of now .
* Once users are there , this check can be removed after the
* device mapper code has been updated to split ranges across targets .
*/
if ( nr_pages ! = 1 )
return - EIO ;
return dax_dev - > ops - > zero_page_range ( dax_dev , pgoff , nr_pages ) ;
}
EXPORT_SYMBOL_GPL ( dax_zero_page_range ) ;
2017-09-01 04:47:43 +03:00
# ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem ( void * addr , size_t size ) ;
void dax_flush ( struct dax_device * dax_dev , void * addr , size_t size )
2017-05-29 23:02:52 +03:00
{
2018-06-06 19:45:14 +03:00
if ( unlikely ( ! dax_write_cache_enabled ( dax_dev ) ) )
2017-06-27 07:28:41 +03:00
return ;
2017-09-01 04:47:43 +03:00
arch_wb_cache_pmem ( addr , size ) ;
2017-05-29 23:02:52 +03:00
}
2017-09-01 04:47:43 +03:00
# else
void dax_flush ( struct dax_device * dax_dev , void * addr , size_t size )
{
}
# endif
2017-05-29 23:02:52 +03:00
EXPORT_SYMBOL_GPL ( dax_flush ) ;
2017-06-27 07:28:41 +03:00
void dax_write_cache ( struct dax_device * dax_dev , bool wc )
{
if ( wc )
set_bit ( DAXDEV_WRITE_CACHE , & dax_dev - > flags ) ;
else
clear_bit ( DAXDEV_WRITE_CACHE , & dax_dev - > flags ) ;
}
EXPORT_SYMBOL_GPL ( dax_write_cache ) ;
2017-07-26 16:35:09 +03:00
bool dax_write_cache_enabled ( struct dax_device * dax_dev )
{
return test_bit ( DAXDEV_WRITE_CACHE , & dax_dev - > flags ) ;
}
EXPORT_SYMBOL_GPL ( dax_write_cache_enabled ) ;
2021-12-15 11:45:06 +03:00
bool dax_synchronous ( struct dax_device * dax_dev )
2019-07-05 17:03:24 +03:00
{
return test_bit ( DAXDEV_SYNC , & dax_dev - > flags ) ;
}
2021-12-15 11:45:06 +03:00
EXPORT_SYMBOL_GPL ( dax_synchronous ) ;
2019-07-05 17:03:24 +03:00
2021-12-15 11:45:06 +03:00
void set_dax_synchronous ( struct dax_device * dax_dev )
2019-07-05 17:03:24 +03:00
{
set_bit ( DAXDEV_SYNC , & dax_dev - > flags ) ;
}
2021-12-15 11:45:06 +03:00
EXPORT_SYMBOL_GPL ( set_dax_synchronous ) ;
2019-07-05 17:03:24 +03:00
2021-12-15 11:45:08 +03:00
void set_dax_nocache ( struct dax_device * dax_dev )
{
set_bit ( DAXDEV_NOCACHE , & dax_dev - > flags ) ;
}
EXPORT_SYMBOL_GPL ( set_dax_nocache ) ;
void set_dax_nomc ( struct dax_device * dax_dev )
{
set_bit ( DAXDEV_NOMC , & dax_dev - > flags ) ;
}
EXPORT_SYMBOL_GPL ( set_dax_nomc ) ;
2017-04-11 19:49:49 +03:00
bool dax_alive ( struct dax_device * dax_dev )
{
lockdep_assert_held ( & dax_srcu ) ;
2017-06-28 03:59:28 +03:00
return test_bit ( DAXDEV_ALIVE , & dax_dev - > flags ) ;
2017-04-11 19:49:49 +03:00
}
EXPORT_SYMBOL_GPL ( dax_alive ) ;
/*
* Note , rcu is not protecting the liveness of dax_dev , rcu is ensuring
* that any fault handlers or operations that might have seen
* dax_alive ( ) , have completed . Any operations that start after
* synchronize_srcu ( ) has run will abort upon seeing ! dax_alive ( ) .
*/
void kill_dax ( struct dax_device * dax_dev )
{
if ( ! dax_dev )
return ;
2017-06-28 03:59:28 +03:00
clear_bit ( DAXDEV_ALIVE , & dax_dev - > flags ) ;
2017-04-11 19:49:49 +03:00
synchronize_srcu ( & dax_srcu ) ;
}
EXPORT_SYMBOL_GPL ( kill_dax ) ;
2017-07-13 03:58:21 +03:00
void run_dax ( struct dax_device * dax_dev )
{
set_bit ( DAXDEV_ALIVE , & dax_dev - > flags ) ;
}
EXPORT_SYMBOL_GPL ( run_dax ) ;
2017-04-11 19:49:49 +03:00
static struct inode * dax_alloc_inode ( struct super_block * sb )
{
struct dax_device * dax_dev ;
2017-06-09 18:50:49 +03:00
struct inode * inode ;
2017-04-11 19:49:49 +03:00
dax_dev = kmem_cache_alloc ( dax_cache , GFP_KERNEL ) ;
2017-11-14 17:59:54 +03:00
if ( ! dax_dev )
return NULL ;
2017-06-09 18:50:49 +03:00
inode = & dax_dev - > inode ;
inode - > i_rdev = 0 ;
return inode ;
2017-04-11 19:49:49 +03:00
}
static struct dax_device * to_dax_dev ( struct inode * inode )
{
return container_of ( inode , struct dax_device , inode ) ;
}
2019-04-10 21:57:19 +03:00
static void dax_free_inode ( struct inode * inode )
2017-04-11 19:49:49 +03:00
{
struct dax_device * dax_dev = to_dax_dev ( inode ) ;
2017-06-09 18:50:49 +03:00
if ( inode - > i_rdev )
2021-02-01 03:23:55 +03:00
ida_simple_remove ( & dax_minor_ida , iminor ( inode ) ) ;
2017-04-11 19:49:49 +03:00
kmem_cache_free ( dax_cache , dax_dev ) ;
}
static void dax_destroy_inode ( struct inode * inode )
{
struct dax_device * dax_dev = to_dax_dev ( inode ) ;
2017-06-28 03:59:28 +03:00
WARN_ONCE ( test_bit ( DAXDEV_ALIVE , & dax_dev - > flags ) ,
2017-04-11 19:49:49 +03:00
" kill_dax() must be called before final iput() \n " ) ;
}
static const struct super_operations dax_sops = {
. statfs = simple_statfs ,
. alloc_inode = dax_alloc_inode ,
. destroy_inode = dax_destroy_inode ,
2019-04-10 21:57:19 +03:00
. free_inode = dax_free_inode ,
2017-04-11 19:49:49 +03:00
. drop_inode = generic_delete_inode ,
} ;
2019-03-25 19:38:24 +03:00
static int dax_init_fs_context ( struct fs_context * fc )
2017-04-11 19:49:49 +03:00
{
2019-03-25 19:38:24 +03:00
struct pseudo_fs_context * ctx = init_pseudo ( fc , DAXFS_MAGIC ) ;
if ( ! ctx )
return - ENOMEM ;
ctx - > ops = & dax_sops ;
return 0 ;
2017-04-11 19:49:49 +03:00
}
static struct file_system_type dax_fs_type = {
2019-03-25 19:38:24 +03:00
. name = " dax " ,
. init_fs_context = dax_init_fs_context ,
. kill_sb = kill_anon_super ,
2017-04-11 19:49:49 +03:00
} ;
static int dax_test ( struct inode * inode , void * data )
{
dev_t devt = * ( dev_t * ) data ;
return inode - > i_rdev = = devt ;
}
static int dax_set ( struct inode * inode , void * data )
{
dev_t devt = * ( dev_t * ) data ;
inode - > i_rdev = devt ;
return 0 ;
}
static struct dax_device * dax_dev_get ( dev_t devt )
{
struct dax_device * dax_dev ;
struct inode * inode ;
inode = iget5_locked ( dax_superblock , hash_32 ( devt + DAXFS_MAGIC , 31 ) ,
dax_test , dax_set , & devt ) ;
if ( ! inode )
return NULL ;
dax_dev = to_dax_dev ( inode ) ;
if ( inode - > i_state & I_NEW ) {
2017-06-28 03:59:28 +03:00
set_bit ( DAXDEV_ALIVE , & dax_dev - > flags ) ;
2017-04-11 19:49:49 +03:00
inode - > i_cdev = & dax_dev - > cdev ;
inode - > i_mode = S_IFCHR ;
inode - > i_flags = S_DAX ;
mapping_set_gfp_mask ( & inode - > i_data , GFP_USER ) ;
unlock_new_inode ( inode ) ;
}
return dax_dev ;
}
2021-12-15 11:45:07 +03:00
struct dax_device * alloc_dax ( void * private , const struct dax_operations * ops )
2017-04-11 19:49:49 +03:00
{
struct dax_device * dax_dev ;
dev_t devt ;
int minor ;
2021-11-29 13:21:38 +03:00
if ( WARN_ON_ONCE ( ops & & ! ops - > zero_page_range ) )
2020-04-01 19:11:25 +03:00
return ERR_PTR ( - EINVAL ) ;
2017-04-20 01:14:31 +03:00
2017-05-08 22:33:53 +03:00
minor = ida_simple_get ( & dax_minor_ida , 0 , MINORMASK + 1 , GFP_KERNEL ) ;
2017-04-11 19:49:49 +03:00
if ( minor < 0 )
2021-11-29 13:21:38 +03:00
return ERR_PTR ( - ENOMEM ) ;
2017-04-11 19:49:49 +03:00
devt = MKDEV ( MAJOR ( dax_devt ) , minor ) ;
dax_dev = dax_dev_get ( devt ) ;
if ( ! dax_dev )
2017-04-20 01:14:31 +03:00
goto err_dev ;
2017-04-11 19:49:49 +03:00
2017-01-25 05:44:18 +03:00
dax_dev - > ops = ops ;
2017-04-11 19:49:49 +03:00
dax_dev - > private = private ;
return dax_dev ;
2017-04-20 01:14:31 +03:00
err_dev :
2017-04-11 19:49:49 +03:00
ida_simple_remove ( & dax_minor_ida , minor ) ;
2020-04-01 19:11:25 +03:00
return ERR_PTR ( - ENOMEM ) ;
2017-04-11 19:49:49 +03:00
}
EXPORT_SYMBOL_GPL ( alloc_dax ) ;
void put_dax ( struct dax_device * dax_dev )
{
if ( ! dax_dev )
return ;
iput ( & dax_dev - > inode ) ;
}
EXPORT_SYMBOL_GPL ( put_dax ) ;
/**
* inode_dax : convert a public inode into its dax_dev
* @ inode : An inode with i_cdev pointing to a dax_dev
*
* Note this is not equivalent to to_dax_dev ( ) which is for private
* internal use where we know the inode filesystem type = = dax_fs_type .
*/
struct dax_device * inode_dax ( struct inode * inode )
{
struct cdev * cdev = inode - > i_cdev ;
return container_of ( cdev , struct dax_device , cdev ) ;
}
EXPORT_SYMBOL_GPL ( inode_dax ) ;
struct inode * dax_inode ( struct dax_device * dax_dev )
{
return & dax_dev - > inode ;
}
EXPORT_SYMBOL_GPL ( dax_inode ) ;
void * dax_get_private ( struct dax_device * dax_dev )
{
2017-07-13 03:58:21 +03:00
if ( ! test_bit ( DAXDEV_ALIVE , & dax_dev - > flags ) )
return NULL ;
2017-04-11 19:49:49 +03:00
return dax_dev - > private ;
}
EXPORT_SYMBOL_GPL ( dax_get_private ) ;
static void init_once ( void * _dax_dev )
{
struct dax_device * dax_dev = _dax_dev ;
struct inode * inode = & dax_dev - > inode ;
2017-06-09 18:50:49 +03:00
memset ( dax_dev , 0 , sizeof ( * dax_dev ) ) ;
2017-04-11 19:49:49 +03:00
inode_init_once ( inode ) ;
}
2017-07-13 03:58:21 +03:00
static int dax_fs_init ( void )
2017-04-11 19:49:49 +03:00
{
int rc ;
dax_cache = kmem_cache_create ( " dax_cache " , sizeof ( struct dax_device ) , 0 ,
( SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD | SLAB_ACCOUNT ) ,
init_once ) ;
if ( ! dax_cache )
return - ENOMEM ;
dax_mnt = kern_mount ( & dax_fs_type ) ;
if ( IS_ERR ( dax_mnt ) ) {
rc = PTR_ERR ( dax_mnt ) ;
goto err_mount ;
}
dax_superblock = dax_mnt - > mnt_sb ;
return 0 ;
err_mount :
kmem_cache_destroy ( dax_cache ) ;
return rc ;
}
2017-07-13 03:58:21 +03:00
static void dax_fs_exit ( void )
2017-04-11 19:49:49 +03:00
{
kern_unmount ( dax_mnt ) ;
kmem_cache_destroy ( dax_cache ) ;
}
2017-07-13 03:58:21 +03:00
static int __init dax_core_init ( void )
2017-04-11 19:49:49 +03:00
{
int rc ;
2017-07-13 03:58:21 +03:00
rc = dax_fs_init ( ) ;
2017-04-11 19:49:49 +03:00
if ( rc )
return rc ;
2017-05-08 22:33:53 +03:00
rc = alloc_chrdev_region ( & dax_devt , 0 , MINORMASK + 1 , " dax " ) ;
2017-04-11 19:49:49 +03:00
if ( rc )
2017-07-13 03:58:21 +03:00
goto err_chrdev ;
rc = dax_bus_init ( ) ;
if ( rc )
goto err_bus ;
return 0 ;
err_bus :
unregister_chrdev_region ( dax_devt , MINORMASK + 1 ) ;
err_chrdev :
dax_fs_exit ( ) ;
return 0 ;
2017-04-11 19:49:49 +03:00
}
2017-07-13 03:58:21 +03:00
static void __exit dax_core_exit ( void )
2017-04-11 19:49:49 +03:00
{
2020-12-01 16:59:29 +03:00
dax_bus_exit ( ) ;
2017-05-08 22:33:53 +03:00
unregister_chrdev_region ( dax_devt , MINORMASK + 1 ) ;
2017-04-11 19:49:49 +03:00
ida_destroy ( & dax_minor_ida ) ;
2017-07-13 03:58:21 +03:00
dax_fs_exit ( ) ;
2017-04-11 19:49:49 +03:00
}
MODULE_AUTHOR ( " Intel Corporation " ) ;
MODULE_LICENSE ( " GPL v2 " ) ;
2017-07-13 03:58:21 +03:00
subsys_initcall ( dax_core_init ) ;
module_exit ( dax_core_exit ) ;