2011-01-20 20:50:14 +03:00
/*
* NVM Express device driver
2014-03-24 18:11:22 +04:00
* Copyright ( c ) 2011 - 2014 , Intel Corporation .
2011-01-20 20:50:14 +03:00
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms and conditions of the GNU General Public License ,
* version 2 , as published by the Free Software Foundation .
*
* This program is distributed in the hope it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License for
* more details .
*/
# include <linux/nvme.h>
2011-05-12 21:50:28 +04:00
# include <linux/bitops.h>
2011-01-20 20:50:14 +03:00
# include <linux/blkdev.h>
2014-11-04 18:20:14 +03:00
# include <linux/blk-mq.h>
2014-03-24 20:46:25 +04:00
# include <linux/cpu.h>
2011-05-06 16:37:54 +04:00
# include <linux/delay.h>
2011-01-20 20:50:14 +03:00
# include <linux/errno.h>
# include <linux/fs.h>
# include <linux/genhd.h>
2014-04-03 01:45:37 +04:00
# include <linux/hdreg.h>
2011-05-06 16:45:47 +04:00
# include <linux/idr.h>
2011-01-20 20:50:14 +03:00
# include <linux/init.h>
# include <linux/interrupt.h>
# include <linux/io.h>
# include <linux/kdev_t.h>
2011-03-03 02:37:18 +03:00
# include <linux/kthread.h>
2011-01-20 20:50:14 +03:00
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/module.h>
# include <linux/moduleparam.h>
# include <linux/pci.h>
2011-02-06 15:53:23 +03:00
# include <linux/poison.h>
2013-07-09 01:26:25 +04:00
# include <linux/ptrace.h>
2011-01-20 20:50:14 +03:00
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/types.h>
2013-03-05 05:40:58 +04:00
# include <scsi/sg.h>
2012-02-07 06:45:33 +04:00
# include <asm-generic/io-64-nonatomic-lo-hi.h>
2014-05-13 21:42:02 +04:00
# define NVME_Q_DEPTH 1024
2014-11-04 18:20:14 +03:00
# define NVME_AQ_DEPTH 64
2011-01-20 20:50:14 +03:00
# define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
# define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
2014-05-13 21:42:02 +04:00
# define ADMIN_TIMEOUT (admin_timeout * HZ)
2014-07-01 19:33:32 +04:00
# define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
2014-05-13 21:42:02 +04:00
# define IOD_TIMEOUT (retry_time * HZ)
static unsigned char admin_timeout = 60 ;
module_param ( admin_timeout , byte , 0644 ) ;
MODULE_PARM_DESC ( admin_timeout , " timeout in seconds for admin commands " ) ;
2011-01-20 20:50:14 +03:00
2014-06-04 07:04:30 +04:00
unsigned char nvme_io_timeout = 30 ;
module_param_named ( io_timeout , nvme_io_timeout , byte , 0644 ) ;
2014-04-04 21:43:36 +04:00
MODULE_PARM_DESC ( io_timeout , " timeout in seconds for I/O " ) ;
2011-01-20 20:50:14 +03:00
2014-05-13 21:42:01 +04:00
static unsigned char retry_time = 30 ;
module_param ( retry_time , byte , 0644 ) ;
MODULE_PARM_DESC ( retry_time , " time in seconds to retry failed I/O " ) ;
2014-07-01 19:33:32 +04:00
static unsigned char shutdown_timeout = 5 ;
module_param ( shutdown_timeout , byte , 0644 ) ;
MODULE_PARM_DESC ( shutdown_timeout , " timeout in seconds for controller shutdown " ) ;
2011-01-20 20:50:14 +03:00
static int nvme_major ;
module_param ( nvme_major , int , 0 ) ;
2011-02-06 15:28:06 +03:00
static int use_threaded_interrupts ;
module_param ( use_threaded_interrupts , int , 0 ) ;
2011-03-03 02:37:18 +03:00
static DEFINE_SPINLOCK ( dev_list_lock ) ;
static LIST_HEAD ( dev_list ) ;
static struct task_struct * nvme_thread ;
2013-12-11 00:10:36 +04:00
static struct workqueue_struct * nvme_workq ;
2014-04-08 03:10:11 +04:00
static wait_queue_head_t nvme_kthread_wait ;
2014-06-11 21:51:35 +04:00
static struct notifier_block nvme_nb ;
2011-03-03 02:37:18 +03:00
2013-12-11 00:10:37 +04:00
static void nvme_reset_failed_dev ( struct work_struct * ws ) ;
2014-11-04 18:20:14 +03:00
static int nvme_process_cq ( struct nvme_queue * nvmeq ) ;
2013-12-11 00:10:37 +04:00
2013-12-11 00:10:40 +04:00
struct async_cmd_info {
struct kthread_work work ;
struct kthread_worker * worker ;
2014-11-04 18:20:14 +03:00
struct request * req ;
2013-12-11 00:10:40 +04:00
u32 result ;
int status ;
void * ctx ;
} ;
2011-03-03 02:37:18 +03:00
2011-01-20 20:50:14 +03:00
/*
* An NVM Express queue . Each device has at least two ( one for admin
* commands and one for I / O commands ) .
*/
struct nvme_queue {
2014-07-07 19:14:42 +04:00
struct llist_node node ;
2011-01-20 20:50:14 +03:00
struct device * q_dmadev ;
2011-02-10 17:56:01 +03:00
struct nvme_dev * dev ;
2014-01-28 00:57:22 +04:00
char irqname [ 24 ] ; /* nvme4294967295-65535\0 */
2011-01-20 20:50:14 +03:00
spinlock_t q_lock ;
struct nvme_command * sq_cmds ;
volatile struct nvme_completion * cqes ;
dma_addr_t sq_dma_addr ;
dma_addr_t cq_dma_addr ;
u32 __iomem * q_db ;
u16 q_depth ;
u16 cq_vector ;
u16 sq_head ;
u16 sq_tail ;
u16 cq_head ;
2013-12-11 00:10:38 +04:00
u16 qid ;
2013-06-24 19:47:34 +04:00
u8 cq_phase ;
u8 cqe_seen ;
2013-12-11 00:10:40 +04:00
struct async_cmd_info cmdinfo ;
2014-11-04 18:20:14 +03:00
struct blk_mq_hw_ctx * hctx ;
2011-01-20 20:50:14 +03:00
} ;
/*
* Check we didin ' t inadvertently grow the command struct
*/
static inline void _nvme_check_size ( void )
{
BUILD_BUG_ON ( sizeof ( struct nvme_rw_command ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_create_cq ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_create_sq ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_delete_queue ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_features ) ! = 64 ) ;
2013-03-27 15:13:41 +04:00
BUILD_BUG_ON ( sizeof ( struct nvme_format_cmd ) ! = 64 ) ;
2013-12-11 00:10:38 +04:00
BUILD_BUG_ON ( sizeof ( struct nvme_abort_cmd ) ! = 64 ) ;
2011-01-20 20:50:14 +03:00
BUILD_BUG_ON ( sizeof ( struct nvme_command ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_id_ctrl ) ! = 4096 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_id_ns ) ! = 4096 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_lba_range_type ) ! = 64 ) ;
2012-09-26 22:49:27 +04:00
BUILD_BUG_ON ( sizeof ( struct nvme_smart_log ) ! = 512 ) ;
2011-01-20 20:50:14 +03:00
}
2014-04-04 02:45:23 +04:00
typedef void ( * nvme_completion_fn ) ( struct nvme_queue * , void * ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * ) ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info {
2011-10-15 15:33:46 +04:00
nvme_completion_fn fn ;
void * ctx ;
2013-12-11 00:10:38 +04:00
int aborted ;
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq ;
2011-02-07 02:30:16 +03:00
} ;
2014-11-04 18:20:14 +03:00
static int nvme_admin_init_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int hctx_idx )
2011-02-07 02:30:16 +03:00
{
2014-11-04 18:20:14 +03:00
struct nvme_dev * dev = data ;
struct nvme_queue * nvmeq = dev - > queues [ 0 ] ;
WARN_ON ( nvmeq - > hctx ) ;
nvmeq - > hctx = hctx ;
hctx - > driver_data = nvmeq ;
return 0 ;
2011-02-07 02:30:16 +03:00
}
2014-11-04 18:20:14 +03:00
static int nvme_admin_init_request ( void * data , struct request * req ,
unsigned int hctx_idx , unsigned int rq_idx ,
unsigned int numa_node )
2013-07-16 01:02:20 +04:00
{
2014-11-04 18:20:14 +03:00
struct nvme_dev * dev = data ;
struct nvme_cmd_info * cmd = blk_mq_rq_to_pdu ( req ) ;
struct nvme_queue * nvmeq = dev - > queues [ 0 ] ;
BUG_ON ( ! nvmeq ) ;
cmd - > nvmeq = nvmeq ;
return 0 ;
2013-07-16 01:02:20 +04:00
}
2014-11-14 19:47:32 +03:00
static void nvme_exit_hctx ( struct blk_mq_hw_ctx * hctx , unsigned int hctx_idx )
{
struct nvme_queue * nvmeq = hctx - > driver_data ;
nvmeq - > hctx = NULL ;
}
2014-11-04 18:20:14 +03:00
static int nvme_init_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int hctx_idx )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
struct nvme_dev * dev = data ;
struct nvme_queue * nvmeq = dev - > queues [
( hctx_idx % dev - > queue_count ) + 1 ] ;
2011-01-20 20:50:14 +03:00
2014-11-04 18:20:14 +03:00
if ( ! nvmeq - > hctx )
nvmeq - > hctx = hctx ;
/* nvmeq queues are shared between namespaces. We assume here that
* blk - mq map the tags so they match up with the nvme queue tags . */
WARN_ON ( nvmeq - > hctx - > tags ! = hctx - > tags ) ;
2011-01-20 20:50:14 +03:00
2014-11-04 18:20:14 +03:00
hctx - > driver_data = nvmeq ;
return 0 ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
static int nvme_init_request ( void * data , struct request * req ,
unsigned int hctx_idx , unsigned int rq_idx ,
unsigned int numa_node )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
struct nvme_dev * dev = data ;
struct nvme_cmd_info * cmd = blk_mq_rq_to_pdu ( req ) ;
struct nvme_queue * nvmeq = dev - > queues [ hctx_idx + 1 ] ;
BUG_ON ( ! nvmeq ) ;
cmd - > nvmeq = nvmeq ;
return 0 ;
}
static void nvme_set_info ( struct nvme_cmd_info * cmd , void * ctx ,
nvme_completion_fn handler )
{
cmd - > fn = handler ;
cmd - > ctx = ctx ;
cmd - > aborted = 0 ;
2015-01-08 04:55:48 +03:00
blk_mq_start_request ( blk_mq_rq_from_pdu ( cmd ) ) ;
2011-01-20 20:50:14 +03:00
}
2011-10-15 15:33:46 +04:00
/* Special values must be less than 0x1000 */
# define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
2011-02-07 23:55:59 +03:00
# define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
# define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
# define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
2011-02-06 15:53:23 +03:00
2014-04-04 02:45:23 +04:00
static void special_completion ( struct nvme_queue * nvmeq , void * ctx ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * cqe )
{
if ( ctx = = CMD_CTX_CANCELLED )
return ;
if ( ctx = = CMD_CTX_COMPLETED ) {
2014-04-04 02:45:23 +04:00
dev_warn ( nvmeq - > q_dmadev ,
2011-10-15 15:33:46 +04:00
" completed id %d twice on queue %d \n " ,
cqe - > command_id , le16_to_cpup ( & cqe - > sq_id ) ) ;
return ;
}
if ( ctx = = CMD_CTX_INVALID ) {
2014-04-04 02:45:23 +04:00
dev_warn ( nvmeq - > q_dmadev ,
2011-10-15 15:33:46 +04:00
" invalid id %d completed on queue %d \n " ,
cqe - > command_id , le16_to_cpup ( & cqe - > sq_id ) ) ;
return ;
}
2014-04-04 02:45:23 +04:00
dev_warn ( nvmeq - > q_dmadev , " Unknown special completion %p \n " , ctx ) ;
2011-10-15 15:33:46 +04:00
}
2014-11-04 18:20:14 +03:00
static void * cancel_cmd_info ( struct nvme_cmd_info * cmd , nvme_completion_fn * fn )
2011-01-20 20:50:14 +03:00
{
2011-10-15 15:33:46 +04:00
void * ctx ;
2011-01-20 20:50:14 +03:00
2012-08-03 00:05:59 +04:00
if ( fn )
2014-11-04 18:20:14 +03:00
* fn = cmd - > fn ;
ctx = cmd - > ctx ;
cmd - > fn = special_completion ;
cmd - > ctx = CMD_CTX_CANCELLED ;
2011-10-15 15:33:46 +04:00
return ctx ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
static void async_req_completion ( struct nvme_queue * nvmeq , void * ctx ,
struct nvme_completion * cqe )
2011-02-05 00:03:56 +03:00
{
2014-11-04 18:20:14 +03:00
struct request * req = ctx ;
2011-02-05 00:03:56 +03:00
2014-11-04 18:20:14 +03:00
u32 result = le32_to_cpup ( & cqe - > result ) ;
u16 status = le16_to_cpup ( & cqe - > status ) > > 1 ;
if ( status = = NVME_SC_SUCCESS | | status = = NVME_SC_ABORT_REQ )
+ + nvmeq - > dev - > event_limit ;
if ( status = = NVME_SC_SUCCESS )
dev_warn ( nvmeq - > q_dmadev ,
" async event result %08x \n " , result ) ;
2014-11-17 20:43:42 +03:00
blk_mq_free_hctx_request ( nvmeq - > hctx , req ) ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
static void abort_completion ( struct nvme_queue * nvmeq , void * ctx ,
struct nvme_completion * cqe )
2014-02-22 01:13:44 +04:00
{
2014-11-04 18:20:14 +03:00
struct request * req = ctx ;
u16 status = le16_to_cpup ( & cqe - > status ) > > 1 ;
u32 result = le32_to_cpup ( & cqe - > result ) ;
2014-05-13 20:32:46 +04:00
2014-11-17 20:43:42 +03:00
blk_mq_free_hctx_request ( nvmeq - > hctx , req ) ;
2014-05-13 20:32:46 +04:00
2014-11-04 18:20:14 +03:00
dev_warn ( nvmeq - > q_dmadev , " Abort status:%x result:%x " , status , result ) ;
+ + nvmeq - > dev - > abort_limit ;
2014-02-22 01:13:44 +04:00
}
2014-11-04 18:20:14 +03:00
static void async_completion ( struct nvme_queue * nvmeq , void * ctx ,
struct nvme_completion * cqe )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
struct async_cmd_info * cmdinfo = ctx ;
cmdinfo - > result = le32_to_cpup ( & cqe - > result ) ;
cmdinfo - > status = le16_to_cpup ( & cqe - > status ) > > 1 ;
queue_kthread_work ( cmdinfo - > worker , & cmdinfo - > work ) ;
2014-11-17 20:43:42 +03:00
blk_mq_free_hctx_request ( nvmeq - > hctx , cmdinfo - > req ) ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
static inline struct nvme_cmd_info * get_cmd_from_tag ( struct nvme_queue * nvmeq ,
unsigned int tag )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
struct blk_mq_hw_ctx * hctx = nvmeq - > hctx ;
struct request * req = blk_mq_tag_to_rq ( hctx - > tags , tag ) ;
2014-05-13 20:32:46 +04:00
2014-11-04 18:20:14 +03:00
return blk_mq_rq_to_pdu ( req ) ;
2014-03-04 03:39:13 +04:00
}
2014-11-04 18:20:14 +03:00
/*
* Called with local interrupts disabled and the q_lock held . May not sleep .
*/
static void * nvme_finish_cmd ( struct nvme_queue * nvmeq , int tag ,
nvme_completion_fn * fn )
2014-03-04 03:39:13 +04:00
{
2014-11-04 18:20:14 +03:00
struct nvme_cmd_info * cmd = get_cmd_from_tag ( nvmeq , tag ) ;
void * ctx ;
if ( tag > = nvmeq - > q_depth ) {
* fn = special_completion ;
return CMD_CTX_INVALID ;
}
if ( fn )
* fn = cmd - > fn ;
ctx = cmd - > ctx ;
cmd - > fn = special_completion ;
cmd - > ctx = CMD_CTX_COMPLETED ;
return ctx ;
2011-01-20 20:50:14 +03:00
}
/**
2011-03-16 23:28:24 +03:00
* nvme_submit_cmd ( ) - Copy a command into a queue and ring the doorbell
2011-01-20 20:50:14 +03:00
* @ nvmeq : The queue to use
* @ cmd : The command to send
*
* Safe to use from interrupt context
*/
2014-11-04 18:20:14 +03:00
static int __nvme_submit_cmd ( struct nvme_queue * nvmeq , struct nvme_command * cmd )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
u16 tail = nvmeq - > sq_tail ;
2011-01-20 20:50:14 +03:00
memcpy ( & nvmeq - > sq_cmds [ tail ] , cmd , sizeof ( * cmd ) ) ;
if ( + + tail = = nvmeq - > q_depth )
tail = 0 ;
2011-02-16 17:59:59 +03:00
writel ( tail , nvmeq - > q_db ) ;
2011-01-20 20:50:14 +03:00
nvmeq - > sq_tail = tail ;
return 0 ;
}
2014-11-04 18:20:14 +03:00
static int nvme_submit_cmd ( struct nvme_queue * nvmeq , struct nvme_command * cmd )
{
unsigned long flags ;
int ret ;
spin_lock_irqsave ( & nvmeq - > q_lock , flags ) ;
ret = __nvme_submit_cmd ( nvmeq , cmd ) ;
spin_unlock_irqrestore ( & nvmeq - > q_lock , flags ) ;
return ret ;
}
2011-12-20 22:34:52 +04:00
static __le64 * * iod_list ( struct nvme_iod * iod )
2011-02-10 16:51:24 +03:00
{
2011-12-20 22:34:52 +04:00
return ( ( void * ) iod ) + iod - > offset ;
2011-02-10 16:51:24 +03:00
}
2011-12-20 22:34:52 +04:00
/*
* Will slightly overestimate the number of pages needed . This is OK
* as it only leads to a small amount of wasted memory for the lifetime of
* the I / O .
*/
2014-06-23 21:34:01 +04:00
static int nvme_npages ( unsigned size , struct nvme_dev * dev )
2011-12-20 22:34:52 +04:00
{
2014-06-23 21:34:01 +04:00
unsigned nprps = DIV_ROUND_UP ( size + dev - > page_size , dev - > page_size ) ;
return DIV_ROUND_UP ( 8 * nprps , dev - > page_size - 8 ) ;
2011-12-20 22:34:52 +04:00
}
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
static struct nvme_iod *
2014-06-23 21:34:01 +04:00
nvme_alloc_iod ( unsigned nseg , unsigned nbytes , struct nvme_dev * dev , gfp_t gfp )
2011-01-20 20:50:14 +03:00
{
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod = kmalloc ( sizeof ( struct nvme_iod ) +
2014-06-23 21:34:01 +04:00
sizeof ( __le64 * ) * nvme_npages ( nbytes , dev ) +
2011-12-20 22:34:52 +04:00
sizeof ( struct scatterlist ) * nseg , gfp ) ;
if ( iod ) {
iod - > offset = offsetof ( struct nvme_iod , sg [ nseg ] ) ;
iod - > npages = - 1 ;
iod - > length = nbytes ;
2012-11-06 22:59:23 +04:00
iod - > nents = 0 ;
2014-04-04 02:45:23 +04:00
iod - > first_dma = 0ULL ;
2011-12-20 22:34:52 +04:00
}
return iod ;
2011-01-20 20:50:14 +03:00
}
2013-03-05 05:40:58 +04:00
void nvme_free_iod ( struct nvme_dev * dev , struct nvme_iod * iod )
2011-01-20 20:50:14 +03:00
{
2014-06-23 21:34:01 +04:00
const int last_prp = dev - > page_size / 8 - 1 ;
2011-12-20 22:34:52 +04:00
int i ;
__le64 * * list = iod_list ( iod ) ;
dma_addr_t prp_dma = iod - > first_dma ;
if ( iod - > npages = = 0 )
dma_pool_free ( dev - > prp_small_pool , list [ 0 ] , prp_dma ) ;
for ( i = 0 ; i < iod - > npages ; i + + ) {
__le64 * prp_list = list [ i ] ;
dma_addr_t next_prp_dma = le64_to_cpu ( prp_list [ last_prp ] ) ;
dma_pool_free ( dev - > prp_page_pool , prp_list , prp_dma ) ;
prp_dma = next_prp_dma ;
}
kfree ( iod ) ;
2011-01-20 20:50:14 +03:00
}
2014-08-29 19:06:12 +04:00
static int nvme_error_status ( u16 status )
{
switch ( status & 0x7ff ) {
case NVME_SC_SUCCESS :
return 0 ;
case NVME_SC_CAP_EXCEEDED :
return - ENOSPC ;
default :
return - EIO ;
}
}
2014-11-04 18:20:14 +03:00
static void req_completion ( struct nvme_queue * nvmeq , void * ctx ,
2011-01-20 20:50:14 +03:00
struct nvme_completion * cqe )
{
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod = ctx ;
2014-11-04 18:20:14 +03:00
struct request * req = iod - > private ;
struct nvme_cmd_info * cmd_rq = blk_mq_rq_to_pdu ( req ) ;
2011-01-20 20:50:14 +03:00
u16 status = le16_to_cpup ( & cqe - > status ) > > 1 ;
2014-04-04 02:45:23 +04:00
if ( unlikely ( status ) ) {
2014-11-04 18:20:14 +03:00
if ( ! ( status & NVME_SC_DNR | | blk_noretry_request ( req ) )
& & ( jiffies - req - > start_time ) < req - > timeout ) {
2015-01-08 04:55:52 +03:00
unsigned long flags ;
2014-11-04 18:20:14 +03:00
blk_mq_requeue_request ( req ) ;
2015-01-08 04:55:52 +03:00
spin_lock_irqsave ( req - > q - > queue_lock , flags ) ;
if ( ! blk_queue_stopped ( req - > q ) )
blk_mq_kick_requeue_list ( req - > q ) ;
spin_unlock_irqrestore ( req - > q - > queue_lock , flags ) ;
2014-04-04 02:45:23 +04:00
return ;
}
2014-11-04 18:20:14 +03:00
req - > errors = nvme_error_status ( status ) ;
} else
req - > errors = 0 ;
if ( cmd_rq - > aborted )
dev_warn ( & nvmeq - > dev - > pci_dev - > dev ,
" completing aborted command with status:%04x \n " ,
status ) ;
if ( iod - > nents )
dma_unmap_sg ( & nvmeq - > dev - > pci_dev - > dev , iod - > sg , iod - > nents ,
rq_data_dir ( req ) ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2014-04-04 02:45:23 +04:00
nvme_free_iod ( nvmeq - > dev , iod ) ;
2014-04-28 22:30:52 +04:00
2014-11-04 18:20:14 +03:00
blk_mq_complete_request ( req ) ;
2011-01-20 20:50:14 +03:00
}
2011-05-12 05:36:38 +04:00
/* length is in bytes. gfp flags indicates whether we may sleep. */
2014-04-04 02:45:23 +04:00
int nvme_setup_prps ( struct nvme_dev * dev , struct nvme_iod * iod , int total_len ,
gfp_t gfp )
2011-01-26 18:02:29 +03:00
{
2011-02-10 18:30:34 +03:00
struct dma_pool * pool ;
2011-12-20 22:34:52 +04:00
int length = total_len ;
struct scatterlist * sg = iod - > sg ;
2011-01-26 18:02:29 +03:00
int dma_len = sg_dma_len ( sg ) ;
u64 dma_addr = sg_dma_address ( sg ) ;
int offset = offset_in_page ( dma_addr ) ;
2011-02-10 16:51:24 +03:00
__le64 * prp_list ;
2011-12-20 22:34:52 +04:00
__le64 * * list = iod_list ( iod ) ;
2011-02-10 16:51:24 +03:00
dma_addr_t prp_dma ;
2011-12-20 22:34:52 +04:00
int nprps , i ;
2014-06-23 21:34:01 +04:00
u32 page_size = dev - > page_size ;
2011-01-26 18:02:29 +03:00
2014-06-23 21:34:01 +04:00
length - = ( page_size - offset ) ;
2011-01-26 18:02:29 +03:00
if ( length < = 0 )
2011-12-20 22:34:52 +04:00
return total_len ;
2011-01-26 18:02:29 +03:00
2014-06-23 21:34:01 +04:00
dma_len - = ( page_size - offset ) ;
2011-01-26 18:02:29 +03:00
if ( dma_len ) {
2014-06-23 21:34:01 +04:00
dma_addr + = ( page_size - offset ) ;
2011-01-26 18:02:29 +03:00
} else {
sg = sg_next ( sg ) ;
dma_addr = sg_dma_address ( sg ) ;
dma_len = sg_dma_len ( sg ) ;
}
2014-06-23 21:34:01 +04:00
if ( length < = page_size ) {
2014-04-04 02:45:23 +04:00
iod - > first_dma = dma_addr ;
2011-12-20 22:34:52 +04:00
return total_len ;
2011-02-10 16:51:24 +03:00
}
2014-06-23 21:34:01 +04:00
nprps = DIV_ROUND_UP ( length , page_size ) ;
2011-02-10 18:30:34 +03:00
if ( nprps < = ( 256 / 8 ) ) {
pool = dev - > prp_small_pool ;
2011-12-20 22:34:52 +04:00
iod - > npages = 0 ;
2011-02-10 18:30:34 +03:00
} else {
pool = dev - > prp_page_pool ;
2011-12-20 22:34:52 +04:00
iod - > npages = 1 ;
2011-02-10 18:30:34 +03:00
}
2011-05-12 21:51:41 +04:00
prp_list = dma_pool_alloc ( pool , gfp , & prp_dma ) ;
if ( ! prp_list ) {
2014-04-04 02:45:23 +04:00
iod - > first_dma = dma_addr ;
2011-12-20 22:34:52 +04:00
iod - > npages = - 1 ;
2014-06-23 21:34:01 +04:00
return ( total_len - length ) + page_size ;
2011-05-12 21:51:41 +04:00
}
2011-12-20 22:34:52 +04:00
list [ 0 ] = prp_list ;
iod - > first_dma = prp_dma ;
2011-02-10 16:51:24 +03:00
i = 0 ;
for ( ; ; ) {
2014-06-23 21:34:01 +04:00
if ( i = = page_size > > 3 ) {
2011-02-10 16:51:24 +03:00
__le64 * old_prp_list = prp_list ;
2011-05-12 21:51:41 +04:00
prp_list = dma_pool_alloc ( pool , gfp , & prp_dma ) ;
2011-12-20 22:34:52 +04:00
if ( ! prp_list )
return total_len - length ;
list [ iod - > npages + + ] = prp_list ;
2011-03-16 23:43:40 +03:00
prp_list [ 0 ] = old_prp_list [ i - 1 ] ;
old_prp_list [ i - 1 ] = cpu_to_le64 ( prp_dma ) ;
i = 1 ;
2011-02-10 16:51:24 +03:00
}
prp_list [ i + + ] = cpu_to_le64 ( dma_addr ) ;
2014-06-23 21:34:01 +04:00
dma_len - = page_size ;
dma_addr + = page_size ;
length - = page_size ;
2011-02-10 16:51:24 +03:00
if ( length < = 0 )
break ;
if ( dma_len > 0 )
continue ;
BUG_ON ( dma_len < 0 ) ;
sg = sg_next ( sg ) ;
dma_addr = sg_dma_address ( sg ) ;
dma_len = sg_dma_len ( sg ) ;
2011-01-26 18:02:29 +03:00
}
2011-12-20 22:34:52 +04:00
return total_len ;
2011-01-26 18:02:29 +03:00
}
2014-11-04 18:20:14 +03:00
/*
* We reuse the small pool to allocate the 16 - byte range here as it is not
* worth having a special pool for these or additional cases to handle freeing
* the iod .
*/
static void nvme_submit_discard ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
struct request * req , struct nvme_iod * iod )
2012-11-10 03:33:05 +04:00
{
2014-04-04 02:45:23 +04:00
struct nvme_dsm_range * range =
( struct nvme_dsm_range * ) iod_list ( iod ) [ 0 ] ;
2012-11-10 03:33:05 +04:00
struct nvme_command * cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
range - > cattr = cpu_to_le32 ( 0 ) ;
2014-11-04 18:20:14 +03:00
range - > nlb = cpu_to_le32 ( blk_rq_bytes ( req ) > > ns - > lba_shift ) ;
range - > slba = cpu_to_le64 ( nvme_block_nr ( ns , blk_rq_pos ( req ) ) ) ;
2012-11-10 03:33:05 +04:00
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
cmnd - > dsm . opcode = nvme_cmd_dsm ;
2014-11-04 18:20:14 +03:00
cmnd - > dsm . command_id = req - > tag ;
2012-11-10 03:33:05 +04:00
cmnd - > dsm . nsid = cpu_to_le32 ( ns - > ns_id ) ;
cmnd - > dsm . prp1 = cpu_to_le64 ( iod - > first_dma ) ;
cmnd - > dsm . nr = 0 ;
cmnd - > dsm . attributes = cpu_to_le32 ( NVME_DSMGMT_AD ) ;
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
}
2014-11-04 18:20:14 +03:00
static void nvme_submit_flush ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
2011-02-22 22:18:30 +03:00
int cmdid )
{
struct nvme_command * cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
cmnd - > common . opcode = nvme_cmd_flush ;
cmnd - > common . command_id = cmdid ;
cmnd - > common . nsid = cpu_to_le32 ( ns - > ns_id ) ;
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
}
2014-11-04 18:20:14 +03:00
static int nvme_submit_iod ( struct nvme_queue * nvmeq , struct nvme_iod * iod ,
struct nvme_ns * ns )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
struct request * req = iod - > private ;
2011-01-26 18:02:29 +03:00
struct nvme_command * cmnd ;
2014-11-04 18:20:14 +03:00
u16 control = 0 ;
u32 dsmgmt = 0 ;
2011-02-22 22:18:30 +03:00
2014-11-04 18:20:14 +03:00
if ( req - > cmd_flags & REQ_FUA )
2011-01-20 20:50:14 +03:00
control | = NVME_RW_FUA ;
2014-11-04 18:20:14 +03:00
if ( req - > cmd_flags & ( REQ_FAILFAST_DEV | REQ_RAHEAD ) )
2011-01-20 20:50:14 +03:00
control | = NVME_RW_LR ;
2014-11-04 18:20:14 +03:00
if ( req - > cmd_flags & REQ_RAHEAD )
2011-01-20 20:50:14 +03:00
dsmgmt | = NVME_RW_DSM_FREQ_PREFETCH ;
2011-01-26 18:02:29 +03:00
cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
2011-01-26 18:08:25 +03:00
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
2011-01-20 20:50:14 +03:00
2014-11-04 18:20:14 +03:00
cmnd - > rw . opcode = ( rq_data_dir ( req ) ? nvme_cmd_write : nvme_cmd_read ) ;
cmnd - > rw . command_id = req - > tag ;
2011-01-26 18:02:29 +03:00
cmnd - > rw . nsid = cpu_to_le32 ( ns - > ns_id ) ;
2014-04-04 02:45:23 +04:00
cmnd - > rw . prp1 = cpu_to_le64 ( sg_dma_address ( iod - > sg ) ) ;
cmnd - > rw . prp2 = cpu_to_le64 ( iod - > first_dma ) ;
2014-11-04 18:20:14 +03:00
cmnd - > rw . slba = cpu_to_le64 ( nvme_block_nr ( ns , blk_rq_pos ( req ) ) ) ;
cmnd - > rw . length = cpu_to_le16 ( ( blk_rq_bytes ( req ) > > ns - > lba_shift ) - 1 ) ;
2011-01-26 18:02:29 +03:00
cmnd - > rw . control = cpu_to_le16 ( control ) ;
cmnd - > rw . dsmgmt = cpu_to_le32 ( dsmgmt ) ;
2011-01-20 20:50:14 +03:00
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
2011-02-16 17:59:59 +03:00
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
2011-01-20 20:50:14 +03:00
2011-02-10 20:01:09 +03:00
return 0 ;
2014-04-04 02:45:23 +04:00
}
2014-11-04 18:20:14 +03:00
static int nvme_queue_rq ( struct blk_mq_hw_ctx * hctx ,
const struct blk_mq_queue_data * bd )
2014-04-04 02:45:23 +04:00
{
2014-11-04 18:20:14 +03:00
struct nvme_ns * ns = hctx - > queue - > queuedata ;
struct nvme_queue * nvmeq = hctx - > driver_data ;
struct request * req = bd - > rq ;
struct nvme_cmd_info * cmd = blk_mq_rq_to_pdu ( req ) ;
2014-04-04 02:45:23 +04:00
struct nvme_iod * iod ;
2014-11-04 18:20:14 +03:00
int psegs = req - > nr_phys_segments ;
enum dma_data_direction dma_dir ;
unsigned size = ! ( req - > cmd_flags & REQ_DISCARD ) ? blk_rq_bytes ( req ) :
2014-10-07 01:23:06 +04:00
sizeof ( struct nvme_dsm_range ) ;
2014-04-04 02:45:23 +04:00
2014-10-07 01:23:06 +04:00
iod = nvme_alloc_iod ( psegs , size , ns - > dev , GFP_ATOMIC ) ;
2014-04-04 02:45:23 +04:00
if ( ! iod )
2014-12-11 23:58:39 +03:00
return BLK_MQ_RQ_QUEUE_BUSY ;
2014-11-04 18:20:14 +03:00
iod - > private = req ;
2014-04-04 02:45:23 +04:00
2014-11-04 18:20:14 +03:00
if ( req - > cmd_flags & REQ_DISCARD ) {
2014-04-04 02:45:23 +04:00
void * range ;
/*
* We reuse the small pool to allocate the 16 - byte range here
* as it is not worth having a special pool for these or
* additional cases to handle freeing the iod .
*/
range = dma_pool_alloc ( nvmeq - > dev - > prp_small_pool ,
GFP_ATOMIC ,
& iod - > first_dma ) ;
2014-11-04 18:20:14 +03:00
if ( ! range )
2014-12-11 23:58:39 +03:00
goto retry_cmd ;
2014-04-04 02:45:23 +04:00
iod_list ( iod ) [ 0 ] = ( __le64 * ) range ;
iod - > npages = 0 ;
} else if ( psegs ) {
2014-11-04 18:20:14 +03:00
dma_dir = rq_data_dir ( req ) ? DMA_TO_DEVICE : DMA_FROM_DEVICE ;
sg_init_table ( iod - > sg , psegs ) ;
iod - > nents = blk_rq_map_sg ( req - > q , req , iod - > sg ) ;
2014-12-11 23:58:39 +03:00
if ( ! iod - > nents )
goto error_cmd ;
2014-11-04 18:20:14 +03:00
if ( ! dma_map_sg ( nvmeq - > q_dmadev , iod - > sg , iod - > nents , dma_dir ) )
2014-12-11 23:58:39 +03:00
goto retry_cmd ;
2014-11-04 18:20:14 +03:00
2014-12-11 23:58:39 +03:00
if ( blk_rq_bytes ( req ) ! =
nvme_setup_prps ( nvmeq - > dev , iod , blk_rq_bytes ( req ) , GFP_ATOMIC ) ) {
dma_unmap_sg ( & nvmeq - > dev - > pci_dev - > dev , iod - > sg ,
iod - > nents , dma_dir ) ;
goto retry_cmd ;
}
2014-04-04 02:45:23 +04:00
}
2011-02-10 20:01:09 +03:00
2014-12-04 03:07:13 +03:00
nvme_set_info ( cmd , iod , req_completion ) ;
2014-11-04 18:20:14 +03:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
if ( req - > cmd_flags & REQ_DISCARD )
nvme_submit_discard ( nvmeq , ns , req , iod ) ;
else if ( req - > cmd_flags & REQ_FLUSH )
nvme_submit_flush ( nvmeq , ns , req - > tag ) ;
else
nvme_submit_iod ( nvmeq , iod , ns ) ;
nvme_process_cq ( nvmeq ) ;
spin_unlock_irq ( & nvmeq - > q_lock ) ;
return BLK_MQ_RQ_QUEUE_OK ;
2014-12-11 23:58:39 +03:00
error_cmd :
nvme_free_iod ( nvmeq - > dev , iod ) ;
return BLK_MQ_RQ_QUEUE_ERROR ;
retry_cmd :
2011-12-20 22:34:52 +04:00
nvme_free_iod ( nvmeq - > dev , iod ) ;
2014-12-11 23:58:39 +03:00
return BLK_MQ_RQ_QUEUE_BUSY ;
2011-01-20 20:50:14 +03:00
}
2013-06-24 19:47:34 +04:00
static int nvme_process_cq ( struct nvme_queue * nvmeq )
2011-01-20 20:50:14 +03:00
{
2011-01-20 21:24:06 +03:00
u16 head , phase ;
2011-01-20 20:50:14 +03:00
head = nvmeq - > cq_head ;
2011-01-20 21:24:06 +03:00
phase = nvmeq - > cq_phase ;
2011-01-20 20:50:14 +03:00
for ( ; ; ) {
2011-10-15 15:33:46 +04:00
void * ctx ;
nvme_completion_fn fn ;
2011-01-20 20:50:14 +03:00
struct nvme_completion cqe = nvmeq - > cqes [ head ] ;
2011-01-20 21:24:06 +03:00
if ( ( le16_to_cpu ( cqe . status ) & 1 ) ! = phase )
2011-01-20 20:50:14 +03:00
break ;
nvmeq - > sq_head = le16_to_cpu ( cqe . sq_head ) ;
if ( + + head = = nvmeq - > q_depth ) {
head = 0 ;
2011-01-20 21:24:06 +03:00
phase = ! phase ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
ctx = nvme_finish_cmd ( nvmeq , cqe . command_id , & fn ) ;
2014-04-04 02:45:23 +04:00
fn ( nvmeq , ctx , & cqe ) ;
2011-01-20 20:50:14 +03:00
}
/* If the controller ignores the cq head doorbell and continuously
* writes to the queue , it is theoretically possible to wrap around
* the queue twice and mistakenly return IRQ_NONE . Linux only
* requires that 0.1 % of your interrupts are handled , so this isn ' t
* a big problem .
*/
2011-01-20 21:24:06 +03:00
if ( head = = nvmeq - > cq_head & & phase = = nvmeq - > cq_phase )
2013-06-24 19:47:34 +04:00
return 0 ;
2011-01-20 20:50:14 +03:00
2013-09-10 07:25:37 +04:00
writel ( head , nvmeq - > q_db + nvmeq - > dev - > db_stride ) ;
2011-01-20 20:50:14 +03:00
nvmeq - > cq_head = head ;
2011-01-20 21:24:06 +03:00
nvmeq - > cq_phase = phase ;
2011-01-20 20:50:14 +03:00
2013-06-24 19:47:34 +04:00
nvmeq - > cqe_seen = 1 ;
return 1 ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
/* Admin queue isn't initialized as a request queue. If at some point this
* happens anyway , make sure to notify the user */
static int nvme_admin_queue_rq ( struct blk_mq_hw_ctx * hctx ,
const struct blk_mq_queue_data * bd )
2013-06-24 20:03:57 +04:00
{
2014-11-04 18:20:14 +03:00
WARN_ON_ONCE ( 1 ) ;
return BLK_MQ_RQ_QUEUE_ERROR ;
2013-06-24 20:03:57 +04:00
}
2011-01-20 20:50:14 +03:00
static irqreturn_t nvme_irq ( int irq , void * data )
2011-02-06 15:28:06 +03:00
{
irqreturn_t result ;
struct nvme_queue * nvmeq = data ;
spin_lock ( & nvmeq - > q_lock ) ;
2013-06-24 19:47:34 +04:00
nvme_process_cq ( nvmeq ) ;
result = nvmeq - > cqe_seen ? IRQ_HANDLED : IRQ_NONE ;
nvmeq - > cqe_seen = 0 ;
2011-02-06 15:28:06 +03:00
spin_unlock ( & nvmeq - > q_lock ) ;
return result ;
}
static irqreturn_t nvme_irq_check ( int irq , void * data )
{
struct nvme_queue * nvmeq = data ;
struct nvme_completion cqe = nvmeq - > cqes [ nvmeq - > cq_head ] ;
if ( ( le16_to_cpu ( cqe . status ) & 1 ) ! = nvmeq - > cq_phase )
return IRQ_NONE ;
return IRQ_WAKE_THREAD ;
}
2014-11-04 18:20:14 +03:00
static void nvme_abort_cmd_info ( struct nvme_queue * nvmeq , struct nvme_cmd_info *
cmd_info )
2011-02-05 00:03:56 +03:00
{
spin_lock_irq ( & nvmeq - > q_lock ) ;
2014-11-04 18:20:14 +03:00
cancel_cmd_info ( cmd_info , NULL ) ;
2011-02-05 00:03:56 +03:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
2011-10-15 15:33:46 +04:00
struct sync_cmd_info {
struct task_struct * task ;
u32 result ;
int status ;
} ;
2014-04-04 02:45:23 +04:00
static void sync_completion ( struct nvme_queue * nvmeq , void * ctx ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * cqe )
{
struct sync_cmd_info * cmdinfo = ctx ;
cmdinfo - > result = le32_to_cpup ( & cqe - > result ) ;
cmdinfo - > status = le16_to_cpup ( & cqe - > status ) > > 1 ;
wake_up_process ( cmdinfo - > task ) ;
}
2011-01-20 20:50:14 +03:00
/*
* Returns 0 on success . If the result is negative , it ' s a Linux error code ;
* if the result is positive , it ' s an NVM Express status code
*/
2014-11-04 18:20:14 +03:00
static int nvme_submit_sync_cmd ( struct request * req , struct nvme_command * cmd ,
2013-03-05 05:40:58 +04:00
u32 * result , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
int ret ;
2011-01-20 20:50:14 +03:00
struct sync_cmd_info cmdinfo ;
2014-11-04 18:20:14 +03:00
struct nvme_cmd_info * cmd_rq = blk_mq_rq_to_pdu ( req ) ;
struct nvme_queue * nvmeq = cmd_rq - > nvmeq ;
2011-01-20 20:50:14 +03:00
cmdinfo . task = current ;
cmdinfo . status = - EINTR ;
2014-11-04 18:20:14 +03:00
cmd - > common . command_id = req - > tag ;
nvme_set_info ( cmd_rq , & cmdinfo , sync_completion ) ;
2011-01-20 20:50:14 +03:00
2011-02-05 00:03:56 +03:00
set_current_state ( TASK_KILLABLE ) ;
2014-03-04 03:39:13 +04:00
ret = nvme_submit_cmd ( nvmeq , cmd ) ;
if ( ret ) {
2014-11-04 18:20:14 +03:00
nvme_finish_cmd ( nvmeq , req - > tag , NULL ) ;
2014-03-04 03:39:13 +04:00
set_current_state ( TASK_RUNNING ) ;
}
2014-12-12 18:53:40 +03:00
ret = schedule_timeout ( timeout ) ;
2011-01-20 20:50:14 +03:00
2014-12-12 18:53:40 +03:00
/*
* Ensure that sync_completion has either run , or that it will
* never run .
*/
nvme_abort_cmd_info ( nvmeq , blk_mq_rq_to_pdu ( req ) ) ;
/*
* We never got the completion
*/
if ( cmdinfo . status = = - EINTR )
2011-02-05 00:03:56 +03:00
return - EINTR ;
2011-01-20 20:50:14 +03:00
if ( result )
* result = cmdinfo . result ;
return cmdinfo . status ;
}
2014-11-04 18:20:14 +03:00
static int nvme_submit_async_admin_req ( struct nvme_dev * dev )
{
struct nvme_queue * nvmeq = dev - > queues [ 0 ] ;
struct nvme_command c ;
struct nvme_cmd_info * cmd_info ;
struct request * req ;
2014-11-18 18:21:18 +03:00
req = blk_mq_alloc_request ( dev - > admin_q , WRITE , GFP_ATOMIC , false ) ;
2014-11-05 23:39:09 +03:00
if ( IS_ERR ( req ) )
return PTR_ERR ( req ) ;
2014-11-04 18:20:14 +03:00
2015-01-08 04:55:48 +03:00
req - > cmd_flags | = REQ_NO_TIMEOUT ;
2014-11-04 18:20:14 +03:00
cmd_info = blk_mq_rq_to_pdu ( req ) ;
nvme_set_info ( cmd_info , req , async_req_completion ) ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . common . opcode = nvme_admin_async_event ;
c . common . command_id = req - > tag ;
return __nvme_submit_cmd ( nvmeq , & c ) ;
}
static int nvme_submit_admin_async_cmd ( struct nvme_dev * dev ,
2013-12-11 00:10:40 +04:00
struct nvme_command * cmd ,
struct async_cmd_info * cmdinfo , unsigned timeout )
{
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = dev - > queues [ 0 ] ;
struct request * req ;
struct nvme_cmd_info * cmd_rq ;
2013-12-11 00:10:40 +04:00
2014-11-04 18:20:14 +03:00
req = blk_mq_alloc_request ( dev - > admin_q , WRITE , GFP_KERNEL , false ) ;
2014-11-05 23:39:09 +03:00
if ( IS_ERR ( req ) )
return PTR_ERR ( req ) ;
2014-11-04 18:20:14 +03:00
req - > timeout = timeout ;
cmd_rq = blk_mq_rq_to_pdu ( req ) ;
cmdinfo - > req = req ;
nvme_set_info ( cmd_rq , cmdinfo , async_completion ) ;
2013-12-11 00:10:40 +04:00
cmdinfo - > status = - EINTR ;
2014-11-04 18:20:14 +03:00
cmd - > common . command_id = req - > tag ;
2014-03-04 03:39:13 +04:00
return nvme_submit_cmd ( nvmeq , cmd ) ;
2013-12-11 00:10:40 +04:00
}
2014-11-05 13:47:07 +03:00
static int __nvme_submit_admin_cmd ( struct nvme_dev * dev , struct nvme_command * cmd ,
2014-11-04 18:20:14 +03:00
u32 * result , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
int res ;
struct request * req ;
req = blk_mq_alloc_request ( dev - > admin_q , WRITE , GFP_KERNEL , false ) ;
2014-12-10 23:02:44 +03:00
if ( IS_ERR ( req ) )
return PTR_ERR ( req ) ;
2014-11-04 18:20:14 +03:00
res = nvme_submit_sync_cmd ( req , cmd , result , timeout ) ;
2014-11-17 20:43:42 +03:00
blk_mq_free_request ( req ) ;
2014-11-04 18:20:14 +03:00
return res ;
2014-03-04 03:39:13 +04:00
}
2014-11-04 18:20:14 +03:00
int nvme_submit_admin_cmd ( struct nvme_dev * dev , struct nvme_command * cmd ,
2014-03-04 03:39:13 +04:00
u32 * result )
{
2014-11-04 18:20:14 +03:00
return __nvme_submit_admin_cmd ( dev , cmd , result , ADMIN_TIMEOUT ) ;
2011-01-20 20:50:14 +03:00
}
2014-11-04 18:20:14 +03:00
int nvme_submit_io_cmd ( struct nvme_dev * dev , struct nvme_ns * ns ,
struct nvme_command * cmd , u32 * result )
2013-12-11 00:10:40 +04:00
{
2014-11-04 18:20:14 +03:00
int res ;
struct request * req ;
req = blk_mq_alloc_request ( ns - > queue , WRITE , ( GFP_KERNEL | __GFP_WAIT ) ,
false ) ;
2014-12-10 23:02:44 +03:00
if ( IS_ERR ( req ) )
return PTR_ERR ( req ) ;
2014-11-04 18:20:14 +03:00
res = nvme_submit_sync_cmd ( req , cmd , result , NVME_IO_TIMEOUT ) ;
2014-11-17 20:43:42 +03:00
blk_mq_free_request ( req ) ;
2014-11-04 18:20:14 +03:00
return res ;
2013-12-11 00:10:40 +04:00
}
2011-01-20 20:50:14 +03:00
static int adapter_delete_queue ( struct nvme_dev * dev , u8 opcode , u16 id )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . delete_queue . opcode = opcode ;
c . delete_queue . qid = cpu_to_le16 ( id ) ;
2014-11-04 18:20:14 +03:00
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
2011-01-20 20:50:14 +03:00
}
static int adapter_alloc_cq ( struct nvme_dev * dev , u16 qid ,
struct nvme_queue * nvmeq )
{
struct nvme_command c ;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . create_cq . opcode = nvme_admin_create_cq ;
c . create_cq . prp1 = cpu_to_le64 ( nvmeq - > cq_dma_addr ) ;
c . create_cq . cqid = cpu_to_le16 ( qid ) ;
c . create_cq . qsize = cpu_to_le16 ( nvmeq - > q_depth - 1 ) ;
c . create_cq . cq_flags = cpu_to_le16 ( flags ) ;
c . create_cq . irq_vector = cpu_to_le16 ( nvmeq - > cq_vector ) ;
2014-11-04 18:20:14 +03:00
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
2011-01-20 20:50:14 +03:00
}
static int adapter_alloc_sq ( struct nvme_dev * dev , u16 qid ,
struct nvme_queue * nvmeq )
{
struct nvme_command c ;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . create_sq . opcode = nvme_admin_create_sq ;
c . create_sq . prp1 = cpu_to_le64 ( nvmeq - > sq_dma_addr ) ;
c . create_sq . sqid = cpu_to_le16 ( qid ) ;
c . create_sq . qsize = cpu_to_le16 ( nvmeq - > q_depth - 1 ) ;
c . create_sq . sq_flags = cpu_to_le16 ( flags ) ;
c . create_sq . cqid = cpu_to_le16 ( qid ) ;
2014-11-04 18:20:14 +03:00
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
2011-01-20 20:50:14 +03:00
}
static int adapter_delete_cq ( struct nvme_dev * dev , u16 cqid )
{
return adapter_delete_queue ( dev , nvme_admin_delete_cq , cqid ) ;
}
static int adapter_delete_sq ( struct nvme_dev * dev , u16 sqid )
{
return adapter_delete_queue ( dev , nvme_admin_delete_sq , sqid ) ;
}
2013-03-05 05:40:58 +04:00
int nvme_identify ( struct nvme_dev * dev , unsigned nsid , unsigned cns ,
2011-09-20 01:08:14 +04:00
dma_addr_t dma_addr )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . identify . opcode = nvme_admin_identify ;
c . identify . nsid = cpu_to_le32 ( nsid ) ;
c . identify . prp1 = cpu_to_le64 ( dma_addr ) ;
c . identify . cns = cpu_to_le32 ( cns ) ;
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
}
2013-03-05 05:40:58 +04:00
int nvme_get_features ( struct nvme_dev * dev , unsigned fid , unsigned nsid ,
2012-09-21 20:52:13 +04:00
dma_addr_t dma_addr , u32 * result )
2011-09-20 01:08:14 +04:00
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . features . opcode = nvme_admin_get_features ;
2012-07-26 02:06:38 +04:00
c . features . nsid = cpu_to_le32 ( nsid ) ;
2011-09-20 01:08:14 +04:00
c . features . prp1 = cpu_to_le64 ( dma_addr ) ;
c . features . fid = cpu_to_le32 ( fid ) ;
2012-09-21 20:52:13 +04:00
return nvme_submit_admin_cmd ( dev , & c , result ) ;
2012-01-11 18:29:56 +04:00
}
2013-03-05 05:40:58 +04:00
int nvme_set_features ( struct nvme_dev * dev , unsigned fid , unsigned dword11 ,
dma_addr_t dma_addr , u32 * result )
2012-01-11 18:29:56 +04:00
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . features . opcode = nvme_admin_set_features ;
c . features . prp1 = cpu_to_le64 ( dma_addr ) ;
c . features . fid = cpu_to_le32 ( fid ) ;
c . features . dword11 = cpu_to_le32 ( dword11 ) ;
2011-09-20 01:08:14 +04:00
return nvme_submit_admin_cmd ( dev , & c , result ) ;
}
2013-12-11 00:10:38 +04:00
/**
2014-11-04 18:20:14 +03:00
* nvme_abort_req - Attempt aborting a request
2013-12-11 00:10:38 +04:00
*
* Schedule controller reset if the command was already aborted once before and
* still hasn ' t been returned to the driver , or if this is the admin queue .
*/
2014-11-04 18:20:14 +03:00
static void nvme_abort_req ( struct request * req )
2013-12-11 00:10:38 +04:00
{
2014-11-04 18:20:14 +03:00
struct nvme_cmd_info * cmd_rq = blk_mq_rq_to_pdu ( req ) ;
struct nvme_queue * nvmeq = cmd_rq - > nvmeq ;
2013-12-11 00:10:38 +04:00
struct nvme_dev * dev = nvmeq - > dev ;
2014-11-04 18:20:14 +03:00
struct request * abort_req ;
struct nvme_cmd_info * abort_cmd ;
struct nvme_command cmd ;
2013-12-11 00:10:38 +04:00
2014-11-04 18:20:14 +03:00
if ( ! nvmeq - > qid | | cmd_rq - > aborted ) {
2013-12-11 00:10:38 +04:00
if ( work_busy ( & dev - > reset_work ) )
return ;
list_del_init ( & dev - > node ) ;
dev_warn ( & dev - > pci_dev - > dev ,
2014-11-04 18:20:14 +03:00
" I/O %d QID %d timeout, reset controller \n " ,
req - > tag , nvmeq - > qid ) ;
2014-03-07 19:24:49 +04:00
dev - > reset_workfn = nvme_reset_failed_dev ;
2013-12-11 00:10:38 +04:00
queue_work ( nvme_workq , & dev - > reset_work ) ;
return ;
}
if ( ! dev - > abort_limit )
return ;
2014-11-04 18:20:14 +03:00
abort_req = blk_mq_alloc_request ( dev - > admin_q , WRITE , GFP_ATOMIC ,
false ) ;
2014-11-05 23:39:09 +03:00
if ( IS_ERR ( abort_req ) )
2013-12-11 00:10:38 +04:00
return ;
2014-11-04 18:20:14 +03:00
abort_cmd = blk_mq_rq_to_pdu ( abort_req ) ;
nvme_set_info ( abort_cmd , abort_req , abort_completion ) ;
2013-12-11 00:10:38 +04:00
memset ( & cmd , 0 , sizeof ( cmd ) ) ;
cmd . abort . opcode = nvme_admin_abort_cmd ;
2014-11-04 18:20:14 +03:00
cmd . abort . cid = req - > tag ;
2013-12-11 00:10:38 +04:00
cmd . abort . sqid = cpu_to_le16 ( nvmeq - > qid ) ;
2014-11-04 18:20:14 +03:00
cmd . abort . command_id = abort_req - > tag ;
2013-12-11 00:10:38 +04:00
- - dev - > abort_limit ;
2014-11-04 18:20:14 +03:00
cmd_rq - > aborted = 1 ;
2013-12-11 00:10:38 +04:00
2014-11-04 18:20:14 +03:00
dev_warn ( nvmeq - > q_dmadev , " Aborting I/O %d QID %d \n " , req - > tag ,
2013-12-11 00:10:38 +04:00
nvmeq - > qid ) ;
2014-11-04 18:20:14 +03:00
if ( nvme_submit_cmd ( dev - > queues [ 0 ] , & cmd ) < 0 ) {
dev_warn ( nvmeq - > q_dmadev ,
" Could not abort I/O %d QID %d " ,
req - > tag , nvmeq - > qid ) ;
2014-12-10 23:00:31 +03:00
blk_mq_free_request ( abort_req ) ;
2014-11-04 18:20:14 +03:00
}
2013-12-11 00:10:38 +04:00
}
2014-11-04 18:20:14 +03:00
static void nvme_cancel_queue_ios ( struct blk_mq_hw_ctx * hctx ,
struct request * req , void * data , bool reserved )
2012-08-07 23:56:23 +04:00
{
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = data ;
void * ctx ;
nvme_completion_fn fn ;
struct nvme_cmd_info * cmd ;
2015-01-08 04:55:51 +03:00
struct nvme_completion cqe ;
if ( ! blk_mq_request_started ( req ) )
return ;
2012-08-07 23:56:23 +04:00
2014-11-04 18:20:14 +03:00
cmd = blk_mq_rq_to_pdu ( req ) ;
2012-08-07 23:56:23 +04:00
2014-11-04 18:20:14 +03:00
if ( cmd - > ctx = = CMD_CTX_CANCELLED )
return ;
2015-01-08 04:55:51 +03:00
if ( blk_queue_dying ( req - > q ) )
cqe . status = cpu_to_le16 ( ( NVME_SC_ABORT_REQ | NVME_SC_DNR ) < < 1 ) ;
else
cqe . status = cpu_to_le16 ( NVME_SC_ABORT_REQ < < 1 ) ;
2014-11-04 18:20:14 +03:00
dev_warn ( nvmeq - > q_dmadev , " Cancelling I/O %d QID %d \n " ,
req - > tag , nvmeq - > qid ) ;
ctx = cancel_cmd_info ( cmd , & fn ) ;
fn ( nvmeq , ctx , & cqe ) ;
2012-08-07 23:56:23 +04:00
}
2014-11-04 18:20:14 +03:00
static enum blk_eh_timer_return nvme_timeout ( struct request * req , bool reserved )
2012-08-03 21:55:56 +04:00
{
2014-11-04 18:20:14 +03:00
struct nvme_cmd_info * cmd = blk_mq_rq_to_pdu ( req ) ;
struct nvme_queue * nvmeq = cmd - > nvmeq ;
dev_warn ( nvmeq - > q_dmadev , " Timeout I/O %d QID %d \n " , req - > tag ,
nvmeq - > qid ) ;
2015-01-08 04:55:48 +03:00
if ( ! nvmeq - > dev - > initialized ) {
/*
* Force cancelled command frees the request , which requires we
* return BLK_EH_NOT_HANDLED .
*/
nvme_cancel_queue_ios ( nvmeq - > hctx , req , nvmeq , reserved ) ;
return BLK_EH_NOT_HANDLED ;
}
nvme_abort_req ( req ) ;
2014-11-04 18:20:14 +03:00
/*
* The aborted req will be completed on receiving the abort req .
* We enable the timer again . If hit twice , it ' ll cause a device reset ,
* as the device then is in a faulty state .
*/
return BLK_EH_RESET_TIMER ;
}
2013-07-16 01:02:20 +04:00
2014-11-04 18:20:14 +03:00
static void nvme_free_queue ( struct nvme_queue * nvmeq )
{
2012-08-03 21:55:56 +04:00
dma_free_coherent ( nvmeq - > q_dmadev , CQ_SIZE ( nvmeq - > q_depth ) ,
( void * ) nvmeq - > cqes , nvmeq - > cq_dma_addr ) ;
dma_free_coherent ( nvmeq - > q_dmadev , SQ_SIZE ( nvmeq - > q_depth ) ,
nvmeq - > sq_cmds , nvmeq - > sq_dma_addr ) ;
kfree ( nvmeq ) ;
}
2013-12-16 22:50:00 +04:00
static void nvme_free_queues ( struct nvme_dev * dev , int lowest )
2013-07-16 01:02:20 +04:00
{
2014-07-07 19:14:42 +04:00
LLIST_HEAD ( q_list ) ;
struct nvme_queue * nvmeq , * next ;
struct llist_node * entry ;
2013-07-16 01:02:20 +04:00
int i ;
2013-12-16 22:50:00 +04:00
for ( i = dev - > queue_count - 1 ; i > = lowest ; i - - ) {
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2014-07-07 19:14:42 +04:00
llist_add ( & nvmeq - > node , & q_list ) ;
2013-07-16 01:02:20 +04:00
dev - > queue_count - - ;
2014-11-04 18:20:14 +03:00
dev - > queues [ i ] = NULL ;
2013-07-16 01:02:20 +04:00
}
2014-07-07 19:14:42 +04:00
synchronize_rcu ( ) ;
entry = llist_del_all ( & q_list ) ;
llist_for_each_entry_safe ( nvmeq , next , entry , node )
nvme_free_queue ( nvmeq ) ;
2013-07-16 01:02:20 +04:00
}
2013-12-11 00:10:40 +04:00
/**
* nvme_suspend_queue - put queue into suspended state
* @ nvmeq - queue to suspend
*/
static int nvme_suspend_queue ( struct nvme_queue * nvmeq )
2011-01-20 20:50:14 +03:00
{
2014-12-22 22:59:04 +03:00
int vector ;
2011-01-20 20:50:14 +03:00
2012-08-07 23:56:23 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2014-12-22 22:59:04 +03:00
if ( nvmeq - > cq_vector = = - 1 ) {
spin_unlock_irq ( & nvmeq - > q_lock ) ;
return 1 ;
}
vector = nvmeq - > dev - > entry [ nvmeq - > cq_vector ] . vector ;
2014-03-24 20:46:25 +04:00
nvmeq - > dev - > online_queues - - ;
2014-12-22 22:59:04 +03:00
nvmeq - > cq_vector = - 1 ;
2012-08-07 23:56:23 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2011-03-27 16:52:06 +04:00
irq_set_affinity_hint ( vector , NULL ) ;
free_irq ( vector , nvmeq ) ;
2011-01-20 20:50:14 +03:00
2013-12-11 00:10:40 +04:00
return 0 ;
}
2011-01-20 20:50:14 +03:00
2013-12-11 00:10:40 +04:00
static void nvme_clear_queue ( struct nvme_queue * nvmeq )
{
2014-11-04 18:20:14 +03:00
struct blk_mq_hw_ctx * hctx = nvmeq - > hctx ;
2013-07-16 01:02:20 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
nvme_process_cq ( nvmeq ) ;
2014-11-04 18:20:14 +03:00
if ( hctx & & hctx - > tags )
blk_mq_tag_busy_iter ( hctx , nvme_cancel_queue_ios , nvmeq ) ;
2013-07-16 01:02:20 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2011-01-20 20:50:14 +03:00
}
2013-12-11 00:10:40 +04:00
static void nvme_disable_queue ( struct nvme_dev * dev , int qid )
{
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = dev - > queues [ qid ] ;
2013-12-11 00:10:40 +04:00
if ( ! nvmeq )
return ;
if ( nvme_suspend_queue ( nvmeq ) )
return ;
2013-12-11 00:10:39 +04:00
/* Don't tell the adapter to delete the admin queue.
* Don ' t tell a removed adapter to delete IO queues . */
if ( qid & & readl ( & dev - > bar - > csts ) ! = - 1 ) {
2011-01-20 20:50:14 +03:00
adapter_delete_sq ( dev , qid ) ;
adapter_delete_cq ( dev , qid ) ;
}
2015-01-08 04:55:50 +03:00
if ( ! qid & & dev - > admin_q )
blk_mq_freeze_queue_start ( dev - > admin_q ) ;
2013-12-11 00:10:40 +04:00
nvme_clear_queue ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
}
static struct nvme_queue * nvme_alloc_queue ( struct nvme_dev * dev , int qid ,
2014-12-22 22:59:04 +03:00
int depth )
2011-01-20 20:50:14 +03:00
{
struct device * dmadev = & dev - > pci_dev - > dev ;
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = kzalloc ( sizeof ( * nvmeq ) , GFP_KERNEL ) ;
2011-01-20 20:50:14 +03:00
if ( ! nvmeq )
return NULL ;
2014-06-16 00:37:33 +04:00
nvmeq - > cqes = dma_zalloc_coherent ( dmadev , CQ_SIZE ( depth ) ,
& nvmeq - > cq_dma_addr , GFP_KERNEL ) ;
2011-01-20 20:50:14 +03:00
if ( ! nvmeq - > cqes )
goto free_nvmeq ;
nvmeq - > sq_cmds = dma_alloc_coherent ( dmadev , SQ_SIZE ( depth ) ,
& nvmeq - > sq_dma_addr , GFP_KERNEL ) ;
if ( ! nvmeq - > sq_cmds )
goto free_cqdma ;
nvmeq - > q_dmadev = dmadev ;
2011-02-10 17:56:01 +03:00
nvmeq - > dev = dev ;
2014-01-28 00:57:22 +04:00
snprintf ( nvmeq - > irqname , sizeof ( nvmeq - > irqname ) , " nvme%dq%d " ,
dev - > instance , qid ) ;
2011-01-20 20:50:14 +03:00
spin_lock_init ( & nvmeq - > q_lock ) ;
nvmeq - > cq_head = 0 ;
2011-01-20 21:24:06 +03:00
nvmeq - > cq_phase = 1 ;
2013-09-10 07:25:37 +04:00
nvmeq - > q_db = & dev - > dbs [ qid * 2 * dev - > db_stride ] ;
2011-01-20 20:50:14 +03:00
nvmeq - > q_depth = depth ;
2013-12-11 00:10:38 +04:00
nvmeq - > qid = qid ;
2013-07-16 01:02:20 +04:00
dev - > queue_count + + ;
2014-11-04 18:20:14 +03:00
dev - > queues [ qid ] = nvmeq ;
2011-01-20 20:50:14 +03:00
return nvmeq ;
free_cqdma :
2013-05-01 23:07:47 +04:00
dma_free_coherent ( dmadev , CQ_SIZE ( depth ) , ( void * ) nvmeq - > cqes ,
2011-01-20 20:50:14 +03:00
nvmeq - > cq_dma_addr ) ;
free_nvmeq :
kfree ( nvmeq ) ;
return NULL ;
}
2011-01-20 17:10:15 +03:00
static int queue_request_irq ( struct nvme_dev * dev , struct nvme_queue * nvmeq ,
const char * name )
{
2011-02-06 15:28:06 +03:00
if ( use_threaded_interrupts )
return request_threaded_irq ( dev - > entry [ nvmeq - > cq_vector ] . vector ,
2013-10-12 08:23:29 +04:00
nvme_irq_check , nvme_irq , IRQF_SHARED ,
2011-02-06 15:28:06 +03:00
name , nvmeq ) ;
2011-01-20 17:10:15 +03:00
return request_irq ( dev - > entry [ nvmeq - > cq_vector ] . vector , nvme_irq ,
2013-10-12 08:23:29 +04:00
IRQF_SHARED , name , nvmeq ) ;
2011-01-20 17:10:15 +03:00
}
2013-07-16 01:02:20 +04:00
static void nvme_init_queue ( struct nvme_queue * nvmeq , u16 qid )
2011-01-20 20:50:14 +03:00
{
2013-07-16 01:02:20 +04:00
struct nvme_dev * dev = nvmeq - > dev ;
2011-01-20 20:50:14 +03:00
2014-09-11 01:48:47 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
nvmeq - > sq_tail = 0 ;
nvmeq - > cq_head = 0 ;
nvmeq - > cq_phase = 1 ;
2013-09-10 07:25:37 +04:00
nvmeq - > q_db = & dev - > dbs [ qid * 2 * dev - > db_stride ] ;
2013-07-16 01:02:20 +04:00
memset ( ( void * ) nvmeq - > cqes , 0 , CQ_SIZE ( nvmeq - > q_depth ) ) ;
2014-03-24 20:46:25 +04:00
dev - > online_queues + + ;
2014-09-11 01:48:47 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
}
static int nvme_create_queue ( struct nvme_queue * nvmeq , int qid )
{
struct nvme_dev * dev = nvmeq - > dev ;
int result ;
2011-02-01 16:39:04 +03:00
2014-12-22 22:59:04 +03:00
nvmeq - > cq_vector = qid - 1 ;
2011-01-20 20:50:14 +03:00
result = adapter_alloc_cq ( dev , qid , nvmeq ) ;
if ( result < 0 )
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
result = adapter_alloc_sq ( dev , qid , nvmeq ) ;
if ( result < 0 )
goto release_cq ;
2014-01-28 00:57:22 +04:00
result = queue_request_irq ( dev , nvmeq , nvmeq - > irqname ) ;
2011-01-20 20:50:14 +03:00
if ( result < 0 )
goto release_sq ;
2013-07-16 01:02:20 +04:00
nvme_init_queue ( nvmeq , qid ) ;
return result ;
2011-01-20 20:50:14 +03:00
release_sq :
adapter_delete_sq ( dev , qid ) ;
release_cq :
adapter_delete_cq ( dev , qid ) ;
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
}
2013-05-04 14:43:16 +04:00
static int nvme_wait_ready ( struct nvme_dev * dev , u64 cap , bool enabled )
{
unsigned long timeout ;
u32 bit = enabled ? NVME_CSTS_RDY : 0 ;
timeout = ( ( NVME_CAP_TIMEOUT ( cap ) + 1 ) * HZ / 2 ) + jiffies ;
while ( ( readl ( & dev - > bar - > csts ) & NVME_CSTS_RDY ) ! = bit ) {
msleep ( 100 ) ;
if ( fatal_signal_pending ( current ) )
return - EINTR ;
if ( time_after ( jiffies , timeout ) ) {
dev_err ( & dev - > pci_dev - > dev ,
2014-04-11 19:58:45 +04:00
" Device not ready; aborting %s \n " , enabled ?
" initialisation " : " reset " ) ;
2013-05-04 14:43:16 +04:00
return - ENODEV ;
}
}
return 0 ;
}
/*
* If the device has been passed off to us in an enabled state , just clear
* the enabled bit . The spec says we should set the ' shutdown notification
* bits ' , but doing so may cause the device to complete commands to the
* admin queue . . . and we don ' t know what memory that might be pointing at !
*/
static int nvme_disable_ctrl ( struct nvme_dev * dev , u64 cap )
{
2014-06-23 18:24:36 +04:00
dev - > ctrl_config & = ~ NVME_CC_SHN_MASK ;
dev - > ctrl_config & = ~ NVME_CC_ENABLE ;
writel ( dev - > ctrl_config , & dev - > bar - > cc ) ;
2013-05-04 14:43:17 +04:00
2013-05-04 14:43:16 +04:00
return nvme_wait_ready ( dev , cap , false ) ;
}
static int nvme_enable_ctrl ( struct nvme_dev * dev , u64 cap )
{
2014-06-23 18:24:36 +04:00
dev - > ctrl_config & = ~ NVME_CC_SHN_MASK ;
dev - > ctrl_config | = NVME_CC_ENABLE ;
writel ( dev - > ctrl_config , & dev - > bar - > cc ) ;
2013-05-04 14:43:16 +04:00
return nvme_wait_ready ( dev , cap , true ) ;
}
2013-07-16 01:02:22 +04:00
static int nvme_shutdown_ctrl ( struct nvme_dev * dev )
{
unsigned long timeout ;
2014-06-23 18:24:36 +04:00
dev - > ctrl_config & = ~ NVME_CC_SHN_MASK ;
dev - > ctrl_config | = NVME_CC_SHN_NORMAL ;
writel ( dev - > ctrl_config , & dev - > bar - > cc ) ;
2013-07-16 01:02:22 +04:00
2014-07-01 19:33:32 +04:00
timeout = SHUTDOWN_TIMEOUT + jiffies ;
2013-07-16 01:02:22 +04:00
while ( ( readl ( & dev - > bar - > csts ) & NVME_CSTS_SHST_MASK ) ! =
NVME_CSTS_SHST_CMPLT ) {
msleep ( 100 ) ;
if ( fatal_signal_pending ( current ) )
return - EINTR ;
if ( time_after ( jiffies , timeout ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Device shutdown incomplete; abort shutdown \n " ) ;
return - ENODEV ;
}
}
return 0 ;
}
2014-11-04 18:20:14 +03:00
static struct blk_mq_ops nvme_mq_admin_ops = {
. queue_rq = nvme_admin_queue_rq ,
. map_queue = blk_mq_map_queue ,
. init_hctx = nvme_admin_init_hctx ,
2014-11-14 19:47:32 +03:00
. exit_hctx = nvme_exit_hctx ,
2014-11-04 18:20:14 +03:00
. init_request = nvme_admin_init_request ,
. timeout = nvme_timeout ,
} ;
static struct blk_mq_ops nvme_mq_ops = {
. queue_rq = nvme_queue_rq ,
. map_queue = blk_mq_map_queue ,
. init_hctx = nvme_init_hctx ,
2014-11-14 19:47:32 +03:00
. exit_hctx = nvme_exit_hctx ,
2014-11-04 18:20:14 +03:00
. init_request = nvme_init_request ,
. timeout = nvme_timeout ,
} ;
2015-01-08 04:55:49 +03:00
static void nvme_dev_remove_admin ( struct nvme_dev * dev )
{
if ( dev - > admin_q & & ! blk_queue_dying ( dev - > admin_q ) ) {
blk_cleanup_queue ( dev - > admin_q ) ;
blk_mq_free_tag_set ( & dev - > admin_tagset ) ;
}
}
2014-11-04 18:20:14 +03:00
static int nvme_alloc_admin_tags ( struct nvme_dev * dev )
{
if ( ! dev - > admin_q ) {
dev - > admin_tagset . ops = & nvme_mq_admin_ops ;
dev - > admin_tagset . nr_hw_queues = 1 ;
dev - > admin_tagset . queue_depth = NVME_AQ_DEPTH - 1 ;
dev - > admin_tagset . timeout = ADMIN_TIMEOUT ;
dev - > admin_tagset . numa_node = dev_to_node ( & dev - > pci_dev - > dev ) ;
dev - > admin_tagset . cmd_size = sizeof ( struct nvme_cmd_info ) ;
dev - > admin_tagset . driver_data = dev ;
if ( blk_mq_alloc_tag_set ( & dev - > admin_tagset ) )
return - ENOMEM ;
dev - > admin_q = blk_mq_init_queue ( & dev - > admin_tagset ) ;
2015-01-02 17:25:27 +03:00
if ( IS_ERR ( dev - > admin_q ) ) {
2014-11-04 18:20:14 +03:00
blk_mq_free_tag_set ( & dev - > admin_tagset ) ;
return - ENOMEM ;
}
2015-01-08 04:55:49 +03:00
if ( ! blk_get_queue ( dev - > admin_q ) ) {
nvme_dev_remove_admin ( dev ) ;
return - ENODEV ;
}
2015-01-08 04:55:50 +03:00
} else
blk_mq_unfreeze_queue ( dev - > admin_q ) ;
2014-11-04 18:20:14 +03:00
return 0 ;
}
2012-12-22 03:13:49 +04:00
static int nvme_configure_admin_queue ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-05-04 14:43:16 +04:00
int result ;
2011-01-20 20:50:14 +03:00
u32 aqa ;
2013-05-04 14:43:16 +04:00
u64 cap = readq ( & dev - > bar - > cap ) ;
2011-01-20 20:50:14 +03:00
struct nvme_queue * nvmeq ;
2014-06-23 21:34:01 +04:00
unsigned page_shift = PAGE_SHIFT ;
unsigned dev_page_min = NVME_CAP_MPSMIN ( cap ) + 12 ;
unsigned dev_page_max = NVME_CAP_MPSMAX ( cap ) + 12 ;
if ( page_shift < dev_page_min ) {
dev_err ( & dev - > pci_dev - > dev ,
" Minimum device page size (%u) too large for "
" host (%u) \n " , 1 < < dev_page_min ,
1 < < page_shift ) ;
return - ENODEV ;
}
if ( page_shift > dev_page_max ) {
dev_info ( & dev - > pci_dev - > dev ,
" Device maximum page size (%u) smaller than "
" host (%u); enabling work-around \n " ,
1 < < dev_page_max , 1 < < page_shift ) ;
page_shift = dev_page_max ;
}
2011-01-20 20:50:14 +03:00
2013-05-04 14:43:16 +04:00
result = nvme_disable_ctrl ( dev , cap ) ;
if ( result < 0 )
return result ;
2011-01-20 20:50:14 +03:00
2014-11-04 18:20:14 +03:00
nvmeq = dev - > queues [ 0 ] ;
2013-07-16 01:02:23 +04:00
if ( ! nvmeq ) {
2014-12-22 22:59:04 +03:00
nvmeq = nvme_alloc_queue ( dev , 0 , NVME_AQ_DEPTH ) ;
2013-07-16 01:02:23 +04:00
if ( ! nvmeq )
return - ENOMEM ;
}
2011-01-20 20:50:14 +03:00
aqa = nvmeq - > q_depth - 1 ;
aqa | = aqa < < 16 ;
2014-06-23 21:34:01 +04:00
dev - > page_size = 1 < < page_shift ;
2014-06-23 18:24:36 +04:00
dev - > ctrl_config = NVME_CC_CSS_NVM ;
2014-06-23 21:34:01 +04:00
dev - > ctrl_config | = ( page_shift - 12 ) < < NVME_CC_MPS_SHIFT ;
2011-01-20 20:50:14 +03:00
dev - > ctrl_config | = NVME_CC_ARB_RR | NVME_CC_SHN_NONE ;
2011-03-22 22:55:45 +03:00
dev - > ctrl_config | = NVME_CC_IOSQES | NVME_CC_IOCQES ;
2011-01-20 20:50:14 +03:00
writel ( aqa , & dev - > bar - > aqa ) ;
writeq ( nvmeq - > sq_dma_addr , & dev - > bar - > asq ) ;
writeq ( nvmeq - > cq_dma_addr , & dev - > bar - > acq ) ;
2013-05-04 14:43:16 +04:00
result = nvme_enable_ctrl ( dev , cap ) ;
2013-05-01 23:07:51 +04:00
if ( result )
2014-11-04 18:20:14 +03:00
goto free_nvmeq ;
2014-12-22 22:59:04 +03:00
nvmeq - > cq_vector = 0 ;
2014-01-28 00:57:22 +04:00
result = queue_request_irq ( dev , nvmeq , nvmeq - > irqname ) ;
2013-05-01 23:07:51 +04:00
if ( result )
2015-01-08 04:55:50 +03:00
goto free_nvmeq ;
2013-05-01 23:07:51 +04:00
2011-01-20 20:50:14 +03:00
return result ;
2014-11-04 18:20:14 +03:00
free_nvmeq :
nvme_free_queues ( dev , 0 ) ;
return result ;
2011-01-20 20:50:14 +03:00
}
2013-03-05 05:40:58 +04:00
struct nvme_iod * nvme_map_user_pages ( struct nvme_dev * dev , int write ,
2011-12-20 22:34:52 +04:00
unsigned long addr , unsigned length )
2011-01-20 20:50:14 +03:00
{
2011-01-24 15:52:07 +03:00
int i , err , count , nents , offset ;
2011-01-27 01:05:50 +03:00
struct scatterlist * sg ;
struct page * * pages ;
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod ;
2011-01-24 15:52:07 +03:00
if ( addr & 3 )
2011-12-20 22:34:52 +04:00
return ERR_PTR ( - EINVAL ) ;
2013-05-13 18:59:50 +04:00
if ( ! length | | length > INT_MAX - PAGE_SIZE )
2011-12-20 22:34:52 +04:00
return ERR_PTR ( - EINVAL ) ;
2011-01-27 01:05:50 +03:00
2011-01-24 15:52:07 +03:00
offset = offset_in_page ( addr ) ;
2011-01-27 01:05:50 +03:00
count = DIV_ROUND_UP ( offset + length , PAGE_SIZE ) ;
pages = kcalloc ( count , sizeof ( * pages ) , GFP_KERNEL ) ;
2012-01-20 16:55:30 +04:00
if ( ! pages )
return ERR_PTR ( - ENOMEM ) ;
2011-01-24 15:52:07 +03:00
err = get_user_pages_fast ( addr , count , 1 , pages ) ;
if ( err < count ) {
count = err ;
err = - EFAULT ;
goto put_pages ;
}
2011-01-27 01:05:50 +03:00
2014-05-29 08:31:52 +04:00
err = - ENOMEM ;
2014-06-23 21:34:01 +04:00
iod = nvme_alloc_iod ( count , length , dev , GFP_KERNEL ) ;
2014-05-29 08:31:52 +04:00
if ( ! iod )
goto put_pages ;
2011-12-20 22:34:52 +04:00
sg = iod - > sg ;
2011-01-24 15:52:07 +03:00
sg_init_table ( sg , count ) ;
2011-09-14 01:01:39 +04:00
for ( i = 0 ; i < count ; i + + ) {
sg_set_page ( & sg [ i ] , pages [ i ] ,
2013-05-13 18:59:50 +04:00
min_t ( unsigned , length , PAGE_SIZE - offset ) ,
offset ) ;
2011-09-14 01:01:39 +04:00
length - = ( PAGE_SIZE - offset ) ;
offset = 0 ;
2011-01-27 01:05:50 +03:00
}
2012-01-07 00:49:25 +04:00
sg_mark_end ( & sg [ i - 1 ] ) ;
2012-01-07 00:52:56 +04:00
iod - > nents = count ;
2011-01-27 01:05:50 +03:00
nents = dma_map_sg ( & dev - > pci_dev - > dev , sg , count ,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-01-24 15:52:07 +03:00
if ( ! nents )
2011-12-20 22:34:52 +04:00
goto free_iod ;
2011-01-20 20:50:14 +03:00
2011-01-27 01:05:50 +03:00
kfree ( pages ) ;
2011-12-20 22:34:52 +04:00
return iod ;
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
free_iod :
kfree ( iod ) ;
2011-01-27 01:05:50 +03:00
put_pages :
for ( i = 0 ; i < count ; i + + )
put_page ( pages [ i ] ) ;
kfree ( pages ) ;
2011-12-20 22:34:52 +04:00
return ERR_PTR ( err ) ;
2011-01-27 01:05:50 +03:00
}
2011-01-20 20:50:14 +03:00
2013-03-05 05:40:58 +04:00
void nvme_unmap_user_pages ( struct nvme_dev * dev , int write ,
2012-01-07 00:52:56 +04:00
struct nvme_iod * iod )
2011-01-27 01:05:50 +03:00
{
2012-01-07 00:52:56 +04:00
int i ;
2011-01-20 20:50:14 +03:00
2012-01-07 00:52:56 +04:00
dma_unmap_sg ( & dev - > pci_dev - > dev , iod - > sg , iod - > nents ,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-01-27 01:05:50 +03:00
2012-01-07 00:52:56 +04:00
for ( i = 0 ; i < iod - > nents ; i + + )
put_page ( sg_page ( & iod - > sg [ i ] ) ) ;
2011-01-27 01:05:50 +03:00
}
2011-01-20 20:50:14 +03:00
2011-02-02 00:13:29 +03:00
static int nvme_submit_io ( struct nvme_ns * ns , struct nvme_user_io __user * uio )
{
struct nvme_dev * dev = ns - > dev ;
struct nvme_user_io io ;
struct nvme_command c ;
2013-04-24 03:23:59 +04:00
unsigned length , meta_len ;
int status , i ;
struct nvme_iod * iod , * meta_iod = NULL ;
dma_addr_t meta_dma_addr ;
void * meta , * uninitialized_var ( meta_mem ) ;
2011-02-02 00:13:29 +03:00
if ( copy_from_user ( & io , uio , sizeof ( io ) ) )
return - EFAULT ;
2011-03-21 16:48:57 +03:00
length = ( io . nblocks + 1 ) < < ns - > lba_shift ;
2013-04-24 03:23:59 +04:00
meta_len = ( io . nblocks + 1 ) * ns - > ms ;
if ( meta_len & & ( ( io . metadata & 3 ) | | ! io . metadata ) )
return - EINVAL ;
2011-03-21 16:48:57 +03:00
switch ( io . opcode ) {
case nvme_cmd_write :
case nvme_cmd_read :
2011-05-20 21:03:42 +04:00
case nvme_cmd_compare :
2011-12-20 22:34:52 +04:00
iod = nvme_map_user_pages ( dev , io . opcode & 1 , io . addr , length ) ;
2011-08-09 20:56:37 +04:00
break ;
2011-03-21 16:48:57 +03:00
default :
2011-05-20 21:03:42 +04:00
return - EINVAL ;
2011-03-21 16:48:57 +03:00
}
2011-12-20 22:34:52 +04:00
if ( IS_ERR ( iod ) )
return PTR_ERR ( iod ) ;
2011-02-02 00:13:29 +03:00
memset ( & c , 0 , sizeof ( c ) ) ;
c . rw . opcode = io . opcode ;
c . rw . flags = io . flags ;
2011-03-21 16:48:57 +03:00
c . rw . nsid = cpu_to_le32 ( ns - > ns_id ) ;
2011-02-02 00:13:29 +03:00
c . rw . slba = cpu_to_le64 ( io . slba ) ;
2011-03-21 16:48:57 +03:00
c . rw . length = cpu_to_le16 ( io . nblocks ) ;
2011-02-02 00:13:29 +03:00
c . rw . control = cpu_to_le16 ( io . control ) ;
2013-04-16 23:21:06 +04:00
c . rw . dsmgmt = cpu_to_le32 ( io . dsmgmt ) ;
c . rw . reftag = cpu_to_le32 ( io . reftag ) ;
c . rw . apptag = cpu_to_le16 ( io . apptag ) ;
c . rw . appmask = cpu_to_le16 ( io . appmask ) ;
2013-04-24 03:23:59 +04:00
if ( meta_len ) {
2013-07-18 22:13:51 +04:00
meta_iod = nvme_map_user_pages ( dev , io . opcode & 1 , io . metadata ,
meta_len ) ;
2013-04-24 03:23:59 +04:00
if ( IS_ERR ( meta_iod ) ) {
status = PTR_ERR ( meta_iod ) ;
meta_iod = NULL ;
goto unmap ;
}
meta_mem = dma_alloc_coherent ( & dev - > pci_dev - > dev , meta_len ,
& meta_dma_addr , GFP_KERNEL ) ;
if ( ! meta_mem ) {
status = - ENOMEM ;
goto unmap ;
}
if ( io . opcode & 1 ) {
int meta_offset = 0 ;
for ( i = 0 ; i < meta_iod - > nents ; i + + ) {
meta = kmap_atomic ( sg_page ( & meta_iod - > sg [ i ] ) ) +
meta_iod - > sg [ i ] . offset ;
memcpy ( meta_mem + meta_offset , meta ,
meta_iod - > sg [ i ] . length ) ;
kunmap_atomic ( meta ) ;
meta_offset + = meta_iod - > sg [ i ] . length ;
}
}
c . rw . metadata = cpu_to_le64 ( meta_dma_addr ) ;
}
2014-04-04 02:45:23 +04:00
length = nvme_setup_prps ( dev , iod , length , GFP_KERNEL ) ;
c . rw . prp1 = cpu_to_le64 ( sg_dma_address ( iod - > sg ) ) ;
c . rw . prp2 = cpu_to_le64 ( iod - > first_dma ) ;
2011-02-02 00:13:29 +03:00
2011-05-12 21:51:41 +04:00
if ( length ! = ( io . nblocks + 1 ) < < ns - > lba_shift )
status = - ENOMEM ;
else
2014-11-04 18:20:14 +03:00
status = nvme_submit_io_cmd ( dev , ns , & c , NULL ) ;
2011-02-02 00:13:29 +03:00
2013-04-24 03:23:59 +04:00
if ( meta_len ) {
if ( status = = NVME_SC_SUCCESS & & ! ( io . opcode & 1 ) ) {
int meta_offset = 0 ;
for ( i = 0 ; i < meta_iod - > nents ; i + + ) {
meta = kmap_atomic ( sg_page ( & meta_iod - > sg [ i ] ) ) +
meta_iod - > sg [ i ] . offset ;
memcpy ( meta , meta_mem + meta_offset ,
meta_iod - > sg [ i ] . length ) ;
kunmap_atomic ( meta ) ;
meta_offset + = meta_iod - > sg [ i ] . length ;
}
}
dma_free_coherent ( & dev - > pci_dev - > dev , meta_len , meta_mem ,
meta_dma_addr ) ;
}
unmap :
2012-01-07 00:52:56 +04:00
nvme_unmap_user_pages ( dev , io . opcode & 1 , iod ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2013-04-24 03:23:59 +04:00
if ( meta_iod ) {
nvme_unmap_user_pages ( dev , io . opcode & 1 , meta_iod ) ;
nvme_free_iod ( dev , meta_iod ) ;
}
2011-02-02 00:13:29 +03:00
return status ;
}
2014-11-04 18:20:14 +03:00
static int nvme_user_cmd ( struct nvme_dev * dev , struct nvme_ns * ns ,
struct nvme_passthru_cmd __user * ucmd )
2011-02-03 18:58:26 +03:00
{
2014-09-13 02:07:20 +04:00
struct nvme_passthru_cmd cmd ;
2011-02-03 18:58:26 +03:00
struct nvme_command c ;
2011-12-20 22:34:52 +04:00
int status , length ;
2012-07-27 21:53:28 +04:00
struct nvme_iod * uninitialized_var ( iod ) ;
2013-05-10 00:01:38 +04:00
unsigned timeout ;
2011-02-03 18:58:26 +03:00
2011-05-20 21:03:42 +04:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( copy_from_user ( & cmd , ucmd , sizeof ( cmd ) ) )
2011-02-03 18:58:26 +03:00
return - EFAULT ;
memset ( & c , 0 , sizeof ( c ) ) ;
2011-05-20 21:03:42 +04:00
c . common . opcode = cmd . opcode ;
c . common . flags = cmd . flags ;
c . common . nsid = cpu_to_le32 ( cmd . nsid ) ;
c . common . cdw2 [ 0 ] = cpu_to_le32 ( cmd . cdw2 ) ;
c . common . cdw2 [ 1 ] = cpu_to_le32 ( cmd . cdw3 ) ;
c . common . cdw10 [ 0 ] = cpu_to_le32 ( cmd . cdw10 ) ;
c . common . cdw10 [ 1 ] = cpu_to_le32 ( cmd . cdw11 ) ;
c . common . cdw10 [ 2 ] = cpu_to_le32 ( cmd . cdw12 ) ;
c . common . cdw10 [ 3 ] = cpu_to_le32 ( cmd . cdw13 ) ;
c . common . cdw10 [ 4 ] = cpu_to_le32 ( cmd . cdw14 ) ;
c . common . cdw10 [ 5 ] = cpu_to_le32 ( cmd . cdw15 ) ;
length = cmd . data_len ;
if ( cmd . data_len ) {
2012-01-07 00:42:45 +04:00
iod = nvme_map_user_pages ( dev , cmd . opcode & 1 , cmd . addr ,
length ) ;
2011-12-20 22:34:52 +04:00
if ( IS_ERR ( iod ) )
return PTR_ERR ( iod ) ;
2014-04-04 02:45:23 +04:00
length = nvme_setup_prps ( dev , iod , length , GFP_KERNEL ) ;
c . common . prp1 = cpu_to_le64 ( sg_dma_address ( iod - > sg ) ) ;
c . common . prp2 = cpu_to_le64 ( iod - > first_dma ) ;
2011-05-20 21:03:42 +04:00
}
2013-05-10 00:01:38 +04:00
timeout = cmd . timeout_ms ? msecs_to_jiffies ( cmd . timeout_ms ) :
ADMIN_TIMEOUT ;
2014-11-04 18:20:14 +03:00
2011-05-20 21:03:42 +04:00
if ( length ! = cmd . data_len )
2011-05-12 21:51:41 +04:00
status = - ENOMEM ;
2014-11-04 18:20:14 +03:00
else if ( ns ) {
struct request * req ;
req = blk_mq_alloc_request ( ns - > queue , WRITE ,
( GFP_KERNEL | __GFP_WAIT ) , false ) ;
2014-12-10 23:02:44 +03:00
if ( IS_ERR ( req ) )
status = PTR_ERR ( req ) ;
2014-11-04 18:20:14 +03:00
else {
status = nvme_submit_sync_cmd ( req , & c , & cmd . result ,
timeout ) ;
2014-11-17 20:43:42 +03:00
blk_mq_free_request ( req ) ;
2014-11-04 18:20:14 +03:00
}
} else
status = __nvme_submit_admin_cmd ( dev , & c , & cmd . result , timeout ) ;
2011-12-20 22:34:52 +04:00
2011-05-20 21:03:42 +04:00
if ( cmd . data_len ) {
2012-01-07 00:52:56 +04:00
nvme_unmap_user_pages ( dev , cmd . opcode & 1 , iod ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2011-05-20 21:03:42 +04:00
}
2012-09-21 20:49:05 +04:00
2013-05-23 02:34:49 +04:00
if ( ( status > = 0 ) & & copy_to_user ( & ucmd - > result , & cmd . result ,
2012-09-21 20:49:05 +04:00
sizeof ( cmd . result ) ) )
status = - EFAULT ;
2011-02-03 18:58:26 +03:00
return status ;
}
2011-01-20 20:50:14 +03:00
static int nvme_ioctl ( struct block_device * bdev , fmode_t mode , unsigned int cmd ,
unsigned long arg )
{
struct nvme_ns * ns = bdev - > bd_disk - > private_data ;
switch ( cmd ) {
2011-05-20 21:03:42 +04:00
case NVME_IOCTL_ID :
2013-07-09 01:26:25 +04:00
force_successful_syscall_return ( ) ;
2011-05-20 21:03:42 +04:00
return ns - > ns_id ;
case NVME_IOCTL_ADMIN_CMD :
2014-11-04 18:20:14 +03:00
return nvme_user_cmd ( ns - > dev , NULL , ( void __user * ) arg ) ;
2014-09-13 02:07:20 +04:00
case NVME_IOCTL_IO_CMD :
2014-11-04 18:20:14 +03:00
return nvme_user_cmd ( ns - > dev , ns , ( void __user * ) arg ) ;
2011-02-02 00:13:29 +03:00
case NVME_IOCTL_SUBMIT_IO :
return nvme_submit_io ( ns , ( void __user * ) arg ) ;
2013-03-05 05:40:58 +04:00
case SG_GET_VERSION_NUM :
return nvme_sg_get_version_num ( ( void __user * ) arg ) ;
case SG_IO :
return nvme_sg_io ( ns , ( void __user * ) arg ) ;
2011-01-20 20:50:14 +03:00
default :
return - ENOTTY ;
}
}
2013-10-23 23:07:34 +04:00
# ifdef CONFIG_COMPAT
static int nvme_compat_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
switch ( cmd ) {
case SG_IO :
2014-08-27 23:55:38 +04:00
return - ENOIOCTLCMD ;
2013-10-23 23:07:34 +04:00
}
return nvme_ioctl ( bdev , mode , cmd , arg ) ;
}
# else
# define nvme_compat_ioctl NULL
# endif
2014-02-01 03:53:39 +04:00
static int nvme_open ( struct block_device * bdev , fmode_t mode )
{
2014-10-03 21:15:47 +04:00
int ret = 0 ;
struct nvme_ns * ns ;
2014-02-01 03:53:39 +04:00
2014-10-03 21:15:47 +04:00
spin_lock ( & dev_list_lock ) ;
ns = bdev - > bd_disk - > private_data ;
if ( ! ns )
ret = - ENXIO ;
else if ( ! kref_get_unless_zero ( & ns - > dev - > kref ) )
ret = - ENXIO ;
spin_unlock ( & dev_list_lock ) ;
return ret ;
2014-02-01 03:53:39 +04:00
}
static void nvme_free_dev ( struct kref * kref ) ;
static void nvme_release ( struct gendisk * disk , fmode_t mode )
{
struct nvme_ns * ns = disk - > private_data ;
struct nvme_dev * dev = ns - > dev ;
kref_put ( & dev - > kref , nvme_free_dev ) ;
}
2014-04-03 01:45:37 +04:00
static int nvme_getgeo ( struct block_device * bd , struct hd_geometry * geo )
{
/* some standard values */
geo - > heads = 1 < < 6 ;
geo - > sectors = 1 < < 5 ;
geo - > cylinders = get_capacity ( bd - > bd_disk ) > > 11 ;
return 0 ;
}
2014-09-11 03:21:14 +04:00
static int nvme_revalidate_disk ( struct gendisk * disk )
{
struct nvme_ns * ns = disk - > private_data ;
struct nvme_dev * dev = ns - > dev ;
struct nvme_id_ns * id ;
dma_addr_t dma_addr ;
int lbaf ;
id = dma_alloc_coherent ( & dev - > pci_dev - > dev , 4096 , & dma_addr ,
GFP_KERNEL ) ;
if ( ! id ) {
dev_warn ( & dev - > pci_dev - > dev , " %s: Memory alocation failure \n " ,
__func__ ) ;
return 0 ;
}
if ( nvme_identify ( dev , ns - > ns_id , 0 , dma_addr ) )
goto free ;
lbaf = id - > flbas & 0xf ;
ns - > lba_shift = id - > lbaf [ lbaf ] . ds ;
blk_queue_logical_block_size ( ns - > queue , 1 < < ns - > lba_shift ) ;
set_capacity ( disk , le64_to_cpup ( & id - > nsze ) < < ( ns - > lba_shift - 9 ) ) ;
free :
dma_free_coherent ( & dev - > pci_dev - > dev , 4096 , id , dma_addr ) ;
return 0 ;
}
2011-01-20 20:50:14 +03:00
static const struct block_device_operations nvme_fops = {
. owner = THIS_MODULE ,
. ioctl = nvme_ioctl ,
2013-10-23 23:07:34 +04:00
. compat_ioctl = nvme_compat_ioctl ,
2014-02-01 03:53:39 +04:00
. open = nvme_open ,
. release = nvme_release ,
2014-04-03 01:45:37 +04:00
. getgeo = nvme_getgeo ,
2014-09-11 03:21:14 +04:00
. revalidate_disk = nvme_revalidate_disk ,
2011-01-20 20:50:14 +03:00
} ;
2011-03-03 02:37:18 +03:00
static int nvme_kthread ( void * data )
{
2013-12-11 00:10:37 +04:00
struct nvme_dev * dev , * next ;
2011-03-03 02:37:18 +03:00
while ( ! kthread_should_stop ( ) ) {
2013-05-02 00:38:23 +04:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
2011-03-03 02:37:18 +03:00
spin_lock ( & dev_list_lock ) ;
2013-12-11 00:10:37 +04:00
list_for_each_entry_safe ( dev , next , & dev_list , node ) {
2011-03-03 02:37:18 +03:00
int i ;
2013-12-11 00:10:37 +04:00
if ( readl ( & dev - > bar - > csts ) & NVME_CSTS_CFS & &
dev - > initialized ) {
if ( work_busy ( & dev - > reset_work ) )
continue ;
list_del_init ( & dev - > node ) ;
dev_warn ( & dev - > pci_dev - > dev ,
2014-11-04 18:20:14 +03:00
" Failed status: %x, reset controller \n " ,
readl ( & dev - > bar - > csts ) ) ;
2014-03-07 19:24:49 +04:00
dev - > reset_workfn = nvme_reset_failed_dev ;
2013-12-11 00:10:37 +04:00
queue_work ( nvme_workq , & dev - > reset_work ) ;
continue ;
}
2011-03-03 02:37:18 +03:00
for ( i = 0 ; i < dev - > queue_count ; i + + ) {
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2011-02-16 00:28:20 +03:00
if ( ! nvmeq )
continue ;
2011-03-03 02:37:18 +03:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-06-24 19:56:42 +04:00
nvme_process_cq ( nvmeq ) ;
2014-06-18 23:58:57 +04:00
while ( ( i = = 0 ) & & ( dev - > event_limit > 0 ) ) {
2014-11-04 18:20:14 +03:00
if ( nvme_submit_async_admin_req ( dev ) )
2014-06-18 23:58:57 +04:00
break ;
dev - > event_limit - - ;
}
2011-03-03 02:37:18 +03:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
}
spin_unlock ( & dev_list_lock ) ;
2013-02-05 02:44:33 +04:00
schedule_timeout ( round_jiffies_relative ( HZ ) ) ;
2011-03-03 02:37:18 +03:00
}
return 0 ;
}
2012-11-10 03:33:05 +04:00
static void nvme_config_discard ( struct nvme_ns * ns )
{
u32 logical_block_size = queue_logical_block_size ( ns - > queue ) ;
ns - > queue - > limits . discard_zeroes_data = 0 ;
ns - > queue - > limits . discard_alignment = logical_block_size ;
ns - > queue - > limits . discard_granularity = logical_block_size ;
ns - > queue - > limits . max_discard_sectors = 0xffffffff ;
queue_flag_set_unlocked ( QUEUE_FLAG_DISCARD , ns - > queue ) ;
}
2013-07-09 01:26:25 +04:00
static struct nvme_ns * nvme_alloc_ns ( struct nvme_dev * dev , unsigned nsid ,
2011-01-20 20:50:14 +03:00
struct nvme_id_ns * id , struct nvme_lba_range_type * rt )
{
struct nvme_ns * ns ;
struct gendisk * disk ;
2014-11-04 18:20:14 +03:00
int node = dev_to_node ( & dev - > pci_dev - > dev ) ;
2011-01-20 20:50:14 +03:00
int lbaf ;
if ( rt - > attributes & NVME_LBART_ATTRIB_HIDE )
return NULL ;
2014-11-04 18:20:14 +03:00
ns = kzalloc_node ( sizeof ( * ns ) , GFP_KERNEL , node ) ;
2011-01-20 20:50:14 +03:00
if ( ! ns )
return NULL ;
2014-11-04 18:20:14 +03:00
ns - > queue = blk_mq_init_queue ( & dev - > tagset ) ;
2014-11-05 23:39:09 +03:00
if ( IS_ERR ( ns - > queue ) )
2011-01-20 20:50:14 +03:00
goto out_free_ns ;
2012-01-11 01:35:08 +04:00
queue_flag_set_unlocked ( QUEUE_FLAG_NOMERGES , ns - > queue ) ;
queue_flag_set_unlocked ( QUEUE_FLAG_NONROT , ns - > queue ) ;
2014-11-04 18:20:14 +03:00
queue_flag_set_unlocked ( QUEUE_FLAG_SG_GAPS , ns - > queue ) ;
2011-01-20 20:50:14 +03:00
ns - > dev = dev ;
ns - > queue - > queuedata = ns ;
2014-11-04 18:20:14 +03:00
disk = alloc_disk_node ( 0 , node ) ;
2011-01-20 20:50:14 +03:00
if ( ! disk )
goto out_free_queue ;
2014-11-04 18:20:14 +03:00
2011-05-06 16:45:47 +04:00
ns - > ns_id = nsid ;
2011-01-20 20:50:14 +03:00
ns - > disk = disk ;
lbaf = id - > flbas & 0xf ;
ns - > lba_shift = id - > lbaf [ lbaf ] . ds ;
2013-04-24 03:23:59 +04:00
ns - > ms = le16_to_cpu ( id - > lbaf [ lbaf ] . ms ) ;
2012-07-25 01:01:04 +04:00
blk_queue_logical_block_size ( ns - > queue , 1 < < ns - > lba_shift ) ;
2012-07-26 21:29:57 +04:00
if ( dev - > max_hw_sectors )
blk_queue_max_hw_sectors ( ns - > queue , dev - > max_hw_sectors ) ;
2014-11-04 18:20:14 +03:00
if ( dev - > stripe_size )
blk_queue_chunk_sectors ( ns - > queue , dev - > stripe_size > > 9 ) ;
2014-04-29 21:41:28 +04:00
if ( dev - > vwc & NVME_CTRL_VWC_PRESENT )
blk_queue_flush ( ns - > queue , REQ_FLUSH | REQ_FUA ) ;
2011-01-20 20:50:14 +03:00
disk - > major = nvme_major ;
2013-12-09 21:58:46 +04:00
disk - > first_minor = 0 ;
2011-01-20 20:50:14 +03:00
disk - > fops = & nvme_fops ;
disk - > private_data = ns ;
disk - > queue = ns - > queue ;
2011-02-01 20:49:38 +03:00
disk - > driverfs_dev = & dev - > pci_dev - > dev ;
2013-12-09 21:58:46 +04:00
disk - > flags = GENHD_FL_EXT_DEVT ;
2011-05-06 16:45:47 +04:00
sprintf ( disk - > disk_name , " nvme%dn%d " , dev - > instance , nsid ) ;
2011-01-20 20:50:14 +03:00
set_capacity ( disk , le64_to_cpup ( & id - > nsze ) < < ( ns - > lba_shift - 9 ) ) ;
2012-11-10 03:33:05 +04:00
if ( dev - > oncs & NVME_CTRL_ONCS_DSM )
nvme_config_discard ( ns ) ;
2011-01-20 20:50:14 +03:00
return ns ;
out_free_queue :
blk_cleanup_queue ( ns - > queue ) ;
out_free_ns :
kfree ( ns ) ;
return NULL ;
}
2014-03-24 20:46:25 +04:00
static void nvme_create_io_queues ( struct nvme_dev * dev )
{
2014-11-04 18:20:14 +03:00
unsigned i ;
2014-03-24 20:46:25 +04:00
2014-11-04 18:20:14 +03:00
for ( i = dev - > queue_count ; i < = dev - > max_qid ; i + + )
2014-12-22 22:59:04 +03:00
if ( ! nvme_alloc_queue ( dev , i , dev - > q_depth ) )
2014-03-24 20:46:25 +04:00
break ;
2014-11-04 18:20:14 +03:00
for ( i = dev - > online_queues ; i < = dev - > queue_count - 1 ; i + + )
if ( nvme_create_queue ( dev - > queues [ i ] , i ) )
2014-03-24 20:46:25 +04:00
break ;
}
2011-01-20 17:14:34 +03:00
static int set_queue_count ( struct nvme_dev * dev , int count )
2011-01-20 20:50:14 +03:00
{
int status ;
u32 result ;
2011-01-20 17:14:34 +03:00
u32 q_count = ( count - 1 ) | ( ( count - 1 ) < < 16 ) ;
2011-01-20 20:50:14 +03:00
2012-01-11 18:29:56 +04:00
status = nvme_set_features ( dev , NVME_FEAT_NUM_QUEUES , q_count , 0 ,
2011-09-20 01:08:14 +04:00
& result ) ;
2014-04-11 19:58:45 +04:00
if ( status < 0 )
return status ;
if ( status > 0 ) {
dev_err ( & dev - > pci_dev - > dev , " Could not set queue count (%d) \n " ,
status ) ;
2014-06-24 00:25:35 +04:00
return 0 ;
2014-04-11 19:58:45 +04:00
}
2011-01-20 20:50:14 +03:00
return min ( result & 0xffff , result > > 16 ) + 1 ;
}
2013-07-16 01:02:24 +04:00
static size_t db_bar_size ( struct nvme_dev * dev , unsigned nr_io_queues )
{
2013-09-10 07:25:37 +04:00
return 4096 + ( ( nr_io_queues + 1 ) * 8 * dev - > db_stride ) ;
2013-07-16 01:02:24 +04:00
}
2012-12-22 03:13:49 +04:00
static int nvme_setup_io_queues ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
struct nvme_queue * adminq = dev - > queues [ 0 ] ;
2013-05-12 02:19:31 +04:00
struct pci_dev * pdev = dev - > pci_dev ;
2014-03-24 20:46:25 +04:00
int result , i , vecs , nr_io_queues , size ;
2011-01-20 20:50:14 +03:00
2014-03-24 20:46:25 +04:00
nr_io_queues = num_possible_cpus ( ) ;
2011-02-16 00:16:02 +03:00
result = set_queue_count ( dev , nr_io_queues ) ;
2014-06-24 00:25:35 +04:00
if ( result < = 0 )
2011-01-20 21:01:49 +03:00
return result ;
2011-02-16 00:16:02 +03:00
if ( result < nr_io_queues )
nr_io_queues = result ;
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:24 +04:00
size = db_bar_size ( dev , nr_io_queues ) ;
if ( size > 8192 ) {
2011-10-21 01:00:41 +04:00
iounmap ( dev - > bar ) ;
2013-07-16 01:02:24 +04:00
do {
dev - > bar = ioremap ( pci_resource_start ( pdev , 0 ) , size ) ;
if ( dev - > bar )
break ;
if ( ! - - nr_io_queues )
return - ENOMEM ;
size = db_bar_size ( dev , nr_io_queues ) ;
} while ( 1 ) ;
2011-10-21 01:00:41 +04:00
dev - > dbs = ( ( void __iomem * ) dev - > bar ) + 4096 ;
2014-02-22 01:13:44 +04:00
adminq - > q_db = dev - > dbs ;
2011-10-21 01:00:41 +04:00
}
2013-07-16 01:02:24 +04:00
/* Deregister the admin queue's interrupt */
2014-01-28 00:57:22 +04:00
free_irq ( dev - > entry [ 0 ] . vector , adminq ) ;
2013-07-16 01:02:24 +04:00
2014-11-14 19:49:26 +03:00
/*
* If we enable msix early due to not intx , disable it again before
* setting up the full range we need .
*/
if ( ! pdev - > irq )
pci_disable_msix ( pdev ) ;
2014-03-04 19:22:00 +04:00
for ( i = 0 ; i < nr_io_queues ; i + + )
2011-01-20 21:01:49 +03:00
dev - > entry [ i ] . entry = i ;
2014-03-04 19:22:00 +04:00
vecs = pci_enable_msix_range ( pdev , dev - > entry , 1 , nr_io_queues ) ;
if ( vecs < 0 ) {
vecs = pci_enable_msi_range ( pdev , 1 , min ( nr_io_queues , 32 ) ) ;
if ( vecs < 0 ) {
vecs = 1 ;
} else {
for ( i = 0 ; i < vecs ; i + + )
dev - > entry [ i ] . vector = i + pdev - > irq ;
2013-05-12 02:19:31 +04:00
}
}
2013-06-20 18:53:48 +04:00
/*
* Should investigate if there ' s a performance win from allocating
* more queues than interrupt vectors ; it might allow the submission
* path to scale better , even if the receive path is limited by the
* number of interrupts .
*/
nr_io_queues = vecs ;
2014-03-24 20:46:25 +04:00
dev - > max_qid = nr_io_queues ;
2013-06-20 18:53:48 +04:00
2014-01-28 00:57:22 +04:00
result = queue_request_irq ( dev , adminq , adminq - > irqname ) ;
2014-11-04 18:20:14 +03:00
if ( result )
2013-07-16 01:02:20 +04:00
goto free_queues ;
2011-01-20 21:01:49 +03:00
2013-07-16 01:02:23 +04:00
/* Free previously allocated queues that are no longer usable */
2014-03-24 20:46:25 +04:00
nvme_free_queues ( dev , nr_io_queues + 1 ) ;
2014-11-04 18:20:14 +03:00
nvme_create_io_queues ( dev ) ;
2011-03-16 23:52:19 +03:00
2013-07-16 01:02:20 +04:00
return 0 ;
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:20 +04:00
free_queues :
2013-12-16 22:50:00 +04:00
nvme_free_queues ( dev , 1 ) ;
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
}
2013-04-16 19:22:36 +04:00
/*
* Return : error value if an error occurred setting up the queues or calling
* Identify Device . 0 if these succeeded , even if adding some of the
* namespaces failed . At the moment , these failures are silent . TBD which
* failures should be reported .
*/
2012-12-22 03:13:49 +04:00
static int nvme_dev_add ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-06-21 22:36:34 +04:00
struct pci_dev * pdev = dev - > pci_dev ;
2013-07-09 01:26:25 +04:00
int res ;
unsigned nn , i ;
2013-05-01 23:07:49 +04:00
struct nvme_ns * ns ;
2011-02-02 00:18:08 +03:00
struct nvme_id_ctrl * ctrl ;
2011-09-20 01:08:14 +04:00
struct nvme_id_ns * id_ns ;
void * mem ;
2011-01-20 20:50:14 +03:00
dma_addr_t dma_addr ;
2013-04-10 03:13:20 +04:00
int shift = NVME_CAP_MPSMIN ( readq ( & dev - > bar - > cap ) ) + 12 ;
2011-01-20 20:50:14 +03:00
2013-06-21 22:36:34 +04:00
mem = dma_alloc_coherent ( & pdev - > dev , 8192 , & dma_addr , GFP_KERNEL ) ;
2013-05-01 23:07:48 +04:00
if ( ! mem )
return - ENOMEM ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
res = nvme_identify ( dev , 0 , 1 , dma_addr ) ;
2011-01-20 20:50:14 +03:00
if ( res ) {
2014-04-11 19:58:45 +04:00
dev_err ( & pdev - > dev , " Identify Controller failed (%d) \n " , res ) ;
2011-01-20 20:50:14 +03:00
res = - EIO ;
2013-05-01 23:07:49 +04:00
goto out ;
2011-01-20 20:50:14 +03:00
}
2011-09-20 01:08:14 +04:00
ctrl = mem ;
2011-02-02 00:18:08 +03:00
nn = le32_to_cpup ( & ctrl - > nn ) ;
2012-11-10 03:33:05 +04:00
dev - > oncs = le16_to_cpup ( & ctrl - > oncs ) ;
2013-12-11 00:10:38 +04:00
dev - > abort_limit = ctrl - > acl + 1 ;
2014-04-29 21:41:28 +04:00
dev - > vwc = ctrl - > vwc ;
2014-06-18 23:58:57 +04:00
dev - > event_limit = min ( ctrl - > aerl + 1 , 8 ) ;
2011-02-02 00:18:08 +03:00
memcpy ( dev - > serial , ctrl - > sn , sizeof ( ctrl - > sn ) ) ;
memcpy ( dev - > model , ctrl - > mn , sizeof ( ctrl - > mn ) ) ;
memcpy ( dev - > firmware_rev , ctrl - > fr , sizeof ( ctrl - > fr ) ) ;
2013-04-10 03:13:20 +04:00
if ( ctrl - > mdts )
2012-07-26 21:29:57 +04:00
dev - > max_hw_sectors = 1 < < ( ctrl - > mdts + shift - 9 ) ;
2013-06-21 22:36:34 +04:00
if ( ( pdev - > vendor = = PCI_VENDOR_ID_INTEL ) & &
2014-11-04 18:20:14 +03:00
( pdev - > device = = 0x0953 ) & & ctrl - > vs [ 3 ] ) {
unsigned int max_hw_sectors ;
2013-04-10 03:13:20 +04:00
dev - > stripe_size = 1 < < ( ctrl - > vs [ 3 ] + shift ) ;
2014-11-04 18:20:14 +03:00
max_hw_sectors = dev - > stripe_size > > ( shift - 9 ) ;
if ( dev - > max_hw_sectors ) {
dev - > max_hw_sectors = min ( max_hw_sectors ,
dev - > max_hw_sectors ) ;
} else
dev - > max_hw_sectors = max_hw_sectors ;
}
dev - > tagset . ops = & nvme_mq_ops ;
dev - > tagset . nr_hw_queues = dev - > online_queues - 1 ;
dev - > tagset . timeout = NVME_IO_TIMEOUT ;
dev - > tagset . numa_node = dev_to_node ( & dev - > pci_dev - > dev ) ;
dev - > tagset . queue_depth =
min_t ( int , dev - > q_depth , BLK_MQ_MAX_DEPTH ) - 1 ;
dev - > tagset . cmd_size = sizeof ( struct nvme_cmd_info ) ;
dev - > tagset . flags = BLK_MQ_F_SHOULD_MERGE ;
dev - > tagset . driver_data = dev ;
if ( blk_mq_alloc_tag_set ( & dev - > tagset ) )
goto out ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
id_ns = mem ;
2011-10-07 21:10:13 +04:00
for ( i = 1 ; i < = nn ; i + + ) {
2011-09-20 01:08:14 +04:00
res = nvme_identify ( dev , i , 0 , dma_addr ) ;
2011-01-20 20:50:14 +03:00
if ( res )
continue ;
2011-09-20 01:08:14 +04:00
if ( id_ns - > ncap = = 0 )
2011-01-20 20:50:14 +03:00
continue ;
2011-09-20 01:08:14 +04:00
res = nvme_get_features ( dev , NVME_FEAT_LBA_RANGE , i ,
2012-09-21 20:52:13 +04:00
dma_addr + 4096 , NULL ) ;
2011-01-20 20:50:14 +03:00
if ( res )
2013-02-01 01:40:38 +04:00
memset ( mem + 4096 , 0 , 4096 ) ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
ns = nvme_alloc_ns ( dev , i , mem , mem + 4096 ) ;
2011-01-20 20:50:14 +03:00
if ( ns )
list_add_tail ( & ns - > list , & dev - > namespaces ) ;
}
list_for_each_entry ( ns , & dev - > namespaces , list )
add_disk ( ns - > disk ) ;
2013-04-16 19:22:36 +04:00
res = 0 ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
out :
2011-09-20 01:14:53 +04:00
dma_free_coherent ( & dev - > pci_dev - > dev , 8192 , mem , dma_addr ) ;
2011-01-20 20:50:14 +03:00
return res ;
}
2013-07-16 01:02:19 +04:00
static int nvme_dev_map ( struct nvme_dev * dev )
{
2014-03-24 20:46:25 +04:00
u64 cap ;
2013-07-16 01:02:19 +04:00
int bars , result = - ENOMEM ;
struct pci_dev * pdev = dev - > pci_dev ;
if ( pci_enable_device_mem ( pdev ) )
return result ;
dev - > entry [ 0 ] . vector = pdev - > irq ;
pci_set_master ( pdev ) ;
bars = pci_select_bars ( pdev , IORESOURCE_MEM ) ;
2014-11-14 19:50:19 +03:00
if ( ! bars )
goto disable_pci ;
2013-07-16 01:02:19 +04:00
if ( pci_request_selected_regions ( pdev , bars , " nvme " ) )
goto disable_pci ;
2013-06-27 02:49:11 +04:00
if ( dma_set_mask_and_coherent ( & pdev - > dev , DMA_BIT_MASK ( 64 ) ) & &
dma_set_mask_and_coherent ( & pdev - > dev , DMA_BIT_MASK ( 32 ) ) )
goto disable ;
2013-07-16 01:02:19 +04:00
dev - > bar = ioremap ( pci_resource_start ( pdev , 0 ) , 8192 ) ;
if ( ! dev - > bar )
goto disable ;
2014-11-14 19:49:26 +03:00
2013-12-11 00:10:39 +04:00
if ( readl ( & dev - > bar - > csts ) = = - 1 ) {
result = - ENODEV ;
goto unmap ;
}
2014-11-14 19:49:26 +03:00
/*
* Some devices don ' t advertse INTx interrupts , pre - enable a single
* MSIX vec for setup . We ' ll adjust this later .
*/
if ( ! pdev - > irq ) {
result = pci_enable_msix ( pdev , dev - > entry , 1 ) ;
if ( result < 0 )
goto unmap ;
}
2014-03-24 20:46:25 +04:00
cap = readq ( & dev - > bar - > cap ) ;
dev - > q_depth = min_t ( int , NVME_CAP_MQES ( cap ) + 1 , NVME_Q_DEPTH ) ;
dev - > db_stride = 1 < < NVME_CAP_STRIDE ( cap ) ;
2013-07-16 01:02:19 +04:00
dev - > dbs = ( ( void __iomem * ) dev - > bar ) + 4096 ;
return 0 ;
2013-12-11 00:10:39 +04:00
unmap :
iounmap ( dev - > bar ) ;
dev - > bar = NULL ;
2013-07-16 01:02:19 +04:00
disable :
pci_release_regions ( pdev ) ;
disable_pci :
pci_disable_device ( pdev ) ;
return result ;
}
static void nvme_dev_unmap ( struct nvme_dev * dev )
{
if ( dev - > pci_dev - > msi_enabled )
pci_disable_msi ( dev - > pci_dev ) ;
else if ( dev - > pci_dev - > msix_enabled )
pci_disable_msix ( dev - > pci_dev ) ;
if ( dev - > bar ) {
iounmap ( dev - > bar ) ;
dev - > bar = NULL ;
2013-12-11 00:10:36 +04:00
pci_release_regions ( dev - > pci_dev ) ;
2013-07-16 01:02:19 +04:00
}
if ( pci_is_enabled ( dev - > pci_dev ) )
pci_disable_device ( dev - > pci_dev ) ;
}
2013-12-11 00:10:40 +04:00
struct nvme_delq_ctx {
struct task_struct * waiter ;
struct kthread_worker * worker ;
atomic_t refcount ;
} ;
static void nvme_wait_dq ( struct nvme_delq_ctx * dq , struct nvme_dev * dev )
{
dq - > waiter = current ;
mb ( ) ;
for ( ; ; ) {
set_current_state ( TASK_KILLABLE ) ;
if ( ! atomic_read ( & dq - > refcount ) )
break ;
if ( ! schedule_timeout ( ADMIN_TIMEOUT ) | |
fatal_signal_pending ( current ) ) {
2015-01-08 04:55:50 +03:00
/*
* Disable the controller first since we can ' t trust it
* at this point , but leave the admin queue enabled
* until all queue deletion requests are flushed .
* FIXME : This may take a while if there are more h / w
* queues than admin tags .
*/
2013-12-11 00:10:40 +04:00
set_current_state ( TASK_RUNNING ) ;
nvme_disable_ctrl ( dev , readq ( & dev - > bar - > cap ) ) ;
2015-01-08 04:55:50 +03:00
nvme_clear_queue ( dev - > queues [ 0 ] ) ;
2013-12-11 00:10:40 +04:00
flush_kthread_worker ( dq - > worker ) ;
2015-01-08 04:55:50 +03:00
nvme_disable_queue ( dev , 0 ) ;
2013-12-11 00:10:40 +04:00
return ;
}
}
set_current_state ( TASK_RUNNING ) ;
}
static void nvme_put_dq ( struct nvme_delq_ctx * dq )
{
atomic_dec ( & dq - > refcount ) ;
if ( dq - > waiter )
wake_up_process ( dq - > waiter ) ;
}
static struct nvme_delq_ctx * nvme_get_dq ( struct nvme_delq_ctx * dq )
{
atomic_inc ( & dq - > refcount ) ;
return dq ;
}
static void nvme_del_queue_end ( struct nvme_queue * nvmeq )
{
struct nvme_delq_ctx * dq = nvmeq - > cmdinfo . ctx ;
nvme_clear_queue ( nvmeq ) ;
nvme_put_dq ( dq ) ;
}
static int adapter_async_del_queue ( struct nvme_queue * nvmeq , u8 opcode ,
kthread_work_func_t fn )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . delete_queue . opcode = opcode ;
c . delete_queue . qid = cpu_to_le16 ( nvmeq - > qid ) ;
init_kthread_work ( & nvmeq - > cmdinfo . work , fn ) ;
2014-11-04 18:20:14 +03:00
return nvme_submit_admin_async_cmd ( nvmeq - > dev , & c , & nvmeq - > cmdinfo ,
ADMIN_TIMEOUT ) ;
2013-12-11 00:10:40 +04:00
}
static void nvme_del_cq_work_handler ( struct kthread_work * work )
{
struct nvme_queue * nvmeq = container_of ( work , struct nvme_queue ,
cmdinfo . work ) ;
nvme_del_queue_end ( nvmeq ) ;
}
static int nvme_delete_cq ( struct nvme_queue * nvmeq )
{
return adapter_async_del_queue ( nvmeq , nvme_admin_delete_cq ,
nvme_del_cq_work_handler ) ;
}
static void nvme_del_sq_work_handler ( struct kthread_work * work )
{
struct nvme_queue * nvmeq = container_of ( work , struct nvme_queue ,
cmdinfo . work ) ;
int status = nvmeq - > cmdinfo . status ;
if ( ! status )
status = nvme_delete_cq ( nvmeq ) ;
if ( status )
nvme_del_queue_end ( nvmeq ) ;
}
static int nvme_delete_sq ( struct nvme_queue * nvmeq )
{
return adapter_async_del_queue ( nvmeq , nvme_admin_delete_sq ,
nvme_del_sq_work_handler ) ;
}
static void nvme_del_queue_start ( struct kthread_work * work )
{
struct nvme_queue * nvmeq = container_of ( work , struct nvme_queue ,
cmdinfo . work ) ;
if ( nvme_delete_sq ( nvmeq ) )
nvme_del_queue_end ( nvmeq ) ;
}
static void nvme_disable_io_queues ( struct nvme_dev * dev )
{
int i ;
DEFINE_KTHREAD_WORKER_ONSTACK ( worker ) ;
struct nvme_delq_ctx dq ;
struct task_struct * kworker_task = kthread_run ( kthread_worker_fn ,
& worker , " nvme%d " , dev - > instance ) ;
if ( IS_ERR ( kworker_task ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Failed to create queue del task \n " ) ;
for ( i = dev - > queue_count - 1 ; i > 0 ; i - - )
nvme_disable_queue ( dev , i ) ;
return ;
}
dq . waiter = NULL ;
atomic_set ( & dq . refcount , 0 ) ;
dq . worker = & worker ;
for ( i = dev - > queue_count - 1 ; i > 0 ; i - - ) {
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2013-12-11 00:10:40 +04:00
if ( nvme_suspend_queue ( nvmeq ) )
continue ;
nvmeq - > cmdinfo . ctx = nvme_get_dq ( & dq ) ;
nvmeq - > cmdinfo . worker = dq . worker ;
init_kthread_work ( & nvmeq - > cmdinfo . work , nvme_del_queue_start ) ;
queue_kthread_work ( dq . worker , & nvmeq - > cmdinfo . work ) ;
}
nvme_wait_dq ( & dq , dev ) ;
kthread_stop ( kworker_task ) ;
}
2014-04-08 03:10:11 +04:00
/*
* Remove the node from the device list and check
* for whether or not we need to stop the nvme_thread .
*/
static void nvme_dev_list_remove ( struct nvme_dev * dev )
{
struct task_struct * tmp = NULL ;
spin_lock ( & dev_list_lock ) ;
list_del_init ( & dev - > node ) ;
if ( list_empty ( & dev_list ) & & ! IS_ERR_OR_NULL ( nvme_thread ) ) {
tmp = nvme_thread ;
nvme_thread = NULL ;
}
spin_unlock ( & dev_list_lock ) ;
if ( tmp )
kthread_stop ( tmp ) ;
}
2015-01-08 04:55:52 +03:00
static void nvme_freeze_queues ( struct nvme_dev * dev )
{
struct nvme_ns * ns ;
list_for_each_entry ( ns , & dev - > namespaces , list ) {
blk_mq_freeze_queue_start ( ns - > queue ) ;
spin_lock ( ns - > queue - > queue_lock ) ;
queue_flag_set ( QUEUE_FLAG_STOPPED , ns - > queue ) ;
spin_unlock ( ns - > queue - > queue_lock ) ;
blk_mq_cancel_requeue_work ( ns - > queue ) ;
blk_mq_stop_hw_queues ( ns - > queue ) ;
}
}
static void nvme_unfreeze_queues ( struct nvme_dev * dev )
{
struct nvme_ns * ns ;
list_for_each_entry ( ns , & dev - > namespaces , list ) {
queue_flag_clear_unlocked ( QUEUE_FLAG_STOPPED , ns - > queue ) ;
blk_mq_unfreeze_queue ( ns - > queue ) ;
blk_mq_start_stopped_hw_queues ( ns - > queue , true ) ;
blk_mq_kick_requeue_list ( ns - > queue ) ;
}
}
2013-07-16 01:02:21 +04:00
static void nvme_dev_shutdown ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-07-16 01:02:20 +04:00
int i ;
2014-06-25 21:18:12 +04:00
u32 csts = - 1 ;
2013-07-16 01:02:20 +04:00
2013-12-11 00:10:37 +04:00
dev - > initialized = 0 ;
2014-04-08 03:10:11 +04:00
nvme_dev_list_remove ( dev ) ;
2011-03-03 02:37:18 +03:00
2015-01-08 04:55:52 +03:00
if ( dev - > bar ) {
nvme_freeze_queues ( dev ) ;
2014-06-25 21:18:12 +04:00
csts = readl ( & dev - > bar - > csts ) ;
2015-01-08 04:55:52 +03:00
}
2014-06-25 21:18:12 +04:00
if ( csts & NVME_CSTS_CFS | | ! ( csts & NVME_CSTS_RDY ) ) {
2013-12-11 00:10:40 +04:00
for ( i = dev - > queue_count - 1 ; i > = 0 ; i - - ) {
2014-11-04 18:20:14 +03:00
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2013-12-11 00:10:40 +04:00
nvme_suspend_queue ( nvmeq ) ;
nvme_clear_queue ( nvmeq ) ;
}
} else {
nvme_disable_io_queues ( dev ) ;
2013-07-16 01:02:22 +04:00
nvme_shutdown_ctrl ( dev ) ;
2013-12-11 00:10:40 +04:00
nvme_disable_queue ( dev , 0 ) ;
}
2013-07-16 01:02:21 +04:00
nvme_dev_unmap ( dev ) ;
}
static void nvme_dev_remove ( struct nvme_dev * dev )
{
2014-02-01 03:53:39 +04:00
struct nvme_ns * ns ;
2013-07-16 01:02:21 +04:00
2014-02-01 03:53:39 +04:00
list_for_each_entry ( ns , & dev - > namespaces , list ) {
if ( ns - > disk - > flags & GENHD_FL_UP )
del_gendisk ( ns - > disk ) ;
2015-01-08 04:55:51 +03:00
if ( ! blk_queue_dying ( ns - > queue ) ) {
blk_mq_abort_requeue_list ( ns - > queue ) ;
2014-02-01 03:53:39 +04:00
blk_cleanup_queue ( ns - > queue ) ;
2015-01-08 04:55:51 +03:00
}
2011-01-20 20:50:14 +03:00
}
}
2011-02-10 17:56:01 +03:00
static int nvme_setup_prp_pools ( struct nvme_dev * dev )
{
struct device * dmadev = & dev - > pci_dev - > dev ;
dev - > prp_page_pool = dma_pool_create ( " prp list page " , dmadev ,
PAGE_SIZE , PAGE_SIZE , 0 ) ;
if ( ! dev - > prp_page_pool )
return - ENOMEM ;
2011-02-10 18:30:34 +03:00
/* Optimisation for I/Os between 4k and 128k */
dev - > prp_small_pool = dma_pool_create ( " prp list 256 " , dmadev ,
256 , 256 , 0 ) ;
if ( ! dev - > prp_small_pool ) {
dma_pool_destroy ( dev - > prp_page_pool ) ;
return - ENOMEM ;
}
2011-02-10 17:56:01 +03:00
return 0 ;
}
static void nvme_release_prp_pools ( struct nvme_dev * dev )
{
dma_pool_destroy ( dev - > prp_page_pool ) ;
2011-02-10 18:30:34 +03:00
dma_pool_destroy ( dev - > prp_small_pool ) ;
2011-02-10 17:56:01 +03:00
}
2012-02-22 03:50:53 +04:00
static DEFINE_IDA ( nvme_instance_ida ) ;
static int nvme_set_instance ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2012-02-22 03:50:53 +04:00
int instance , error ;
do {
if ( ! ida_pre_get ( & nvme_instance_ida , GFP_KERNEL ) )
return - ENODEV ;
spin_lock ( & dev_list_lock ) ;
error = ida_get_new ( & nvme_instance_ida , & instance ) ;
spin_unlock ( & dev_list_lock ) ;
} while ( error = = - EAGAIN ) ;
if ( error )
return - ENODEV ;
dev - > instance = instance ;
return 0 ;
2011-01-20 20:50:14 +03:00
}
static void nvme_release_instance ( struct nvme_dev * dev )
{
2012-02-22 03:50:53 +04:00
spin_lock ( & dev_list_lock ) ;
ida_remove ( & nvme_instance_ida , dev - > instance ) ;
spin_unlock ( & dev_list_lock ) ;
2011-01-20 20:50:14 +03:00
}
2014-02-01 03:53:39 +04:00
static void nvme_free_namespaces ( struct nvme_dev * dev )
{
struct nvme_ns * ns , * next ;
list_for_each_entry_safe ( ns , next , & dev - > namespaces , list ) {
list_del ( & ns - > list ) ;
2014-10-03 21:15:47 +04:00
spin_lock ( & dev_list_lock ) ;
ns - > disk - > private_data = NULL ;
spin_unlock ( & dev_list_lock ) ;
2014-02-01 03:53:39 +04:00
put_disk ( ns - > disk ) ;
kfree ( ns ) ;
}
}
2013-02-19 21:17:58 +04:00
static void nvme_free_dev ( struct kref * kref )
{
struct nvme_dev * dev = container_of ( kref , struct nvme_dev , kref ) ;
2014-02-01 03:53:39 +04:00
2014-08-20 05:15:59 +04:00
pci_dev_put ( dev - > pci_dev ) ;
2014-02-01 03:53:39 +04:00
nvme_free_namespaces ( dev ) ;
2014-12-11 18:24:18 +03:00
nvme_release_instance ( dev ) ;
2014-11-04 18:20:14 +03:00
blk_mq_free_tag_set ( & dev - > tagset ) ;
2015-01-08 04:55:49 +03:00
blk_put_queue ( dev - > admin_q ) ;
2013-02-19 21:17:58 +04:00
kfree ( dev - > queues ) ;
kfree ( dev - > entry ) ;
kfree ( dev ) ;
}
static int nvme_dev_open ( struct inode * inode , struct file * f )
{
struct nvme_dev * dev = container_of ( f - > private_data , struct nvme_dev ,
miscdev ) ;
kref_get ( & dev - > kref ) ;
f - > private_data = dev ;
return 0 ;
}
static int nvme_dev_release ( struct inode * inode , struct file * f )
{
struct nvme_dev * dev = f - > private_data ;
kref_put ( & dev - > kref , nvme_free_dev ) ;
return 0 ;
}
static long nvme_dev_ioctl ( struct file * f , unsigned int cmd , unsigned long arg )
{
struct nvme_dev * dev = f - > private_data ;
2014-11-04 18:20:14 +03:00
struct nvme_ns * ns ;
2013-02-19 21:17:58 +04:00
switch ( cmd ) {
case NVME_IOCTL_ADMIN_CMD :
2014-11-04 18:20:14 +03:00
return nvme_user_cmd ( dev , NULL , ( void __user * ) arg ) ;
2014-09-13 02:07:20 +04:00
case NVME_IOCTL_IO_CMD :
2014-11-04 18:20:14 +03:00
if ( list_empty ( & dev - > namespaces ) )
return - ENOTTY ;
ns = list_first_entry ( & dev - > namespaces , struct nvme_ns , list ) ;
return nvme_user_cmd ( dev , ns , ( void __user * ) arg ) ;
2013-02-19 21:17:58 +04:00
default :
return - ENOTTY ;
}
}
static const struct file_operations nvme_dev_fops = {
. owner = THIS_MODULE ,
. open = nvme_dev_open ,
. release = nvme_dev_release ,
. unlocked_ioctl = nvme_dev_ioctl ,
. compat_ioctl = nvme_dev_ioctl ,
} ;
2014-11-04 18:20:14 +03:00
static void nvme_set_irq_hints ( struct nvme_dev * dev )
{
struct nvme_queue * nvmeq ;
int i ;
for ( i = 0 ; i < dev - > online_queues ; i + + ) {
nvmeq = dev - > queues [ i ] ;
if ( ! nvmeq - > hctx )
continue ;
irq_set_affinity_hint ( dev - > entry [ nvmeq - > cq_vector ] . vector ,
nvmeq - > hctx - > cpumask ) ;
}
}
2013-07-16 01:02:21 +04:00
static int nvme_dev_start ( struct nvme_dev * dev )
{
int result ;
2014-04-08 03:10:11 +04:00
bool start_thread = false ;
2013-07-16 01:02:21 +04:00
result = nvme_dev_map ( dev ) ;
if ( result )
return result ;
result = nvme_configure_admin_queue ( dev ) ;
if ( result )
goto unmap ;
spin_lock ( & dev_list_lock ) ;
2014-04-08 03:10:11 +04:00
if ( list_empty ( & dev_list ) & & IS_ERR_OR_NULL ( nvme_thread ) ) {
start_thread = true ;
nvme_thread = NULL ;
}
2013-07-16 01:02:21 +04:00
list_add ( & dev - > node , & dev_list ) ;
spin_unlock ( & dev_list_lock ) ;
2014-04-08 03:10:11 +04:00
if ( start_thread ) {
nvme_thread = kthread_run ( nvme_kthread , NULL , " nvme " ) ;
2014-09-22 23:46:19 +04:00
wake_up_all ( & nvme_kthread_wait ) ;
2014-04-08 03:10:11 +04:00
} else
wait_event_killable ( nvme_kthread_wait , nvme_thread ) ;
if ( IS_ERR_OR_NULL ( nvme_thread ) ) {
result = nvme_thread ? PTR_ERR ( nvme_thread ) : - EINTR ;
goto disable ;
}
2014-11-04 18:20:14 +03:00
nvme_init_queue ( dev - > queues [ 0 ] , 0 ) ;
2015-01-08 04:55:50 +03:00
result = nvme_alloc_admin_tags ( dev ) ;
if ( result )
goto disable ;
2014-04-08 03:10:11 +04:00
2013-07-16 01:02:21 +04:00
result = nvme_setup_io_queues ( dev ) ;
2014-06-24 00:25:35 +04:00
if ( result )
2015-01-08 04:55:50 +03:00
goto free_tags ;
2013-07-16 01:02:21 +04:00
2014-11-04 18:20:14 +03:00
nvme_set_irq_hints ( dev ) ;
2013-09-06 00:45:07 +04:00
return result ;
2013-07-16 01:02:21 +04:00
2015-01-08 04:55:50 +03:00
free_tags :
nvme_dev_remove_admin ( dev ) ;
2013-07-16 01:02:21 +04:00
disable :
2013-12-16 22:50:00 +04:00
nvme_disable_queue ( dev , 0 ) ;
2014-04-08 03:10:11 +04:00
nvme_dev_list_remove ( dev ) ;
2013-07-16 01:02:21 +04:00
unmap :
nvme_dev_unmap ( dev ) ;
return result ;
}
2013-12-11 00:10:36 +04:00
static int nvme_remove_dead_ctrl ( void * arg )
{
struct nvme_dev * dev = ( struct nvme_dev * ) arg ;
struct pci_dev * pdev = dev - > pci_dev ;
if ( pci_get_drvdata ( pdev ) )
2014-06-24 01:24:53 +04:00
pci_stop_and_remove_bus_device_locked ( pdev ) ;
2013-12-11 00:10:36 +04:00
kref_put ( & dev - > kref , nvme_free_dev ) ;
return 0 ;
}
static void nvme_remove_disks ( struct work_struct * ws )
{
struct nvme_dev * dev = container_of ( ws , struct nvme_dev , reset_work ) ;
2014-02-22 01:13:44 +04:00
nvme_free_queues ( dev , 1 ) ;
2014-07-18 21:40:20 +04:00
nvme_dev_remove ( dev ) ;
2013-12-11 00:10:36 +04:00
}
static int nvme_dev_resume ( struct nvme_dev * dev )
{
int ret ;
ret = nvme_dev_start ( dev ) ;
2014-06-24 00:25:35 +04:00
if ( ret )
2013-12-11 00:10:36 +04:00
return ret ;
2014-06-24 00:25:35 +04:00
if ( dev - > online_queues < 2 ) {
2013-12-11 00:10:36 +04:00
spin_lock ( & dev_list_lock ) ;
2014-03-07 19:24:49 +04:00
dev - > reset_workfn = nvme_remove_disks ;
2013-12-11 00:10:36 +04:00
queue_work ( nvme_workq , & dev - > reset_work ) ;
spin_unlock ( & dev_list_lock ) ;
2015-01-08 04:55:52 +03:00
} else {
nvme_unfreeze_queues ( dev ) ;
nvme_set_irq_hints ( dev ) ;
2013-12-11 00:10:36 +04:00
}
2013-12-11 00:10:37 +04:00
dev - > initialized = 1 ;
2013-12-11 00:10:36 +04:00
return 0 ;
}
static void nvme_dev_reset ( struct nvme_dev * dev )
{
nvme_dev_shutdown ( dev ) ;
if ( nvme_dev_resume ( dev ) ) {
2014-11-04 18:20:14 +03:00
dev_warn ( & dev - > pci_dev - > dev , " Device failed to resume \n " ) ;
2013-12-11 00:10:36 +04:00
kref_get ( & dev - > kref ) ;
if ( IS_ERR ( kthread_run ( nvme_remove_dead_ctrl , dev , " nvme%d " ,
dev - > instance ) ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Failed to start controller remove task \n " ) ;
kref_put ( & dev - > kref , nvme_free_dev ) ;
}
}
}
static void nvme_reset_failed_dev ( struct work_struct * ws )
{
struct nvme_dev * dev = container_of ( ws , struct nvme_dev , reset_work ) ;
nvme_dev_reset ( dev ) ;
}
2014-03-07 19:24:49 +04:00
static void nvme_reset_workfn ( struct work_struct * work )
{
struct nvme_dev * dev = container_of ( work , struct nvme_dev , reset_work ) ;
dev - > reset_workfn ( work ) ;
}
2012-12-22 03:13:49 +04:00
static int nvme_probe ( struct pci_dev * pdev , const struct pci_device_id * id )
2011-01-20 20:50:14 +03:00
{
2014-11-04 18:20:14 +03:00
int node , result = - ENOMEM ;
2011-01-20 20:50:14 +03:00
struct nvme_dev * dev ;
2014-11-04 18:20:14 +03:00
node = dev_to_node ( & pdev - > dev ) ;
if ( node = = NUMA_NO_NODE )
set_dev_node ( & pdev - > dev , 0 ) ;
dev = kzalloc_node ( sizeof ( * dev ) , GFP_KERNEL , node ) ;
2011-01-20 20:50:14 +03:00
if ( ! dev )
return - ENOMEM ;
2014-11-04 18:20:14 +03:00
dev - > entry = kzalloc_node ( num_possible_cpus ( ) * sizeof ( * dev - > entry ) ,
GFP_KERNEL , node ) ;
2011-01-20 20:50:14 +03:00
if ( ! dev - > entry )
goto free ;
2014-11-04 18:20:14 +03:00
dev - > queues = kzalloc_node ( ( num_possible_cpus ( ) + 1 ) * sizeof ( void * ) ,
GFP_KERNEL , node ) ;
2011-01-20 20:50:14 +03:00
if ( ! dev - > queues )
goto free ;
INIT_LIST_HEAD ( & dev - > namespaces ) ;
2014-03-07 19:24:49 +04:00
dev - > reset_workfn = nvme_reset_failed_dev ;
INIT_WORK ( & dev - > reset_work , nvme_reset_workfn ) ;
2014-08-20 05:15:59 +04:00
dev - > pci_dev = pci_dev_get ( pdev ) ;
2013-12-11 00:10:36 +04:00
pci_set_drvdata ( pdev , dev ) ;
2012-02-22 03:50:53 +04:00
result = nvme_set_instance ( dev ) ;
if ( result )
2014-08-20 05:15:59 +04:00
goto put_pci ;
2011-01-20 20:50:14 +03:00
2011-02-10 17:56:01 +03:00
result = nvme_setup_prp_pools ( dev ) ;
if ( result )
2013-07-16 01:02:19 +04:00
goto release ;
2011-02-10 17:56:01 +03:00
2014-03-03 22:09:47 +04:00
kref_init ( & dev - > kref ) ;
2013-07-16 01:02:21 +04:00
result = nvme_dev_start ( dev ) ;
2014-06-24 00:25:35 +04:00
if ( result )
2013-07-16 01:02:19 +04:00
goto release_pools ;
2011-01-20 20:50:14 +03:00
2014-06-24 00:25:35 +04:00
if ( dev - > online_queues > 1 )
result = nvme_dev_add ( dev ) ;
2013-09-06 00:45:07 +04:00
if ( result )
2013-07-16 01:02:21 +04:00
goto shutdown ;
2011-02-16 00:28:20 +03:00
2013-02-19 21:17:58 +04:00
scnprintf ( dev - > name , sizeof ( dev - > name ) , " nvme%d " , dev - > instance ) ;
dev - > miscdev . minor = MISC_DYNAMIC_MINOR ;
dev - > miscdev . parent = & pdev - > dev ;
dev - > miscdev . name = dev - > name ;
dev - > miscdev . fops = & nvme_dev_fops ;
result = misc_register ( & dev - > miscdev ) ;
if ( result )
goto remove ;
2014-11-04 18:20:14 +03:00
nvme_set_irq_hints ( dev ) ;
2013-12-11 00:10:37 +04:00
dev - > initialized = 1 ;
2011-01-20 20:50:14 +03:00
return 0 ;
2013-02-19 21:17:58 +04:00
remove :
nvme_dev_remove ( dev ) ;
2014-11-04 18:20:14 +03:00
nvme_dev_remove_admin ( dev ) ;
2014-02-01 03:53:39 +04:00
nvme_free_namespaces ( dev ) ;
2013-07-16 01:02:21 +04:00
shutdown :
nvme_dev_shutdown ( dev ) ;
2013-07-16 01:02:19 +04:00
release_pools :
2013-12-16 22:50:00 +04:00
nvme_free_queues ( dev , 0 ) ;
2011-02-10 17:56:01 +03:00
nvme_release_prp_pools ( dev ) ;
2013-07-16 01:02:19 +04:00
release :
nvme_release_instance ( dev ) ;
2014-08-20 05:15:59 +04:00
put_pci :
pci_dev_put ( dev - > pci_dev ) ;
2011-01-20 20:50:14 +03:00
free :
kfree ( dev - > queues ) ;
kfree ( dev - > entry ) ;
kfree ( dev ) ;
return result ;
}
2014-05-02 20:40:43 +04:00
static void nvme_reset_notify ( struct pci_dev * pdev , bool prepare )
{
2014-06-24 02:03:21 +04:00
struct nvme_dev * dev = pci_get_drvdata ( pdev ) ;
2014-05-02 20:40:43 +04:00
2014-06-24 02:03:21 +04:00
if ( prepare )
nvme_dev_shutdown ( dev ) ;
else
nvme_dev_resume ( dev ) ;
2014-05-02 20:40:43 +04:00
}
2014-01-27 20:29:40 +04:00
static void nvme_shutdown ( struct pci_dev * pdev )
{
struct nvme_dev * dev = pci_get_drvdata ( pdev ) ;
nvme_dev_shutdown ( dev ) ;
}
2012-12-22 03:13:49 +04:00
static void nvme_remove ( struct pci_dev * pdev )
2011-01-20 20:50:14 +03:00
{
struct nvme_dev * dev = pci_get_drvdata ( pdev ) ;
2013-12-11 00:10:36 +04:00
spin_lock ( & dev_list_lock ) ;
list_del_init ( & dev - > node ) ;
spin_unlock ( & dev_list_lock ) ;
pci_set_drvdata ( pdev , NULL ) ;
flush_work ( & dev - > reset_work ) ;
2013-02-19 21:17:58 +04:00
misc_deregister ( & dev - > miscdev ) ;
2013-12-11 00:10:36 +04:00
nvme_dev_shutdown ( dev ) ;
2015-01-08 04:55:52 +03:00
nvme_dev_remove ( dev ) ;
2014-11-04 18:20:14 +03:00
nvme_dev_remove_admin ( dev ) ;
2013-12-16 22:50:00 +04:00
nvme_free_queues ( dev , 0 ) ;
2013-12-11 00:10:36 +04:00
nvme_release_prp_pools ( dev ) ;
2013-02-19 21:17:58 +04:00
kref_put ( & dev - > kref , nvme_free_dev ) ;
2011-01-20 20:50:14 +03:00
}
/* These functions are yet to be implemented */
# define nvme_error_detected NULL
# define nvme_dump_registers NULL
# define nvme_link_reset NULL
# define nvme_slot_reset NULL
# define nvme_error_resume NULL
2013-07-16 01:02:23 +04:00
2014-02-13 06:19:14 +04:00
# ifdef CONFIG_PM_SLEEP
2013-07-16 01:02:23 +04:00
static int nvme_suspend ( struct device * dev )
{
struct pci_dev * pdev = to_pci_dev ( dev ) ;
struct nvme_dev * ndev = pci_get_drvdata ( pdev ) ;
nvme_dev_shutdown ( ndev ) ;
return 0 ;
}
static int nvme_resume ( struct device * dev )
{
struct pci_dev * pdev = to_pci_dev ( dev ) ;
struct nvme_dev * ndev = pci_get_drvdata ( pdev ) ;
2013-12-11 00:10:36 +04:00
if ( nvme_dev_resume ( ndev ) & & ! work_busy ( & ndev - > reset_work ) ) {
2014-03-07 19:24:49 +04:00
ndev - > reset_workfn = nvme_reset_failed_dev ;
2013-12-11 00:10:36 +04:00
queue_work ( nvme_workq , & ndev - > reset_work ) ;
}
return 0 ;
2013-07-16 01:02:23 +04:00
}
2014-02-13 06:19:14 +04:00
# endif
2013-07-16 01:02:23 +04:00
static SIMPLE_DEV_PM_OPS ( nvme_dev_pm_ops , nvme_suspend , nvme_resume ) ;
2011-01-20 20:50:14 +03:00
2012-09-07 20:33:17 +04:00
static const struct pci_error_handlers nvme_err_handler = {
2011-01-20 20:50:14 +03:00
. error_detected = nvme_error_detected ,
. mmio_enabled = nvme_dump_registers ,
. link_reset = nvme_link_reset ,
. slot_reset = nvme_slot_reset ,
. resume = nvme_error_resume ,
2014-05-02 20:40:43 +04:00
. reset_notify = nvme_reset_notify ,
2011-01-20 20:50:14 +03:00
} ;
/* Move to pci_ids.h later */
# define PCI_CLASS_STORAGE_EXPRESS 0x010802
2014-03-24 18:11:22 +04:00
static const struct pci_device_id nvme_id_table [ ] = {
2011-01-20 20:50:14 +03:00
{ PCI_DEVICE_CLASS ( PCI_CLASS_STORAGE_EXPRESS , 0xffffff ) } ,
{ 0 , }
} ;
MODULE_DEVICE_TABLE ( pci , nvme_id_table ) ;
static struct pci_driver nvme_driver = {
. name = " nvme " ,
. id_table = nvme_id_table ,
. probe = nvme_probe ,
2012-12-22 03:13:49 +04:00
. remove = nvme_remove ,
2014-01-27 20:29:40 +04:00
. shutdown = nvme_shutdown ,
2013-07-16 01:02:23 +04:00
. driver = {
. pm = & nvme_dev_pm_ops ,
} ,
2011-01-20 20:50:14 +03:00
. err_handler = & nvme_err_handler ,
} ;
static int __init nvme_init ( void )
{
2012-07-31 21:31:15 +04:00
int result ;
2011-03-03 02:37:18 +03:00
2014-04-08 03:10:11 +04:00
init_waitqueue_head ( & nvme_kthread_wait ) ;
2011-01-20 20:50:14 +03:00
2013-12-11 00:10:36 +04:00
nvme_workq = create_singlethread_workqueue ( " nvme " ) ;
if ( ! nvme_workq )
2014-04-08 03:10:11 +04:00
return - ENOMEM ;
2013-12-11 00:10:36 +04:00
2012-07-26 02:05:18 +04:00
result = register_blkdev ( nvme_major , " nvme " ) ;
if ( result < 0 )
2013-12-11 00:10:36 +04:00
goto kill_workq ;
2012-07-26 02:05:18 +04:00
else if ( result > 0 )
2012-07-31 21:31:15 +04:00
nvme_major = result ;
2011-01-20 20:50:14 +03:00
2014-06-11 21:51:35 +04:00
result = pci_register_driver ( & nvme_driver ) ;
if ( result )
2014-11-04 18:20:14 +03:00
goto unregister_blkdev ;
2011-03-03 02:37:18 +03:00
return 0 ;
2011-01-20 20:50:14 +03:00
2011-03-03 02:37:18 +03:00
unregister_blkdev :
2011-01-20 20:50:14 +03:00
unregister_blkdev ( nvme_major , " nvme " ) ;
2013-12-11 00:10:36 +04:00
kill_workq :
destroy_workqueue ( nvme_workq ) ;
2011-01-20 20:50:14 +03:00
return result ;
}
static void __exit nvme_exit ( void )
{
pci_unregister_driver ( & nvme_driver ) ;
2014-06-11 21:51:35 +04:00
unregister_hotcpu_notifier ( & nvme_nb ) ;
2011-01-20 20:50:14 +03:00
unregister_blkdev ( nvme_major , " nvme " ) ;
2013-12-11 00:10:36 +04:00
destroy_workqueue ( nvme_workq ) ;
2014-04-08 03:10:11 +04:00
BUG_ON ( nvme_thread & & ! IS_ERR ( nvme_thread ) ) ;
2014-05-10 06:42:26 +04:00
_nvme_check_size ( ) ;
2011-01-20 20:50:14 +03:00
}
MODULE_AUTHOR ( " Matthew Wilcox <willy@linux.intel.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;
2014-11-22 01:16:32 +03:00
MODULE_VERSION ( " 1.0 " ) ;
2011-01-20 20:50:14 +03:00
module_init ( nvme_init ) ;
module_exit ( nvme_exit ) ;