2011-01-20 20:50:14 +03:00
/*
* NVM Express device driver
* Copyright ( c ) 2011 , Intel Corporation .
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms and conditions of the GNU General Public License ,
* version 2 , as published by the Free Software Foundation .
*
* This program is distributed in the hope it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License for
* more details .
*
* You should have received a copy of the GNU General Public License along with
* this program ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin St - Fifth Floor , Boston , MA 02110 - 1301 USA .
*/
# include <linux/nvme.h>
# include <linux/bio.h>
2011-05-12 21:50:28 +04:00
# include <linux/bitops.h>
2011-01-20 20:50:14 +03:00
# include <linux/blkdev.h>
2011-05-06 16:37:54 +04:00
# include <linux/delay.h>
2011-01-20 20:50:14 +03:00
# include <linux/errno.h>
# include <linux/fs.h>
# include <linux/genhd.h>
2011-05-06 16:45:47 +04:00
# include <linux/idr.h>
2011-01-20 20:50:14 +03:00
# include <linux/init.h>
# include <linux/interrupt.h>
# include <linux/io.h>
# include <linux/kdev_t.h>
2011-03-03 02:37:18 +03:00
# include <linux/kthread.h>
2011-01-20 20:50:14 +03:00
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/module.h>
# include <linux/moduleparam.h>
# include <linux/pci.h>
2011-02-06 15:53:23 +03:00
# include <linux/poison.h>
2013-07-09 01:26:25 +04:00
# include <linux/ptrace.h>
2011-01-20 20:50:14 +03:00
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/types.h>
2013-03-05 05:40:58 +04:00
# include <scsi/sg.h>
2012-02-07 06:45:33 +04:00
# include <asm-generic/io-64-nonatomic-lo-hi.h>
2011-01-20 20:50:14 +03:00
# define NVME_Q_DEPTH 1024
# define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
# define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
2011-02-07 02:30:16 +03:00
# define ADMIN_TIMEOUT (60 * HZ)
2011-01-20 20:50:14 +03:00
static int nvme_major ;
module_param ( nvme_major , int , 0 ) ;
2011-02-06 15:28:06 +03:00
static int use_threaded_interrupts ;
module_param ( use_threaded_interrupts , int , 0 ) ;
2011-03-03 02:37:18 +03:00
static DEFINE_SPINLOCK ( dev_list_lock ) ;
static LIST_HEAD ( dev_list ) ;
static struct task_struct * nvme_thread ;
2013-12-11 00:10:36 +04:00
static struct workqueue_struct * nvme_workq ;
2011-03-03 02:37:18 +03:00
2013-12-11 00:10:37 +04:00
static void nvme_reset_failed_dev ( struct work_struct * ws ) ;
2013-12-11 00:10:40 +04:00
struct async_cmd_info {
struct kthread_work work ;
struct kthread_worker * worker ;
u32 result ;
int status ;
void * ctx ;
} ;
2011-01-20 20:50:14 +03:00
/*
* An NVM Express queue . Each device has at least two ( one for admin
* commands and one for I / O commands ) .
*/
struct nvme_queue {
struct device * q_dmadev ;
2011-02-10 17:56:01 +03:00
struct nvme_dev * dev ;
2011-01-20 20:50:14 +03:00
spinlock_t q_lock ;
struct nvme_command * sq_cmds ;
volatile struct nvme_completion * cqes ;
dma_addr_t sq_dma_addr ;
dma_addr_t cq_dma_addr ;
wait_queue_head_t sq_full ;
2011-03-03 02:37:18 +03:00
wait_queue_t sq_cong_wait ;
2011-01-20 20:50:14 +03:00
struct bio_list sq_cong ;
u32 __iomem * q_db ;
u16 q_depth ;
u16 cq_vector ;
u16 sq_head ;
u16 sq_tail ;
u16 cq_head ;
2013-12-11 00:10:38 +04:00
u16 qid ;
2013-06-24 19:47:34 +04:00
u8 cq_phase ;
u8 cqe_seen ;
2013-07-16 01:02:20 +04:00
u8 q_suspended ;
2013-12-11 00:10:40 +04:00
struct async_cmd_info cmdinfo ;
2011-01-20 20:50:14 +03:00
unsigned long cmdid_data [ ] ;
} ;
/*
* Check we didin ' t inadvertently grow the command struct
*/
static inline void _nvme_check_size ( void )
{
BUILD_BUG_ON ( sizeof ( struct nvme_rw_command ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_create_cq ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_create_sq ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_delete_queue ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_features ) ! = 64 ) ;
2013-03-27 15:13:41 +04:00
BUILD_BUG_ON ( sizeof ( struct nvme_format_cmd ) ! = 64 ) ;
2013-12-11 00:10:38 +04:00
BUILD_BUG_ON ( sizeof ( struct nvme_abort_cmd ) ! = 64 ) ;
2011-01-20 20:50:14 +03:00
BUILD_BUG_ON ( sizeof ( struct nvme_command ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_id_ctrl ) ! = 4096 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_id_ns ) ! = 4096 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_lba_range_type ) ! = 64 ) ;
2012-09-26 22:49:27 +04:00
BUILD_BUG_ON ( sizeof ( struct nvme_smart_log ) ! = 512 ) ;
2011-01-20 20:50:14 +03:00
}
2011-12-20 20:54:53 +04:00
typedef void ( * nvme_completion_fn ) ( struct nvme_dev * , void * ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * ) ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info {
2011-10-15 15:33:46 +04:00
nvme_completion_fn fn ;
void * ctx ;
2011-02-07 02:30:16 +03:00
unsigned long timeout ;
2013-12-11 00:10:38 +04:00
int aborted ;
2011-02-07 02:30:16 +03:00
} ;
static struct nvme_cmd_info * nvme_cmd_info ( struct nvme_queue * nvmeq )
{
return ( void * ) & nvmeq - > cmdid_data [ BITS_TO_LONGS ( nvmeq - > q_depth ) ] ;
}
2013-07-16 01:02:20 +04:00
static unsigned nvme_queue_extra ( int depth )
{
return DIV_ROUND_UP ( depth , 8 ) + ( depth * sizeof ( struct nvme_cmd_info ) ) ;
}
2011-01-20 20:50:14 +03:00
/**
2011-03-16 23:28:24 +03:00
* alloc_cmdid ( ) - Allocate a Command ID
* @ nvmeq : The queue that will be used for this command
* @ ctx : A pointer that will be passed to the handler
2011-10-15 15:33:46 +04:00
* @ handler : The function to call on completion
2011-01-20 20:50:14 +03:00
*
* Allocate a Command ID for a queue . The data passed in will
* be passed to the completion handler . This is implemented by using
* the bottom two bits of the ctx pointer to store the handler ID .
* Passing in a pointer that ' s not 4 - byte aligned will cause a BUG .
* We can change this if it becomes a problem .
2011-05-12 05:36:38 +04:00
*
* May be called with local interrupts disabled and the q_lock held ,
* or with interrupts enabled and no locks held .
2011-01-20 20:50:14 +03:00
*/
2011-10-15 15:33:46 +04:00
static int alloc_cmdid ( struct nvme_queue * nvmeq , void * ctx ,
nvme_completion_fn handler , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
2011-02-24 16:49:41 +03:00
int depth = nvmeq - > q_depth - 1 ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
int cmdid ;
do {
cmdid = find_first_zero_bit ( nvmeq - > cmdid_data , depth ) ;
if ( cmdid > = depth )
return - EBUSY ;
} while ( test_and_set_bit ( cmdid , nvmeq - > cmdid_data ) ) ;
2011-10-15 15:33:46 +04:00
info [ cmdid ] . fn = handler ;
info [ cmdid ] . ctx = ctx ;
2011-02-07 02:30:16 +03:00
info [ cmdid ] . timeout = jiffies + timeout ;
2013-12-11 00:10:38 +04:00
info [ cmdid ] . aborted = 0 ;
2011-01-20 20:50:14 +03:00
return cmdid ;
}
static int alloc_cmdid_killable ( struct nvme_queue * nvmeq , void * ctx ,
2011-10-15 15:33:46 +04:00
nvme_completion_fn handler , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
int cmdid ;
wait_event_killable ( nvmeq - > sq_full ,
2011-02-07 02:30:16 +03:00
( cmdid = alloc_cmdid ( nvmeq , ctx , handler , timeout ) ) > = 0 ) ;
2011-01-20 20:50:14 +03:00
return ( cmdid < 0 ) ? - EINTR : cmdid ;
}
2011-10-15 15:33:46 +04:00
/* Special values must be less than 0x1000 */
# define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
2011-02-07 23:55:59 +03:00
# define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
# define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
# define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
2011-02-22 22:18:30 +03:00
# define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE)
2013-12-11 00:10:38 +04:00
# define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE)
2011-02-06 15:53:23 +03:00
2011-12-20 20:54:53 +04:00
static void special_completion ( struct nvme_dev * dev , void * ctx ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * cqe )
{
if ( ctx = = CMD_CTX_CANCELLED )
return ;
if ( ctx = = CMD_CTX_FLUSH )
return ;
2013-12-11 00:10:38 +04:00
if ( ctx = = CMD_CTX_ABORT ) {
+ + dev - > abort_limit ;
return ;
}
2011-10-15 15:33:46 +04:00
if ( ctx = = CMD_CTX_COMPLETED ) {
2011-12-20 20:54:53 +04:00
dev_warn ( & dev - > pci_dev - > dev ,
2011-10-15 15:33:46 +04:00
" completed id %d twice on queue %d \n " ,
cqe - > command_id , le16_to_cpup ( & cqe - > sq_id ) ) ;
return ;
}
if ( ctx = = CMD_CTX_INVALID ) {
2011-12-20 20:54:53 +04:00
dev_warn ( & dev - > pci_dev - > dev ,
2011-10-15 15:33:46 +04:00
" invalid id %d completed on queue %d \n " ,
cqe - > command_id , le16_to_cpup ( & cqe - > sq_id ) ) ;
return ;
}
2011-12-20 20:54:53 +04:00
dev_warn ( & dev - > pci_dev - > dev , " Unknown special completion %p \n " , ctx ) ;
2011-10-15 15:33:46 +04:00
}
2013-12-11 00:10:40 +04:00
static void async_completion ( struct nvme_dev * dev , void * ctx ,
struct nvme_completion * cqe )
{
struct async_cmd_info * cmdinfo = ctx ;
cmdinfo - > result = le32_to_cpup ( & cqe - > result ) ;
cmdinfo - > status = le16_to_cpup ( & cqe - > status ) > > 1 ;
queue_kthread_work ( cmdinfo - > worker , & cmdinfo - > work ) ;
}
2011-05-12 05:36:38 +04:00
/*
* Called with local interrupts disabled and the q_lock held . May not sleep .
*/
2011-10-15 15:33:46 +04:00
static void * free_cmdid ( struct nvme_queue * nvmeq , int cmdid ,
nvme_completion_fn * fn )
2011-01-20 20:50:14 +03:00
{
2011-10-15 15:33:46 +04:00
void * ctx ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
2011-10-15 15:33:46 +04:00
if ( cmdid > = nvmeq - > q_depth ) {
* fn = special_completion ;
2011-02-06 16:51:15 +03:00
return CMD_CTX_INVALID ;
2011-10-15 15:33:46 +04:00
}
2012-08-03 00:05:59 +04:00
if ( fn )
* fn = info [ cmdid ] . fn ;
2011-10-15 15:33:46 +04:00
ctx = info [ cmdid ] . ctx ;
info [ cmdid ] . fn = special_completion ;
2011-02-07 02:30:16 +03:00
info [ cmdid ] . ctx = CMD_CTX_COMPLETED ;
2011-01-20 20:50:14 +03:00
clear_bit ( cmdid , nvmeq - > cmdid_data ) ;
wake_up ( & nvmeq - > sq_full ) ;
2011-10-15 15:33:46 +04:00
return ctx ;
2011-01-20 20:50:14 +03:00
}
2011-10-15 15:33:46 +04:00
static void * cancel_cmdid ( struct nvme_queue * nvmeq , int cmdid ,
nvme_completion_fn * fn )
2011-02-05 00:03:56 +03:00
{
2011-10-15 15:33:46 +04:00
void * ctx ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
2011-10-15 15:33:46 +04:00
if ( fn )
* fn = info [ cmdid ] . fn ;
ctx = info [ cmdid ] . ctx ;
info [ cmdid ] . fn = special_completion ;
2011-02-07 02:30:16 +03:00
info [ cmdid ] . ctx = CMD_CTX_CANCELLED ;
2011-10-15 15:33:46 +04:00
return ctx ;
2011-02-05 00:03:56 +03:00
}
2013-03-05 05:40:58 +04:00
struct nvme_queue * get_nvmeq ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2011-12-20 20:04:12 +04:00
return dev - > queues [ get_cpu ( ) + 1 ] ;
2011-01-20 20:50:14 +03:00
}
2013-03-05 05:40:58 +04:00
void put_nvmeq ( struct nvme_queue * nvmeq )
2011-01-20 20:50:14 +03:00
{
2011-01-20 21:01:49 +03:00
put_cpu ( ) ;
2011-01-20 20:50:14 +03:00
}
/**
2011-03-16 23:28:24 +03:00
* nvme_submit_cmd ( ) - Copy a command into a queue and ring the doorbell
2011-01-20 20:50:14 +03:00
* @ nvmeq : The queue to use
* @ cmd : The command to send
*
* Safe to use from interrupt context
*/
static int nvme_submit_cmd ( struct nvme_queue * nvmeq , struct nvme_command * cmd )
{
unsigned long flags ;
u16 tail ;
spin_lock_irqsave ( & nvmeq - > q_lock , flags ) ;
tail = nvmeq - > sq_tail ;
memcpy ( & nvmeq - > sq_cmds [ tail ] , cmd , sizeof ( * cmd ) ) ;
if ( + + tail = = nvmeq - > q_depth )
tail = 0 ;
2011-02-16 17:59:59 +03:00
writel ( tail , nvmeq - > q_db ) ;
2011-01-20 20:50:14 +03:00
nvmeq - > sq_tail = tail ;
spin_unlock_irqrestore ( & nvmeq - > q_lock , flags ) ;
return 0 ;
}
2011-12-20 22:34:52 +04:00
static __le64 * * iod_list ( struct nvme_iod * iod )
2011-02-10 16:51:24 +03:00
{
2011-12-20 22:34:52 +04:00
return ( ( void * ) iod ) + iod - > offset ;
2011-02-10 16:51:24 +03:00
}
2011-12-20 22:34:52 +04:00
/*
* Will slightly overestimate the number of pages needed . This is OK
* as it only leads to a small amount of wasted memory for the lifetime of
* the I / O .
*/
static int nvme_npages ( unsigned size )
{
unsigned nprps = DIV_ROUND_UP ( size + PAGE_SIZE , PAGE_SIZE ) ;
return DIV_ROUND_UP ( 8 * nprps , PAGE_SIZE - 8 ) ;
}
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
static struct nvme_iod *
nvme_alloc_iod ( unsigned nseg , unsigned nbytes , gfp_t gfp )
2011-01-20 20:50:14 +03:00
{
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod = kmalloc ( sizeof ( struct nvme_iod ) +
sizeof ( __le64 * ) * nvme_npages ( nbytes ) +
sizeof ( struct scatterlist ) * nseg , gfp ) ;
if ( iod ) {
iod - > offset = offsetof ( struct nvme_iod , sg [ nseg ] ) ;
iod - > npages = - 1 ;
iod - > length = nbytes ;
2012-11-06 22:59:23 +04:00
iod - > nents = 0 ;
2013-05-30 01:59:39 +04:00
iod - > start_time = jiffies ;
2011-12-20 22:34:52 +04:00
}
return iod ;
2011-01-20 20:50:14 +03:00
}
2013-03-05 05:40:58 +04:00
void nvme_free_iod ( struct nvme_dev * dev , struct nvme_iod * iod )
2011-01-20 20:50:14 +03:00
{
2011-12-20 22:34:52 +04:00
const int last_prp = PAGE_SIZE / 8 - 1 ;
int i ;
__le64 * * list = iod_list ( iod ) ;
dma_addr_t prp_dma = iod - > first_dma ;
if ( iod - > npages = = 0 )
dma_pool_free ( dev - > prp_small_pool , list [ 0 ] , prp_dma ) ;
for ( i = 0 ; i < iod - > npages ; i + + ) {
__le64 * prp_list = list [ i ] ;
dma_addr_t next_prp_dma = le64_to_cpu ( prp_list [ last_prp ] ) ;
dma_pool_free ( dev - > prp_page_pool , prp_list , prp_dma ) ;
prp_dma = next_prp_dma ;
}
kfree ( iod ) ;
2011-01-20 20:50:14 +03:00
}
2013-05-30 01:59:39 +04:00
static void nvme_start_io_acct ( struct bio * bio )
{
struct gendisk * disk = bio - > bi_bdev - > bd_disk ;
const int rw = bio_data_dir ( bio ) ;
int cpu = part_stat_lock ( ) ;
part_round_stats ( cpu , & disk - > part0 ) ;
part_stat_inc ( cpu , & disk - > part0 , ios [ rw ] ) ;
part_stat_add ( cpu , & disk - > part0 , sectors [ rw ] , bio_sectors ( bio ) ) ;
part_inc_in_flight ( & disk - > part0 , rw ) ;
part_stat_unlock ( ) ;
}
static void nvme_end_io_acct ( struct bio * bio , unsigned long start_time )
{
struct gendisk * disk = bio - > bi_bdev - > bd_disk ;
const int rw = bio_data_dir ( bio ) ;
unsigned long duration = jiffies - start_time ;
int cpu = part_stat_lock ( ) ;
part_stat_add ( cpu , & disk - > part0 , ticks [ rw ] , duration ) ;
part_round_stats ( cpu , & disk - > part0 ) ;
part_dec_in_flight ( & disk - > part0 , rw ) ;
part_stat_unlock ( ) ;
}
2011-12-20 20:54:53 +04:00
static void bio_completion ( struct nvme_dev * dev , void * ctx ,
2011-01-20 20:50:14 +03:00
struct nvme_completion * cqe )
{
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod = ctx ;
struct bio * bio = iod - > private ;
2011-01-20 20:50:14 +03:00
u16 status = le16_to_cpup ( & cqe - > status ) > > 1 ;
2013-08-08 20:25:38 +04:00
if ( iod - > nents ) {
2012-11-06 22:59:23 +04:00
dma_unmap_sg ( & dev - > pci_dev - > dev , iod - > sg , iod - > nents ,
2011-01-20 20:50:14 +03:00
bio_data_dir ( bio ) ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2013-08-08 20:25:38 +04:00
nvme_end_io_acct ( bio , iod - > start_time ) ;
}
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2013-04-09 21:59:32 +04:00
if ( status )
2011-02-23 23:20:00 +03:00
bio_endio ( bio , - EIO ) ;
2013-04-09 21:59:32 +04:00
else
2011-02-23 23:20:00 +03:00
bio_endio ( bio , 0 ) ;
2011-01-20 20:50:14 +03:00
}
2011-05-12 05:36:38 +04:00
/* length is in bytes. gfp flags indicates whether we may sleep. */
2013-03-05 05:40:58 +04:00
int nvme_setup_prps ( struct nvme_dev * dev , struct nvme_common_command * cmd ,
struct nvme_iod * iod , int total_len , gfp_t gfp )
2011-01-26 18:02:29 +03:00
{
2011-02-10 18:30:34 +03:00
struct dma_pool * pool ;
2011-12-20 22:34:52 +04:00
int length = total_len ;
struct scatterlist * sg = iod - > sg ;
2011-01-26 18:02:29 +03:00
int dma_len = sg_dma_len ( sg ) ;
u64 dma_addr = sg_dma_address ( sg ) ;
int offset = offset_in_page ( dma_addr ) ;
2011-02-10 16:51:24 +03:00
__le64 * prp_list ;
2011-12-20 22:34:52 +04:00
__le64 * * list = iod_list ( iod ) ;
2011-02-10 16:51:24 +03:00
dma_addr_t prp_dma ;
2011-12-20 22:34:52 +04:00
int nprps , i ;
2011-01-26 18:02:29 +03:00
cmd - > prp1 = cpu_to_le64 ( dma_addr ) ;
length - = ( PAGE_SIZE - offset ) ;
if ( length < = 0 )
2011-12-20 22:34:52 +04:00
return total_len ;
2011-01-26 18:02:29 +03:00
dma_len - = ( PAGE_SIZE - offset ) ;
if ( dma_len ) {
dma_addr + = ( PAGE_SIZE - offset ) ;
} else {
sg = sg_next ( sg ) ;
dma_addr = sg_dma_address ( sg ) ;
dma_len = sg_dma_len ( sg ) ;
}
if ( length < = PAGE_SIZE ) {
cmd - > prp2 = cpu_to_le64 ( dma_addr ) ;
2011-12-20 22:34:52 +04:00
return total_len ;
2011-02-10 16:51:24 +03:00
}
nprps = DIV_ROUND_UP ( length , PAGE_SIZE ) ;
2011-02-10 18:30:34 +03:00
if ( nprps < = ( 256 / 8 ) ) {
pool = dev - > prp_small_pool ;
2011-12-20 22:34:52 +04:00
iod - > npages = 0 ;
2011-02-10 18:30:34 +03:00
} else {
pool = dev - > prp_page_pool ;
2011-12-20 22:34:52 +04:00
iod - > npages = 1 ;
2011-02-10 18:30:34 +03:00
}
2011-05-12 21:51:41 +04:00
prp_list = dma_pool_alloc ( pool , gfp , & prp_dma ) ;
if ( ! prp_list ) {
cmd - > prp2 = cpu_to_le64 ( dma_addr ) ;
2011-12-20 22:34:52 +04:00
iod - > npages = - 1 ;
return ( total_len - length ) + PAGE_SIZE ;
2011-05-12 21:51:41 +04:00
}
2011-12-20 22:34:52 +04:00
list [ 0 ] = prp_list ;
iod - > first_dma = prp_dma ;
2011-02-10 16:51:24 +03:00
cmd - > prp2 = cpu_to_le64 ( prp_dma ) ;
i = 0 ;
for ( ; ; ) {
2011-03-16 23:43:40 +03:00
if ( i = = PAGE_SIZE / 8 ) {
2011-02-10 16:51:24 +03:00
__le64 * old_prp_list = prp_list ;
2011-05-12 21:51:41 +04:00
prp_list = dma_pool_alloc ( pool , gfp , & prp_dma ) ;
2011-12-20 22:34:52 +04:00
if ( ! prp_list )
return total_len - length ;
list [ iod - > npages + + ] = prp_list ;
2011-03-16 23:43:40 +03:00
prp_list [ 0 ] = old_prp_list [ i - 1 ] ;
old_prp_list [ i - 1 ] = cpu_to_le64 ( prp_dma ) ;
i = 1 ;
2011-02-10 16:51:24 +03:00
}
prp_list [ i + + ] = cpu_to_le64 ( dma_addr ) ;
dma_len - = PAGE_SIZE ;
dma_addr + = PAGE_SIZE ;
length - = PAGE_SIZE ;
if ( length < = 0 )
break ;
if ( dma_len > 0 )
continue ;
BUG_ON ( dma_len < 0 ) ;
sg = sg_next ( sg ) ;
dma_addr = sg_dma_address ( sg ) ;
dma_len = sg_dma_len ( sg ) ;
2011-01-26 18:02:29 +03:00
}
2011-12-20 22:34:52 +04:00
return total_len ;
2011-01-26 18:02:29 +03:00
}
2013-04-09 21:59:32 +04:00
struct nvme_bio_pair {
struct bio b1 , b2 , * parent ;
struct bio_vec * bv1 , * bv2 ;
int err ;
atomic_t cnt ;
} ;
static void nvme_bio_pair_endio ( struct bio * bio , int err )
{
struct nvme_bio_pair * bp = bio - > bi_private ;
if ( err )
bp - > err = err ;
if ( atomic_dec_and_test ( & bp - > cnt ) ) {
bio_endio ( bp - > parent , bp - > err ) ;
2013-07-18 22:13:51 +04:00
kfree ( bp - > bv1 ) ;
kfree ( bp - > bv2 ) ;
2013-04-09 21:59:32 +04:00
kfree ( bp ) ;
}
}
static struct nvme_bio_pair * nvme_bio_split ( struct bio * bio , int idx ,
int len , int offset )
{
struct nvme_bio_pair * bp ;
BUG_ON ( len > bio - > bi_size ) ;
BUG_ON ( idx > bio - > bi_vcnt ) ;
bp = kmalloc ( sizeof ( * bp ) , GFP_ATOMIC ) ;
if ( ! bp )
return NULL ;
bp - > err = 0 ;
bp - > b1 = * bio ;
bp - > b2 = * bio ;
bp - > b1 . bi_size = len ;
bp - > b2 . bi_size - = len ;
bp - > b1 . bi_vcnt = idx ;
bp - > b2 . bi_idx = idx ;
bp - > b2 . bi_sector + = len > > 9 ;
if ( offset ) {
bp - > bv1 = kmalloc ( bio - > bi_max_vecs * sizeof ( struct bio_vec ) ,
GFP_ATOMIC ) ;
if ( ! bp - > bv1 )
goto split_fail_1 ;
bp - > bv2 = kmalloc ( bio - > bi_max_vecs * sizeof ( struct bio_vec ) ,
GFP_ATOMIC ) ;
if ( ! bp - > bv2 )
goto split_fail_2 ;
memcpy ( bp - > bv1 , bio - > bi_io_vec ,
bio - > bi_max_vecs * sizeof ( struct bio_vec ) ) ;
memcpy ( bp - > bv2 , bio - > bi_io_vec ,
bio - > bi_max_vecs * sizeof ( struct bio_vec ) ) ;
bp - > b1 . bi_io_vec = bp - > bv1 ;
bp - > b2 . bi_io_vec = bp - > bv2 ;
bp - > b2 . bi_io_vec [ idx ] . bv_offset + = offset ;
bp - > b2 . bi_io_vec [ idx ] . bv_len - = offset ;
bp - > b1 . bi_io_vec [ idx ] . bv_len = offset ;
bp - > b1 . bi_vcnt + + ;
} else
bp - > bv1 = bp - > bv2 = NULL ;
bp - > b1 . bi_private = bp ;
bp - > b2 . bi_private = bp ;
bp - > b1 . bi_end_io = nvme_bio_pair_endio ;
bp - > b2 . bi_end_io = nvme_bio_pair_endio ;
bp - > parent = bio ;
atomic_set ( & bp - > cnt , 2 ) ;
return bp ;
split_fail_2 :
kfree ( bp - > bv1 ) ;
split_fail_1 :
kfree ( bp ) ;
return NULL ;
}
static int nvme_split_and_submit ( struct bio * bio , struct nvme_queue * nvmeq ,
int idx , int len , int offset )
{
struct nvme_bio_pair * bp = nvme_bio_split ( bio , idx , len , offset ) ;
if ( ! bp )
return - ENOMEM ;
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
add_wait_queue ( & nvmeq - > sq_full , & nvmeq - > sq_cong_wait ) ;
bio_list_add ( & nvmeq - > sq_cong , & bp - > b1 ) ;
bio_list_add ( & nvmeq - > sq_cong , & bp - > b2 ) ;
return 0 ;
}
2011-02-23 23:20:00 +03:00
/* NVMe scatterlists require no holes in the virtual address */
# define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \
( ( ( vec1 ) - > bv_offset + ( vec1 ) - > bv_len ) % PAGE_SIZE ) )
2013-04-09 21:59:32 +04:00
static int nvme_map_bio ( struct nvme_queue * nvmeq , struct nvme_iod * iod ,
2011-01-20 20:50:14 +03:00
struct bio * bio , enum dma_data_direction dma_dir , int psegs )
{
2011-02-10 21:55:39 +03:00
struct bio_vec * bvec , * bvprv = NULL ;
struct scatterlist * sg = NULL ;
2013-04-10 03:13:20 +04:00
int i , length = 0 , nsegs = 0 , split_len = bio - > bi_size ;
if ( nvmeq - > dev - > stripe_size )
split_len = nvmeq - > dev - > stripe_size -
( ( bio - > bi_sector < < 9 ) & ( nvmeq - > dev - > stripe_size - 1 ) ) ;
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
sg_init_table ( iod - > sg , psegs ) ;
2011-01-20 20:50:14 +03:00
bio_for_each_segment ( bvec , bio , i ) {
2011-02-10 21:55:39 +03:00
if ( bvprv & & BIOVEC_PHYS_MERGEABLE ( bvprv , bvec ) ) {
sg - > length + = bvec - > bv_len ;
} else {
2011-02-23 23:20:00 +03:00
if ( bvprv & & BIOVEC_NOT_VIRT_MERGEABLE ( bvprv , bvec ) )
2013-04-09 21:59:32 +04:00
return nvme_split_and_submit ( bio , nvmeq , i ,
length , 0 ) ;
2011-12-20 22:34:52 +04:00
sg = sg ? sg + 1 : iod - > sg ;
2011-02-10 21:55:39 +03:00
sg_set_page ( sg , bvec - > bv_page , bvec - > bv_len ,
bvec - > bv_offset ) ;
nsegs + + ;
}
2013-04-10 03:13:20 +04:00
if ( split_len - length < bvec - > bv_len )
return nvme_split_and_submit ( bio , nvmeq , i , split_len ,
split_len - length ) ;
2011-02-23 23:20:00 +03:00
length + = bvec - > bv_len ;
2011-02-10 21:55:39 +03:00
bvprv = bvec ;
2011-01-20 20:50:14 +03:00
}
2011-12-20 22:34:52 +04:00
iod - > nents = nsegs ;
2011-02-10 21:55:39 +03:00
sg_mark_end ( sg ) ;
2013-04-09 21:59:32 +04:00
if ( dma_map_sg ( nvmeq - > q_dmadev , iod - > sg , iod - > nents , dma_dir ) = = 0 )
2011-02-23 23:20:00 +03:00
return - ENOMEM ;
2013-04-09 21:59:32 +04:00
2013-04-10 03:13:20 +04:00
BUG_ON ( length ! = bio - > bi_size ) ;
2011-02-23 23:20:00 +03:00
return length ;
2011-01-20 20:50:14 +03:00
}
2012-11-10 03:33:05 +04:00
/*
* We reuse the small pool to allocate the 16 - byte range here as it is not
* worth having a special pool for these or additional cases to handle freeing
* the iod .
*/
static int nvme_submit_discard ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
struct bio * bio , struct nvme_iod * iod , int cmdid )
{
struct nvme_dsm_range * range ;
struct nvme_command * cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
range = dma_pool_alloc ( nvmeq - > dev - > prp_small_pool , GFP_ATOMIC ,
& iod - > first_dma ) ;
if ( ! range )
return - ENOMEM ;
iod_list ( iod ) [ 0 ] = ( __le64 * ) range ;
iod - > npages = 0 ;
range - > cattr = cpu_to_le32 ( 0 ) ;
range - > nlb = cpu_to_le32 ( bio - > bi_size > > ns - > lba_shift ) ;
2013-03-28 05:28:22 +04:00
range - > slba = cpu_to_le64 ( nvme_block_nr ( ns , bio - > bi_sector ) ) ;
2012-11-10 03:33:05 +04:00
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
cmnd - > dsm . opcode = nvme_cmd_dsm ;
cmnd - > dsm . command_id = cmdid ;
cmnd - > dsm . nsid = cpu_to_le32 ( ns - > ns_id ) ;
cmnd - > dsm . prp1 = cpu_to_le64 ( iod - > first_dma ) ;
cmnd - > dsm . nr = 0 ;
cmnd - > dsm . attributes = cpu_to_le32 ( NVME_DSMGMT_AD ) ;
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
return 0 ;
}
2011-02-22 22:18:30 +03:00
static int nvme_submit_flush ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
int cmdid )
{
struct nvme_command * cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
cmnd - > common . opcode = nvme_cmd_flush ;
cmnd - > common . command_id = cmdid ;
cmnd - > common . nsid = cpu_to_le32 ( ns - > ns_id ) ;
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
return 0 ;
}
2013-03-05 05:40:58 +04:00
int nvme_submit_flush_data ( struct nvme_queue * nvmeq , struct nvme_ns * ns )
2011-02-22 22:18:30 +03:00
{
int cmdid = alloc_cmdid ( nvmeq , ( void * ) CMD_CTX_FLUSH ,
2011-12-20 22:53:01 +04:00
special_completion , NVME_IO_TIMEOUT ) ;
2011-02-22 22:18:30 +03:00
if ( unlikely ( cmdid < 0 ) )
return cmdid ;
return nvme_submit_flush ( nvmeq , ns , cmdid ) ;
}
2011-05-12 05:36:38 +04:00
/*
* Called with local interrupts disabled and the q_lock held . May not sleep .
*/
2011-01-20 20:50:14 +03:00
static int nvme_submit_bio_queue ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
struct bio * bio )
{
2011-01-26 18:02:29 +03:00
struct nvme_command * cmnd ;
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod ;
2011-01-20 20:50:14 +03:00
enum dma_data_direction dma_dir ;
2013-05-13 18:29:04 +04:00
int cmdid , length , result ;
2011-01-20 20:50:14 +03:00
u16 control ;
u32 dsmgmt ;
int psegs = bio_phys_segments ( ns - > queue , bio ) ;
2011-02-22 22:18:30 +03:00
if ( ( bio - > bi_rw & REQ_FLUSH ) & & psegs ) {
result = nvme_submit_flush_data ( nvmeq , ns ) ;
if ( result )
return result ;
}
2013-05-13 18:29:04 +04:00
result = - ENOMEM ;
2011-12-20 22:34:52 +04:00
iod = nvme_alloc_iod ( psegs , bio - > bi_size , GFP_ATOMIC ) ;
if ( ! iod )
2011-02-14 23:55:33 +03:00
goto nomem ;
2011-12-20 22:34:52 +04:00
iod - > private = bio ;
2011-01-20 20:50:14 +03:00
2011-02-14 23:55:33 +03:00
result = - EBUSY ;
2011-12-20 22:53:01 +04:00
cmdid = alloc_cmdid ( nvmeq , iod , bio_completion , NVME_IO_TIMEOUT ) ;
2011-01-20 20:50:14 +03:00
if ( unlikely ( cmdid < 0 ) )
2011-12-20 22:34:52 +04:00
goto free_iod ;
2011-01-20 20:50:14 +03:00
2012-11-10 03:33:05 +04:00
if ( bio - > bi_rw & REQ_DISCARD ) {
result = nvme_submit_discard ( nvmeq , ns , bio , iod , cmdid ) ;
if ( result )
goto free_cmdid ;
return result ;
}
2011-02-22 22:18:30 +03:00
if ( ( bio - > bi_rw & REQ_FLUSH ) & & ! psegs )
return nvme_submit_flush ( nvmeq , ns , cmdid ) ;
2011-01-20 20:50:14 +03:00
control = 0 ;
if ( bio - > bi_rw & REQ_FUA )
control | = NVME_RW_FUA ;
if ( bio - > bi_rw & ( REQ_FAILFAST_DEV | REQ_RAHEAD ) )
control | = NVME_RW_LR ;
dsmgmt = 0 ;
if ( bio - > bi_rw & REQ_RAHEAD )
dsmgmt | = NVME_RW_DSM_FREQ_PREFETCH ;
2011-01-26 18:02:29 +03:00
cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
2011-01-20 20:50:14 +03:00
2011-01-26 18:08:25 +03:00
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
2011-01-20 20:50:14 +03:00
if ( bio_data_dir ( bio ) ) {
2011-01-26 18:02:29 +03:00
cmnd - > rw . opcode = nvme_cmd_write ;
2011-01-20 20:50:14 +03:00
dma_dir = DMA_TO_DEVICE ;
} else {
2011-01-26 18:02:29 +03:00
cmnd - > rw . opcode = nvme_cmd_read ;
2011-01-20 20:50:14 +03:00
dma_dir = DMA_FROM_DEVICE ;
}
2013-04-09 21:59:32 +04:00
result = nvme_map_bio ( nvmeq , iod , bio , dma_dir , psegs ) ;
if ( result < = 0 )
2012-08-03 00:05:59 +04:00
goto free_cmdid ;
2011-02-23 23:20:00 +03:00
length = result ;
2011-01-20 20:50:14 +03:00
2011-01-26 18:02:29 +03:00
cmnd - > rw . command_id = cmdid ;
cmnd - > rw . nsid = cpu_to_le32 ( ns - > ns_id ) ;
2011-12-20 22:34:52 +04:00
length = nvme_setup_prps ( nvmeq - > dev , & cmnd - > common , iod , length ,
GFP_ATOMIC ) ;
2013-03-28 05:28:22 +04:00
cmnd - > rw . slba = cpu_to_le64 ( nvme_block_nr ( ns , bio - > bi_sector ) ) ;
2011-02-23 23:20:00 +03:00
cmnd - > rw . length = cpu_to_le16 ( ( length > > ns - > lba_shift ) - 1 ) ;
2011-01-26 18:02:29 +03:00
cmnd - > rw . control = cpu_to_le16 ( control ) ;
cmnd - > rw . dsmgmt = cpu_to_le32 ( dsmgmt ) ;
2011-01-20 20:50:14 +03:00
2013-05-30 01:59:39 +04:00
nvme_start_io_acct ( bio ) ;
2011-01-20 20:50:14 +03:00
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
2011-02-16 17:59:59 +03:00
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
2011-01-20 20:50:14 +03:00
2011-02-10 20:01:09 +03:00
return 0 ;
2012-08-03 00:05:59 +04:00
free_cmdid :
free_cmdid ( nvmeq , cmdid , NULL ) ;
2011-12-20 22:34:52 +04:00
free_iod :
nvme_free_iod ( nvmeq - > dev , iod ) ;
2011-02-14 23:55:33 +03:00
nomem :
return result ;
2011-01-20 20:50:14 +03:00
}
2013-06-24 19:47:34 +04:00
static int nvme_process_cq ( struct nvme_queue * nvmeq )
2011-01-20 20:50:14 +03:00
{
2011-01-20 21:24:06 +03:00
u16 head , phase ;
2011-01-20 20:50:14 +03:00
head = nvmeq - > cq_head ;
2011-01-20 21:24:06 +03:00
phase = nvmeq - > cq_phase ;
2011-01-20 20:50:14 +03:00
for ( ; ; ) {
2011-10-15 15:33:46 +04:00
void * ctx ;
nvme_completion_fn fn ;
2011-01-20 20:50:14 +03:00
struct nvme_completion cqe = nvmeq - > cqes [ head ] ;
2011-01-20 21:24:06 +03:00
if ( ( le16_to_cpu ( cqe . status ) & 1 ) ! = phase )
2011-01-20 20:50:14 +03:00
break ;
nvmeq - > sq_head = le16_to_cpu ( cqe . sq_head ) ;
if ( + + head = = nvmeq - > q_depth ) {
head = 0 ;
2011-01-20 21:24:06 +03:00
phase = ! phase ;
2011-01-20 20:50:14 +03:00
}
2011-10-15 15:33:46 +04:00
ctx = free_cmdid ( nvmeq , cqe . command_id , & fn ) ;
2011-12-20 20:54:53 +04:00
fn ( nvmeq - > dev , ctx , & cqe ) ;
2011-01-20 20:50:14 +03:00
}
/* If the controller ignores the cq head doorbell and continuously
* writes to the queue , it is theoretically possible to wrap around
* the queue twice and mistakenly return IRQ_NONE . Linux only
* requires that 0.1 % of your interrupts are handled , so this isn ' t
* a big problem .
*/
2011-01-20 21:24:06 +03:00
if ( head = = nvmeq - > cq_head & & phase = = nvmeq - > cq_phase )
2013-06-24 19:47:34 +04:00
return 0 ;
2011-01-20 20:50:14 +03:00
2013-09-10 07:25:37 +04:00
writel ( head , nvmeq - > q_db + nvmeq - > dev - > db_stride ) ;
2011-01-20 20:50:14 +03:00
nvmeq - > cq_head = head ;
2011-01-20 21:24:06 +03:00
nvmeq - > cq_phase = phase ;
2011-01-20 20:50:14 +03:00
2013-06-24 19:47:34 +04:00
nvmeq - > cqe_seen = 1 ;
return 1 ;
2011-01-20 20:50:14 +03:00
}
2013-06-24 20:03:57 +04:00
static void nvme_make_request ( struct request_queue * q , struct bio * bio )
{
struct nvme_ns * ns = q - > queuedata ;
struct nvme_queue * nvmeq = get_nvmeq ( ns - > dev ) ;
int result = - EBUSY ;
2013-07-16 01:02:23 +04:00
if ( ! nvmeq ) {
put_nvmeq ( NULL ) ;
bio_endio ( bio , - EIO ) ;
return ;
}
2013-06-24 20:03:57 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
if ( ! nvmeq - > q_suspended & & bio_list_empty ( & nvmeq - > sq_cong ) )
2013-06-24 20:03:57 +04:00
result = nvme_submit_bio_queue ( nvmeq , ns , bio ) ;
if ( unlikely ( result ) ) {
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
add_wait_queue ( & nvmeq - > sq_full , & nvmeq - > sq_cong_wait ) ;
bio_list_add ( & nvmeq - > sq_cong , bio ) ;
}
nvme_process_cq ( nvmeq ) ;
spin_unlock_irq ( & nvmeq - > q_lock ) ;
put_nvmeq ( nvmeq ) ;
}
2011-01-20 20:50:14 +03:00
static irqreturn_t nvme_irq ( int irq , void * data )
2011-02-06 15:28:06 +03:00
{
irqreturn_t result ;
struct nvme_queue * nvmeq = data ;
spin_lock ( & nvmeq - > q_lock ) ;
2013-06-24 19:47:34 +04:00
nvme_process_cq ( nvmeq ) ;
result = nvmeq - > cqe_seen ? IRQ_HANDLED : IRQ_NONE ;
nvmeq - > cqe_seen = 0 ;
2011-02-06 15:28:06 +03:00
spin_unlock ( & nvmeq - > q_lock ) ;
return result ;
}
static irqreturn_t nvme_irq_check ( int irq , void * data )
{
struct nvme_queue * nvmeq = data ;
struct nvme_completion cqe = nvmeq - > cqes [ nvmeq - > cq_head ] ;
if ( ( le16_to_cpu ( cqe . status ) & 1 ) ! = nvmeq - > cq_phase )
return IRQ_NONE ;
return IRQ_WAKE_THREAD ;
}
2011-02-05 00:03:56 +03:00
static void nvme_abort_command ( struct nvme_queue * nvmeq , int cmdid )
{
spin_lock_irq ( & nvmeq - > q_lock ) ;
2011-10-15 15:33:46 +04:00
cancel_cmdid ( nvmeq , cmdid , NULL ) ;
2011-02-05 00:03:56 +03:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
2011-10-15 15:33:46 +04:00
struct sync_cmd_info {
struct task_struct * task ;
u32 result ;
int status ;
} ;
2011-12-20 20:54:53 +04:00
static void sync_completion ( struct nvme_dev * dev , void * ctx ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * cqe )
{
struct sync_cmd_info * cmdinfo = ctx ;
cmdinfo - > result = le32_to_cpup ( & cqe - > result ) ;
cmdinfo - > status = le16_to_cpup ( & cqe - > status ) > > 1 ;
wake_up_process ( cmdinfo - > task ) ;
}
2011-01-20 20:50:14 +03:00
/*
* Returns 0 on success . If the result is negative , it ' s a Linux error code ;
* if the result is positive , it ' s an NVM Express status code
*/
2013-03-05 05:40:58 +04:00
int nvme_submit_sync_cmd ( struct nvme_queue * nvmeq , struct nvme_command * cmd ,
u32 * result , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
int cmdid ;
struct sync_cmd_info cmdinfo ;
cmdinfo . task = current ;
cmdinfo . status = - EINTR ;
2011-10-15 15:33:46 +04:00
cmdid = alloc_cmdid_killable ( nvmeq , & cmdinfo , sync_completion ,
2011-02-07 02:30:16 +03:00
timeout ) ;
2011-01-20 20:50:14 +03:00
if ( cmdid < 0 )
return cmdid ;
cmd - > common . command_id = cmdid ;
2011-02-05 00:03:56 +03:00
set_current_state ( TASK_KILLABLE ) ;
nvme_submit_cmd ( nvmeq , cmd ) ;
2013-04-20 00:11:06 +04:00
schedule_timeout ( timeout ) ;
2011-01-20 20:50:14 +03:00
2011-02-05 00:03:56 +03:00
if ( cmdinfo . status = = - EINTR ) {
nvme_abort_command ( nvmeq , cmdid ) ;
return - EINTR ;
}
2011-01-20 20:50:14 +03:00
if ( result )
* result = cmdinfo . result ;
return cmdinfo . status ;
}
2013-12-11 00:10:40 +04:00
static int nvme_submit_async_cmd ( struct nvme_queue * nvmeq ,
struct nvme_command * cmd ,
struct async_cmd_info * cmdinfo , unsigned timeout )
{
int cmdid ;
cmdid = alloc_cmdid_killable ( nvmeq , cmdinfo , async_completion , timeout ) ;
if ( cmdid < 0 )
return cmdid ;
cmdinfo - > status = - EINTR ;
cmd - > common . command_id = cmdid ;
nvme_submit_cmd ( nvmeq , cmd ) ;
return 0 ;
}
2013-03-05 05:40:58 +04:00
int nvme_submit_admin_cmd ( struct nvme_dev * dev , struct nvme_command * cmd ,
2011-01-20 20:50:14 +03:00
u32 * result )
{
2011-02-07 02:30:16 +03:00
return nvme_submit_sync_cmd ( dev - > queues [ 0 ] , cmd , result , ADMIN_TIMEOUT ) ;
2011-01-20 20:50:14 +03:00
}
2013-12-11 00:10:40 +04:00
static int nvme_submit_admin_cmd_async ( struct nvme_dev * dev ,
struct nvme_command * cmd , struct async_cmd_info * cmdinfo )
{
return nvme_submit_async_cmd ( dev - > queues [ 0 ] , cmd , cmdinfo ,
ADMIN_TIMEOUT ) ;
}
2011-01-20 20:50:14 +03:00
static int adapter_delete_queue ( struct nvme_dev * dev , u8 opcode , u16 id )
{
int status ;
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . delete_queue . opcode = opcode ;
c . delete_queue . qid = cpu_to_le16 ( id ) ;
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
if ( status )
return - EIO ;
return 0 ;
}
static int adapter_alloc_cq ( struct nvme_dev * dev , u16 qid ,
struct nvme_queue * nvmeq )
{
int status ;
struct nvme_command c ;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . create_cq . opcode = nvme_admin_create_cq ;
c . create_cq . prp1 = cpu_to_le64 ( nvmeq - > cq_dma_addr ) ;
c . create_cq . cqid = cpu_to_le16 ( qid ) ;
c . create_cq . qsize = cpu_to_le16 ( nvmeq - > q_depth - 1 ) ;
c . create_cq . cq_flags = cpu_to_le16 ( flags ) ;
c . create_cq . irq_vector = cpu_to_le16 ( nvmeq - > cq_vector ) ;
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
if ( status )
return - EIO ;
return 0 ;
}
static int adapter_alloc_sq ( struct nvme_dev * dev , u16 qid ,
struct nvme_queue * nvmeq )
{
int status ;
struct nvme_command c ;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . create_sq . opcode = nvme_admin_create_sq ;
c . create_sq . prp1 = cpu_to_le64 ( nvmeq - > sq_dma_addr ) ;
c . create_sq . sqid = cpu_to_le16 ( qid ) ;
c . create_sq . qsize = cpu_to_le16 ( nvmeq - > q_depth - 1 ) ;
c . create_sq . sq_flags = cpu_to_le16 ( flags ) ;
c . create_sq . cqid = cpu_to_le16 ( qid ) ;
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
if ( status )
return - EIO ;
return 0 ;
}
static int adapter_delete_cq ( struct nvme_dev * dev , u16 cqid )
{
return adapter_delete_queue ( dev , nvme_admin_delete_cq , cqid ) ;
}
static int adapter_delete_sq ( struct nvme_dev * dev , u16 sqid )
{
return adapter_delete_queue ( dev , nvme_admin_delete_sq , sqid ) ;
}
2013-03-05 05:40:58 +04:00
int nvme_identify ( struct nvme_dev * dev , unsigned nsid , unsigned cns ,
2011-09-20 01:08:14 +04:00
dma_addr_t dma_addr )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . identify . opcode = nvme_admin_identify ;
c . identify . nsid = cpu_to_le32 ( nsid ) ;
c . identify . prp1 = cpu_to_le64 ( dma_addr ) ;
c . identify . cns = cpu_to_le32 ( cns ) ;
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
}
2013-03-05 05:40:58 +04:00
int nvme_get_features ( struct nvme_dev * dev , unsigned fid , unsigned nsid ,
2012-09-21 20:52:13 +04:00
dma_addr_t dma_addr , u32 * result )
2011-09-20 01:08:14 +04:00
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . features . opcode = nvme_admin_get_features ;
2012-07-26 02:06:38 +04:00
c . features . nsid = cpu_to_le32 ( nsid ) ;
2011-09-20 01:08:14 +04:00
c . features . prp1 = cpu_to_le64 ( dma_addr ) ;
c . features . fid = cpu_to_le32 ( fid ) ;
2012-09-21 20:52:13 +04:00
return nvme_submit_admin_cmd ( dev , & c , result ) ;
2012-01-11 18:29:56 +04:00
}
2013-03-05 05:40:58 +04:00
int nvme_set_features ( struct nvme_dev * dev , unsigned fid , unsigned dword11 ,
dma_addr_t dma_addr , u32 * result )
2012-01-11 18:29:56 +04:00
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . features . opcode = nvme_admin_set_features ;
c . features . prp1 = cpu_to_le64 ( dma_addr ) ;
c . features . fid = cpu_to_le32 ( fid ) ;
c . features . dword11 = cpu_to_le32 ( dword11 ) ;
2011-09-20 01:08:14 +04:00
return nvme_submit_admin_cmd ( dev , & c , result ) ;
}
2013-12-11 00:10:38 +04:00
/**
* nvme_abort_cmd - Attempt aborting a command
* @ cmdid : Command id of a timed out IO
* @ queue : The queue with timed out IO
*
* Schedule controller reset if the command was already aborted once before and
* still hasn ' t been returned to the driver , or if this is the admin queue .
*/
static void nvme_abort_cmd ( int cmdid , struct nvme_queue * nvmeq )
{
int a_cmdid ;
struct nvme_command cmd ;
struct nvme_dev * dev = nvmeq - > dev ;
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
if ( ! nvmeq - > qid | | info [ cmdid ] . aborted ) {
if ( work_busy ( & dev - > reset_work ) )
return ;
list_del_init ( & dev - > node ) ;
dev_warn ( & dev - > pci_dev - > dev ,
" I/O %d QID %d timeout, reset controller \n " , cmdid ,
nvmeq - > qid ) ;
INIT_WORK ( & dev - > reset_work , nvme_reset_failed_dev ) ;
queue_work ( nvme_workq , & dev - > reset_work ) ;
return ;
}
if ( ! dev - > abort_limit )
return ;
a_cmdid = alloc_cmdid ( dev - > queues [ 0 ] , CMD_CTX_ABORT , special_completion ,
ADMIN_TIMEOUT ) ;
if ( a_cmdid < 0 )
return ;
memset ( & cmd , 0 , sizeof ( cmd ) ) ;
cmd . abort . opcode = nvme_admin_abort_cmd ;
cmd . abort . cid = cmdid ;
cmd . abort . sqid = cpu_to_le16 ( nvmeq - > qid ) ;
cmd . abort . command_id = a_cmdid ;
- - dev - > abort_limit ;
info [ cmdid ] . aborted = 1 ;
info [ cmdid ] . timeout = jiffies + ADMIN_TIMEOUT ;
dev_warn ( nvmeq - > q_dmadev , " Aborting I/O %d QID %d \n " , cmdid ,
nvmeq - > qid ) ;
nvme_submit_cmd ( dev - > queues [ 0 ] , & cmd ) ;
}
2012-08-07 23:56:23 +04:00
/**
* nvme_cancel_ios - Cancel outstanding I / Os
* @ queue : The queue to cancel I / Os on
* @ timeout : True to only cancel I / Os which have timed out
*/
static void nvme_cancel_ios ( struct nvme_queue * nvmeq , bool timeout )
{
int depth = nvmeq - > q_depth - 1 ;
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
unsigned long now = jiffies ;
int cmdid ;
for_each_set_bit ( cmdid , nvmeq - > cmdid_data , depth ) {
void * ctx ;
nvme_completion_fn fn ;
static struct nvme_completion cqe = {
2013-04-16 23:18:30 +04:00
. status = cpu_to_le16 ( NVME_SC_ABORT_REQ < < 1 ) ,
2012-08-07 23:56:23 +04:00
} ;
if ( timeout & & ! time_after ( now , info [ cmdid ] . timeout ) )
continue ;
2013-04-30 21:19:38 +04:00
if ( info [ cmdid ] . ctx = = CMD_CTX_CANCELLED )
continue ;
2013-12-11 00:10:38 +04:00
if ( timeout & & nvmeq - > dev - > initialized ) {
nvme_abort_cmd ( cmdid , nvmeq ) ;
continue ;
}
dev_warn ( nvmeq - > q_dmadev , " Cancelling I/O %d QID %d \n " , cmdid ,
nvmeq - > qid ) ;
2012-08-07 23:56:23 +04:00
ctx = cancel_cmdid ( nvmeq , cmdid , & fn ) ;
fn ( nvmeq - > dev , ctx , & cqe ) ;
}
}
2013-07-16 01:02:20 +04:00
static void nvme_free_queue ( struct nvme_queue * nvmeq )
2012-08-03 21:55:56 +04:00
{
2013-07-16 01:02:20 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
while ( bio_list_peek ( & nvmeq - > sq_cong ) ) {
struct bio * bio = bio_list_pop ( & nvmeq - > sq_cong ) ;
bio_endio ( bio , - EIO ) ;
}
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2012-08-03 21:55:56 +04:00
dma_free_coherent ( nvmeq - > q_dmadev , CQ_SIZE ( nvmeq - > q_depth ) ,
( void * ) nvmeq - > cqes , nvmeq - > cq_dma_addr ) ;
dma_free_coherent ( nvmeq - > q_dmadev , SQ_SIZE ( nvmeq - > q_depth ) ,
nvmeq - > sq_cmds , nvmeq - > sq_dma_addr ) ;
kfree ( nvmeq ) ;
}
2013-12-16 22:50:00 +04:00
static void nvme_free_queues ( struct nvme_dev * dev , int lowest )
2013-07-16 01:02:20 +04:00
{
int i ;
2013-12-16 22:50:00 +04:00
for ( i = dev - > queue_count - 1 ; i > = lowest ; i - - ) {
2013-07-16 01:02:20 +04:00
nvme_free_queue ( dev - > queues [ i ] ) ;
dev - > queue_count - - ;
dev - > queues [ i ] = NULL ;
}
}
2013-12-11 00:10:40 +04:00
/**
* nvme_suspend_queue - put queue into suspended state
* @ nvmeq - queue to suspend
*
* Returns 1 if already suspended , 0 otherwise .
*/
static int nvme_suspend_queue ( struct nvme_queue * nvmeq )
2011-01-20 20:50:14 +03:00
{
2013-12-11 00:10:40 +04:00
int vector = nvmeq - > dev - > entry [ nvmeq - > cq_vector ] . vector ;
2011-01-20 20:50:14 +03:00
2012-08-07 23:56:23 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
if ( nvmeq - > q_suspended ) {
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2013-12-11 00:10:40 +04:00
return 1 ;
2012-08-21 00:57:49 +04:00
}
2013-07-16 01:02:20 +04:00
nvmeq - > q_suspended = 1 ;
2012-08-07 23:56:23 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2011-03-27 16:52:06 +04:00
irq_set_affinity_hint ( vector , NULL ) ;
free_irq ( vector , nvmeq ) ;
2011-01-20 20:50:14 +03:00
2013-12-11 00:10:40 +04:00
return 0 ;
}
static void nvme_clear_queue ( struct nvme_queue * nvmeq )
{
spin_lock_irq ( & nvmeq - > q_lock ) ;
nvme_process_cq ( nvmeq ) ;
nvme_cancel_ios ( nvmeq , false ) ;
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
static void nvme_disable_queue ( struct nvme_dev * dev , int qid )
{
struct nvme_queue * nvmeq = dev - > queues [ qid ] ;
if ( ! nvmeq )
return ;
if ( nvme_suspend_queue ( nvmeq ) )
return ;
2013-12-11 00:10:39 +04:00
/* Don't tell the adapter to delete the admin queue.
* Don ' t tell a removed adapter to delete IO queues . */
if ( qid & & readl ( & dev - > bar - > csts ) ! = - 1 ) {
2011-01-20 20:50:14 +03:00
adapter_delete_sq ( dev , qid ) ;
adapter_delete_cq ( dev , qid ) ;
}
2013-12-11 00:10:40 +04:00
nvme_clear_queue ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
}
static struct nvme_queue * nvme_alloc_queue ( struct nvme_dev * dev , int qid ,
int depth , int vector )
{
struct device * dmadev = & dev - > pci_dev - > dev ;
2013-07-16 01:02:20 +04:00
unsigned extra = nvme_queue_extra ( depth ) ;
2011-01-20 20:50:14 +03:00
struct nvme_queue * nvmeq = kzalloc ( sizeof ( * nvmeq ) + extra , GFP_KERNEL ) ;
if ( ! nvmeq )
return NULL ;
nvmeq - > cqes = dma_alloc_coherent ( dmadev , CQ_SIZE ( depth ) ,
& nvmeq - > cq_dma_addr , GFP_KERNEL ) ;
if ( ! nvmeq - > cqes )
goto free_nvmeq ;
memset ( ( void * ) nvmeq - > cqes , 0 , CQ_SIZE ( depth ) ) ;
nvmeq - > sq_cmds = dma_alloc_coherent ( dmadev , SQ_SIZE ( depth ) ,
& nvmeq - > sq_dma_addr , GFP_KERNEL ) ;
if ( ! nvmeq - > sq_cmds )
goto free_cqdma ;
nvmeq - > q_dmadev = dmadev ;
2011-02-10 17:56:01 +03:00
nvmeq - > dev = dev ;
2011-01-20 20:50:14 +03:00
spin_lock_init ( & nvmeq - > q_lock ) ;
nvmeq - > cq_head = 0 ;
2011-01-20 21:24:06 +03:00
nvmeq - > cq_phase = 1 ;
2011-01-20 20:50:14 +03:00
init_waitqueue_head ( & nvmeq - > sq_full ) ;
2011-03-03 02:37:18 +03:00
init_waitqueue_entry ( & nvmeq - > sq_cong_wait , nvme_thread ) ;
2011-01-20 20:50:14 +03:00
bio_list_init ( & nvmeq - > sq_cong ) ;
2013-09-10 07:25:37 +04:00
nvmeq - > q_db = & dev - > dbs [ qid * 2 * dev - > db_stride ] ;
2011-01-20 20:50:14 +03:00
nvmeq - > q_depth = depth ;
nvmeq - > cq_vector = vector ;
2013-12-11 00:10:38 +04:00
nvmeq - > qid = qid ;
2013-07-16 01:02:20 +04:00
nvmeq - > q_suspended = 1 ;
dev - > queue_count + + ;
2011-01-20 20:50:14 +03:00
return nvmeq ;
free_cqdma :
2013-05-01 23:07:47 +04:00
dma_free_coherent ( dmadev , CQ_SIZE ( depth ) , ( void * ) nvmeq - > cqes ,
2011-01-20 20:50:14 +03:00
nvmeq - > cq_dma_addr ) ;
free_nvmeq :
kfree ( nvmeq ) ;
return NULL ;
}
2011-01-20 17:10:15 +03:00
static int queue_request_irq ( struct nvme_dev * dev , struct nvme_queue * nvmeq ,
const char * name )
{
2011-02-06 15:28:06 +03:00
if ( use_threaded_interrupts )
return request_threaded_irq ( dev - > entry [ nvmeq - > cq_vector ] . vector ,
2013-10-12 08:23:29 +04:00
nvme_irq_check , nvme_irq , IRQF_SHARED ,
2011-02-06 15:28:06 +03:00
name , nvmeq ) ;
2011-01-20 17:10:15 +03:00
return request_irq ( dev - > entry [ nvmeq - > cq_vector ] . vector , nvme_irq ,
2013-10-12 08:23:29 +04:00
IRQF_SHARED , name , nvmeq ) ;
2011-01-20 17:10:15 +03:00
}
2013-07-16 01:02:20 +04:00
static void nvme_init_queue ( struct nvme_queue * nvmeq , u16 qid )
2011-01-20 20:50:14 +03:00
{
2013-07-16 01:02:20 +04:00
struct nvme_dev * dev = nvmeq - > dev ;
unsigned extra = nvme_queue_extra ( nvmeq - > q_depth ) ;
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:20 +04:00
nvmeq - > sq_tail = 0 ;
nvmeq - > cq_head = 0 ;
nvmeq - > cq_phase = 1 ;
2013-09-10 07:25:37 +04:00
nvmeq - > q_db = & dev - > dbs [ qid * 2 * dev - > db_stride ] ;
2013-07-16 01:02:20 +04:00
memset ( nvmeq - > cmdid_data , 0 , extra ) ;
memset ( ( void * ) nvmeq - > cqes , 0 , CQ_SIZE ( nvmeq - > q_depth ) ) ;
nvme_cancel_ios ( nvmeq , false ) ;
nvmeq - > q_suspended = 0 ;
}
static int nvme_create_queue ( struct nvme_queue * nvmeq , int qid )
{
struct nvme_dev * dev = nvmeq - > dev ;
int result ;
2011-02-01 16:39:04 +03:00
2011-01-20 20:50:14 +03:00
result = adapter_alloc_cq ( dev , qid , nvmeq ) ;
if ( result < 0 )
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
result = adapter_alloc_sq ( dev , qid , nvmeq ) ;
if ( result < 0 )
goto release_cq ;
2011-01-20 17:10:15 +03:00
result = queue_request_irq ( dev , nvmeq , " nvme " ) ;
2011-01-20 20:50:14 +03:00
if ( result < 0 )
goto release_sq ;
2013-10-15 23:01:10 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
nvme_init_queue ( nvmeq , qid ) ;
2013-10-15 23:01:10 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
release_sq :
adapter_delete_sq ( dev , qid ) ;
release_cq :
adapter_delete_cq ( dev , qid ) ;
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
}
2013-05-04 14:43:16 +04:00
static int nvme_wait_ready ( struct nvme_dev * dev , u64 cap , bool enabled )
{
unsigned long timeout ;
u32 bit = enabled ? NVME_CSTS_RDY : 0 ;
timeout = ( ( NVME_CAP_TIMEOUT ( cap ) + 1 ) * HZ / 2 ) + jiffies ;
while ( ( readl ( & dev - > bar - > csts ) & NVME_CSTS_RDY ) ! = bit ) {
msleep ( 100 ) ;
if ( fatal_signal_pending ( current ) )
return - EINTR ;
if ( time_after ( jiffies , timeout ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Device not ready; aborting initialisation \n " ) ;
return - ENODEV ;
}
}
return 0 ;
}
/*
* If the device has been passed off to us in an enabled state , just clear
* the enabled bit . The spec says we should set the ' shutdown notification
* bits ' , but doing so may cause the device to complete commands to the
* admin queue . . . and we don ' t know what memory that might be pointing at !
*/
static int nvme_disable_ctrl ( struct nvme_dev * dev , u64 cap )
{
2013-05-04 14:43:17 +04:00
u32 cc = readl ( & dev - > bar - > cc ) ;
if ( cc & NVME_CC_ENABLE )
writel ( cc & ~ NVME_CC_ENABLE , & dev - > bar - > cc ) ;
2013-05-04 14:43:16 +04:00
return nvme_wait_ready ( dev , cap , false ) ;
}
static int nvme_enable_ctrl ( struct nvme_dev * dev , u64 cap )
{
return nvme_wait_ready ( dev , cap , true ) ;
}
2013-07-16 01:02:22 +04:00
static int nvme_shutdown_ctrl ( struct nvme_dev * dev )
{
unsigned long timeout ;
u32 cc ;
cc = ( readl ( & dev - > bar - > cc ) & ~ NVME_CC_SHN_MASK ) | NVME_CC_SHN_NORMAL ;
writel ( cc , & dev - > bar - > cc ) ;
timeout = 2 * HZ + jiffies ;
while ( ( readl ( & dev - > bar - > csts ) & NVME_CSTS_SHST_MASK ) ! =
NVME_CSTS_SHST_CMPLT ) {
msleep ( 100 ) ;
if ( fatal_signal_pending ( current ) )
return - EINTR ;
if ( time_after ( jiffies , timeout ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Device shutdown incomplete; abort shutdown \n " ) ;
return - ENODEV ;
}
}
return 0 ;
}
2012-12-22 03:13:49 +04:00
static int nvme_configure_admin_queue ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-05-04 14:43:16 +04:00
int result ;
2011-01-20 20:50:14 +03:00
u32 aqa ;
2013-05-04 14:43:16 +04:00
u64 cap = readq ( & dev - > bar - > cap ) ;
2011-01-20 20:50:14 +03:00
struct nvme_queue * nvmeq ;
2013-05-04 14:43:16 +04:00
result = nvme_disable_ctrl ( dev , cap ) ;
if ( result < 0 )
return result ;
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:23 +04:00
nvmeq = dev - > queues [ 0 ] ;
if ( ! nvmeq ) {
nvmeq = nvme_alloc_queue ( dev , 0 , 64 , 0 ) ;
if ( ! nvmeq )
return - ENOMEM ;
dev - > queues [ 0 ] = nvmeq ;
}
2011-01-20 20:50:14 +03:00
aqa = nvmeq - > q_depth - 1 ;
aqa | = aqa < < 16 ;
dev - > ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM ;
dev - > ctrl_config | = ( PAGE_SHIFT - 12 ) < < NVME_CC_MPS_SHIFT ;
dev - > ctrl_config | = NVME_CC_ARB_RR | NVME_CC_SHN_NONE ;
2011-03-22 22:55:45 +03:00
dev - > ctrl_config | = NVME_CC_IOSQES | NVME_CC_IOCQES ;
2011-01-20 20:50:14 +03:00
writel ( aqa , & dev - > bar - > aqa ) ;
writeq ( nvmeq - > sq_dma_addr , & dev - > bar - > asq ) ;
writeq ( nvmeq - > cq_dma_addr , & dev - > bar - > acq ) ;
writel ( dev - > ctrl_config , & dev - > bar - > cc ) ;
2013-05-04 14:43:16 +04:00
result = nvme_enable_ctrl ( dev , cap ) ;
2013-05-01 23:07:51 +04:00
if ( result )
2013-07-16 01:02:23 +04:00
return result ;
2012-08-03 21:55:56 +04:00
2011-01-20 17:10:15 +03:00
result = queue_request_irq ( dev , nvmeq , " nvme admin " ) ;
2013-05-01 23:07:51 +04:00
if ( result )
2013-07-16 01:02:23 +04:00
return result ;
2013-05-01 23:07:51 +04:00
2013-10-15 23:01:10 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
nvme_init_queue ( nvmeq , 0 ) ;
2013-10-15 23:01:10 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2011-01-20 20:50:14 +03:00
return result ;
}
2013-03-05 05:40:58 +04:00
struct nvme_iod * nvme_map_user_pages ( struct nvme_dev * dev , int write ,
2011-12-20 22:34:52 +04:00
unsigned long addr , unsigned length )
2011-01-20 20:50:14 +03:00
{
2011-01-24 15:52:07 +03:00
int i , err , count , nents , offset ;
2011-01-27 01:05:50 +03:00
struct scatterlist * sg ;
struct page * * pages ;
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod ;
2011-01-24 15:52:07 +03:00
if ( addr & 3 )
2011-12-20 22:34:52 +04:00
return ERR_PTR ( - EINVAL ) ;
2013-05-13 18:59:50 +04:00
if ( ! length | | length > INT_MAX - PAGE_SIZE )
2011-12-20 22:34:52 +04:00
return ERR_PTR ( - EINVAL ) ;
2011-01-27 01:05:50 +03:00
2011-01-24 15:52:07 +03:00
offset = offset_in_page ( addr ) ;
2011-01-27 01:05:50 +03:00
count = DIV_ROUND_UP ( offset + length , PAGE_SIZE ) ;
pages = kcalloc ( count , sizeof ( * pages ) , GFP_KERNEL ) ;
2012-01-20 16:55:30 +04:00
if ( ! pages )
return ERR_PTR ( - ENOMEM ) ;
2011-01-24 15:52:07 +03:00
err = get_user_pages_fast ( addr , count , 1 , pages ) ;
if ( err < count ) {
count = err ;
err = - EFAULT ;
goto put_pages ;
}
2011-01-27 01:05:50 +03:00
2011-12-20 22:34:52 +04:00
iod = nvme_alloc_iod ( count , length , GFP_KERNEL ) ;
sg = iod - > sg ;
2011-01-24 15:52:07 +03:00
sg_init_table ( sg , count ) ;
2011-09-14 01:01:39 +04:00
for ( i = 0 ; i < count ; i + + ) {
sg_set_page ( & sg [ i ] , pages [ i ] ,
2013-05-13 18:59:50 +04:00
min_t ( unsigned , length , PAGE_SIZE - offset ) ,
offset ) ;
2011-09-14 01:01:39 +04:00
length - = ( PAGE_SIZE - offset ) ;
offset = 0 ;
2011-01-27 01:05:50 +03:00
}
2012-01-07 00:49:25 +04:00
sg_mark_end ( & sg [ i - 1 ] ) ;
2012-01-07 00:52:56 +04:00
iod - > nents = count ;
2011-01-27 01:05:50 +03:00
err = - ENOMEM ;
nents = dma_map_sg ( & dev - > pci_dev - > dev , sg , count ,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-01-24 15:52:07 +03:00
if ( ! nents )
2011-12-20 22:34:52 +04:00
goto free_iod ;
2011-01-20 20:50:14 +03:00
2011-01-27 01:05:50 +03:00
kfree ( pages ) ;
2011-12-20 22:34:52 +04:00
return iod ;
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
free_iod :
kfree ( iod ) ;
2011-01-27 01:05:50 +03:00
put_pages :
for ( i = 0 ; i < count ; i + + )
put_page ( pages [ i ] ) ;
kfree ( pages ) ;
2011-12-20 22:34:52 +04:00
return ERR_PTR ( err ) ;
2011-01-27 01:05:50 +03:00
}
2011-01-20 20:50:14 +03:00
2013-03-05 05:40:58 +04:00
void nvme_unmap_user_pages ( struct nvme_dev * dev , int write ,
2012-01-07 00:52:56 +04:00
struct nvme_iod * iod )
2011-01-27 01:05:50 +03:00
{
2012-01-07 00:52:56 +04:00
int i ;
2011-01-20 20:50:14 +03:00
2012-01-07 00:52:56 +04:00
dma_unmap_sg ( & dev - > pci_dev - > dev , iod - > sg , iod - > nents ,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-01-27 01:05:50 +03:00
2012-01-07 00:52:56 +04:00
for ( i = 0 ; i < iod - > nents ; i + + )
put_page ( sg_page ( & iod - > sg [ i ] ) ) ;
2011-01-27 01:05:50 +03:00
}
2011-01-20 20:50:14 +03:00
2011-02-02 00:13:29 +03:00
static int nvme_submit_io ( struct nvme_ns * ns , struct nvme_user_io __user * uio )
{
struct nvme_dev * dev = ns - > dev ;
struct nvme_queue * nvmeq ;
struct nvme_user_io io ;
struct nvme_command c ;
2013-04-24 03:23:59 +04:00
unsigned length , meta_len ;
int status , i ;
struct nvme_iod * iod , * meta_iod = NULL ;
dma_addr_t meta_dma_addr ;
void * meta , * uninitialized_var ( meta_mem ) ;
2011-02-02 00:13:29 +03:00
if ( copy_from_user ( & io , uio , sizeof ( io ) ) )
return - EFAULT ;
2011-03-21 16:48:57 +03:00
length = ( io . nblocks + 1 ) < < ns - > lba_shift ;
2013-04-24 03:23:59 +04:00
meta_len = ( io . nblocks + 1 ) * ns - > ms ;
if ( meta_len & & ( ( io . metadata & 3 ) | | ! io . metadata ) )
return - EINVAL ;
2011-03-21 16:48:57 +03:00
switch ( io . opcode ) {
case nvme_cmd_write :
case nvme_cmd_read :
2011-05-20 21:03:42 +04:00
case nvme_cmd_compare :
2011-12-20 22:34:52 +04:00
iod = nvme_map_user_pages ( dev , io . opcode & 1 , io . addr , length ) ;
2011-08-09 20:56:37 +04:00
break ;
2011-03-21 16:48:57 +03:00
default :
2011-05-20 21:03:42 +04:00
return - EINVAL ;
2011-03-21 16:48:57 +03:00
}
2011-12-20 22:34:52 +04:00
if ( IS_ERR ( iod ) )
return PTR_ERR ( iod ) ;
2011-02-02 00:13:29 +03:00
memset ( & c , 0 , sizeof ( c ) ) ;
c . rw . opcode = io . opcode ;
c . rw . flags = io . flags ;
2011-03-21 16:48:57 +03:00
c . rw . nsid = cpu_to_le32 ( ns - > ns_id ) ;
2011-02-02 00:13:29 +03:00
c . rw . slba = cpu_to_le64 ( io . slba ) ;
2011-03-21 16:48:57 +03:00
c . rw . length = cpu_to_le16 ( io . nblocks ) ;
2011-02-02 00:13:29 +03:00
c . rw . control = cpu_to_le16 ( io . control ) ;
2013-04-16 23:21:06 +04:00
c . rw . dsmgmt = cpu_to_le32 ( io . dsmgmt ) ;
c . rw . reftag = cpu_to_le32 ( io . reftag ) ;
c . rw . apptag = cpu_to_le16 ( io . apptag ) ;
c . rw . appmask = cpu_to_le16 ( io . appmask ) ;
2013-04-24 03:23:59 +04:00
if ( meta_len ) {
2013-07-18 22:13:51 +04:00
meta_iod = nvme_map_user_pages ( dev , io . opcode & 1 , io . metadata ,
meta_len ) ;
2013-04-24 03:23:59 +04:00
if ( IS_ERR ( meta_iod ) ) {
status = PTR_ERR ( meta_iod ) ;
meta_iod = NULL ;
goto unmap ;
}
meta_mem = dma_alloc_coherent ( & dev - > pci_dev - > dev , meta_len ,
& meta_dma_addr , GFP_KERNEL ) ;
if ( ! meta_mem ) {
status = - ENOMEM ;
goto unmap ;
}
if ( io . opcode & 1 ) {
int meta_offset = 0 ;
for ( i = 0 ; i < meta_iod - > nents ; i + + ) {
meta = kmap_atomic ( sg_page ( & meta_iod - > sg [ i ] ) ) +
meta_iod - > sg [ i ] . offset ;
memcpy ( meta_mem + meta_offset , meta ,
meta_iod - > sg [ i ] . length ) ;
kunmap_atomic ( meta ) ;
meta_offset + = meta_iod - > sg [ i ] . length ;
}
}
c . rw . metadata = cpu_to_le64 ( meta_dma_addr ) ;
}
2011-12-20 22:34:52 +04:00
length = nvme_setup_prps ( dev , & c . common , iod , length , GFP_KERNEL ) ;
2011-02-02 00:13:29 +03:00
2011-12-20 20:04:12 +04:00
nvmeq = get_nvmeq ( dev ) ;
2011-03-16 23:29:00 +03:00
/*
* Since nvme_submit_sync_cmd sleeps , we can ' t keep preemption
2011-02-05 00:14:30 +03:00
* disabled . We may be preempted at any point , and be rescheduled
* to a different CPU . That will cause cacheline bouncing , but no
* additional races since q_lock already protects against other CPUs .
*/
2011-02-02 00:13:29 +03:00
put_nvmeq ( nvmeq ) ;
2011-05-12 21:51:41 +04:00
if ( length ! = ( io . nblocks + 1 ) < < ns - > lba_shift )
status = - ENOMEM ;
2013-07-16 01:02:20 +04:00
else if ( ! nvmeq | | nvmeq - > q_suspended )
status = - EBUSY ;
2011-05-12 21:51:41 +04:00
else
2011-12-20 22:53:01 +04:00
status = nvme_submit_sync_cmd ( nvmeq , & c , NULL , NVME_IO_TIMEOUT ) ;
2011-02-02 00:13:29 +03:00
2013-04-24 03:23:59 +04:00
if ( meta_len ) {
if ( status = = NVME_SC_SUCCESS & & ! ( io . opcode & 1 ) ) {
int meta_offset = 0 ;
for ( i = 0 ; i < meta_iod - > nents ; i + + ) {
meta = kmap_atomic ( sg_page ( & meta_iod - > sg [ i ] ) ) +
meta_iod - > sg [ i ] . offset ;
memcpy ( meta , meta_mem + meta_offset ,
meta_iod - > sg [ i ] . length ) ;
kunmap_atomic ( meta ) ;
meta_offset + = meta_iod - > sg [ i ] . length ;
}
}
dma_free_coherent ( & dev - > pci_dev - > dev , meta_len , meta_mem ,
meta_dma_addr ) ;
}
unmap :
2012-01-07 00:52:56 +04:00
nvme_unmap_user_pages ( dev , io . opcode & 1 , iod ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2013-04-24 03:23:59 +04:00
if ( meta_iod ) {
nvme_unmap_user_pages ( dev , io . opcode & 1 , meta_iod ) ;
nvme_free_iod ( dev , meta_iod ) ;
}
2011-02-02 00:13:29 +03:00
return status ;
}
2012-07-26 02:07:55 +04:00
static int nvme_user_admin_cmd ( struct nvme_dev * dev ,
2011-05-20 21:03:42 +04:00
struct nvme_admin_cmd __user * ucmd )
2011-02-03 18:58:26 +03:00
{
2011-05-20 21:03:42 +04:00
struct nvme_admin_cmd cmd ;
2011-02-03 18:58:26 +03:00
struct nvme_command c ;
2011-12-20 22:34:52 +04:00
int status , length ;
2012-07-27 21:53:28 +04:00
struct nvme_iod * uninitialized_var ( iod ) ;
2013-05-10 00:01:38 +04:00
unsigned timeout ;
2011-02-03 18:58:26 +03:00
2011-05-20 21:03:42 +04:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( copy_from_user ( & cmd , ucmd , sizeof ( cmd ) ) )
2011-02-03 18:58:26 +03:00
return - EFAULT ;
memset ( & c , 0 , sizeof ( c ) ) ;
2011-05-20 21:03:42 +04:00
c . common . opcode = cmd . opcode ;
c . common . flags = cmd . flags ;
c . common . nsid = cpu_to_le32 ( cmd . nsid ) ;
c . common . cdw2 [ 0 ] = cpu_to_le32 ( cmd . cdw2 ) ;
c . common . cdw2 [ 1 ] = cpu_to_le32 ( cmd . cdw3 ) ;
c . common . cdw10 [ 0 ] = cpu_to_le32 ( cmd . cdw10 ) ;
c . common . cdw10 [ 1 ] = cpu_to_le32 ( cmd . cdw11 ) ;
c . common . cdw10 [ 2 ] = cpu_to_le32 ( cmd . cdw12 ) ;
c . common . cdw10 [ 3 ] = cpu_to_le32 ( cmd . cdw13 ) ;
c . common . cdw10 [ 4 ] = cpu_to_le32 ( cmd . cdw14 ) ;
c . common . cdw10 [ 5 ] = cpu_to_le32 ( cmd . cdw15 ) ;
length = cmd . data_len ;
if ( cmd . data_len ) {
2012-01-07 00:42:45 +04:00
iod = nvme_map_user_pages ( dev , cmd . opcode & 1 , cmd . addr ,
length ) ;
2011-12-20 22:34:52 +04:00
if ( IS_ERR ( iod ) )
return PTR_ERR ( iod ) ;
length = nvme_setup_prps ( dev , & c . common , iod , length ,
GFP_KERNEL ) ;
2011-05-20 21:03:42 +04:00
}
2013-05-10 00:01:38 +04:00
timeout = cmd . timeout_ms ? msecs_to_jiffies ( cmd . timeout_ms ) :
ADMIN_TIMEOUT ;
2011-05-20 21:03:42 +04:00
if ( length ! = cmd . data_len )
2011-05-12 21:51:41 +04:00
status = - ENOMEM ;
else
2013-05-10 00:01:38 +04:00
status = nvme_submit_sync_cmd ( dev - > queues [ 0 ] , & c , & cmd . result ,
timeout ) ;
2011-12-20 22:34:52 +04:00
2011-05-20 21:03:42 +04:00
if ( cmd . data_len ) {
2012-01-07 00:52:56 +04:00
nvme_unmap_user_pages ( dev , cmd . opcode & 1 , iod ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2011-05-20 21:03:42 +04:00
}
2012-09-21 20:49:05 +04:00
2013-05-23 02:34:49 +04:00
if ( ( status > = 0 ) & & copy_to_user ( & ucmd - > result , & cmd . result ,
2012-09-21 20:49:05 +04:00
sizeof ( cmd . result ) ) )
status = - EFAULT ;
2011-02-03 18:58:26 +03:00
return status ;
}
2011-01-20 20:50:14 +03:00
static int nvme_ioctl ( struct block_device * bdev , fmode_t mode , unsigned int cmd ,
unsigned long arg )
{
struct nvme_ns * ns = bdev - > bd_disk - > private_data ;
switch ( cmd ) {
2011-05-20 21:03:42 +04:00
case NVME_IOCTL_ID :
2013-07-09 01:26:25 +04:00
force_successful_syscall_return ( ) ;
2011-05-20 21:03:42 +04:00
return ns - > ns_id ;
case NVME_IOCTL_ADMIN_CMD :
2012-07-26 02:07:55 +04:00
return nvme_user_admin_cmd ( ns - > dev , ( void __user * ) arg ) ;
2011-02-02 00:13:29 +03:00
case NVME_IOCTL_SUBMIT_IO :
return nvme_submit_io ( ns , ( void __user * ) arg ) ;
2013-03-05 05:40:58 +04:00
case SG_GET_VERSION_NUM :
return nvme_sg_get_version_num ( ( void __user * ) arg ) ;
case SG_IO :
return nvme_sg_io ( ns , ( void __user * ) arg ) ;
2011-01-20 20:50:14 +03:00
default :
return - ENOTTY ;
}
}
2013-10-23 23:07:34 +04:00
# ifdef CONFIG_COMPAT
static int nvme_compat_ioctl ( struct block_device * bdev , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
struct nvme_ns * ns = bdev - > bd_disk - > private_data ;
switch ( cmd ) {
case SG_IO :
return nvme_sg_io32 ( ns , arg ) ;
}
return nvme_ioctl ( bdev , mode , cmd , arg ) ;
}
# else
# define nvme_compat_ioctl NULL
# endif
2011-01-20 20:50:14 +03:00
static const struct block_device_operations nvme_fops = {
. owner = THIS_MODULE ,
. ioctl = nvme_ioctl ,
2013-10-23 23:07:34 +04:00
. compat_ioctl = nvme_compat_ioctl ,
2011-01-20 20:50:14 +03:00
} ;
2011-03-03 02:37:18 +03:00
static void nvme_resubmit_bios ( struct nvme_queue * nvmeq )
{
while ( bio_list_peek ( & nvmeq - > sq_cong ) ) {
struct bio * bio = bio_list_pop ( & nvmeq - > sq_cong ) ;
struct nvme_ns * ns = bio - > bi_bdev - > bd_disk - > private_data ;
2013-04-09 21:59:32 +04:00
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
remove_wait_queue ( & nvmeq - > sq_full ,
& nvmeq - > sq_cong_wait ) ;
2011-03-03 02:37:18 +03:00
if ( nvme_submit_bio_queue ( nvmeq , ns , bio ) ) {
2013-04-09 21:59:32 +04:00
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
add_wait_queue ( & nvmeq - > sq_full ,
& nvmeq - > sq_cong_wait ) ;
2011-03-03 02:37:18 +03:00
bio_list_add_head ( & nvmeq - > sq_cong , bio ) ;
break ;
}
}
}
static int nvme_kthread ( void * data )
{
2013-12-11 00:10:37 +04:00
struct nvme_dev * dev , * next ;
2011-03-03 02:37:18 +03:00
while ( ! kthread_should_stop ( ) ) {
2013-05-02 00:38:23 +04:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
2011-03-03 02:37:18 +03:00
spin_lock ( & dev_list_lock ) ;
2013-12-11 00:10:37 +04:00
list_for_each_entry_safe ( dev , next , & dev_list , node ) {
2011-03-03 02:37:18 +03:00
int i ;
2013-12-11 00:10:37 +04:00
if ( readl ( & dev - > bar - > csts ) & NVME_CSTS_CFS & &
dev - > initialized ) {
if ( work_busy ( & dev - > reset_work ) )
continue ;
list_del_init ( & dev - > node ) ;
dev_warn ( & dev - > pci_dev - > dev ,
" Failed status, reset controller \n " ) ;
INIT_WORK ( & dev - > reset_work ,
nvme_reset_failed_dev ) ;
queue_work ( nvme_workq , & dev - > reset_work ) ;
continue ;
}
2011-03-03 02:37:18 +03:00
for ( i = 0 ; i < dev - > queue_count ; i + + ) {
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2011-02-16 00:28:20 +03:00
if ( ! nvmeq )
continue ;
2011-03-03 02:37:18 +03:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:20 +04:00
if ( nvmeq - > q_suspended )
goto unlock ;
2013-06-24 19:56:42 +04:00
nvme_process_cq ( nvmeq ) ;
2012-08-07 23:56:23 +04:00
nvme_cancel_ios ( nvmeq , true ) ;
2011-03-03 02:37:18 +03:00
nvme_resubmit_bios ( nvmeq ) ;
2013-07-16 01:02:20 +04:00
unlock :
2011-03-03 02:37:18 +03:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
}
spin_unlock ( & dev_list_lock ) ;
2013-02-05 02:44:33 +04:00
schedule_timeout ( round_jiffies_relative ( HZ ) ) ;
2011-03-03 02:37:18 +03:00
}
return 0 ;
}
2012-11-10 03:33:05 +04:00
static void nvme_config_discard ( struct nvme_ns * ns )
{
u32 logical_block_size = queue_logical_block_size ( ns - > queue ) ;
ns - > queue - > limits . discard_zeroes_data = 0 ;
ns - > queue - > limits . discard_alignment = logical_block_size ;
ns - > queue - > limits . discard_granularity = logical_block_size ;
ns - > queue - > limits . max_discard_sectors = 0xffffffff ;
queue_flag_set_unlocked ( QUEUE_FLAG_DISCARD , ns - > queue ) ;
}
2013-07-09 01:26:25 +04:00
static struct nvme_ns * nvme_alloc_ns ( struct nvme_dev * dev , unsigned nsid ,
2011-01-20 20:50:14 +03:00
struct nvme_id_ns * id , struct nvme_lba_range_type * rt )
{
struct nvme_ns * ns ;
struct gendisk * disk ;
int lbaf ;
if ( rt - > attributes & NVME_LBART_ATTRIB_HIDE )
return NULL ;
ns = kzalloc ( sizeof ( * ns ) , GFP_KERNEL ) ;
if ( ! ns )
return NULL ;
ns - > queue = blk_alloc_queue ( GFP_KERNEL ) ;
if ( ! ns - > queue )
goto out_free_ns ;
2012-01-11 01:35:08 +04:00
ns - > queue - > queue_flags = QUEUE_FLAG_DEFAULT ;
queue_flag_set_unlocked ( QUEUE_FLAG_NOMERGES , ns - > queue ) ;
queue_flag_set_unlocked ( QUEUE_FLAG_NONROT , ns - > queue ) ;
2011-01-20 20:50:14 +03:00
blk_queue_make_request ( ns - > queue , nvme_make_request ) ;
ns - > dev = dev ;
ns - > queue - > queuedata = ns ;
2013-12-09 21:58:46 +04:00
disk = alloc_disk ( 0 ) ;
2011-01-20 20:50:14 +03:00
if ( ! disk )
goto out_free_queue ;
2011-05-06 16:45:47 +04:00
ns - > ns_id = nsid ;
2011-01-20 20:50:14 +03:00
ns - > disk = disk ;
lbaf = id - > flbas & 0xf ;
ns - > lba_shift = id - > lbaf [ lbaf ] . ds ;
2013-04-24 03:23:59 +04:00
ns - > ms = le16_to_cpu ( id - > lbaf [ lbaf ] . ms ) ;
2012-07-25 01:01:04 +04:00
blk_queue_logical_block_size ( ns - > queue , 1 < < ns - > lba_shift ) ;
2012-07-26 21:29:57 +04:00
if ( dev - > max_hw_sectors )
blk_queue_max_hw_sectors ( ns - > queue , dev - > max_hw_sectors ) ;
2011-01-20 20:50:14 +03:00
disk - > major = nvme_major ;
2013-12-09 21:58:46 +04:00
disk - > first_minor = 0 ;
2011-01-20 20:50:14 +03:00
disk - > fops = & nvme_fops ;
disk - > private_data = ns ;
disk - > queue = ns - > queue ;
2011-02-01 20:49:38 +03:00
disk - > driverfs_dev = & dev - > pci_dev - > dev ;
2013-12-09 21:58:46 +04:00
disk - > flags = GENHD_FL_EXT_DEVT ;
2011-05-06 16:45:47 +04:00
sprintf ( disk - > disk_name , " nvme%dn%d " , dev - > instance , nsid ) ;
2011-01-20 20:50:14 +03:00
set_capacity ( disk , le64_to_cpup ( & id - > nsze ) < < ( ns - > lba_shift - 9 ) ) ;
2012-11-10 03:33:05 +04:00
if ( dev - > oncs & NVME_CTRL_ONCS_DSM )
nvme_config_discard ( ns ) ;
2011-01-20 20:50:14 +03:00
return ns ;
out_free_queue :
blk_cleanup_queue ( ns - > queue ) ;
out_free_ns :
kfree ( ns ) ;
return NULL ;
}
static void nvme_ns_free ( struct nvme_ns * ns )
{
put_disk ( ns - > disk ) ;
blk_cleanup_queue ( ns - > queue ) ;
kfree ( ns ) ;
}
2011-01-20 17:14:34 +03:00
static int set_queue_count ( struct nvme_dev * dev , int count )
2011-01-20 20:50:14 +03:00
{
int status ;
u32 result ;
2011-01-20 17:14:34 +03:00
u32 q_count = ( count - 1 ) | ( ( count - 1 ) < < 16 ) ;
2011-01-20 20:50:14 +03:00
2012-01-11 18:29:56 +04:00
status = nvme_set_features ( dev , NVME_FEAT_NUM_QUEUES , q_count , 0 ,
2011-09-20 01:08:14 +04:00
& result ) ;
2011-01-20 20:50:14 +03:00
if ( status )
2013-07-30 02:20:56 +04:00
return status < 0 ? - EIO : - EBUSY ;
2011-01-20 20:50:14 +03:00
return min ( result & 0xffff , result > > 16 ) + 1 ;
}
2013-07-16 01:02:24 +04:00
static size_t db_bar_size ( struct nvme_dev * dev , unsigned nr_io_queues )
{
2013-09-10 07:25:37 +04:00
return 4096 + ( ( nr_io_queues + 1 ) * 8 * dev - > db_stride ) ;
2013-07-16 01:02:24 +04:00
}
2012-12-22 03:13:49 +04:00
static int nvme_setup_io_queues ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-05-12 02:19:31 +04:00
struct pci_dev * pdev = dev - > pci_dev ;
2013-07-16 01:02:24 +04:00
int result , cpu , i , vecs , nr_io_queues , size , q_depth ;
2011-01-20 20:50:14 +03:00
2011-02-16 00:16:02 +03:00
nr_io_queues = num_online_cpus ( ) ;
result = set_queue_count ( dev , nr_io_queues ) ;
2011-01-20 21:01:49 +03:00
if ( result < 0 )
return result ;
2011-02-16 00:16:02 +03:00
if ( result < nr_io_queues )
nr_io_queues = result ;
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:24 +04:00
size = db_bar_size ( dev , nr_io_queues ) ;
if ( size > 8192 ) {
2011-10-21 01:00:41 +04:00
iounmap ( dev - > bar ) ;
2013-07-16 01:02:24 +04:00
do {
dev - > bar = ioremap ( pci_resource_start ( pdev , 0 ) , size ) ;
if ( dev - > bar )
break ;
if ( ! - - nr_io_queues )
return - ENOMEM ;
size = db_bar_size ( dev , nr_io_queues ) ;
} while ( 1 ) ;
2011-10-21 01:00:41 +04:00
dev - > dbs = ( ( void __iomem * ) dev - > bar ) + 4096 ;
dev - > queues [ 0 ] - > q_db = dev - > dbs ;
}
2013-07-16 01:02:24 +04:00
/* Deregister the admin queue's interrupt */
free_irq ( dev - > entry [ 0 ] . vector , dev - > queues [ 0 ] ) ;
2013-06-20 18:53:48 +04:00
vecs = nr_io_queues ;
for ( i = 0 ; i < vecs ; i + + )
2011-01-20 21:01:49 +03:00
dev - > entry [ i ] . entry = i ;
for ( ; ; ) {
2013-06-20 18:53:48 +04:00
result = pci_enable_msix ( pdev , dev - > entry , vecs ) ;
if ( result < = 0 )
2011-01-20 21:01:49 +03:00
break ;
2013-06-20 18:53:48 +04:00
vecs = result ;
2011-01-20 21:01:49 +03:00
}
2013-06-20 18:53:48 +04:00
if ( result < 0 ) {
vecs = nr_io_queues ;
if ( vecs > 32 )
vecs = 32 ;
2013-05-12 02:19:31 +04:00
for ( ; ; ) {
2013-06-20 18:53:48 +04:00
result = pci_enable_msi_block ( pdev , vecs ) ;
2013-05-12 02:19:31 +04:00
if ( result = = 0 ) {
2013-06-20 18:53:48 +04:00
for ( i = 0 ; i < vecs ; i + + )
2013-05-12 02:19:31 +04:00
dev - > entry [ i ] . vector = i + pdev - > irq ;
break ;
2013-06-20 18:53:48 +04:00
} else if ( result < 0 ) {
vecs = 1 ;
2013-05-12 02:19:31 +04:00
break ;
}
2013-06-20 18:53:48 +04:00
vecs = result ;
2013-05-12 02:19:31 +04:00
}
}
2013-06-20 18:53:48 +04:00
/*
* Should investigate if there ' s a performance win from allocating
* more queues than interrupt vectors ; it might allow the submission
* path to scale better , even if the receive path is limited by the
* number of interrupts .
*/
nr_io_queues = vecs ;
2011-01-20 21:01:49 +03:00
result = queue_request_irq ( dev , dev - > queues [ 0 ] , " nvme admin " ) ;
2013-07-16 01:02:24 +04:00
if ( result ) {
dev - > queues [ 0 ] - > q_suspended = 1 ;
2013-07-16 01:02:20 +04:00
goto free_queues ;
2013-07-16 01:02:24 +04:00
}
2011-01-20 21:01:49 +03:00
2013-07-16 01:02:23 +04:00
/* Free previously allocated queues that are no longer usable */
spin_lock ( & dev_list_lock ) ;
for ( i = dev - > queue_count - 1 ; i > nr_io_queues ; i - - ) {
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2013-10-15 23:01:10 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:23 +04:00
nvme_cancel_ios ( nvmeq , false ) ;
2013-10-15 23:01:10 +04:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2013-07-16 01:02:23 +04:00
nvme_free_queue ( nvmeq ) ;
dev - > queue_count - - ;
dev - > queues [ i ] = NULL ;
}
spin_unlock ( & dev_list_lock ) ;
2011-01-20 21:01:49 +03:00
cpu = cpumask_first ( cpu_online_mask ) ;
2011-02-16 00:16:02 +03:00
for ( i = 0 ; i < nr_io_queues ; i + + ) {
2011-01-20 21:01:49 +03:00
irq_set_affinity_hint ( dev - > entry [ i ] . vector , get_cpu_mask ( cpu ) ) ;
cpu = cpumask_next ( cpu , cpu_online_mask ) ;
}
2012-07-27 21:57:23 +04:00
q_depth = min_t ( int , NVME_CAP_MQES ( readq ( & dev - > bar - > cap ) ) + 1 ,
NVME_Q_DEPTH ) ;
2013-07-16 01:02:23 +04:00
for ( i = dev - > queue_count - 1 ; i < nr_io_queues ; i + + ) {
2013-07-16 01:02:20 +04:00
dev - > queues [ i + 1 ] = nvme_alloc_queue ( dev , i + 1 , q_depth , i ) ;
if ( ! dev - > queues [ i + 1 ] ) {
result = - ENOMEM ;
goto free_queues ;
}
2011-01-20 21:01:49 +03:00
}
2011-01-20 20:50:14 +03:00
2011-03-16 23:52:19 +03:00
for ( ; i < num_possible_cpus ( ) ; i + + ) {
int target = i % rounddown_pow_of_two ( dev - > queue_count - 1 ) ;
dev - > queues [ i + 1 ] = dev - > queues [ target + 1 ] ;
}
2013-07-16 01:02:20 +04:00
for ( i = 1 ; i < dev - > queue_count ; i + + ) {
result = nvme_create_queue ( dev - > queues [ i ] , i ) ;
if ( result ) {
for ( - - i ; i > 0 ; i - - )
nvme_disable_queue ( dev , i ) ;
goto free_queues ;
}
}
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:20 +04:00
return 0 ;
2011-01-20 20:50:14 +03:00
2013-07-16 01:02:20 +04:00
free_queues :
2013-12-16 22:50:00 +04:00
nvme_free_queues ( dev , 1 ) ;
2013-07-16 01:02:20 +04:00
return result ;
2011-01-20 20:50:14 +03:00
}
2013-04-16 19:22:36 +04:00
/*
* Return : error value if an error occurred setting up the queues or calling
* Identify Device . 0 if these succeeded , even if adding some of the
* namespaces failed . At the moment , these failures are silent . TBD which
* failures should be reported .
*/
2012-12-22 03:13:49 +04:00
static int nvme_dev_add ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-06-21 22:36:34 +04:00
struct pci_dev * pdev = dev - > pci_dev ;
2013-07-09 01:26:25 +04:00
int res ;
unsigned nn , i ;
2013-05-01 23:07:49 +04:00
struct nvme_ns * ns ;
2011-02-02 00:18:08 +03:00
struct nvme_id_ctrl * ctrl ;
2011-09-20 01:08:14 +04:00
struct nvme_id_ns * id_ns ;
void * mem ;
2011-01-20 20:50:14 +03:00
dma_addr_t dma_addr ;
2013-04-10 03:13:20 +04:00
int shift = NVME_CAP_MPSMIN ( readq ( & dev - > bar - > cap ) ) + 12 ;
2011-01-20 20:50:14 +03:00
2013-06-21 22:36:34 +04:00
mem = dma_alloc_coherent ( & pdev - > dev , 8192 , & dma_addr , GFP_KERNEL ) ;
2013-05-01 23:07:48 +04:00
if ( ! mem )
return - ENOMEM ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
res = nvme_identify ( dev , 0 , 1 , dma_addr ) ;
2011-01-20 20:50:14 +03:00
if ( res ) {
res = - EIO ;
2013-05-01 23:07:49 +04:00
goto out ;
2011-01-20 20:50:14 +03:00
}
2011-09-20 01:08:14 +04:00
ctrl = mem ;
2011-02-02 00:18:08 +03:00
nn = le32_to_cpup ( & ctrl - > nn ) ;
2012-11-10 03:33:05 +04:00
dev - > oncs = le16_to_cpup ( & ctrl - > oncs ) ;
2013-12-11 00:10:38 +04:00
dev - > abort_limit = ctrl - > acl + 1 ;
2011-02-02 00:18:08 +03:00
memcpy ( dev - > serial , ctrl - > sn , sizeof ( ctrl - > sn ) ) ;
memcpy ( dev - > model , ctrl - > mn , sizeof ( ctrl - > mn ) ) ;
memcpy ( dev - > firmware_rev , ctrl - > fr , sizeof ( ctrl - > fr ) ) ;
2013-04-10 03:13:20 +04:00
if ( ctrl - > mdts )
2012-07-26 21:29:57 +04:00
dev - > max_hw_sectors = 1 < < ( ctrl - > mdts + shift - 9 ) ;
2013-06-21 22:36:34 +04:00
if ( ( pdev - > vendor = = PCI_VENDOR_ID_INTEL ) & &
( pdev - > device = = 0x0953 ) & & ctrl - > vs [ 3 ] )
2013-04-10 03:13:20 +04:00
dev - > stripe_size = 1 < < ( ctrl - > vs [ 3 ] + shift ) ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
id_ns = mem ;
2011-10-07 21:10:13 +04:00
for ( i = 1 ; i < = nn ; i + + ) {
2011-09-20 01:08:14 +04:00
res = nvme_identify ( dev , i , 0 , dma_addr ) ;
2011-01-20 20:50:14 +03:00
if ( res )
continue ;
2011-09-20 01:08:14 +04:00
if ( id_ns - > ncap = = 0 )
2011-01-20 20:50:14 +03:00
continue ;
2011-09-20 01:08:14 +04:00
res = nvme_get_features ( dev , NVME_FEAT_LBA_RANGE , i ,
2012-09-21 20:52:13 +04:00
dma_addr + 4096 , NULL ) ;
2011-01-20 20:50:14 +03:00
if ( res )
2013-02-01 01:40:38 +04:00
memset ( mem + 4096 , 0 , 4096 ) ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
ns = nvme_alloc_ns ( dev , i , mem , mem + 4096 ) ;
2011-01-20 20:50:14 +03:00
if ( ns )
list_add_tail ( & ns - > list , & dev - > namespaces ) ;
}
list_for_each_entry ( ns , & dev - > namespaces , list )
add_disk ( ns - > disk ) ;
2013-04-16 19:22:36 +04:00
res = 0 ;
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
out :
2011-09-20 01:14:53 +04:00
dma_free_coherent ( & dev - > pci_dev - > dev , 8192 , mem , dma_addr ) ;
2011-01-20 20:50:14 +03:00
return res ;
}
2013-07-16 01:02:19 +04:00
static int nvme_dev_map ( struct nvme_dev * dev )
{
int bars , result = - ENOMEM ;
struct pci_dev * pdev = dev - > pci_dev ;
if ( pci_enable_device_mem ( pdev ) )
return result ;
dev - > entry [ 0 ] . vector = pdev - > irq ;
pci_set_master ( pdev ) ;
bars = pci_select_bars ( pdev , IORESOURCE_MEM ) ;
if ( pci_request_selected_regions ( pdev , bars , " nvme " ) )
goto disable_pci ;
2013-06-27 02:49:11 +04:00
if ( dma_set_mask_and_coherent ( & pdev - > dev , DMA_BIT_MASK ( 64 ) ) & &
dma_set_mask_and_coherent ( & pdev - > dev , DMA_BIT_MASK ( 32 ) ) )
goto disable ;
2013-07-16 01:02:19 +04:00
dev - > bar = ioremap ( pci_resource_start ( pdev , 0 ) , 8192 ) ;
if ( ! dev - > bar )
goto disable ;
2013-12-11 00:10:39 +04:00
if ( readl ( & dev - > bar - > csts ) = = - 1 ) {
result = - ENODEV ;
goto unmap ;
}
2013-09-10 07:25:37 +04:00
dev - > db_stride = 1 < < NVME_CAP_STRIDE ( readq ( & dev - > bar - > cap ) ) ;
2013-07-16 01:02:19 +04:00
dev - > dbs = ( ( void __iomem * ) dev - > bar ) + 4096 ;
return 0 ;
2013-12-11 00:10:39 +04:00
unmap :
iounmap ( dev - > bar ) ;
dev - > bar = NULL ;
2013-07-16 01:02:19 +04:00
disable :
pci_release_regions ( pdev ) ;
disable_pci :
pci_disable_device ( pdev ) ;
return result ;
}
static void nvme_dev_unmap ( struct nvme_dev * dev )
{
if ( dev - > pci_dev - > msi_enabled )
pci_disable_msi ( dev - > pci_dev ) ;
else if ( dev - > pci_dev - > msix_enabled )
pci_disable_msix ( dev - > pci_dev ) ;
if ( dev - > bar ) {
iounmap ( dev - > bar ) ;
dev - > bar = NULL ;
2013-12-11 00:10:36 +04:00
pci_release_regions ( dev - > pci_dev ) ;
2013-07-16 01:02:19 +04:00
}
if ( pci_is_enabled ( dev - > pci_dev ) )
pci_disable_device ( dev - > pci_dev ) ;
}
2013-12-11 00:10:40 +04:00
struct nvme_delq_ctx {
struct task_struct * waiter ;
struct kthread_worker * worker ;
atomic_t refcount ;
} ;
static void nvme_wait_dq ( struct nvme_delq_ctx * dq , struct nvme_dev * dev )
{
dq - > waiter = current ;
mb ( ) ;
for ( ; ; ) {
set_current_state ( TASK_KILLABLE ) ;
if ( ! atomic_read ( & dq - > refcount ) )
break ;
if ( ! schedule_timeout ( ADMIN_TIMEOUT ) | |
fatal_signal_pending ( current ) ) {
set_current_state ( TASK_RUNNING ) ;
nvme_disable_ctrl ( dev , readq ( & dev - > bar - > cap ) ) ;
nvme_disable_queue ( dev , 0 ) ;
send_sig ( SIGKILL , dq - > worker - > task , 1 ) ;
flush_kthread_worker ( dq - > worker ) ;
return ;
}
}
set_current_state ( TASK_RUNNING ) ;
}
static void nvme_put_dq ( struct nvme_delq_ctx * dq )
{
atomic_dec ( & dq - > refcount ) ;
if ( dq - > waiter )
wake_up_process ( dq - > waiter ) ;
}
static struct nvme_delq_ctx * nvme_get_dq ( struct nvme_delq_ctx * dq )
{
atomic_inc ( & dq - > refcount ) ;
return dq ;
}
static void nvme_del_queue_end ( struct nvme_queue * nvmeq )
{
struct nvme_delq_ctx * dq = nvmeq - > cmdinfo . ctx ;
nvme_clear_queue ( nvmeq ) ;
nvme_put_dq ( dq ) ;
}
static int adapter_async_del_queue ( struct nvme_queue * nvmeq , u8 opcode ,
kthread_work_func_t fn )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . delete_queue . opcode = opcode ;
c . delete_queue . qid = cpu_to_le16 ( nvmeq - > qid ) ;
init_kthread_work ( & nvmeq - > cmdinfo . work , fn ) ;
return nvme_submit_admin_cmd_async ( nvmeq - > dev , & c , & nvmeq - > cmdinfo ) ;
}
static void nvme_del_cq_work_handler ( struct kthread_work * work )
{
struct nvme_queue * nvmeq = container_of ( work , struct nvme_queue ,
cmdinfo . work ) ;
nvme_del_queue_end ( nvmeq ) ;
}
static int nvme_delete_cq ( struct nvme_queue * nvmeq )
{
return adapter_async_del_queue ( nvmeq , nvme_admin_delete_cq ,
nvme_del_cq_work_handler ) ;
}
static void nvme_del_sq_work_handler ( struct kthread_work * work )
{
struct nvme_queue * nvmeq = container_of ( work , struct nvme_queue ,
cmdinfo . work ) ;
int status = nvmeq - > cmdinfo . status ;
if ( ! status )
status = nvme_delete_cq ( nvmeq ) ;
if ( status )
nvme_del_queue_end ( nvmeq ) ;
}
static int nvme_delete_sq ( struct nvme_queue * nvmeq )
{
return adapter_async_del_queue ( nvmeq , nvme_admin_delete_sq ,
nvme_del_sq_work_handler ) ;
}
static void nvme_del_queue_start ( struct kthread_work * work )
{
struct nvme_queue * nvmeq = container_of ( work , struct nvme_queue ,
cmdinfo . work ) ;
allow_signal ( SIGKILL ) ;
if ( nvme_delete_sq ( nvmeq ) )
nvme_del_queue_end ( nvmeq ) ;
}
static void nvme_disable_io_queues ( struct nvme_dev * dev )
{
int i ;
DEFINE_KTHREAD_WORKER_ONSTACK ( worker ) ;
struct nvme_delq_ctx dq ;
struct task_struct * kworker_task = kthread_run ( kthread_worker_fn ,
& worker , " nvme%d " , dev - > instance ) ;
if ( IS_ERR ( kworker_task ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Failed to create queue del task \n " ) ;
for ( i = dev - > queue_count - 1 ; i > 0 ; i - - )
nvme_disable_queue ( dev , i ) ;
return ;
}
dq . waiter = NULL ;
atomic_set ( & dq . refcount , 0 ) ;
dq . worker = & worker ;
for ( i = dev - > queue_count - 1 ; i > 0 ; i - - ) {
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
if ( nvme_suspend_queue ( nvmeq ) )
continue ;
nvmeq - > cmdinfo . ctx = nvme_get_dq ( & dq ) ;
nvmeq - > cmdinfo . worker = dq . worker ;
init_kthread_work ( & nvmeq - > cmdinfo . work , nvme_del_queue_start ) ;
queue_kthread_work ( dq . worker , & nvmeq - > cmdinfo . work ) ;
}
nvme_wait_dq ( & dq , dev ) ;
kthread_stop ( kworker_task ) ;
}
2013-07-16 01:02:21 +04:00
static void nvme_dev_shutdown ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2013-07-16 01:02:20 +04:00
int i ;
2013-12-11 00:10:37 +04:00
dev - > initialized = 0 ;
2011-01-20 20:50:14 +03:00
2011-03-03 02:37:18 +03:00
spin_lock ( & dev_list_lock ) ;
2013-07-16 01:02:21 +04:00
list_del_init ( & dev - > node ) ;
2011-03-03 02:37:18 +03:00
spin_unlock ( & dev_list_lock ) ;
2013-12-11 00:10:40 +04:00
if ( ! dev - > bar | | ( dev - > bar & & readl ( & dev - > bar - > csts ) = = - 1 ) ) {
for ( i = dev - > queue_count - 1 ; i > = 0 ; i - - ) {
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
nvme_suspend_queue ( nvmeq ) ;
nvme_clear_queue ( nvmeq ) ;
}
} else {
nvme_disable_io_queues ( dev ) ;
2013-07-16 01:02:22 +04:00
nvme_shutdown_ctrl ( dev ) ;
2013-12-11 00:10:40 +04:00
nvme_disable_queue ( dev , 0 ) ;
}
2013-07-16 01:02:21 +04:00
nvme_dev_unmap ( dev ) ;
}
static void nvme_dev_remove ( struct nvme_dev * dev )
{
struct nvme_ns * ns , * next ;
2011-01-20 20:50:14 +03:00
list_for_each_entry_safe ( ns , next , & dev - > namespaces , list ) {
list_del ( & ns - > list ) ;
del_gendisk ( ns - > disk ) ;
nvme_ns_free ( ns ) ;
}
}
2011-02-10 17:56:01 +03:00
static int nvme_setup_prp_pools ( struct nvme_dev * dev )
{
struct device * dmadev = & dev - > pci_dev - > dev ;
dev - > prp_page_pool = dma_pool_create ( " prp list page " , dmadev ,
PAGE_SIZE , PAGE_SIZE , 0 ) ;
if ( ! dev - > prp_page_pool )
return - ENOMEM ;
2011-02-10 18:30:34 +03:00
/* Optimisation for I/Os between 4k and 128k */
dev - > prp_small_pool = dma_pool_create ( " prp list 256 " , dmadev ,
256 , 256 , 0 ) ;
if ( ! dev - > prp_small_pool ) {
dma_pool_destroy ( dev - > prp_page_pool ) ;
return - ENOMEM ;
}
2011-02-10 17:56:01 +03:00
return 0 ;
}
static void nvme_release_prp_pools ( struct nvme_dev * dev )
{
dma_pool_destroy ( dev - > prp_page_pool ) ;
2011-02-10 18:30:34 +03:00
dma_pool_destroy ( dev - > prp_small_pool ) ;
2011-02-10 17:56:01 +03:00
}
2012-02-22 03:50:53 +04:00
static DEFINE_IDA ( nvme_instance_ida ) ;
static int nvme_set_instance ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2012-02-22 03:50:53 +04:00
int instance , error ;
do {
if ( ! ida_pre_get ( & nvme_instance_ida , GFP_KERNEL ) )
return - ENODEV ;
spin_lock ( & dev_list_lock ) ;
error = ida_get_new ( & nvme_instance_ida , & instance ) ;
spin_unlock ( & dev_list_lock ) ;
} while ( error = = - EAGAIN ) ;
if ( error )
return - ENODEV ;
dev - > instance = instance ;
return 0 ;
2011-01-20 20:50:14 +03:00
}
static void nvme_release_instance ( struct nvme_dev * dev )
{
2012-02-22 03:50:53 +04:00
spin_lock ( & dev_list_lock ) ;
ida_remove ( & nvme_instance_ida , dev - > instance ) ;
spin_unlock ( & dev_list_lock ) ;
2011-01-20 20:50:14 +03:00
}
2013-02-19 21:17:58 +04:00
static void nvme_free_dev ( struct kref * kref )
{
struct nvme_dev * dev = container_of ( kref , struct nvme_dev , kref ) ;
kfree ( dev - > queues ) ;
kfree ( dev - > entry ) ;
kfree ( dev ) ;
}
static int nvme_dev_open ( struct inode * inode , struct file * f )
{
struct nvme_dev * dev = container_of ( f - > private_data , struct nvme_dev ,
miscdev ) ;
kref_get ( & dev - > kref ) ;
f - > private_data = dev ;
return 0 ;
}
static int nvme_dev_release ( struct inode * inode , struct file * f )
{
struct nvme_dev * dev = f - > private_data ;
kref_put ( & dev - > kref , nvme_free_dev ) ;
return 0 ;
}
static long nvme_dev_ioctl ( struct file * f , unsigned int cmd , unsigned long arg )
{
struct nvme_dev * dev = f - > private_data ;
switch ( cmd ) {
case NVME_IOCTL_ADMIN_CMD :
return nvme_user_admin_cmd ( dev , ( void __user * ) arg ) ;
default :
return - ENOTTY ;
}
}
static const struct file_operations nvme_dev_fops = {
. owner = THIS_MODULE ,
. open = nvme_dev_open ,
. release = nvme_dev_release ,
. unlocked_ioctl = nvme_dev_ioctl ,
. compat_ioctl = nvme_dev_ioctl ,
} ;
2013-07-16 01:02:21 +04:00
static int nvme_dev_start ( struct nvme_dev * dev )
{
int result ;
result = nvme_dev_map ( dev ) ;
if ( result )
return result ;
result = nvme_configure_admin_queue ( dev ) ;
if ( result )
goto unmap ;
spin_lock ( & dev_list_lock ) ;
list_add ( & dev - > node , & dev_list ) ;
spin_unlock ( & dev_list_lock ) ;
result = nvme_setup_io_queues ( dev ) ;
2013-09-06 00:45:07 +04:00
if ( result & & result ! = - EBUSY )
2013-07-16 01:02:21 +04:00
goto disable ;
2013-09-06 00:45:07 +04:00
return result ;
2013-07-16 01:02:21 +04:00
disable :
2013-12-16 22:50:00 +04:00
nvme_disable_queue ( dev , 0 ) ;
2013-07-16 01:02:21 +04:00
spin_lock ( & dev_list_lock ) ;
list_del_init ( & dev - > node ) ;
spin_unlock ( & dev_list_lock ) ;
unmap :
nvme_dev_unmap ( dev ) ;
return result ;
}
2013-12-11 00:10:36 +04:00
static int nvme_remove_dead_ctrl ( void * arg )
{
struct nvme_dev * dev = ( struct nvme_dev * ) arg ;
struct pci_dev * pdev = dev - > pci_dev ;
if ( pci_get_drvdata ( pdev ) )
pci_stop_and_remove_bus_device ( pdev ) ;
kref_put ( & dev - > kref , nvme_free_dev ) ;
return 0 ;
}
static void nvme_remove_disks ( struct work_struct * ws )
{
int i ;
struct nvme_dev * dev = container_of ( ws , struct nvme_dev , reset_work ) ;
nvme_dev_remove ( dev ) ;
spin_lock ( & dev_list_lock ) ;
for ( i = dev - > queue_count - 1 ; i > 0 ; i - - ) {
BUG_ON ( ! dev - > queues [ i ] | | ! dev - > queues [ i ] - > q_suspended ) ;
nvme_free_queue ( dev - > queues [ i ] ) ;
dev - > queue_count - - ;
dev - > queues [ i ] = NULL ;
}
spin_unlock ( & dev_list_lock ) ;
}
static int nvme_dev_resume ( struct nvme_dev * dev )
{
int ret ;
ret = nvme_dev_start ( dev ) ;
if ( ret & & ret ! = - EBUSY )
return ret ;
if ( ret = = - EBUSY ) {
spin_lock ( & dev_list_lock ) ;
INIT_WORK ( & dev - > reset_work , nvme_remove_disks ) ;
queue_work ( nvme_workq , & dev - > reset_work ) ;
spin_unlock ( & dev_list_lock ) ;
}
2013-12-11 00:10:37 +04:00
dev - > initialized = 1 ;
2013-12-11 00:10:36 +04:00
return 0 ;
}
static void nvme_dev_reset ( struct nvme_dev * dev )
{
nvme_dev_shutdown ( dev ) ;
if ( nvme_dev_resume ( dev ) ) {
dev_err ( & dev - > pci_dev - > dev , " Device failed to resume \n " ) ;
kref_get ( & dev - > kref ) ;
if ( IS_ERR ( kthread_run ( nvme_remove_dead_ctrl , dev , " nvme%d " ,
dev - > instance ) ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Failed to start controller remove task \n " ) ;
kref_put ( & dev - > kref , nvme_free_dev ) ;
}
}
}
static void nvme_reset_failed_dev ( struct work_struct * ws )
{
struct nvme_dev * dev = container_of ( ws , struct nvme_dev , reset_work ) ;
nvme_dev_reset ( dev ) ;
}
2012-12-22 03:13:49 +04:00
static int nvme_probe ( struct pci_dev * pdev , const struct pci_device_id * id )
2011-01-20 20:50:14 +03:00
{
2013-07-16 01:02:19 +04:00
int result = - ENOMEM ;
2011-01-20 20:50:14 +03:00
struct nvme_dev * dev ;
dev = kzalloc ( sizeof ( * dev ) , GFP_KERNEL ) ;
if ( ! dev )
return - ENOMEM ;
dev - > entry = kcalloc ( num_possible_cpus ( ) , sizeof ( * dev - > entry ) ,
GFP_KERNEL ) ;
if ( ! dev - > entry )
goto free ;
2011-01-20 21:01:49 +03:00
dev - > queues = kcalloc ( num_possible_cpus ( ) + 1 , sizeof ( void * ) ,
GFP_KERNEL ) ;
2011-01-20 20:50:14 +03:00
if ( ! dev - > queues )
goto free ;
INIT_LIST_HEAD ( & dev - > namespaces ) ;
dev - > pci_dev = pdev ;
2013-12-11 00:10:36 +04:00
pci_set_drvdata ( pdev , dev ) ;
2012-02-22 03:50:53 +04:00
result = nvme_set_instance ( dev ) ;
if ( result )
2013-07-16 01:02:19 +04:00
goto free ;
2011-01-20 20:50:14 +03:00
2011-02-10 17:56:01 +03:00
result = nvme_setup_prp_pools ( dev ) ;
if ( result )
2013-07-16 01:02:19 +04:00
goto release ;
2011-02-10 17:56:01 +03:00
2013-07-16 01:02:21 +04:00
result = nvme_dev_start ( dev ) ;
2013-09-06 00:45:07 +04:00
if ( result ) {
if ( result = = - EBUSY )
goto create_cdev ;
2013-07-16 01:02:19 +04:00
goto release_pools ;
2013-09-06 00:45:07 +04:00
}
2011-01-20 20:50:14 +03:00
2011-02-16 00:28:20 +03:00
result = nvme_dev_add ( dev ) ;
2013-09-06 00:45:07 +04:00
if ( result )
2013-07-16 01:02:21 +04:00
goto shutdown ;
2011-02-16 00:28:20 +03:00
2013-09-06 00:45:07 +04:00
create_cdev :
2013-02-19 21:17:58 +04:00
scnprintf ( dev - > name , sizeof ( dev - > name ) , " nvme%d " , dev - > instance ) ;
dev - > miscdev . minor = MISC_DYNAMIC_MINOR ;
dev - > miscdev . parent = & pdev - > dev ;
dev - > miscdev . name = dev - > name ;
dev - > miscdev . fops = & nvme_dev_fops ;
result = misc_register ( & dev - > miscdev ) ;
if ( result )
goto remove ;
2013-12-11 00:10:37 +04:00
dev - > initialized = 1 ;
2013-02-19 21:17:58 +04:00
kref_init ( & dev - > kref ) ;
2011-01-20 20:50:14 +03:00
return 0 ;
2013-02-19 21:17:58 +04:00
remove :
nvme_dev_remove ( dev ) ;
2013-07-16 01:02:21 +04:00
shutdown :
nvme_dev_shutdown ( dev ) ;
2013-07-16 01:02:19 +04:00
release_pools :
2013-12-16 22:50:00 +04:00
nvme_free_queues ( dev , 0 ) ;
2011-02-10 17:56:01 +03:00
nvme_release_prp_pools ( dev ) ;
2013-07-16 01:02:19 +04:00
release :
nvme_release_instance ( dev ) ;
2011-01-20 20:50:14 +03:00
free :
kfree ( dev - > queues ) ;
kfree ( dev - > entry ) ;
kfree ( dev ) ;
return result ;
}
2012-12-22 03:13:49 +04:00
static void nvme_remove ( struct pci_dev * pdev )
2011-01-20 20:50:14 +03:00
{
struct nvme_dev * dev = pci_get_drvdata ( pdev ) ;
2013-12-11 00:10:36 +04:00
spin_lock ( & dev_list_lock ) ;
list_del_init ( & dev - > node ) ;
spin_unlock ( & dev_list_lock ) ;
pci_set_drvdata ( pdev , NULL ) ;
flush_work ( & dev - > reset_work ) ;
2013-02-19 21:17:58 +04:00
misc_deregister ( & dev - > miscdev ) ;
2013-12-11 00:10:36 +04:00
nvme_dev_remove ( dev ) ;
nvme_dev_shutdown ( dev ) ;
2013-12-16 22:50:00 +04:00
nvme_free_queues ( dev , 0 ) ;
2013-12-11 00:10:36 +04:00
nvme_release_instance ( dev ) ;
nvme_release_prp_pools ( dev ) ;
2013-02-19 21:17:58 +04:00
kref_put ( & dev - > kref , nvme_free_dev ) ;
2011-01-20 20:50:14 +03:00
}
/* These functions are yet to be implemented */
# define nvme_error_detected NULL
# define nvme_dump_registers NULL
# define nvme_link_reset NULL
# define nvme_slot_reset NULL
# define nvme_error_resume NULL
2013-07-16 01:02:23 +04:00
static int nvme_suspend ( struct device * dev )
{
struct pci_dev * pdev = to_pci_dev ( dev ) ;
struct nvme_dev * ndev = pci_get_drvdata ( pdev ) ;
nvme_dev_shutdown ( ndev ) ;
return 0 ;
}
static int nvme_resume ( struct device * dev )
{
struct pci_dev * pdev = to_pci_dev ( dev ) ;
struct nvme_dev * ndev = pci_get_drvdata ( pdev ) ;
2013-12-11 00:10:36 +04:00
if ( nvme_dev_resume ( ndev ) & & ! work_busy ( & ndev - > reset_work ) ) {
INIT_WORK ( & ndev - > reset_work , nvme_reset_failed_dev ) ;
queue_work ( nvme_workq , & ndev - > reset_work ) ;
}
return 0 ;
2013-07-16 01:02:23 +04:00
}
static SIMPLE_DEV_PM_OPS ( nvme_dev_pm_ops , nvme_suspend , nvme_resume ) ;
2011-01-20 20:50:14 +03:00
2012-09-07 20:33:17 +04:00
static const struct pci_error_handlers nvme_err_handler = {
2011-01-20 20:50:14 +03:00
. error_detected = nvme_error_detected ,
. mmio_enabled = nvme_dump_registers ,
. link_reset = nvme_link_reset ,
. slot_reset = nvme_slot_reset ,
. resume = nvme_error_resume ,
} ;
/* Move to pci_ids.h later */
# define PCI_CLASS_STORAGE_EXPRESS 0x010802
static DEFINE_PCI_DEVICE_TABLE ( nvme_id_table ) = {
{ PCI_DEVICE_CLASS ( PCI_CLASS_STORAGE_EXPRESS , 0xffffff ) } ,
{ 0 , }
} ;
MODULE_DEVICE_TABLE ( pci , nvme_id_table ) ;
static struct pci_driver nvme_driver = {
. name = " nvme " ,
. id_table = nvme_id_table ,
. probe = nvme_probe ,
2012-12-22 03:13:49 +04:00
. remove = nvme_remove ,
2013-07-16 01:02:23 +04:00
. driver = {
. pm = & nvme_dev_pm_ops ,
} ,
2011-01-20 20:50:14 +03:00
. err_handler = & nvme_err_handler ,
} ;
static int __init nvme_init ( void )
{
2012-07-31 21:31:15 +04:00
int result ;
2011-03-03 02:37:18 +03:00
nvme_thread = kthread_run ( nvme_kthread , NULL , " nvme " ) ;
if ( IS_ERR ( nvme_thread ) )
return PTR_ERR ( nvme_thread ) ;
2011-01-20 20:50:14 +03:00
2013-12-11 00:10:36 +04:00
result = - ENOMEM ;
nvme_workq = create_singlethread_workqueue ( " nvme " ) ;
if ( ! nvme_workq )
goto kill_kthread ;
2012-07-26 02:05:18 +04:00
result = register_blkdev ( nvme_major , " nvme " ) ;
if ( result < 0 )
2013-12-11 00:10:36 +04:00
goto kill_workq ;
2012-07-26 02:05:18 +04:00
else if ( result > 0 )
2012-07-31 21:31:15 +04:00
nvme_major = result ;
2011-01-20 20:50:14 +03:00
result = pci_register_driver ( & nvme_driver ) ;
2011-03-03 02:37:18 +03:00
if ( result )
goto unregister_blkdev ;
return 0 ;
2011-01-20 20:50:14 +03:00
2011-03-03 02:37:18 +03:00
unregister_blkdev :
2011-01-20 20:50:14 +03:00
unregister_blkdev ( nvme_major , " nvme " ) ;
2013-12-11 00:10:36 +04:00
kill_workq :
destroy_workqueue ( nvme_workq ) ;
2011-03-03 02:37:18 +03:00
kill_kthread :
kthread_stop ( nvme_thread ) ;
2011-01-20 20:50:14 +03:00
return result ;
}
static void __exit nvme_exit ( void )
{
pci_unregister_driver ( & nvme_driver ) ;
unregister_blkdev ( nvme_major , " nvme " ) ;
2013-12-11 00:10:36 +04:00
destroy_workqueue ( nvme_workq ) ;
2011-03-03 02:37:18 +03:00
kthread_stop ( nvme_thread ) ;
2011-01-20 20:50:14 +03:00
}
MODULE_AUTHOR ( " Matthew Wilcox <willy@linux.intel.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;
2012-01-11 01:30:15 +04:00
MODULE_VERSION ( " 0.8 " ) ;
2011-01-20 20:50:14 +03:00
module_init ( nvme_init ) ;
module_exit ( nvme_exit ) ;