2011-01-20 20:50:14 +03:00
/*
* NVM Express device driver
* Copyright ( c ) 2011 , Intel Corporation .
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms and conditions of the GNU General Public License ,
* version 2 , as published by the Free Software Foundation .
*
* This program is distributed in the hope it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License for
* more details .
*
* You should have received a copy of the GNU General Public License along with
* this program ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin St - Fifth Floor , Boston , MA 02110 - 1301 USA .
*/
# include <linux/nvme.h>
# include <linux/bio.h>
2011-05-12 21:50:28 +04:00
# include <linux/bitops.h>
2011-01-20 20:50:14 +03:00
# include <linux/blkdev.h>
2011-05-06 16:37:54 +04:00
# include <linux/delay.h>
2011-01-20 20:50:14 +03:00
# include <linux/errno.h>
# include <linux/fs.h>
# include <linux/genhd.h>
2011-05-06 16:45:47 +04:00
# include <linux/idr.h>
2011-01-20 20:50:14 +03:00
# include <linux/init.h>
# include <linux/interrupt.h>
# include <linux/io.h>
# include <linux/kdev_t.h>
2011-03-03 02:37:18 +03:00
# include <linux/kthread.h>
2011-01-20 20:50:14 +03:00
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/module.h>
# include <linux/moduleparam.h>
# include <linux/pci.h>
2011-02-06 15:53:23 +03:00
# include <linux/poison.h>
2011-01-20 20:50:14 +03:00
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/types.h>
2012-02-07 06:45:33 +04:00
# include <asm-generic/io-64-nonatomic-lo-hi.h>
2011-01-20 20:50:14 +03:00
# define NVME_Q_DEPTH 1024
# define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
# define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
# define NVME_MINORS 64
2011-12-20 22:53:01 +04:00
# define NVME_IO_TIMEOUT (5 * HZ)
2011-02-07 02:30:16 +03:00
# define ADMIN_TIMEOUT (60 * HZ)
2011-01-20 20:50:14 +03:00
static int nvme_major ;
module_param ( nvme_major , int , 0 ) ;
2011-02-06 15:28:06 +03:00
static int use_threaded_interrupts ;
module_param ( use_threaded_interrupts , int , 0 ) ;
2011-03-03 02:37:18 +03:00
static DEFINE_SPINLOCK ( dev_list_lock ) ;
static LIST_HEAD ( dev_list ) ;
static struct task_struct * nvme_thread ;
2011-01-20 20:50:14 +03:00
/*
* Represents an NVM Express device . Each nvme_dev is a PCI function .
*/
struct nvme_dev {
2011-03-03 02:37:18 +03:00
struct list_head node ;
2011-01-20 20:50:14 +03:00
struct nvme_queue * * queues ;
u32 __iomem * dbs ;
struct pci_dev * pci_dev ;
2011-02-10 17:56:01 +03:00
struct dma_pool * prp_page_pool ;
2011-02-10 18:30:34 +03:00
struct dma_pool * prp_small_pool ;
2011-01-20 20:50:14 +03:00
int instance ;
int queue_count ;
2011-10-21 01:00:41 +04:00
int db_stride ;
2011-01-20 20:50:14 +03:00
u32 ctrl_config ;
struct msix_entry * entry ;
struct nvme_bar __iomem * bar ;
struct list_head namespaces ;
2011-02-02 00:18:08 +03:00
char serial [ 20 ] ;
char model [ 40 ] ;
char firmware_rev [ 8 ] ;
2012-07-26 21:29:57 +04:00
u32 max_hw_sectors ;
2011-01-20 20:50:14 +03:00
} ;
/*
* An NVM Express namespace is equivalent to a SCSI LUN
*/
struct nvme_ns {
struct list_head list ;
struct nvme_dev * dev ;
struct request_queue * queue ;
struct gendisk * disk ;
int ns_id ;
int lba_shift ;
} ;
/*
* An NVM Express queue . Each device has at least two ( one for admin
* commands and one for I / O commands ) .
*/
struct nvme_queue {
struct device * q_dmadev ;
2011-02-10 17:56:01 +03:00
struct nvme_dev * dev ;
2011-01-20 20:50:14 +03:00
spinlock_t q_lock ;
struct nvme_command * sq_cmds ;
volatile struct nvme_completion * cqes ;
dma_addr_t sq_dma_addr ;
dma_addr_t cq_dma_addr ;
wait_queue_head_t sq_full ;
2011-03-03 02:37:18 +03:00
wait_queue_t sq_cong_wait ;
2011-01-20 20:50:14 +03:00
struct bio_list sq_cong ;
u32 __iomem * q_db ;
u16 q_depth ;
u16 cq_vector ;
u16 sq_head ;
u16 sq_tail ;
u16 cq_head ;
2011-01-20 21:24:06 +03:00
u16 cq_phase ;
2011-01-20 20:50:14 +03:00
unsigned long cmdid_data [ ] ;
} ;
/*
* Check we didin ' t inadvertently grow the command struct
*/
static inline void _nvme_check_size ( void )
{
BUILD_BUG_ON ( sizeof ( struct nvme_rw_command ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_create_cq ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_create_sq ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_delete_queue ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_features ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_command ) ! = 64 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_id_ctrl ) ! = 4096 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_id_ns ) ! = 4096 ) ;
BUILD_BUG_ON ( sizeof ( struct nvme_lba_range_type ) ! = 64 ) ;
}
2011-12-20 20:54:53 +04:00
typedef void ( * nvme_completion_fn ) ( struct nvme_dev * , void * ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * ) ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info {
2011-10-15 15:33:46 +04:00
nvme_completion_fn fn ;
void * ctx ;
2011-02-07 02:30:16 +03:00
unsigned long timeout ;
} ;
static struct nvme_cmd_info * nvme_cmd_info ( struct nvme_queue * nvmeq )
{
return ( void * ) & nvmeq - > cmdid_data [ BITS_TO_LONGS ( nvmeq - > q_depth ) ] ;
}
2011-01-20 20:50:14 +03:00
/**
2011-03-16 23:28:24 +03:00
* alloc_cmdid ( ) - Allocate a Command ID
* @ nvmeq : The queue that will be used for this command
* @ ctx : A pointer that will be passed to the handler
2011-10-15 15:33:46 +04:00
* @ handler : The function to call on completion
2011-01-20 20:50:14 +03:00
*
* Allocate a Command ID for a queue . The data passed in will
* be passed to the completion handler . This is implemented by using
* the bottom two bits of the ctx pointer to store the handler ID .
* Passing in a pointer that ' s not 4 - byte aligned will cause a BUG .
* We can change this if it becomes a problem .
2011-05-12 05:36:38 +04:00
*
* May be called with local interrupts disabled and the q_lock held ,
* or with interrupts enabled and no locks held .
2011-01-20 20:50:14 +03:00
*/
2011-10-15 15:33:46 +04:00
static int alloc_cmdid ( struct nvme_queue * nvmeq , void * ctx ,
nvme_completion_fn handler , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
2011-02-24 16:49:41 +03:00
int depth = nvmeq - > q_depth - 1 ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
int cmdid ;
do {
cmdid = find_first_zero_bit ( nvmeq - > cmdid_data , depth ) ;
if ( cmdid > = depth )
return - EBUSY ;
} while ( test_and_set_bit ( cmdid , nvmeq - > cmdid_data ) ) ;
2011-10-15 15:33:46 +04:00
info [ cmdid ] . fn = handler ;
info [ cmdid ] . ctx = ctx ;
2011-02-07 02:30:16 +03:00
info [ cmdid ] . timeout = jiffies + timeout ;
2011-01-20 20:50:14 +03:00
return cmdid ;
}
static int alloc_cmdid_killable ( struct nvme_queue * nvmeq , void * ctx ,
2011-10-15 15:33:46 +04:00
nvme_completion_fn handler , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
int cmdid ;
wait_event_killable ( nvmeq - > sq_full ,
2011-02-07 02:30:16 +03:00
( cmdid = alloc_cmdid ( nvmeq , ctx , handler , timeout ) ) > = 0 ) ;
2011-01-20 20:50:14 +03:00
return ( cmdid < 0 ) ? - EINTR : cmdid ;
}
2011-10-15 15:33:46 +04:00
/* Special values must be less than 0x1000 */
# define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
2011-02-07 23:55:59 +03:00
# define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
# define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
# define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
2011-02-22 22:18:30 +03:00
# define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE)
2011-02-06 15:53:23 +03:00
2011-12-20 20:54:53 +04:00
static void special_completion ( struct nvme_dev * dev , void * ctx ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * cqe )
{
if ( ctx = = CMD_CTX_CANCELLED )
return ;
if ( ctx = = CMD_CTX_FLUSH )
return ;
if ( ctx = = CMD_CTX_COMPLETED ) {
2011-12-20 20:54:53 +04:00
dev_warn ( & dev - > pci_dev - > dev ,
2011-10-15 15:33:46 +04:00
" completed id %d twice on queue %d \n " ,
cqe - > command_id , le16_to_cpup ( & cqe - > sq_id ) ) ;
return ;
}
if ( ctx = = CMD_CTX_INVALID ) {
2011-12-20 20:54:53 +04:00
dev_warn ( & dev - > pci_dev - > dev ,
2011-10-15 15:33:46 +04:00
" invalid id %d completed on queue %d \n " ,
cqe - > command_id , le16_to_cpup ( & cqe - > sq_id ) ) ;
return ;
}
2011-12-20 20:54:53 +04:00
dev_warn ( & dev - > pci_dev - > dev , " Unknown special completion %p \n " , ctx ) ;
2011-10-15 15:33:46 +04:00
}
2011-05-12 05:36:38 +04:00
/*
* Called with local interrupts disabled and the q_lock held . May not sleep .
*/
2011-10-15 15:33:46 +04:00
static void * free_cmdid ( struct nvme_queue * nvmeq , int cmdid ,
nvme_completion_fn * fn )
2011-01-20 20:50:14 +03:00
{
2011-10-15 15:33:46 +04:00
void * ctx ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
2011-10-15 15:33:46 +04:00
if ( cmdid > = nvmeq - > q_depth ) {
* fn = special_completion ;
2011-02-06 16:51:15 +03:00
return CMD_CTX_INVALID ;
2011-10-15 15:33:46 +04:00
}
* fn = info [ cmdid ] . fn ;
ctx = info [ cmdid ] . ctx ;
info [ cmdid ] . fn = special_completion ;
2011-02-07 02:30:16 +03:00
info [ cmdid ] . ctx = CMD_CTX_COMPLETED ;
2011-01-20 20:50:14 +03:00
clear_bit ( cmdid , nvmeq - > cmdid_data ) ;
wake_up ( & nvmeq - > sq_full ) ;
2011-10-15 15:33:46 +04:00
return ctx ;
2011-01-20 20:50:14 +03:00
}
2011-10-15 15:33:46 +04:00
static void * cancel_cmdid ( struct nvme_queue * nvmeq , int cmdid ,
nvme_completion_fn * fn )
2011-02-05 00:03:56 +03:00
{
2011-10-15 15:33:46 +04:00
void * ctx ;
2011-02-07 02:30:16 +03:00
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
2011-10-15 15:33:46 +04:00
if ( fn )
* fn = info [ cmdid ] . fn ;
ctx = info [ cmdid ] . ctx ;
info [ cmdid ] . fn = special_completion ;
2011-02-07 02:30:16 +03:00
info [ cmdid ] . ctx = CMD_CTX_CANCELLED ;
2011-10-15 15:33:46 +04:00
return ctx ;
2011-02-05 00:03:56 +03:00
}
2011-12-20 20:04:12 +04:00
static struct nvme_queue * get_nvmeq ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2011-12-20 20:04:12 +04:00
return dev - > queues [ get_cpu ( ) + 1 ] ;
2011-01-20 20:50:14 +03:00
}
static void put_nvmeq ( struct nvme_queue * nvmeq )
{
2011-01-20 21:01:49 +03:00
put_cpu ( ) ;
2011-01-20 20:50:14 +03:00
}
/**
2011-03-16 23:28:24 +03:00
* nvme_submit_cmd ( ) - Copy a command into a queue and ring the doorbell
2011-01-20 20:50:14 +03:00
* @ nvmeq : The queue to use
* @ cmd : The command to send
*
* Safe to use from interrupt context
*/
static int nvme_submit_cmd ( struct nvme_queue * nvmeq , struct nvme_command * cmd )
{
unsigned long flags ;
u16 tail ;
spin_lock_irqsave ( & nvmeq - > q_lock , flags ) ;
tail = nvmeq - > sq_tail ;
memcpy ( & nvmeq - > sq_cmds [ tail ] , cmd , sizeof ( * cmd ) ) ;
if ( + + tail = = nvmeq - > q_depth )
tail = 0 ;
2011-02-16 17:59:59 +03:00
writel ( tail , nvmeq - > q_db ) ;
2011-01-20 20:50:14 +03:00
nvmeq - > sq_tail = tail ;
spin_unlock_irqrestore ( & nvmeq - > q_lock , flags ) ;
return 0 ;
}
2011-12-20 22:34:52 +04:00
/*
* The nvme_iod describes the data in an I / O , including the list of PRP
* entries . You can ' t see it in this data structure because C doesn ' t let
* me express that . Use nvme_alloc_iod to ensure there ' s enough space
* allocated to store the PRP list .
*/
struct nvme_iod {
void * private ; /* For the use of the submitter of the I/O */
int npages ; /* In the PRP list. 0 means small pool in use */
int offset ; /* Of PRP list */
int nents ; /* Used in scatterlist */
int length ; /* Of data, in bytes */
2011-02-10 16:51:24 +03:00
dma_addr_t first_dma ;
2011-12-20 22:34:52 +04:00
struct scatterlist sg [ 0 ] ;
2011-02-10 16:51:24 +03:00
} ;
2011-12-20 22:34:52 +04:00
static __le64 * * iod_list ( struct nvme_iod * iod )
2011-02-10 16:51:24 +03:00
{
2011-12-20 22:34:52 +04:00
return ( ( void * ) iod ) + iod - > offset ;
2011-02-10 16:51:24 +03:00
}
2011-12-20 22:34:52 +04:00
/*
* Will slightly overestimate the number of pages needed . This is OK
* as it only leads to a small amount of wasted memory for the lifetime of
* the I / O .
*/
static int nvme_npages ( unsigned size )
{
unsigned nprps = DIV_ROUND_UP ( size + PAGE_SIZE , PAGE_SIZE ) ;
return DIV_ROUND_UP ( 8 * nprps , PAGE_SIZE - 8 ) ;
}
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
static struct nvme_iod *
nvme_alloc_iod ( unsigned nseg , unsigned nbytes , gfp_t gfp )
2011-01-20 20:50:14 +03:00
{
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod = kmalloc ( sizeof ( struct nvme_iod ) +
sizeof ( __le64 * ) * nvme_npages ( nbytes ) +
sizeof ( struct scatterlist ) * nseg , gfp ) ;
if ( iod ) {
iod - > offset = offsetof ( struct nvme_iod , sg [ nseg ] ) ;
iod - > npages = - 1 ;
iod - > length = nbytes ;
}
return iod ;
2011-01-20 20:50:14 +03:00
}
2011-12-20 22:34:52 +04:00
static void nvme_free_iod ( struct nvme_dev * dev , struct nvme_iod * iod )
2011-01-20 20:50:14 +03:00
{
2011-12-20 22:34:52 +04:00
const int last_prp = PAGE_SIZE / 8 - 1 ;
int i ;
__le64 * * list = iod_list ( iod ) ;
dma_addr_t prp_dma = iod - > first_dma ;
if ( iod - > npages = = 0 )
dma_pool_free ( dev - > prp_small_pool , list [ 0 ] , prp_dma ) ;
for ( i = 0 ; i < iod - > npages ; i + + ) {
__le64 * prp_list = list [ i ] ;
dma_addr_t next_prp_dma = le64_to_cpu ( prp_list [ last_prp ] ) ;
dma_pool_free ( dev - > prp_page_pool , prp_list , prp_dma ) ;
prp_dma = next_prp_dma ;
}
kfree ( iod ) ;
2011-01-20 20:50:14 +03:00
}
2011-12-20 20:54:53 +04:00
static void requeue_bio ( struct nvme_dev * dev , struct bio * bio )
{
struct nvme_queue * nvmeq = get_nvmeq ( dev ) ;
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
add_wait_queue ( & nvmeq - > sq_full , & nvmeq - > sq_cong_wait ) ;
bio_list_add ( & nvmeq - > sq_cong , bio ) ;
put_nvmeq ( nvmeq ) ;
wake_up_process ( nvme_thread ) ;
}
static void bio_completion ( struct nvme_dev * dev , void * ctx ,
2011-01-20 20:50:14 +03:00
struct nvme_completion * cqe )
{
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod = ctx ;
struct bio * bio = iod - > private ;
2011-01-20 20:50:14 +03:00
u16 status = le16_to_cpup ( & cqe - > status ) > > 1 ;
2011-12-20 22:34:52 +04:00
dma_unmap_sg ( & dev - > pci_dev - > dev , iod - > sg , iod - > nents ,
2011-01-20 20:50:14 +03:00
bio_data_dir ( bio ) ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2011-04-29 10:09:09 +04:00
if ( status ) {
2011-02-23 23:20:00 +03:00
bio_endio ( bio , - EIO ) ;
2011-04-29 10:09:09 +04:00
} else if ( bio - > bi_vcnt > bio - > bi_idx ) {
2011-12-20 20:54:53 +04:00
requeue_bio ( dev , bio ) ;
2011-02-23 23:20:00 +03:00
} else {
bio_endio ( bio , 0 ) ;
}
2011-01-20 20:50:14 +03:00
}
2011-05-12 05:36:38 +04:00
/* length is in bytes. gfp flags indicates whether we may sleep. */
2011-12-20 22:34:52 +04:00
static int nvme_setup_prps ( struct nvme_dev * dev ,
struct nvme_common_command * cmd , struct nvme_iod * iod ,
int total_len , gfp_t gfp )
2011-01-26 18:02:29 +03:00
{
2011-02-10 18:30:34 +03:00
struct dma_pool * pool ;
2011-12-20 22:34:52 +04:00
int length = total_len ;
struct scatterlist * sg = iod - > sg ;
2011-01-26 18:02:29 +03:00
int dma_len = sg_dma_len ( sg ) ;
u64 dma_addr = sg_dma_address ( sg ) ;
int offset = offset_in_page ( dma_addr ) ;
2011-02-10 16:51:24 +03:00
__le64 * prp_list ;
2011-12-20 22:34:52 +04:00
__le64 * * list = iod_list ( iod ) ;
2011-02-10 16:51:24 +03:00
dma_addr_t prp_dma ;
2011-12-20 22:34:52 +04:00
int nprps , i ;
2011-01-26 18:02:29 +03:00
cmd - > prp1 = cpu_to_le64 ( dma_addr ) ;
length - = ( PAGE_SIZE - offset ) ;
if ( length < = 0 )
2011-12-20 22:34:52 +04:00
return total_len ;
2011-01-26 18:02:29 +03:00
dma_len - = ( PAGE_SIZE - offset ) ;
if ( dma_len ) {
dma_addr + = ( PAGE_SIZE - offset ) ;
} else {
sg = sg_next ( sg ) ;
dma_addr = sg_dma_address ( sg ) ;
dma_len = sg_dma_len ( sg ) ;
}
if ( length < = PAGE_SIZE ) {
cmd - > prp2 = cpu_to_le64 ( dma_addr ) ;
2011-12-20 22:34:52 +04:00
return total_len ;
2011-02-10 16:51:24 +03:00
}
nprps = DIV_ROUND_UP ( length , PAGE_SIZE ) ;
2011-02-10 18:30:34 +03:00
if ( nprps < = ( 256 / 8 ) ) {
pool = dev - > prp_small_pool ;
2011-12-20 22:34:52 +04:00
iod - > npages = 0 ;
2011-02-10 18:30:34 +03:00
} else {
pool = dev - > prp_page_pool ;
2011-12-20 22:34:52 +04:00
iod - > npages = 1 ;
2011-02-10 18:30:34 +03:00
}
2011-05-12 21:51:41 +04:00
prp_list = dma_pool_alloc ( pool , gfp , & prp_dma ) ;
if ( ! prp_list ) {
cmd - > prp2 = cpu_to_le64 ( dma_addr ) ;
2011-12-20 22:34:52 +04:00
iod - > npages = - 1 ;
return ( total_len - length ) + PAGE_SIZE ;
2011-05-12 21:51:41 +04:00
}
2011-12-20 22:34:52 +04:00
list [ 0 ] = prp_list ;
iod - > first_dma = prp_dma ;
2011-02-10 16:51:24 +03:00
cmd - > prp2 = cpu_to_le64 ( prp_dma ) ;
i = 0 ;
for ( ; ; ) {
2011-03-16 23:43:40 +03:00
if ( i = = PAGE_SIZE / 8 ) {
2011-02-10 16:51:24 +03:00
__le64 * old_prp_list = prp_list ;
2011-05-12 21:51:41 +04:00
prp_list = dma_pool_alloc ( pool , gfp , & prp_dma ) ;
2011-12-20 22:34:52 +04:00
if ( ! prp_list )
return total_len - length ;
list [ iod - > npages + + ] = prp_list ;
2011-03-16 23:43:40 +03:00
prp_list [ 0 ] = old_prp_list [ i - 1 ] ;
old_prp_list [ i - 1 ] = cpu_to_le64 ( prp_dma ) ;
i = 1 ;
2011-02-10 16:51:24 +03:00
}
prp_list [ i + + ] = cpu_to_le64 ( dma_addr ) ;
dma_len - = PAGE_SIZE ;
dma_addr + = PAGE_SIZE ;
length - = PAGE_SIZE ;
if ( length < = 0 )
break ;
if ( dma_len > 0 )
continue ;
BUG_ON ( dma_len < 0 ) ;
sg = sg_next ( sg ) ;
dma_addr = sg_dma_address ( sg ) ;
dma_len = sg_dma_len ( sg ) ;
2011-01-26 18:02:29 +03:00
}
2011-12-20 22:34:52 +04:00
return total_len ;
2011-01-26 18:02:29 +03:00
}
2011-02-23 23:20:00 +03:00
/* NVMe scatterlists require no holes in the virtual address */
# define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \
( ( ( vec1 ) - > bv_offset + ( vec1 ) - > bv_len ) % PAGE_SIZE ) )
2011-12-20 22:34:52 +04:00
static int nvme_map_bio ( struct device * dev , struct nvme_iod * iod ,
2011-01-20 20:50:14 +03:00
struct bio * bio , enum dma_data_direction dma_dir , int psegs )
{
2011-02-10 21:55:39 +03:00
struct bio_vec * bvec , * bvprv = NULL ;
struct scatterlist * sg = NULL ;
2011-02-23 23:20:00 +03:00
int i , old_idx , length = 0 , nsegs = 0 ;
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
sg_init_table ( iod - > sg , psegs ) ;
2011-02-23 23:20:00 +03:00
old_idx = bio - > bi_idx ;
2011-01-20 20:50:14 +03:00
bio_for_each_segment ( bvec , bio , i ) {
2011-02-10 21:55:39 +03:00
if ( bvprv & & BIOVEC_PHYS_MERGEABLE ( bvprv , bvec ) ) {
sg - > length + = bvec - > bv_len ;
} else {
2011-02-23 23:20:00 +03:00
if ( bvprv & & BIOVEC_NOT_VIRT_MERGEABLE ( bvprv , bvec ) )
break ;
2011-12-20 22:34:52 +04:00
sg = sg ? sg + 1 : iod - > sg ;
2011-02-10 21:55:39 +03:00
sg_set_page ( sg , bvec - > bv_page , bvec - > bv_len ,
bvec - > bv_offset ) ;
nsegs + + ;
}
2011-02-23 23:20:00 +03:00
length + = bvec - > bv_len ;
2011-02-10 21:55:39 +03:00
bvprv = bvec ;
2011-01-20 20:50:14 +03:00
}
2011-02-23 23:20:00 +03:00
bio - > bi_idx = i ;
2011-12-20 22:34:52 +04:00
iod - > nents = nsegs ;
2011-02-10 21:55:39 +03:00
sg_mark_end ( sg ) ;
2011-12-20 22:34:52 +04:00
if ( dma_map_sg ( dev , iod - > sg , iod - > nents , dma_dir ) = = 0 ) {
2011-02-23 23:20:00 +03:00
bio - > bi_idx = old_idx ;
return - ENOMEM ;
}
return length ;
2011-01-20 20:50:14 +03:00
}
2011-02-22 22:18:30 +03:00
static int nvme_submit_flush ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
int cmdid )
{
struct nvme_command * cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
cmnd - > common . opcode = nvme_cmd_flush ;
cmnd - > common . command_id = cmdid ;
cmnd - > common . nsid = cpu_to_le32 ( ns - > ns_id ) ;
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
return 0 ;
}
static int nvme_submit_flush_data ( struct nvme_queue * nvmeq , struct nvme_ns * ns )
{
int cmdid = alloc_cmdid ( nvmeq , ( void * ) CMD_CTX_FLUSH ,
2011-12-20 22:53:01 +04:00
special_completion , NVME_IO_TIMEOUT ) ;
2011-02-22 22:18:30 +03:00
if ( unlikely ( cmdid < 0 ) )
return cmdid ;
return nvme_submit_flush ( nvmeq , ns , cmdid ) ;
}
2011-05-12 05:36:38 +04:00
/*
* Called with local interrupts disabled and the q_lock held . May not sleep .
*/
2011-01-20 20:50:14 +03:00
static int nvme_submit_bio_queue ( struct nvme_queue * nvmeq , struct nvme_ns * ns ,
struct bio * bio )
{
2011-01-26 18:02:29 +03:00
struct nvme_command * cmnd ;
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod ;
2011-01-20 20:50:14 +03:00
enum dma_data_direction dma_dir ;
2011-02-23 23:20:00 +03:00
int cmdid , length , result = - ENOMEM ;
2011-01-20 20:50:14 +03:00
u16 control ;
u32 dsmgmt ;
int psegs = bio_phys_segments ( ns - > queue , bio ) ;
2011-02-22 22:18:30 +03:00
if ( ( bio - > bi_rw & REQ_FLUSH ) & & psegs ) {
result = nvme_submit_flush_data ( nvmeq , ns ) ;
if ( result )
return result ;
}
2011-12-20 22:34:52 +04:00
iod = nvme_alloc_iod ( psegs , bio - > bi_size , GFP_ATOMIC ) ;
if ( ! iod )
2011-02-14 23:55:33 +03:00
goto nomem ;
2011-12-20 22:34:52 +04:00
iod - > private = bio ;
2011-01-20 20:50:14 +03:00
2011-02-14 23:55:33 +03:00
result = - EBUSY ;
2011-12-20 22:53:01 +04:00
cmdid = alloc_cmdid ( nvmeq , iod , bio_completion , NVME_IO_TIMEOUT ) ;
2011-01-20 20:50:14 +03:00
if ( unlikely ( cmdid < 0 ) )
2011-12-20 22:34:52 +04:00
goto free_iod ;
2011-01-20 20:50:14 +03:00
2011-02-22 22:18:30 +03:00
if ( ( bio - > bi_rw & REQ_FLUSH ) & & ! psegs )
return nvme_submit_flush ( nvmeq , ns , cmdid ) ;
2011-01-20 20:50:14 +03:00
control = 0 ;
if ( bio - > bi_rw & REQ_FUA )
control | = NVME_RW_FUA ;
if ( bio - > bi_rw & ( REQ_FAILFAST_DEV | REQ_RAHEAD ) )
control | = NVME_RW_LR ;
dsmgmt = 0 ;
if ( bio - > bi_rw & REQ_RAHEAD )
dsmgmt | = NVME_RW_DSM_FREQ_PREFETCH ;
2011-01-26 18:02:29 +03:00
cmnd = & nvmeq - > sq_cmds [ nvmeq - > sq_tail ] ;
2011-01-20 20:50:14 +03:00
2011-01-26 18:08:25 +03:00
memset ( cmnd , 0 , sizeof ( * cmnd ) ) ;
2011-01-20 20:50:14 +03:00
if ( bio_data_dir ( bio ) ) {
2011-01-26 18:02:29 +03:00
cmnd - > rw . opcode = nvme_cmd_write ;
2011-01-20 20:50:14 +03:00
dma_dir = DMA_TO_DEVICE ;
} else {
2011-01-26 18:02:29 +03:00
cmnd - > rw . opcode = nvme_cmd_read ;
2011-01-20 20:50:14 +03:00
dma_dir = DMA_FROM_DEVICE ;
}
2011-12-20 22:34:52 +04:00
result = nvme_map_bio ( nvmeq - > q_dmadev , iod , bio , dma_dir , psegs ) ;
2011-02-23 23:20:00 +03:00
if ( result < 0 )
2011-12-20 22:34:52 +04:00
goto free_iod ;
2011-02-23 23:20:00 +03:00
length = result ;
2011-01-20 20:50:14 +03:00
2011-01-26 18:02:29 +03:00
cmnd - > rw . command_id = cmdid ;
cmnd - > rw . nsid = cpu_to_le32 ( ns - > ns_id ) ;
2011-12-20 22:34:52 +04:00
length = nvme_setup_prps ( nvmeq - > dev , & cmnd - > common , iod , length ,
GFP_ATOMIC ) ;
2011-01-26 18:02:29 +03:00
cmnd - > rw . slba = cpu_to_le64 ( bio - > bi_sector > > ( ns - > lba_shift - 9 ) ) ;
2011-02-23 23:20:00 +03:00
cmnd - > rw . length = cpu_to_le16 ( ( length > > ns - > lba_shift ) - 1 ) ;
2011-01-26 18:02:29 +03:00
cmnd - > rw . control = cpu_to_le16 ( control ) ;
cmnd - > rw . dsmgmt = cpu_to_le32 ( dsmgmt ) ;
2011-01-20 20:50:14 +03:00
2011-02-24 16:46:00 +03:00
bio - > bi_sector + = length > > 9 ;
2011-01-20 20:50:14 +03:00
if ( + + nvmeq - > sq_tail = = nvmeq - > q_depth )
nvmeq - > sq_tail = 0 ;
2011-02-16 17:59:59 +03:00
writel ( nvmeq - > sq_tail , nvmeq - > q_db ) ;
2011-01-20 20:50:14 +03:00
2011-02-10 20:01:09 +03:00
return 0 ;
2011-12-20 22:34:52 +04:00
free_iod :
nvme_free_iod ( nvmeq - > dev , iod ) ;
2011-02-14 23:55:33 +03:00
nomem :
return result ;
2011-01-20 20:50:14 +03:00
}
2012-01-19 03:41:27 +04:00
static void nvme_make_request ( struct request_queue * q , struct bio * bio )
2011-01-20 20:50:14 +03:00
{
struct nvme_ns * ns = q - > queuedata ;
2011-12-20 20:04:12 +04:00
struct nvme_queue * nvmeq = get_nvmeq ( ns - > dev ) ;
2011-02-14 23:55:33 +03:00
int result = - EBUSY ;
spin_lock_irq ( & nvmeq - > q_lock ) ;
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
result = nvme_submit_bio_queue ( nvmeq , ns , bio ) ;
if ( unlikely ( result ) ) {
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
add_wait_queue ( & nvmeq - > sq_full , & nvmeq - > sq_cong_wait ) ;
2011-01-20 20:50:14 +03:00
bio_list_add ( & nvmeq - > sq_cong , bio ) ;
}
2011-02-14 23:55:33 +03:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2011-01-20 20:50:14 +03:00
put_nvmeq ( nvmeq ) ;
}
static irqreturn_t nvme_process_cq ( struct nvme_queue * nvmeq )
{
2011-01-20 21:24:06 +03:00
u16 head , phase ;
2011-01-20 20:50:14 +03:00
head = nvmeq - > cq_head ;
2011-01-20 21:24:06 +03:00
phase = nvmeq - > cq_phase ;
2011-01-20 20:50:14 +03:00
for ( ; ; ) {
2011-10-15 15:33:46 +04:00
void * ctx ;
nvme_completion_fn fn ;
2011-01-20 20:50:14 +03:00
struct nvme_completion cqe = nvmeq - > cqes [ head ] ;
2011-01-20 21:24:06 +03:00
if ( ( le16_to_cpu ( cqe . status ) & 1 ) ! = phase )
2011-01-20 20:50:14 +03:00
break ;
nvmeq - > sq_head = le16_to_cpu ( cqe . sq_head ) ;
if ( + + head = = nvmeq - > q_depth ) {
head = 0 ;
2011-01-20 21:24:06 +03:00
phase = ! phase ;
2011-01-20 20:50:14 +03:00
}
2011-10-15 15:33:46 +04:00
ctx = free_cmdid ( nvmeq , cqe . command_id , & fn ) ;
2011-12-20 20:54:53 +04:00
fn ( nvmeq - > dev , ctx , & cqe ) ;
2011-01-20 20:50:14 +03:00
}
/* If the controller ignores the cq head doorbell and continuously
* writes to the queue , it is theoretically possible to wrap around
* the queue twice and mistakenly return IRQ_NONE . Linux only
* requires that 0.1 % of your interrupts are handled , so this isn ' t
* a big problem .
*/
2011-01-20 21:24:06 +03:00
if ( head = = nvmeq - > cq_head & & phase = = nvmeq - > cq_phase )
2011-01-20 20:50:14 +03:00
return IRQ_NONE ;
2011-10-21 01:00:41 +04:00
writel ( head , nvmeq - > q_db + ( 1 < < nvmeq - > dev - > db_stride ) ) ;
2011-01-20 20:50:14 +03:00
nvmeq - > cq_head = head ;
2011-01-20 21:24:06 +03:00
nvmeq - > cq_phase = phase ;
2011-01-20 20:50:14 +03:00
return IRQ_HANDLED ;
}
static irqreturn_t nvme_irq ( int irq , void * data )
2011-02-06 15:28:06 +03:00
{
irqreturn_t result ;
struct nvme_queue * nvmeq = data ;
spin_lock ( & nvmeq - > q_lock ) ;
result = nvme_process_cq ( nvmeq ) ;
spin_unlock ( & nvmeq - > q_lock ) ;
return result ;
}
static irqreturn_t nvme_irq_check ( int irq , void * data )
{
struct nvme_queue * nvmeq = data ;
struct nvme_completion cqe = nvmeq - > cqes [ nvmeq - > cq_head ] ;
if ( ( le16_to_cpu ( cqe . status ) & 1 ) ! = nvmeq - > cq_phase )
return IRQ_NONE ;
return IRQ_WAKE_THREAD ;
}
2011-02-05 00:03:56 +03:00
static void nvme_abort_command ( struct nvme_queue * nvmeq , int cmdid )
{
spin_lock_irq ( & nvmeq - > q_lock ) ;
2011-10-15 15:33:46 +04:00
cancel_cmdid ( nvmeq , cmdid , NULL ) ;
2011-02-05 00:03:56 +03:00
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
2011-10-15 15:33:46 +04:00
struct sync_cmd_info {
struct task_struct * task ;
u32 result ;
int status ;
} ;
2011-12-20 20:54:53 +04:00
static void sync_completion ( struct nvme_dev * dev , void * ctx ,
2011-10-15 15:33:46 +04:00
struct nvme_completion * cqe )
{
struct sync_cmd_info * cmdinfo = ctx ;
cmdinfo - > result = le32_to_cpup ( & cqe - > result ) ;
cmdinfo - > status = le16_to_cpup ( & cqe - > status ) > > 1 ;
wake_up_process ( cmdinfo - > task ) ;
}
2011-01-20 20:50:14 +03:00
/*
* Returns 0 on success . If the result is negative , it ' s a Linux error code ;
* if the result is positive , it ' s an NVM Express status code
*/
2011-02-05 00:03:56 +03:00
static int nvme_submit_sync_cmd ( struct nvme_queue * nvmeq ,
2011-02-07 02:30:16 +03:00
struct nvme_command * cmd , u32 * result , unsigned timeout )
2011-01-20 20:50:14 +03:00
{
int cmdid ;
struct sync_cmd_info cmdinfo ;
cmdinfo . task = current ;
cmdinfo . status = - EINTR ;
2011-10-15 15:33:46 +04:00
cmdid = alloc_cmdid_killable ( nvmeq , & cmdinfo , sync_completion ,
2011-02-07 02:30:16 +03:00
timeout ) ;
2011-01-20 20:50:14 +03:00
if ( cmdid < 0 )
return cmdid ;
cmd - > common . command_id = cmdid ;
2011-02-05 00:03:56 +03:00
set_current_state ( TASK_KILLABLE ) ;
nvme_submit_cmd ( nvmeq , cmd ) ;
2011-01-20 20:50:14 +03:00
schedule ( ) ;
2011-02-05 00:03:56 +03:00
if ( cmdinfo . status = = - EINTR ) {
nvme_abort_command ( nvmeq , cmdid ) ;
return - EINTR ;
}
2011-01-20 20:50:14 +03:00
if ( result )
* result = cmdinfo . result ;
return cmdinfo . status ;
}
static int nvme_submit_admin_cmd ( struct nvme_dev * dev , struct nvme_command * cmd ,
u32 * result )
{
2011-02-07 02:30:16 +03:00
return nvme_submit_sync_cmd ( dev - > queues [ 0 ] , cmd , result , ADMIN_TIMEOUT ) ;
2011-01-20 20:50:14 +03:00
}
static int adapter_delete_queue ( struct nvme_dev * dev , u8 opcode , u16 id )
{
int status ;
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . delete_queue . opcode = opcode ;
c . delete_queue . qid = cpu_to_le16 ( id ) ;
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
if ( status )
return - EIO ;
return 0 ;
}
static int adapter_alloc_cq ( struct nvme_dev * dev , u16 qid ,
struct nvme_queue * nvmeq )
{
int status ;
struct nvme_command c ;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . create_cq . opcode = nvme_admin_create_cq ;
c . create_cq . prp1 = cpu_to_le64 ( nvmeq - > cq_dma_addr ) ;
c . create_cq . cqid = cpu_to_le16 ( qid ) ;
c . create_cq . qsize = cpu_to_le16 ( nvmeq - > q_depth - 1 ) ;
c . create_cq . cq_flags = cpu_to_le16 ( flags ) ;
c . create_cq . irq_vector = cpu_to_le16 ( nvmeq - > cq_vector ) ;
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
if ( status )
return - EIO ;
return 0 ;
}
static int adapter_alloc_sq ( struct nvme_dev * dev , u16 qid ,
struct nvme_queue * nvmeq )
{
int status ;
struct nvme_command c ;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . create_sq . opcode = nvme_admin_create_sq ;
c . create_sq . prp1 = cpu_to_le64 ( nvmeq - > sq_dma_addr ) ;
c . create_sq . sqid = cpu_to_le16 ( qid ) ;
c . create_sq . qsize = cpu_to_le16 ( nvmeq - > q_depth - 1 ) ;
c . create_sq . sq_flags = cpu_to_le16 ( flags ) ;
c . create_sq . cqid = cpu_to_le16 ( qid ) ;
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
if ( status )
return - EIO ;
return 0 ;
}
static int adapter_delete_cq ( struct nvme_dev * dev , u16 cqid )
{
return adapter_delete_queue ( dev , nvme_admin_delete_cq , cqid ) ;
}
static int adapter_delete_sq ( struct nvme_dev * dev , u16 sqid )
{
return adapter_delete_queue ( dev , nvme_admin_delete_sq , sqid ) ;
}
2011-09-20 01:08:14 +04:00
static int nvme_identify ( struct nvme_dev * dev , unsigned nsid , unsigned cns ,
dma_addr_t dma_addr )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . identify . opcode = nvme_admin_identify ;
c . identify . nsid = cpu_to_le32 ( nsid ) ;
c . identify . prp1 = cpu_to_le64 ( dma_addr ) ;
c . identify . cns = cpu_to_le32 ( cns ) ;
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
}
static int nvme_get_features ( struct nvme_dev * dev , unsigned fid ,
2012-07-26 02:06:38 +04:00
unsigned nsid , dma_addr_t dma_addr )
2011-09-20 01:08:14 +04:00
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . features . opcode = nvme_admin_get_features ;
2012-07-26 02:06:38 +04:00
c . features . nsid = cpu_to_le32 ( nsid ) ;
2011-09-20 01:08:14 +04:00
c . features . prp1 = cpu_to_le64 ( dma_addr ) ;
c . features . fid = cpu_to_le32 ( fid ) ;
2012-01-11 18:29:56 +04:00
return nvme_submit_admin_cmd ( dev , & c , NULL ) ;
}
static int nvme_set_features ( struct nvme_dev * dev , unsigned fid ,
unsigned dword11 , dma_addr_t dma_addr , u32 * result )
{
struct nvme_command c ;
memset ( & c , 0 , sizeof ( c ) ) ;
c . features . opcode = nvme_admin_set_features ;
c . features . prp1 = cpu_to_le64 ( dma_addr ) ;
c . features . fid = cpu_to_le32 ( fid ) ;
c . features . dword11 = cpu_to_le32 ( dword11 ) ;
2011-09-20 01:08:14 +04:00
return nvme_submit_admin_cmd ( dev , & c , result ) ;
}
2012-08-07 23:56:23 +04:00
/**
* nvme_cancel_ios - Cancel outstanding I / Os
* @ queue : The queue to cancel I / Os on
* @ timeout : True to only cancel I / Os which have timed out
*/
static void nvme_cancel_ios ( struct nvme_queue * nvmeq , bool timeout )
{
int depth = nvmeq - > q_depth - 1 ;
struct nvme_cmd_info * info = nvme_cmd_info ( nvmeq ) ;
unsigned long now = jiffies ;
int cmdid ;
for_each_set_bit ( cmdid , nvmeq - > cmdid_data , depth ) {
void * ctx ;
nvme_completion_fn fn ;
static struct nvme_completion cqe = {
. status = cpu_to_le16 ( NVME_SC_ABORT_REQ ) < < 1 ,
} ;
if ( timeout & & ! time_after ( now , info [ cmdid ] . timeout ) )
continue ;
dev_warn ( nvmeq - > q_dmadev , " Cancelling I/O %d \n " , cmdid ) ;
ctx = cancel_cmdid ( nvmeq , cmdid , & fn ) ;
fn ( nvmeq - > dev , ctx , & cqe ) ;
}
}
2012-08-03 21:55:56 +04:00
static void nvme_free_queue_mem ( struct nvme_queue * nvmeq )
{
dma_free_coherent ( nvmeq - > q_dmadev , CQ_SIZE ( nvmeq - > q_depth ) ,
( void * ) nvmeq - > cqes , nvmeq - > cq_dma_addr ) ;
dma_free_coherent ( nvmeq - > q_dmadev , SQ_SIZE ( nvmeq - > q_depth ) ,
nvmeq - > sq_cmds , nvmeq - > sq_dma_addr ) ;
kfree ( nvmeq ) ;
}
2011-01-20 20:50:14 +03:00
static void nvme_free_queue ( struct nvme_dev * dev , int qid )
{
struct nvme_queue * nvmeq = dev - > queues [ qid ] ;
2011-03-27 16:52:06 +04:00
int vector = dev - > entry [ nvmeq - > cq_vector ] . vector ;
2011-01-20 20:50:14 +03:00
2012-08-07 23:56:23 +04:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
nvme_cancel_ios ( nvmeq , false ) ;
spin_unlock_irq ( & nvmeq - > q_lock ) ;
2011-03-27 16:52:06 +04:00
irq_set_affinity_hint ( vector , NULL ) ;
free_irq ( vector , nvmeq ) ;
2011-01-20 20:50:14 +03:00
/* Don't tell the adapter to delete the admin queue */
if ( qid ) {
adapter_delete_sq ( dev , qid ) ;
adapter_delete_cq ( dev , qid ) ;
}
2012-08-03 21:55:56 +04:00
nvme_free_queue_mem ( nvmeq ) ;
2011-01-20 20:50:14 +03:00
}
static struct nvme_queue * nvme_alloc_queue ( struct nvme_dev * dev , int qid ,
int depth , int vector )
{
struct device * dmadev = & dev - > pci_dev - > dev ;
2012-07-27 21:57:23 +04:00
unsigned extra = DIV_ROUND_UP ( depth , 8 ) + ( depth *
sizeof ( struct nvme_cmd_info ) ) ;
2011-01-20 20:50:14 +03:00
struct nvme_queue * nvmeq = kzalloc ( sizeof ( * nvmeq ) + extra , GFP_KERNEL ) ;
if ( ! nvmeq )
return NULL ;
nvmeq - > cqes = dma_alloc_coherent ( dmadev , CQ_SIZE ( depth ) ,
& nvmeq - > cq_dma_addr , GFP_KERNEL ) ;
if ( ! nvmeq - > cqes )
goto free_nvmeq ;
memset ( ( void * ) nvmeq - > cqes , 0 , CQ_SIZE ( depth ) ) ;
nvmeq - > sq_cmds = dma_alloc_coherent ( dmadev , SQ_SIZE ( depth ) ,
& nvmeq - > sq_dma_addr , GFP_KERNEL ) ;
if ( ! nvmeq - > sq_cmds )
goto free_cqdma ;
nvmeq - > q_dmadev = dmadev ;
2011-02-10 17:56:01 +03:00
nvmeq - > dev = dev ;
2011-01-20 20:50:14 +03:00
spin_lock_init ( & nvmeq - > q_lock ) ;
nvmeq - > cq_head = 0 ;
2011-01-20 21:24:06 +03:00
nvmeq - > cq_phase = 1 ;
2011-01-20 20:50:14 +03:00
init_waitqueue_head ( & nvmeq - > sq_full ) ;
2011-03-03 02:37:18 +03:00
init_waitqueue_entry ( & nvmeq - > sq_cong_wait , nvme_thread ) ;
2011-01-20 20:50:14 +03:00
bio_list_init ( & nvmeq - > sq_cong ) ;
2011-10-21 01:00:41 +04:00
nvmeq - > q_db = & dev - > dbs [ qid < < ( dev - > db_stride + 1 ) ] ;
2011-01-20 20:50:14 +03:00
nvmeq - > q_depth = depth ;
nvmeq - > cq_vector = vector ;
return nvmeq ;
free_cqdma :
dma_free_coherent ( dmadev , CQ_SIZE ( nvmeq - > q_depth ) , ( void * ) nvmeq - > cqes ,
nvmeq - > cq_dma_addr ) ;
free_nvmeq :
kfree ( nvmeq ) ;
return NULL ;
}
2011-01-20 17:10:15 +03:00
static int queue_request_irq ( struct nvme_dev * dev , struct nvme_queue * nvmeq ,
const char * name )
{
2011-02-06 15:28:06 +03:00
if ( use_threaded_interrupts )
return request_threaded_irq ( dev - > entry [ nvmeq - > cq_vector ] . vector ,
2011-02-06 17:01:00 +03:00
nvme_irq_check , nvme_irq ,
2011-02-06 15:28:06 +03:00
IRQF_DISABLED | IRQF_SHARED ,
name , nvmeq ) ;
2011-01-20 17:10:15 +03:00
return request_irq ( dev - > entry [ nvmeq - > cq_vector ] . vector , nvme_irq ,
IRQF_DISABLED | IRQF_SHARED , name , nvmeq ) ;
}
2012-12-22 03:13:49 +04:00
static struct nvme_queue * nvme_create_queue ( struct nvme_dev * dev , int qid ,
int cq_size , int vector )
2011-01-20 20:50:14 +03:00
{
int result ;
struct nvme_queue * nvmeq = nvme_alloc_queue ( dev , qid , cq_size , vector ) ;
2011-02-01 16:39:04 +03:00
if ( ! nvmeq )
2011-05-12 00:30:59 +04:00
return ERR_PTR ( - ENOMEM ) ;
2011-02-01 16:39:04 +03:00
2011-01-20 20:50:14 +03:00
result = adapter_alloc_cq ( dev , qid , nvmeq ) ;
if ( result < 0 )
goto free_nvmeq ;
result = adapter_alloc_sq ( dev , qid , nvmeq ) ;
if ( result < 0 )
goto release_cq ;
2011-01-20 17:10:15 +03:00
result = queue_request_irq ( dev , nvmeq , " nvme " ) ;
2011-01-20 20:50:14 +03:00
if ( result < 0 )
goto release_sq ;
return nvmeq ;
release_sq :
adapter_delete_sq ( dev , qid ) ;
release_cq :
adapter_delete_cq ( dev , qid ) ;
free_nvmeq :
dma_free_coherent ( nvmeq - > q_dmadev , CQ_SIZE ( nvmeq - > q_depth ) ,
( void * ) nvmeq - > cqes , nvmeq - > cq_dma_addr ) ;
dma_free_coherent ( nvmeq - > q_dmadev , SQ_SIZE ( nvmeq - > q_depth ) ,
nvmeq - > sq_cmds , nvmeq - > sq_dma_addr ) ;
kfree ( nvmeq ) ;
2011-05-12 00:30:59 +04:00
return ERR_PTR ( result ) ;
2011-01-20 20:50:14 +03:00
}
2012-12-22 03:13:49 +04:00
static int nvme_configure_admin_queue ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2012-08-03 21:55:56 +04:00
int result = 0 ;
2011-01-20 20:50:14 +03:00
u32 aqa ;
2011-04-19 23:04:20 +04:00
u64 cap ;
unsigned long timeout ;
2011-01-20 20:50:14 +03:00
struct nvme_queue * nvmeq ;
dev - > dbs = ( ( void __iomem * ) dev - > bar ) + 4096 ;
nvmeq = nvme_alloc_queue ( dev , 0 , 64 , 0 ) ;
2011-02-01 16:39:04 +03:00
if ( ! nvmeq )
return - ENOMEM ;
2011-01-20 20:50:14 +03:00
aqa = nvmeq - > q_depth - 1 ;
aqa | = aqa < < 16 ;
dev - > ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM ;
dev - > ctrl_config | = ( PAGE_SHIFT - 12 ) < < NVME_CC_MPS_SHIFT ;
dev - > ctrl_config | = NVME_CC_ARB_RR | NVME_CC_SHN_NONE ;
2011-03-22 22:55:45 +03:00
dev - > ctrl_config | = NVME_CC_IOSQES | NVME_CC_IOCQES ;
2011-01-20 20:50:14 +03:00
2011-02-01 19:31:55 +03:00
writel ( 0 , & dev - > bar - > cc ) ;
2011-01-20 20:50:14 +03:00
writel ( aqa , & dev - > bar - > aqa ) ;
writeq ( nvmeq - > sq_dma_addr , & dev - > bar - > asq ) ;
writeq ( nvmeq - > cq_dma_addr , & dev - > bar - > acq ) ;
writel ( dev - > ctrl_config , & dev - > bar - > cc ) ;
2011-04-19 23:04:20 +04:00
cap = readq ( & dev - > bar - > cap ) ;
timeout = ( ( NVME_CAP_TIMEOUT ( cap ) + 1 ) * HZ / 2 ) + jiffies ;
2011-10-21 01:00:41 +04:00
dev - > db_stride = NVME_CAP_STRIDE ( cap ) ;
2011-04-19 23:04:20 +04:00
2012-08-03 21:55:56 +04:00
while ( ! result & & ! ( readl ( & dev - > bar - > csts ) & NVME_CSTS_RDY ) ) {
2011-01-20 20:50:14 +03:00
msleep ( 100 ) ;
if ( fatal_signal_pending ( current ) )
2012-08-03 21:55:56 +04:00
result = - EINTR ;
2011-04-19 23:04:20 +04:00
if ( time_after ( jiffies , timeout ) ) {
dev_err ( & dev - > pci_dev - > dev ,
" Device not ready; aborting initialisation \n " ) ;
2012-08-03 21:55:56 +04:00
result = - ENODEV ;
2011-04-19 23:04:20 +04:00
}
2011-01-20 20:50:14 +03:00
}
2012-08-03 21:55:56 +04:00
if ( result ) {
nvme_free_queue_mem ( nvmeq ) ;
return result ;
}
2011-01-20 17:10:15 +03:00
result = queue_request_irq ( dev , nvmeq , " nvme admin " ) ;
2011-01-20 20:50:14 +03:00
dev - > queues [ 0 ] = nvmeq ;
return result ;
}
2011-12-20 22:34:52 +04:00
static struct nvme_iod * nvme_map_user_pages ( struct nvme_dev * dev , int write ,
unsigned long addr , unsigned length )
2011-01-20 20:50:14 +03:00
{
2011-01-24 15:52:07 +03:00
int i , err , count , nents , offset ;
2011-01-27 01:05:50 +03:00
struct scatterlist * sg ;
struct page * * pages ;
2011-12-20 22:34:52 +04:00
struct nvme_iod * iod ;
2011-01-24 15:52:07 +03:00
if ( addr & 3 )
2011-12-20 22:34:52 +04:00
return ERR_PTR ( - EINVAL ) ;
2011-01-27 01:05:50 +03:00
if ( ! length )
2011-12-20 22:34:52 +04:00
return ERR_PTR ( - EINVAL ) ;
2011-01-27 01:05:50 +03:00
2011-01-24 15:52:07 +03:00
offset = offset_in_page ( addr ) ;
2011-01-27 01:05:50 +03:00
count = DIV_ROUND_UP ( offset + length , PAGE_SIZE ) ;
pages = kcalloc ( count , sizeof ( * pages ) , GFP_KERNEL ) ;
2012-01-20 16:55:30 +04:00
if ( ! pages )
return ERR_PTR ( - ENOMEM ) ;
2011-01-24 15:52:07 +03:00
err = get_user_pages_fast ( addr , count , 1 , pages ) ;
if ( err < count ) {
count = err ;
err = - EFAULT ;
goto put_pages ;
}
2011-01-27 01:05:50 +03:00
2011-12-20 22:34:52 +04:00
iod = nvme_alloc_iod ( count , length , GFP_KERNEL ) ;
sg = iod - > sg ;
2011-01-24 15:52:07 +03:00
sg_init_table ( sg , count ) ;
2011-09-14 01:01:39 +04:00
for ( i = 0 ; i < count ; i + + ) {
sg_set_page ( & sg [ i ] , pages [ i ] ,
min_t ( int , length , PAGE_SIZE - offset ) , offset ) ;
length - = ( PAGE_SIZE - offset ) ;
offset = 0 ;
2011-01-27 01:05:50 +03:00
}
2012-01-07 00:49:25 +04:00
sg_mark_end ( & sg [ i - 1 ] ) ;
2012-01-07 00:52:56 +04:00
iod - > nents = count ;
2011-01-27 01:05:50 +03:00
err = - ENOMEM ;
nents = dma_map_sg ( & dev - > pci_dev - > dev , sg , count ,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-01-24 15:52:07 +03:00
if ( ! nents )
2011-12-20 22:34:52 +04:00
goto free_iod ;
2011-01-20 20:50:14 +03:00
2011-01-27 01:05:50 +03:00
kfree ( pages ) ;
2011-12-20 22:34:52 +04:00
return iod ;
2011-01-20 20:50:14 +03:00
2011-12-20 22:34:52 +04:00
free_iod :
kfree ( iod ) ;
2011-01-27 01:05:50 +03:00
put_pages :
for ( i = 0 ; i < count ; i + + )
put_page ( pages [ i ] ) ;
kfree ( pages ) ;
2011-12-20 22:34:52 +04:00
return ERR_PTR ( err ) ;
2011-01-27 01:05:50 +03:00
}
2011-01-20 20:50:14 +03:00
2011-01-27 01:05:50 +03:00
static void nvme_unmap_user_pages ( struct nvme_dev * dev , int write ,
2012-01-07 00:52:56 +04:00
struct nvme_iod * iod )
2011-01-27 01:05:50 +03:00
{
2012-01-07 00:52:56 +04:00
int i ;
2011-01-20 20:50:14 +03:00
2012-01-07 00:52:56 +04:00
dma_unmap_sg ( & dev - > pci_dev - > dev , iod - > sg , iod - > nents ,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE ) ;
2011-01-27 01:05:50 +03:00
2012-01-07 00:52:56 +04:00
for ( i = 0 ; i < iod - > nents ; i + + )
put_page ( sg_page ( & iod - > sg [ i ] ) ) ;
2011-01-27 01:05:50 +03:00
}
2011-01-20 20:50:14 +03:00
2011-02-02 00:13:29 +03:00
static int nvme_submit_io ( struct nvme_ns * ns , struct nvme_user_io __user * uio )
{
struct nvme_dev * dev = ns - > dev ;
struct nvme_queue * nvmeq ;
struct nvme_user_io io ;
struct nvme_command c ;
unsigned length ;
2011-12-20 22:34:52 +04:00
int status ;
struct nvme_iod * iod ;
2011-02-02 00:13:29 +03:00
if ( copy_from_user ( & io , uio , sizeof ( io ) ) )
return - EFAULT ;
2011-03-21 16:48:57 +03:00
length = ( io . nblocks + 1 ) < < ns - > lba_shift ;
switch ( io . opcode ) {
case nvme_cmd_write :
case nvme_cmd_read :
2011-05-20 21:03:42 +04:00
case nvme_cmd_compare :
2011-12-20 22:34:52 +04:00
iod = nvme_map_user_pages ( dev , io . opcode & 1 , io . addr , length ) ;
2011-08-09 20:56:37 +04:00
break ;
2011-03-21 16:48:57 +03:00
default :
2011-05-20 21:03:42 +04:00
return - EINVAL ;
2011-03-21 16:48:57 +03:00
}
2011-12-20 22:34:52 +04:00
if ( IS_ERR ( iod ) )
return PTR_ERR ( iod ) ;
2011-02-02 00:13:29 +03:00
memset ( & c , 0 , sizeof ( c ) ) ;
c . rw . opcode = io . opcode ;
c . rw . flags = io . flags ;
2011-03-21 16:48:57 +03:00
c . rw . nsid = cpu_to_le32 ( ns - > ns_id ) ;
2011-02-02 00:13:29 +03:00
c . rw . slba = cpu_to_le64 ( io . slba ) ;
2011-03-21 16:48:57 +03:00
c . rw . length = cpu_to_le16 ( io . nblocks ) ;
2011-02-02 00:13:29 +03:00
c . rw . control = cpu_to_le16 ( io . control ) ;
c . rw . dsmgmt = cpu_to_le16 ( io . dsmgmt ) ;
2011-03-21 16:48:57 +03:00
c . rw . reftag = io . reftag ;
c . rw . apptag = io . apptag ;
c . rw . appmask = io . appmask ;
2011-02-02 00:13:29 +03:00
/* XXX: metadata */
2011-12-20 22:34:52 +04:00
length = nvme_setup_prps ( dev , & c . common , iod , length , GFP_KERNEL ) ;
2011-02-02 00:13:29 +03:00
2011-12-20 20:04:12 +04:00
nvmeq = get_nvmeq ( dev ) ;
2011-03-16 23:29:00 +03:00
/*
* Since nvme_submit_sync_cmd sleeps , we can ' t keep preemption
2011-02-05 00:14:30 +03:00
* disabled . We may be preempted at any point , and be rescheduled
* to a different CPU . That will cause cacheline bouncing , but no
* additional races since q_lock already protects against other CPUs .
*/
2011-02-02 00:13:29 +03:00
put_nvmeq ( nvmeq ) ;
2011-05-12 21:51:41 +04:00
if ( length ! = ( io . nblocks + 1 ) < < ns - > lba_shift )
status = - ENOMEM ;
else
2011-12-20 22:53:01 +04:00
status = nvme_submit_sync_cmd ( nvmeq , & c , NULL , NVME_IO_TIMEOUT ) ;
2011-02-02 00:13:29 +03:00
2012-01-07 00:52:56 +04:00
nvme_unmap_user_pages ( dev , io . opcode & 1 , iod ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2011-02-02 00:13:29 +03:00
return status ;
}
2012-07-26 02:07:55 +04:00
static int nvme_user_admin_cmd ( struct nvme_dev * dev ,
2011-05-20 21:03:42 +04:00
struct nvme_admin_cmd __user * ucmd )
2011-02-03 18:58:26 +03:00
{
2011-05-20 21:03:42 +04:00
struct nvme_admin_cmd cmd ;
2011-02-03 18:58:26 +03:00
struct nvme_command c ;
2011-12-20 22:34:52 +04:00
int status , length ;
2012-07-27 21:53:28 +04:00
struct nvme_iod * uninitialized_var ( iod ) ;
2011-02-03 18:58:26 +03:00
2011-05-20 21:03:42 +04:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EACCES ;
if ( copy_from_user ( & cmd , ucmd , sizeof ( cmd ) ) )
2011-02-03 18:58:26 +03:00
return - EFAULT ;
memset ( & c , 0 , sizeof ( c ) ) ;
2011-05-20 21:03:42 +04:00
c . common . opcode = cmd . opcode ;
c . common . flags = cmd . flags ;
c . common . nsid = cpu_to_le32 ( cmd . nsid ) ;
c . common . cdw2 [ 0 ] = cpu_to_le32 ( cmd . cdw2 ) ;
c . common . cdw2 [ 1 ] = cpu_to_le32 ( cmd . cdw3 ) ;
c . common . cdw10 [ 0 ] = cpu_to_le32 ( cmd . cdw10 ) ;
c . common . cdw10 [ 1 ] = cpu_to_le32 ( cmd . cdw11 ) ;
c . common . cdw10 [ 2 ] = cpu_to_le32 ( cmd . cdw12 ) ;
c . common . cdw10 [ 3 ] = cpu_to_le32 ( cmd . cdw13 ) ;
c . common . cdw10 [ 4 ] = cpu_to_le32 ( cmd . cdw14 ) ;
c . common . cdw10 [ 5 ] = cpu_to_le32 ( cmd . cdw15 ) ;
length = cmd . data_len ;
if ( cmd . data_len ) {
2012-01-07 00:42:45 +04:00
iod = nvme_map_user_pages ( dev , cmd . opcode & 1 , cmd . addr ,
length ) ;
2011-12-20 22:34:52 +04:00
if ( IS_ERR ( iod ) )
return PTR_ERR ( iod ) ;
length = nvme_setup_prps ( dev , & c . common , iod , length ,
GFP_KERNEL ) ;
2011-05-20 21:03:42 +04:00
}
if ( length ! = cmd . data_len )
2011-05-12 21:51:41 +04:00
status = - ENOMEM ;
else
status = nvme_submit_admin_cmd ( dev , & c , NULL ) ;
2011-12-20 22:34:52 +04:00
2011-05-20 21:03:42 +04:00
if ( cmd . data_len ) {
2012-01-07 00:52:56 +04:00
nvme_unmap_user_pages ( dev , cmd . opcode & 1 , iod ) ;
2011-12-20 22:34:52 +04:00
nvme_free_iod ( dev , iod ) ;
2011-05-20 21:03:42 +04:00
}
2011-02-03 18:58:26 +03:00
return status ;
}
2011-01-20 20:50:14 +03:00
static int nvme_ioctl ( struct block_device * bdev , fmode_t mode , unsigned int cmd ,
unsigned long arg )
{
struct nvme_ns * ns = bdev - > bd_disk - > private_data ;
switch ( cmd ) {
2011-05-20 21:03:42 +04:00
case NVME_IOCTL_ID :
return ns - > ns_id ;
case NVME_IOCTL_ADMIN_CMD :
2012-07-26 02:07:55 +04:00
return nvme_user_admin_cmd ( ns - > dev , ( void __user * ) arg ) ;
2011-02-02 00:13:29 +03:00
case NVME_IOCTL_SUBMIT_IO :
return nvme_submit_io ( ns , ( void __user * ) arg ) ;
2011-01-20 20:50:14 +03:00
default :
return - ENOTTY ;
}
}
static const struct block_device_operations nvme_fops = {
. owner = THIS_MODULE ,
. ioctl = nvme_ioctl ,
2011-03-19 21:55:38 +03:00
. compat_ioctl = nvme_ioctl ,
2011-01-20 20:50:14 +03:00
} ;
2011-03-03 02:37:18 +03:00
static void nvme_resubmit_bios ( struct nvme_queue * nvmeq )
{
while ( bio_list_peek ( & nvmeq - > sq_cong ) ) {
struct bio * bio = bio_list_pop ( & nvmeq - > sq_cong ) ;
struct nvme_ns * ns = bio - > bi_bdev - > bd_disk - > private_data ;
if ( nvme_submit_bio_queue ( nvmeq , ns , bio ) ) {
bio_list_add_head ( & nvmeq - > sq_cong , bio ) ;
break ;
}
2011-03-16 23:45:49 +03:00
if ( bio_list_empty ( & nvmeq - > sq_cong ) )
remove_wait_queue ( & nvmeq - > sq_full ,
& nvmeq - > sq_cong_wait ) ;
2011-03-03 02:37:18 +03:00
}
}
static int nvme_kthread ( void * data )
{
struct nvme_dev * dev ;
while ( ! kthread_should_stop ( ) ) {
__set_current_state ( TASK_RUNNING ) ;
spin_lock ( & dev_list_lock ) ;
list_for_each_entry ( dev , & dev_list , node ) {
int i ;
for ( i = 0 ; i < dev - > queue_count ; i + + ) {
struct nvme_queue * nvmeq = dev - > queues [ i ] ;
2011-02-16 00:28:20 +03:00
if ( ! nvmeq )
continue ;
2011-03-03 02:37:18 +03:00
spin_lock_irq ( & nvmeq - > q_lock ) ;
if ( nvme_process_cq ( nvmeq ) )
printk ( " process_cq did something \n " ) ;
2012-08-07 23:56:23 +04:00
nvme_cancel_ios ( nvmeq , true ) ;
2011-03-03 02:37:18 +03:00
nvme_resubmit_bios ( nvmeq ) ;
spin_unlock_irq ( & nvmeq - > q_lock ) ;
}
}
spin_unlock ( & dev_list_lock ) ;
set_current_state ( TASK_INTERRUPTIBLE ) ;
schedule_timeout ( HZ ) ;
}
return 0 ;
}
2011-05-06 16:45:47 +04:00
static DEFINE_IDA ( nvme_index_ida ) ;
static int nvme_get_ns_idx ( void )
{
int index , error ;
do {
if ( ! ida_pre_get ( & nvme_index_ida , GFP_KERNEL ) )
return - 1 ;
spin_lock ( & dev_list_lock ) ;
error = ida_get_new ( & nvme_index_ida , & index ) ;
spin_unlock ( & dev_list_lock ) ;
} while ( error = = - EAGAIN ) ;
if ( error )
index = - 1 ;
return index ;
}
static void nvme_put_ns_idx ( int index )
{
spin_lock ( & dev_list_lock ) ;
ida_remove ( & nvme_index_ida , index ) ;
spin_unlock ( & dev_list_lock ) ;
}
static struct nvme_ns * nvme_alloc_ns ( struct nvme_dev * dev , int nsid ,
2011-01-20 20:50:14 +03:00
struct nvme_id_ns * id , struct nvme_lba_range_type * rt )
{
struct nvme_ns * ns ;
struct gendisk * disk ;
int lbaf ;
if ( rt - > attributes & NVME_LBART_ATTRIB_HIDE )
return NULL ;
ns = kzalloc ( sizeof ( * ns ) , GFP_KERNEL ) ;
if ( ! ns )
return NULL ;
ns - > queue = blk_alloc_queue ( GFP_KERNEL ) ;
if ( ! ns - > queue )
goto out_free_ns ;
2012-01-11 01:35:08 +04:00
ns - > queue - > queue_flags = QUEUE_FLAG_DEFAULT ;
queue_flag_set_unlocked ( QUEUE_FLAG_NOMERGES , ns - > queue ) ;
queue_flag_set_unlocked ( QUEUE_FLAG_NONROT , ns - > queue ) ;
/* queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); */
2011-01-20 20:50:14 +03:00
blk_queue_make_request ( ns - > queue , nvme_make_request ) ;
ns - > dev = dev ;
ns - > queue - > queuedata = ns ;
disk = alloc_disk ( NVME_MINORS ) ;
if ( ! disk )
goto out_free_queue ;
2011-05-06 16:45:47 +04:00
ns - > ns_id = nsid ;
2011-01-20 20:50:14 +03:00
ns - > disk = disk ;
lbaf = id - > flbas & 0xf ;
ns - > lba_shift = id - > lbaf [ lbaf ] . ds ;
2012-07-25 01:01:04 +04:00
blk_queue_logical_block_size ( ns - > queue , 1 < < ns - > lba_shift ) ;
2012-07-26 21:29:57 +04:00
if ( dev - > max_hw_sectors )
blk_queue_max_hw_sectors ( ns - > queue , dev - > max_hw_sectors ) ;
2011-01-20 20:50:14 +03:00
disk - > major = nvme_major ;
disk - > minors = NVME_MINORS ;
2011-05-06 16:45:47 +04:00
disk - > first_minor = NVME_MINORS * nvme_get_ns_idx ( ) ;
2011-01-20 20:50:14 +03:00
disk - > fops = & nvme_fops ;
disk - > private_data = ns ;
disk - > queue = ns - > queue ;
2011-02-01 20:49:38 +03:00
disk - > driverfs_dev = & dev - > pci_dev - > dev ;
2011-05-06 16:45:47 +04:00
sprintf ( disk - > disk_name , " nvme%dn%d " , dev - > instance , nsid ) ;
2011-01-20 20:50:14 +03:00
set_capacity ( disk , le64_to_cpup ( & id - > nsze ) < < ( ns - > lba_shift - 9 ) ) ;
return ns ;
out_free_queue :
blk_cleanup_queue ( ns - > queue ) ;
out_free_ns :
kfree ( ns ) ;
return NULL ;
}
static void nvme_ns_free ( struct nvme_ns * ns )
{
2011-05-06 16:45:47 +04:00
int index = ns - > disk - > first_minor / NVME_MINORS ;
2011-01-20 20:50:14 +03:00
put_disk ( ns - > disk ) ;
2011-05-06 16:45:47 +04:00
nvme_put_ns_idx ( index ) ;
2011-01-20 20:50:14 +03:00
blk_cleanup_queue ( ns - > queue ) ;
kfree ( ns ) ;
}
2011-01-20 17:14:34 +03:00
static int set_queue_count ( struct nvme_dev * dev , int count )
2011-01-20 20:50:14 +03:00
{
int status ;
u32 result ;
2011-01-20 17:14:34 +03:00
u32 q_count = ( count - 1 ) | ( ( count - 1 ) < < 16 ) ;
2011-01-20 20:50:14 +03:00
2012-01-11 18:29:56 +04:00
status = nvme_set_features ( dev , NVME_FEAT_NUM_QUEUES , q_count , 0 ,
2011-09-20 01:08:14 +04:00
& result ) ;
2011-01-20 20:50:14 +03:00
if ( status )
return - EIO ;
return min ( result & 0xffff , result > > 16 ) + 1 ;
}
2012-12-22 03:13:49 +04:00
static int nvme_setup_io_queues ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2012-07-27 21:57:23 +04:00
int result , cpu , i , nr_io_queues , db_bar_size , q_depth ;
2011-01-20 20:50:14 +03:00
2011-02-16 00:16:02 +03:00
nr_io_queues = num_online_cpus ( ) ;
result = set_queue_count ( dev , nr_io_queues ) ;
2011-01-20 21:01:49 +03:00
if ( result < 0 )
return result ;
2011-02-16 00:16:02 +03:00
if ( result < nr_io_queues )
nr_io_queues = result ;
2011-01-20 20:50:14 +03:00
2011-01-20 21:01:49 +03:00
/* Deregister the admin queue's interrupt */
free_irq ( dev - > entry [ 0 ] . vector , dev - > queues [ 0 ] ) ;
2011-10-21 01:00:41 +04:00
db_bar_size = 4096 + ( ( nr_io_queues + 1 ) < < ( dev - > db_stride + 3 ) ) ;
if ( db_bar_size > 8192 ) {
iounmap ( dev - > bar ) ;
dev - > bar = ioremap ( pci_resource_start ( dev - > pci_dev , 0 ) ,
db_bar_size ) ;
dev - > dbs = ( ( void __iomem * ) dev - > bar ) + 4096 ;
dev - > queues [ 0 ] - > q_db = dev - > dbs ;
}
2011-02-16 00:16:02 +03:00
for ( i = 0 ; i < nr_io_queues ; i + + )
2011-01-20 21:01:49 +03:00
dev - > entry [ i ] . entry = i ;
for ( ; ; ) {
2011-02-16 00:16:02 +03:00
result = pci_enable_msix ( dev - > pci_dev , dev - > entry ,
nr_io_queues ) ;
2011-01-20 21:01:49 +03:00
if ( result = = 0 ) {
break ;
} else if ( result > 0 ) {
2011-02-16 00:16:02 +03:00
nr_io_queues = result ;
2011-01-20 21:01:49 +03:00
continue ;
} else {
2011-02-16 00:16:02 +03:00
nr_io_queues = 1 ;
2011-01-20 21:01:49 +03:00
break ;
}
}
result = queue_request_irq ( dev , dev - > queues [ 0 ] , " nvme admin " ) ;
/* XXX: handle failure here */
cpu = cpumask_first ( cpu_online_mask ) ;
2011-02-16 00:16:02 +03:00
for ( i = 0 ; i < nr_io_queues ; i + + ) {
2011-01-20 21:01:49 +03:00
irq_set_affinity_hint ( dev - > entry [ i ] . vector , get_cpu_mask ( cpu ) ) ;
cpu = cpumask_next ( cpu , cpu_online_mask ) ;
}
2012-07-27 21:57:23 +04:00
q_depth = min_t ( int , NVME_CAP_MQES ( readq ( & dev - > bar - > cap ) ) + 1 ,
NVME_Q_DEPTH ) ;
2011-02-16 00:16:02 +03:00
for ( i = 0 ; i < nr_io_queues ; i + + ) {
2012-07-27 21:57:23 +04:00
dev - > queues [ i + 1 ] = nvme_create_queue ( dev , i + 1 , q_depth , i ) ;
2011-05-12 00:30:59 +04:00
if ( IS_ERR ( dev - > queues [ i + 1 ] ) )
return PTR_ERR ( dev - > queues [ i + 1 ] ) ;
2011-01-20 21:01:49 +03:00
dev - > queue_count + + ;
}
2011-01-20 20:50:14 +03:00
2011-03-16 23:52:19 +03:00
for ( ; i < num_possible_cpus ( ) ; i + + ) {
int target = i % rounddown_pow_of_two ( dev - > queue_count - 1 ) ;
dev - > queues [ i + 1 ] = dev - > queues [ target + 1 ] ;
}
2011-01-20 20:50:14 +03:00
return 0 ;
}
static void nvme_free_queues ( struct nvme_dev * dev )
{
int i ;
for ( i = dev - > queue_count - 1 ; i > = 0 ; i - - )
nvme_free_queue ( dev , i ) ;
}
2012-12-22 03:13:49 +04:00
static int nvme_dev_add ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
int res , nn , i ;
struct nvme_ns * ns , * next ;
2011-02-02 00:18:08 +03:00
struct nvme_id_ctrl * ctrl ;
2011-09-20 01:08:14 +04:00
struct nvme_id_ns * id_ns ;
void * mem ;
2011-01-20 20:50:14 +03:00
dma_addr_t dma_addr ;
res = nvme_setup_io_queues ( dev ) ;
if ( res )
return res ;
2011-09-20 01:08:14 +04:00
mem = dma_alloc_coherent ( & dev - > pci_dev - > dev , 8192 , & dma_addr ,
2011-01-20 20:50:14 +03:00
GFP_KERNEL ) ;
2011-09-20 01:08:14 +04:00
res = nvme_identify ( dev , 0 , 1 , dma_addr ) ;
2011-01-20 20:50:14 +03:00
if ( res ) {
res = - EIO ;
goto out_free ;
}
2011-09-20 01:08:14 +04:00
ctrl = mem ;
2011-02-02 00:18:08 +03:00
nn = le32_to_cpup ( & ctrl - > nn ) ;
memcpy ( dev - > serial , ctrl - > sn , sizeof ( ctrl - > sn ) ) ;
memcpy ( dev - > model , ctrl - > mn , sizeof ( ctrl - > mn ) ) ;
memcpy ( dev - > firmware_rev , ctrl - > fr , sizeof ( ctrl - > fr ) ) ;
2012-07-26 21:29:57 +04:00
if ( ctrl - > mdts ) {
int shift = NVME_CAP_MPSMIN ( readq ( & dev - > bar - > cap ) ) + 12 ;
dev - > max_hw_sectors = 1 < < ( ctrl - > mdts + shift - 9 ) ;
}
2011-01-20 20:50:14 +03:00
2011-09-20 01:08:14 +04:00
id_ns = mem ;
2011-10-07 21:10:13 +04:00
for ( i = 1 ; i < = nn ; i + + ) {
2011-09-20 01:08:14 +04:00
res = nvme_identify ( dev , i , 0 , dma_addr ) ;
2011-01-20 20:50:14 +03:00
if ( res )
continue ;
2011-09-20 01:08:14 +04:00
if ( id_ns - > ncap = = 0 )
2011-01-20 20:50:14 +03:00
continue ;
2011-09-20 01:08:14 +04:00
res = nvme_get_features ( dev , NVME_FEAT_LBA_RANGE , i ,
2012-01-11 18:29:56 +04:00
dma_addr + 4096 ) ;
2011-01-20 20:50:14 +03:00
if ( res )
continue ;
2011-09-20 01:08:14 +04:00
ns = nvme_alloc_ns ( dev , i , mem , mem + 4096 ) ;
2011-01-20 20:50:14 +03:00
if ( ns )
list_add_tail ( & ns - > list , & dev - > namespaces ) ;
}
list_for_each_entry ( ns , & dev - > namespaces , list )
add_disk ( ns - > disk ) ;
2011-09-20 01:08:14 +04:00
goto out ;
2011-01-20 20:50:14 +03:00
out_free :
list_for_each_entry_safe ( ns , next , & dev - > namespaces , list ) {
list_del ( & ns - > list ) ;
nvme_ns_free ( ns ) ;
}
2011-09-20 01:08:14 +04:00
out :
2011-09-20 01:14:53 +04:00
dma_free_coherent ( & dev - > pci_dev - > dev , 8192 , mem , dma_addr ) ;
2011-01-20 20:50:14 +03:00
return res ;
}
static int nvme_dev_remove ( struct nvme_dev * dev )
{
struct nvme_ns * ns , * next ;
2011-03-03 02:37:18 +03:00
spin_lock ( & dev_list_lock ) ;
list_del ( & dev - > node ) ;
spin_unlock ( & dev_list_lock ) ;
2011-01-20 20:50:14 +03:00
list_for_each_entry_safe ( ns , next , & dev - > namespaces , list ) {
list_del ( & ns - > list ) ;
del_gendisk ( ns - > disk ) ;
nvme_ns_free ( ns ) ;
}
nvme_free_queues ( dev ) ;
return 0 ;
}
2011-02-10 17:56:01 +03:00
static int nvme_setup_prp_pools ( struct nvme_dev * dev )
{
struct device * dmadev = & dev - > pci_dev - > dev ;
dev - > prp_page_pool = dma_pool_create ( " prp list page " , dmadev ,
PAGE_SIZE , PAGE_SIZE , 0 ) ;
if ( ! dev - > prp_page_pool )
return - ENOMEM ;
2011-02-10 18:30:34 +03:00
/* Optimisation for I/Os between 4k and 128k */
dev - > prp_small_pool = dma_pool_create ( " prp list 256 " , dmadev ,
256 , 256 , 0 ) ;
if ( ! dev - > prp_small_pool ) {
dma_pool_destroy ( dev - > prp_page_pool ) ;
return - ENOMEM ;
}
2011-02-10 17:56:01 +03:00
return 0 ;
}
static void nvme_release_prp_pools ( struct nvme_dev * dev )
{
dma_pool_destroy ( dev - > prp_page_pool ) ;
2011-02-10 18:30:34 +03:00
dma_pool_destroy ( dev - > prp_small_pool ) ;
2011-02-10 17:56:01 +03:00
}
2012-02-22 03:50:53 +04:00
static DEFINE_IDA ( nvme_instance_ida ) ;
static int nvme_set_instance ( struct nvme_dev * dev )
2011-01-20 20:50:14 +03:00
{
2012-02-22 03:50:53 +04:00
int instance , error ;
do {
if ( ! ida_pre_get ( & nvme_instance_ida , GFP_KERNEL ) )
return - ENODEV ;
spin_lock ( & dev_list_lock ) ;
error = ida_get_new ( & nvme_instance_ida , & instance ) ;
spin_unlock ( & dev_list_lock ) ;
} while ( error = = - EAGAIN ) ;
if ( error )
return - ENODEV ;
dev - > instance = instance ;
return 0 ;
2011-01-20 20:50:14 +03:00
}
static void nvme_release_instance ( struct nvme_dev * dev )
{
2012-02-22 03:50:53 +04:00
spin_lock ( & dev_list_lock ) ;
ida_remove ( & nvme_instance_ida , dev - > instance ) ;
spin_unlock ( & dev_list_lock ) ;
2011-01-20 20:50:14 +03:00
}
2012-12-22 03:13:49 +04:00
static int nvme_probe ( struct pci_dev * pdev , const struct pci_device_id * id )
2011-01-20 20:50:14 +03:00
{
2011-02-02 00:24:35 +03:00
int bars , result = - ENOMEM ;
2011-01-20 20:50:14 +03:00
struct nvme_dev * dev ;
dev = kzalloc ( sizeof ( * dev ) , GFP_KERNEL ) ;
if ( ! dev )
return - ENOMEM ;
dev - > entry = kcalloc ( num_possible_cpus ( ) , sizeof ( * dev - > entry ) ,
GFP_KERNEL ) ;
if ( ! dev - > entry )
goto free ;
2011-01-20 21:01:49 +03:00
dev - > queues = kcalloc ( num_possible_cpus ( ) + 1 , sizeof ( void * ) ,
GFP_KERNEL ) ;
2011-01-20 20:50:14 +03:00
if ( ! dev - > queues )
goto free ;
2011-02-01 16:49:30 +03:00
if ( pci_enable_device_mem ( pdev ) )
goto free ;
2011-02-01 17:01:59 +03:00
pci_set_master ( pdev ) ;
2011-02-02 00:24:35 +03:00
bars = pci_select_bars ( pdev , IORESOURCE_MEM ) ;
if ( pci_request_selected_regions ( pdev , bars , " nvme " ) )
goto disable ;
2011-02-01 16:49:30 +03:00
2011-01-20 20:50:14 +03:00
INIT_LIST_HEAD ( & dev - > namespaces ) ;
dev - > pci_dev = pdev ;
pci_set_drvdata ( pdev , dev ) ;
2011-02-02 00:23:39 +03:00
dma_set_mask ( & pdev - > dev , DMA_BIT_MASK ( 64 ) ) ;
dma_set_coherent_mask ( & pdev - > dev , DMA_BIT_MASK ( 64 ) ) ;
2012-02-22 03:50:53 +04:00
result = nvme_set_instance ( dev ) ;
if ( result )
goto disable ;
2011-01-20 21:42:34 +03:00
dev - > entry [ 0 ] . vector = pdev - > irq ;
2011-01-20 20:50:14 +03:00
2011-02-10 17:56:01 +03:00
result = nvme_setup_prp_pools ( dev ) ;
if ( result )
goto disable_msix ;
2011-01-20 20:50:14 +03:00
dev - > bar = ioremap ( pci_resource_start ( pdev , 0 ) , 8192 ) ;
if ( ! dev - > bar ) {
result = - ENOMEM ;
2011-02-02 00:24:35 +03:00
goto disable_msix ;
2011-01-20 20:50:14 +03:00
}
result = nvme_configure_admin_queue ( dev ) ;
if ( result )
goto unmap ;
dev - > queue_count + + ;
2011-03-03 02:37:18 +03:00
spin_lock ( & dev_list_lock ) ;
list_add ( & dev - > node , & dev_list ) ;
spin_unlock ( & dev_list_lock ) ;
2011-02-16 00:28:20 +03:00
result = nvme_dev_add ( dev ) ;
if ( result )
goto delete ;
2011-01-20 20:50:14 +03:00
return 0 ;
delete :
2011-02-16 00:28:20 +03:00
spin_lock ( & dev_list_lock ) ;
list_del ( & dev - > node ) ;
spin_unlock ( & dev_list_lock ) ;
2011-01-20 20:50:14 +03:00
nvme_free_queues ( dev ) ;
unmap :
iounmap ( dev - > bar ) ;
2011-02-02 00:24:35 +03:00
disable_msix :
2011-01-20 20:50:14 +03:00
pci_disable_msix ( pdev ) ;
nvme_release_instance ( dev ) ;
2011-02-10 17:56:01 +03:00
nvme_release_prp_pools ( dev ) ;
2011-02-02 00:24:35 +03:00
disable :
2011-02-01 16:49:30 +03:00
pci_disable_device ( pdev ) ;
2011-02-02 00:24:35 +03:00
pci_release_regions ( pdev ) ;
2011-01-20 20:50:14 +03:00
free :
kfree ( dev - > queues ) ;
kfree ( dev - > entry ) ;
kfree ( dev ) ;
return result ;
}
2012-12-22 03:13:49 +04:00
static void nvme_remove ( struct pci_dev * pdev )
2011-01-20 20:50:14 +03:00
{
struct nvme_dev * dev = pci_get_drvdata ( pdev ) ;
nvme_dev_remove ( dev ) ;
pci_disable_msix ( pdev ) ;
iounmap ( dev - > bar ) ;
nvme_release_instance ( dev ) ;
2011-02-10 17:56:01 +03:00
nvme_release_prp_pools ( dev ) ;
2011-02-01 16:49:30 +03:00
pci_disable_device ( pdev ) ;
2011-02-02 00:24:35 +03:00
pci_release_regions ( pdev ) ;
2011-01-20 20:50:14 +03:00
kfree ( dev - > queues ) ;
kfree ( dev - > entry ) ;
kfree ( dev ) ;
}
/* These functions are yet to be implemented */
# define nvme_error_detected NULL
# define nvme_dump_registers NULL
# define nvme_link_reset NULL
# define nvme_slot_reset NULL
# define nvme_error_resume NULL
# define nvme_suspend NULL
# define nvme_resume NULL
2012-09-07 20:33:17 +04:00
static const struct pci_error_handlers nvme_err_handler = {
2011-01-20 20:50:14 +03:00
. error_detected = nvme_error_detected ,
. mmio_enabled = nvme_dump_registers ,
. link_reset = nvme_link_reset ,
. slot_reset = nvme_slot_reset ,
. resume = nvme_error_resume ,
} ;
/* Move to pci_ids.h later */
# define PCI_CLASS_STORAGE_EXPRESS 0x010802
static DEFINE_PCI_DEVICE_TABLE ( nvme_id_table ) = {
{ PCI_DEVICE_CLASS ( PCI_CLASS_STORAGE_EXPRESS , 0xffffff ) } ,
{ 0 , }
} ;
MODULE_DEVICE_TABLE ( pci , nvme_id_table ) ;
static struct pci_driver nvme_driver = {
. name = " nvme " ,
. id_table = nvme_id_table ,
. probe = nvme_probe ,
2012-12-22 03:13:49 +04:00
. remove = nvme_remove ,
2011-01-20 20:50:14 +03:00
. suspend = nvme_suspend ,
. resume = nvme_resume ,
. err_handler = & nvme_err_handler ,
} ;
static int __init nvme_init ( void )
{
2012-07-31 21:31:15 +04:00
int result ;
2011-03-03 02:37:18 +03:00
nvme_thread = kthread_run ( nvme_kthread , NULL , " nvme " ) ;
if ( IS_ERR ( nvme_thread ) )
return PTR_ERR ( nvme_thread ) ;
2011-01-20 20:50:14 +03:00
2012-07-26 02:05:18 +04:00
result = register_blkdev ( nvme_major , " nvme " ) ;
if ( result < 0 )
2011-03-03 02:37:18 +03:00
goto kill_kthread ;
2012-07-26 02:05:18 +04:00
else if ( result > 0 )
2012-07-31 21:31:15 +04:00
nvme_major = result ;
2011-01-20 20:50:14 +03:00
result = pci_register_driver ( & nvme_driver ) ;
2011-03-03 02:37:18 +03:00
if ( result )
goto unregister_blkdev ;
return 0 ;
2011-01-20 20:50:14 +03:00
2011-03-03 02:37:18 +03:00
unregister_blkdev :
2011-01-20 20:50:14 +03:00
unregister_blkdev ( nvme_major , " nvme " ) ;
2011-03-03 02:37:18 +03:00
kill_kthread :
kthread_stop ( nvme_thread ) ;
2011-01-20 20:50:14 +03:00
return result ;
}
static void __exit nvme_exit ( void )
{
pci_unregister_driver ( & nvme_driver ) ;
unregister_blkdev ( nvme_major , " nvme " ) ;
2011-03-03 02:37:18 +03:00
kthread_stop ( nvme_thread ) ;
2011-01-20 20:50:14 +03:00
}
MODULE_AUTHOR ( " Matthew Wilcox <willy@linux.intel.com> " ) ;
MODULE_LICENSE ( " GPL " ) ;
2012-01-11 01:30:15 +04:00
MODULE_VERSION ( " 0.8 " ) ;
2011-01-20 20:50:14 +03:00
module_init ( nvme_init ) ;
module_exit ( nvme_exit ) ;