2019-02-18 09:34:21 +01:00
// SPDX-License-Identifier: GPL-2.0
2016-07-06 21:55:52 +09:00
/*
* NVMe over Fabrics RDMA host code .
* Copyright ( c ) 2015 - 2016 HGST , a Western Digital Company .
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/module.h>
# include <linux/init.h>
# include <linux/slab.h>
2017-11-26 10:40:55 +00:00
# include <rdma/mr_pool.h>
2016-07-06 21:55:52 +09:00
# include <linux/err.h>
# include <linux/string.h>
# include <linux/atomic.h>
# include <linux/blk-mq.h>
2017-07-13 11:09:44 +03:00
# include <linux/blk-mq-rdma.h>
2021-09-20 14:33:27 +02:00
# include <linux/blk-integrity.h>
2016-07-06 21:55:52 +09:00
# include <linux/types.h>
# include <linux/list.h>
# include <linux/mutex.h>
# include <linux/scatterlist.h>
# include <linux/nvme.h>
# include <asm/unaligned.h>
# include <rdma/ib_verbs.h>
# include <rdma/rdma_cm.h>
# include <linux/nvme-rdma.h>
# include "nvme.h"
# include "fabrics.h"
2017-03-21 16:32:38 +02:00
# define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */
2016-07-06 21:55:52 +09:00
# define NVME_RDMA_MAX_SEGMENTS 256
2018-06-20 07:15:05 -07:00
# define NVME_RDMA_MAX_INLINE_SEGMENTS 4
2016-07-06 21:55:52 +09:00
2020-05-19 17:05:56 +03:00
# define NVME_RDMA_DATA_SGL_SIZE \
( sizeof ( struct scatterlist ) * NVME_INLINE_SG_CNT )
# define NVME_RDMA_METADATA_SGL_SIZE \
( sizeof ( struct scatterlist ) * NVME_INLINE_METADATA_SG_CNT )
2016-07-06 21:55:52 +09:00
struct nvme_rdma_device {
2017-10-23 12:59:27 +03:00
struct ib_device * dev ;
struct ib_pd * pd ;
2016-07-06 21:55:52 +09:00
struct kref ref ;
struct list_head entry ;
2018-06-20 07:15:05 -07:00
unsigned int num_inline_segments ;
2016-07-06 21:55:52 +09:00
} ;
struct nvme_rdma_qe {
struct ib_cqe cqe ;
void * data ;
u64 dma ;
} ;
2020-05-19 17:05:55 +03:00
struct nvme_rdma_sgl {
int nents ;
struct sg_table sg_table ;
} ;
2016-07-06 21:55:52 +09:00
struct nvme_rdma_queue ;
struct nvme_rdma_request {
2016-11-10 07:32:33 -08:00
struct nvme_request req ;
2016-07-06 21:55:52 +09:00
struct ib_mr * mr ;
struct nvme_rdma_qe sqe ;
2017-11-23 17:35:22 +02:00
union nvme_result result ;
__le16 status ;
refcount_t ref ;
2016-07-06 21:55:52 +09:00
struct ib_sge sge [ 1 + NVME_RDMA_MAX_INLINE_SEGMENTS ] ;
u32 num_sge ;
struct ib_reg_wr reg_wr ;
struct ib_cqe reg_cqe ;
struct nvme_rdma_queue * queue ;
2020-05-19 17:05:55 +03:00
struct nvme_rdma_sgl data_sgl ;
2020-05-19 17:05:56 +03:00
struct nvme_rdma_sgl * metadata_sgl ;
bool use_sig_mr ;
2016-07-06 21:55:52 +09:00
} ;
enum nvme_rdma_queue_flags {
2017-10-11 15:29:12 +03:00
NVME_RDMA_Q_ALLOCATED = 0 ,
NVME_RDMA_Q_LIVE = 1 ,
2017-11-28 18:28:44 +02:00
NVME_RDMA_Q_TR_READY = 2 ,
2016-07-06 21:55:52 +09:00
} ;
struct nvme_rdma_queue {
struct nvme_rdma_qe * rsp_ring ;
int queue_size ;
size_t cmnd_capsule_len ;
struct nvme_rdma_ctrl * ctrl ;
struct nvme_rdma_device * device ;
struct ib_cq * ib_cq ;
struct ib_qp * qp ;
unsigned long flags ;
struct rdma_cm_id * cm_id ;
int cm_error ;
struct completion cm_done ;
2020-05-19 17:05:56 +03:00
bool pi_support ;
2020-07-13 11:53:29 +03:00
int cq_size ;
2021-01-14 17:09:25 +08:00
struct mutex queue_lock ;
2016-07-06 21:55:52 +09:00
} ;
struct nvme_rdma_ctrl {
/* read only in the hot path */
struct nvme_rdma_queue * queues ;
/* other member variables */
struct blk_mq_tag_set tag_set ;
struct work_struct err_work ;
struct nvme_rdma_qe async_event_sqe ;
struct delayed_work reconnect_work ;
struct list_head list ;
struct blk_mq_tag_set admin_tag_set ;
struct nvme_rdma_device * device ;
u32 max_fr_pages ;
2017-02-05 21:49:32 +02:00
struct sockaddr_storage addr ;
struct sockaddr_storage src_addr ;
2016-07-06 21:55:52 +09:00
struct nvme_ctrl ctrl ;
2018-06-20 07:15:05 -07:00
bool use_inline_data ;
2019-01-18 16:43:24 -08:00
u32 io_queues [ HCTX_MAX_TYPES ] ;
2016-07-06 21:55:52 +09:00
} ;
static inline struct nvme_rdma_ctrl * to_rdma_ctrl ( struct nvme_ctrl * ctrl )
{
return container_of ( ctrl , struct nvme_rdma_ctrl , ctrl ) ;
}
static LIST_HEAD ( device_list ) ;
static DEFINE_MUTEX ( device_list_mutex ) ;
static LIST_HEAD ( nvme_rdma_ctrl_list ) ;
static DEFINE_MUTEX ( nvme_rdma_ctrl_mutex ) ;
/*
* Disabling this option makes small I / O goes faster , but is fundamentally
* unsafe . With it turned off we will have to register a global rkey that
* allows read and write access to all physical memory .
*/
static bool register_always = true ;
module_param ( register_always , bool , 0444 ) ;
MODULE_PARM_DESC ( register_always ,
" Use memory registration even for contiguous memory regions " ) ;
static int nvme_rdma_cm_handler ( struct rdma_cm_id * cm_id ,
struct rdma_cm_event * event ) ;
static void nvme_rdma_recv_done ( struct ib_cq * cq , struct ib_wc * wc ) ;
2020-06-11 08:44:52 +02:00
static void nvme_rdma_complete_rq ( struct request * rq ) ;
2016-07-06 21:55:52 +09:00
2017-07-10 09:22:28 +03:00
static const struct blk_mq_ops nvme_rdma_mq_ops ;
static const struct blk_mq_ops nvme_rdma_admin_mq_ops ;
2016-07-06 21:55:52 +09:00
static inline int nvme_rdma_queue_idx ( struct nvme_rdma_queue * queue )
{
return queue - queue - > ctrl - > queues ;
}
2018-12-14 11:06:10 -08:00
static bool nvme_rdma_poll_queue ( struct nvme_rdma_queue * queue )
{
return nvme_rdma_queue_idx ( queue ) >
2019-01-18 16:43:24 -08:00
queue - > ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] +
queue - > ctrl - > io_queues [ HCTX_TYPE_READ ] ;
2018-12-14 11:06:10 -08:00
}
2016-07-06 21:55:52 +09:00
static inline size_t nvme_rdma_inline_data_size ( struct nvme_rdma_queue * queue )
{
return queue - > cmnd_capsule_len - sizeof ( struct nvme_command ) ;
}
static void nvme_rdma_free_qe ( struct ib_device * ibdev , struct nvme_rdma_qe * qe ,
size_t capsule_size , enum dma_data_direction dir )
{
ib_dma_unmap_single ( ibdev , qe - > dma , capsule_size , dir ) ;
kfree ( qe - > data ) ;
}
static int nvme_rdma_alloc_qe ( struct ib_device * ibdev , struct nvme_rdma_qe * qe ,
size_t capsule_size , enum dma_data_direction dir )
{
qe - > data = kzalloc ( capsule_size , GFP_KERNEL ) ;
if ( ! qe - > data )
return - ENOMEM ;
qe - > dma = ib_dma_map_single ( ibdev , qe - > data , capsule_size , dir ) ;
if ( ib_dma_mapping_error ( ibdev , qe - > dma ) ) {
kfree ( qe - > data ) ;
2018-11-28 11:11:29 -07:00
qe - > data = NULL ;
2016-07-06 21:55:52 +09:00
return - ENOMEM ;
}
return 0 ;
}
static void nvme_rdma_free_ring ( struct ib_device * ibdev ,
struct nvme_rdma_qe * ring , size_t ib_queue_size ,
size_t capsule_size , enum dma_data_direction dir )
{
int i ;
for ( i = 0 ; i < ib_queue_size ; i + + )
nvme_rdma_free_qe ( ibdev , & ring [ i ] , capsule_size , dir ) ;
kfree ( ring ) ;
}
static struct nvme_rdma_qe * nvme_rdma_alloc_ring ( struct ib_device * ibdev ,
size_t ib_queue_size , size_t capsule_size ,
enum dma_data_direction dir )
{
struct nvme_rdma_qe * ring ;
int i ;
ring = kcalloc ( ib_queue_size , sizeof ( struct nvme_rdma_qe ) , GFP_KERNEL ) ;
if ( ! ring )
return NULL ;
2019-06-06 12:27:36 +03:00
/*
* Bind the CQEs ( post recv buffers ) DMA mapping to the RDMA queue
* lifetime . It ' s safe , since any chage in the underlying RDMA device
* will issue error recovery and queue re - creation .
*/
2016-07-06 21:55:52 +09:00
for ( i = 0 ; i < ib_queue_size ; i + + ) {
if ( nvme_rdma_alloc_qe ( ibdev , & ring [ i ] , capsule_size , dir ) )
goto out_free_ring ;
}
return ring ;
out_free_ring :
nvme_rdma_free_ring ( ibdev , ring , i , capsule_size , dir ) ;
return NULL ;
}
static void nvme_rdma_qp_event ( struct ib_event * event , void * context )
{
2016-11-23 11:38:48 +02:00
pr_debug ( " QP event %s (%d) \n " ,
ib_event_msg ( event - > event ) , event - > event ) ;
2016-07-06 21:55:52 +09:00
}
static int nvme_rdma_wait_for_cm ( struct nvme_rdma_queue * queue )
{
2018-10-08 14:28:54 -07:00
int ret ;
ret = wait_for_completion_interruptible_timeout ( & queue - > cm_done ,
2016-07-06 21:55:52 +09:00
msecs_to_jiffies ( NVME_RDMA_CONNECT_TIMEOUT_MS ) + 1 ) ;
2018-10-08 14:28:54 -07:00
if ( ret < 0 )
return ret ;
if ( ret = = 0 )
return - ETIMEDOUT ;
WARN_ON_ONCE ( queue - > cm_error > 0 ) ;
2016-07-06 21:55:52 +09:00
return queue - > cm_error ;
}
static int nvme_rdma_create_qp ( struct nvme_rdma_queue * queue , const int factor )
{
struct nvme_rdma_device * dev = queue - > device ;
struct ib_qp_init_attr init_attr ;
int ret ;
memset ( & init_attr , 0 , sizeof ( init_attr ) ) ;
init_attr . event_handler = nvme_rdma_qp_event ;
/* +1 for drain */
init_attr . cap . max_send_wr = factor * queue - > queue_size + 1 ;
/* +1 for drain */
init_attr . cap . max_recv_wr = queue - > queue_size + 1 ;
init_attr . cap . max_recv_sge = 1 ;
2018-06-20 07:15:05 -07:00
init_attr . cap . max_send_sge = 1 + dev - > num_inline_segments ;
2016-07-06 21:55:52 +09:00
init_attr . sq_sig_type = IB_SIGNAL_REQ_WR ;
init_attr . qp_type = IB_QPT_RC ;
init_attr . send_cq = queue - > ib_cq ;
init_attr . recv_cq = queue - > ib_cq ;
2020-05-19 17:05:56 +03:00
if ( queue - > pi_support )
init_attr . create_flags | = IB_QP_CREATE_INTEGRITY_EN ;
2020-07-13 11:53:29 +03:00
init_attr . qp_context = queue ;
2016-07-06 21:55:52 +09:00
ret = rdma_create_qp ( queue - > cm_id , dev - > pd , & init_attr ) ;
queue - > qp = queue - > cm_id - > qp ;
return ret ;
}
2017-06-13 09:15:19 +02:00
static void nvme_rdma_exit_request ( struct blk_mq_tag_set * set ,
struct request * rq , unsigned int hctx_idx )
2016-07-06 21:55:52 +09:00
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
2019-06-06 12:27:36 +03:00
kfree ( req - > sqe . data ) ;
2016-07-06 21:55:52 +09:00
}
2017-06-13 09:15:19 +02:00
static int nvme_rdma_init_request ( struct blk_mq_tag_set * set ,
struct request * rq , unsigned int hctx_idx ,
unsigned int numa_node )
2016-07-06 21:55:52 +09:00
{
2017-06-13 09:15:19 +02:00
struct nvme_rdma_ctrl * ctrl = set - > driver_data ;
2016-07-06 21:55:52 +09:00
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
2017-06-13 09:15:19 +02:00
int queue_idx = ( set = = & ctrl - > tag_set ) ? hctx_idx + 1 : 0 ;
2016-07-06 21:55:52 +09:00
struct nvme_rdma_queue * queue = & ctrl - > queues [ queue_idx ] ;
2018-06-29 16:50:00 -06:00
nvme_req ( rq ) - > ctrl = & ctrl - > ctrl ;
2019-06-06 12:27:36 +03:00
req - > sqe . data = kzalloc ( sizeof ( struct nvme_command ) , GFP_KERNEL ) ;
if ( ! req - > sqe . data )
return - ENOMEM ;
2016-07-06 21:55:52 +09:00
2020-05-19 17:05:56 +03:00
/* metadata nvme_rdma_sgl struct is located after command's data SGL */
if ( queue - > pi_support )
req - > metadata_sgl = ( void * ) nvme_req ( rq ) +
sizeof ( struct nvme_rdma_request ) +
NVME_RDMA_DATA_SGL_SIZE ;
2016-07-06 21:55:52 +09:00
req - > queue = queue ;
2021-03-17 13:37:03 -07:00
nvme_req ( rq ) - > cmd = req - > sqe . data ;
2016-07-06 21:55:52 +09:00
return 0 ;
}
static int nvme_rdma_init_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int hctx_idx )
{
struct nvme_rdma_ctrl * ctrl = data ;
struct nvme_rdma_queue * queue = & ctrl - > queues [ hctx_idx + 1 ] ;
2017-04-24 10:58:29 +03:00
BUG_ON ( hctx_idx > = ctrl - > ctrl . queue_count ) ;
2016-07-06 21:55:52 +09:00
hctx - > driver_data = queue ;
return 0 ;
}
static int nvme_rdma_init_admin_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int hctx_idx )
{
struct nvme_rdma_ctrl * ctrl = data ;
struct nvme_rdma_queue * queue = & ctrl - > queues [ 0 ] ;
BUG_ON ( hctx_idx ! = 0 ) ;
hctx - > driver_data = queue ;
return 0 ;
}
static void nvme_rdma_free_dev ( struct kref * ref )
{
struct nvme_rdma_device * ndev =
container_of ( ref , struct nvme_rdma_device , ref ) ;
mutex_lock ( & device_list_mutex ) ;
list_del ( & ndev - > entry ) ;
mutex_unlock ( & device_list_mutex ) ;
ib_dealloc_pd ( ndev - > pd ) ;
kfree ( ndev ) ;
}
static void nvme_rdma_dev_put ( struct nvme_rdma_device * dev )
{
kref_put ( & dev - > ref , nvme_rdma_free_dev ) ;
}
static int nvme_rdma_dev_get ( struct nvme_rdma_device * dev )
{
return kref_get_unless_zero ( & dev - > ref ) ;
}
static struct nvme_rdma_device *
nvme_rdma_find_get_device ( struct rdma_cm_id * cm_id )
{
struct nvme_rdma_device * ndev ;
mutex_lock ( & device_list_mutex ) ;
list_for_each_entry ( ndev , & device_list , entry ) {
if ( ndev - > dev - > node_guid = = cm_id - > device - > node_guid & &
nvme_rdma_dev_get ( ndev ) )
goto out_unlock ;
}
ndev = kzalloc ( sizeof ( * ndev ) , GFP_KERNEL ) ;
if ( ! ndev )
goto out_err ;
ndev - > dev = cm_id - > device ;
kref_init ( & ndev - > ref ) ;
2016-09-05 12:56:20 +02:00
ndev - > pd = ib_alloc_pd ( ndev - > dev ,
register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY ) ;
2016-07-06 21:55:52 +09:00
if ( IS_ERR ( ndev - > pd ) )
goto out_free_dev ;
if ( ! ( ndev - > dev - > attrs . device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS ) ) {
dev_err ( & ndev - > dev - > dev ,
" Memory registrations not supported. \n " ) ;
2016-09-05 12:56:20 +02:00
goto out_free_pd ;
2016-07-06 21:55:52 +09:00
}
2018-06-20 07:15:05 -07:00
ndev - > num_inline_segments = min ( NVME_RDMA_MAX_INLINE_SEGMENTS ,
2018-08-16 14:13:03 -06:00
ndev - > dev - > attrs . max_send_sge - 1 ) ;
2016-07-06 21:55:52 +09:00
list_add ( & ndev - > entry , & device_list ) ;
out_unlock :
mutex_unlock ( & device_list_mutex ) ;
return ndev ;
out_free_pd :
ib_dealloc_pd ( ndev - > pd ) ;
out_free_dev :
kfree ( ndev ) ;
out_err :
mutex_unlock ( & device_list_mutex ) ;
return NULL ;
}
2020-07-13 11:53:29 +03:00
static void nvme_rdma_free_cq ( struct nvme_rdma_queue * queue )
{
if ( nvme_rdma_poll_queue ( queue ) )
ib_free_cq ( queue - > ib_cq ) ;
else
ib_cq_pool_put ( queue - > ib_cq , queue - > cq_size ) ;
}
2016-07-06 21:55:52 +09:00
static void nvme_rdma_destroy_queue_ib ( struct nvme_rdma_queue * queue )
{
2017-11-28 18:28:44 +02:00
struct nvme_rdma_device * dev ;
struct ib_device * ibdev ;
if ( ! test_and_clear_bit ( NVME_RDMA_Q_TR_READY , & queue - > flags ) )
return ;
dev = queue - > device ;
ibdev = dev - > dev ;
2016-07-06 21:55:52 +09:00
2020-05-19 17:05:56 +03:00
if ( queue - > pi_support )
ib_mr_pool_destroy ( queue - > qp , & queue - > qp - > sig_mrs ) ;
2017-11-26 10:40:55 +00:00
ib_mr_pool_destroy ( queue - > qp , & queue - > qp - > rdma_mrs ) ;
2017-11-28 18:28:44 +02:00
/*
* The cm_id object might have been destroyed during RDMA connection
* establishment error flow to avoid getting other cma events , thus
* the destruction of the QP shouldn ' t use rdma_cm API .
*/
ib_destroy_qp ( queue - > qp ) ;
2020-07-13 11:53:29 +03:00
nvme_rdma_free_cq ( queue ) ;
2016-07-06 21:55:52 +09:00
nvme_rdma_free_ring ( ibdev , queue - > rsp_ring , queue - > queue_size ,
sizeof ( struct nvme_completion ) , DMA_FROM_DEVICE ) ;
nvme_rdma_dev_put ( dev ) ;
}
2020-05-19 17:05:56 +03:00
static int nvme_rdma_get_max_fr_pages ( struct ib_device * ibdev , bool pi_support )
2017-11-26 10:40:55 +00:00
{
2020-05-19 17:05:56 +03:00
u32 max_page_list_len ;
if ( pi_support )
max_page_list_len = ibdev - > attrs . max_pi_fast_reg_page_list_len ;
else
max_page_list_len = ibdev - > attrs . max_fast_reg_page_list_len ;
return min_t ( u32 , NVME_RDMA_MAX_SEGMENTS , max_page_list_len - 1 ) ;
2017-11-26 10:40:55 +00:00
}
2020-07-13 11:53:29 +03:00
static int nvme_rdma_create_cq ( struct ib_device * ibdev ,
struct nvme_rdma_queue * queue )
{
int ret , comp_vector , idx = nvme_rdma_queue_idx ( queue ) ;
enum ib_poll_context poll_ctx ;
/*
* Spread I / O queues completion vectors according their queue index .
* Admin queues can always go on completion vector 0.
*/
comp_vector = ( idx = = 0 ? idx : idx - 1 ) % ibdev - > num_comp_vectors ;
/* Polling queues need direct cq polling context */
if ( nvme_rdma_poll_queue ( queue ) ) {
poll_ctx = IB_POLL_DIRECT ;
queue - > ib_cq = ib_alloc_cq ( ibdev , queue , queue - > cq_size ,
comp_vector , poll_ctx ) ;
} else {
poll_ctx = IB_POLL_SOFTIRQ ;
queue - > ib_cq = ib_cq_pool_get ( ibdev , queue - > cq_size ,
comp_vector , poll_ctx ) ;
}
if ( IS_ERR ( queue - > ib_cq ) ) {
ret = PTR_ERR ( queue - > ib_cq ) ;
return ret ;
}
return 0 ;
}
2017-05-04 13:33:09 +03:00
static int nvme_rdma_create_queue_ib ( struct nvme_rdma_queue * queue )
2016-07-06 21:55:52 +09:00
{
2017-05-04 13:33:09 +03:00
struct ib_device * ibdev ;
2016-07-06 21:55:52 +09:00
const int send_wr_factor = 3 ; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1 ; /* + RECV */
2019-09-21 23:58:19 +03:00
int ret , pages_per_mr ;
2016-07-06 21:55:52 +09:00
2017-05-04 13:33:09 +03:00
queue - > device = nvme_rdma_find_get_device ( queue - > cm_id ) ;
if ( ! queue - > device ) {
dev_err ( queue - > cm_id - > device - > dev . parent ,
" no client data found! \n " ) ;
return - ECONNREFUSED ;
}
ibdev = queue - > device - > dev ;
2016-07-06 21:55:52 +09:00
/* +1 for ib_stop_cq */
2020-07-13 11:53:29 +03:00
queue - > cq_size = cq_factor * queue - > queue_size + 1 ;
ret = nvme_rdma_create_cq ( ibdev , queue ) ;
if ( ret )
2017-05-04 13:33:09 +03:00
goto out_put_dev ;
2016-07-06 21:55:52 +09:00
ret = nvme_rdma_create_qp ( queue , send_wr_factor ) ;
if ( ret )
goto out_destroy_ib_cq ;
queue - > rsp_ring = nvme_rdma_alloc_ring ( ibdev , queue - > queue_size ,
sizeof ( struct nvme_completion ) , DMA_FROM_DEVICE ) ;
if ( ! queue - > rsp_ring ) {
ret = - ENOMEM ;
goto out_destroy_qp ;
}
2019-09-21 23:58:19 +03:00
/*
* Currently we don ' t use SG_GAPS MR ' s so if the first entry is
* misaligned we ' ll end up using two entries for a single data page ,
* so one additional entry is required .
*/
2020-05-19 17:05:56 +03:00
pages_per_mr = nvme_rdma_get_max_fr_pages ( ibdev , queue - > pi_support ) + 1 ;
2017-11-26 10:40:55 +00:00
ret = ib_mr_pool_init ( queue - > qp , & queue - > qp - > rdma_mrs ,
queue - > queue_size ,
IB_MR_TYPE_MEM_REG ,
2019-09-21 23:58:19 +03:00
pages_per_mr , 0 ) ;
2017-11-26 10:40:55 +00:00
if ( ret ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" failed to initialize MR pool sized %d for QID %d \n " ,
2020-07-13 11:53:29 +03:00
queue - > queue_size , nvme_rdma_queue_idx ( queue ) ) ;
2017-11-26 10:40:55 +00:00
goto out_destroy_ring ;
}
2020-05-19 17:05:56 +03:00
if ( queue - > pi_support ) {
ret = ib_mr_pool_init ( queue - > qp , & queue - > qp - > sig_mrs ,
queue - > queue_size , IB_MR_TYPE_INTEGRITY ,
pages_per_mr , pages_per_mr ) ;
if ( ret ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" failed to initialize PI MR pool sized %d for QID %d \n " ,
2020-07-13 11:53:29 +03:00
queue - > queue_size , nvme_rdma_queue_idx ( queue ) ) ;
2020-05-19 17:05:56 +03:00
goto out_destroy_mr_pool ;
}
}
2017-11-28 18:28:44 +02:00
set_bit ( NVME_RDMA_Q_TR_READY , & queue - > flags ) ;
2016-07-06 21:55:52 +09:00
return 0 ;
2020-05-19 17:05:56 +03:00
out_destroy_mr_pool :
ib_mr_pool_destroy ( queue - > qp , & queue - > qp - > rdma_mrs ) ;
2017-11-26 10:40:55 +00:00
out_destroy_ring :
nvme_rdma_free_ring ( ibdev , queue - > rsp_ring , queue - > queue_size ,
sizeof ( struct nvme_completion ) , DMA_FROM_DEVICE ) ;
2016-07-06 21:55:52 +09:00
out_destroy_qp :
2017-11-06 16:18:51 +02:00
rdma_destroy_qp ( queue - > cm_id ) ;
2016-07-06 21:55:52 +09:00
out_destroy_ib_cq :
2020-07-13 11:53:29 +03:00
nvme_rdma_free_cq ( queue ) ;
2017-05-04 13:33:09 +03:00
out_put_dev :
nvme_rdma_dev_put ( queue - > device ) ;
2016-07-06 21:55:52 +09:00
return ret ;
}
2017-07-10 09:22:36 +03:00
static int nvme_rdma_alloc_queue ( struct nvme_rdma_ctrl * ctrl ,
2016-07-06 21:55:52 +09:00
int idx , size_t queue_size )
{
struct nvme_rdma_queue * queue ;
2017-02-19 20:08:03 +02:00
struct sockaddr * src_addr = NULL ;
2016-07-06 21:55:52 +09:00
int ret ;
queue = & ctrl - > queues [ idx ] ;
2021-01-14 17:09:25 +08:00
mutex_init ( & queue - > queue_lock ) ;
2016-07-06 21:55:52 +09:00
queue - > ctrl = ctrl ;
2020-05-19 17:05:56 +03:00
if ( idx & & ctrl - > ctrl . max_integrity_segments )
queue - > pi_support = true ;
else
queue - > pi_support = false ;
2016-07-06 21:55:52 +09:00
init_completion ( & queue - > cm_done ) ;
if ( idx > 0 )
queue - > cmnd_capsule_len = ctrl - > ctrl . ioccsz * 16 ;
else
queue - > cmnd_capsule_len = sizeof ( struct nvme_command ) ;
queue - > queue_size = queue_size ;
queue - > cm_id = rdma_create_id ( & init_net , nvme_rdma_cm_handler , queue ,
RDMA_PS_TCP , IB_QPT_RC ) ;
if ( IS_ERR ( queue - > cm_id ) ) {
dev_info ( ctrl - > ctrl . device ,
" failed to create CM ID: %ld \n " , PTR_ERR ( queue - > cm_id ) ) ;
2021-01-14 17:09:25 +08:00
ret = PTR_ERR ( queue - > cm_id ) ;
goto out_destroy_mutex ;
2016-07-06 21:55:52 +09:00
}
2017-02-19 20:08:03 +02:00
if ( ctrl - > ctrl . opts - > mask & NVMF_OPT_HOST_TRADDR )
2017-02-05 21:49:32 +02:00
src_addr = ( struct sockaddr * ) & ctrl - > src_addr ;
2017-02-19 20:08:03 +02:00
2017-02-05 21:49:32 +02:00
queue - > cm_error = - ETIMEDOUT ;
ret = rdma_resolve_addr ( queue - > cm_id , src_addr ,
( struct sockaddr * ) & ctrl - > addr ,
2016-07-06 21:55:52 +09:00
NVME_RDMA_CONNECT_TIMEOUT_MS ) ;
if ( ret ) {
dev_info ( ctrl - > ctrl . device ,
" rdma_resolve_addr failed (%d). \n " , ret ) ;
goto out_destroy_cm_id ;
}
ret = nvme_rdma_wait_for_cm ( queue ) ;
if ( ret ) {
dev_info ( ctrl - > ctrl . device ,
2017-10-11 15:29:07 +03:00
" rdma connection establishment failed (%d) \n " , ret ) ;
2016-07-06 21:55:52 +09:00
goto out_destroy_cm_id ;
}
2017-10-11 15:29:12 +03:00
set_bit ( NVME_RDMA_Q_ALLOCATED , & queue - > flags ) ;
2016-07-06 21:55:52 +09:00
return 0 ;
out_destroy_cm_id :
rdma_destroy_id ( queue - > cm_id ) ;
2017-11-28 18:28:44 +02:00
nvme_rdma_destroy_queue_ib ( queue ) ;
2021-01-14 17:09:25 +08:00
out_destroy_mutex :
mutex_destroy ( & queue - > queue_lock ) ;
2016-07-06 21:55:52 +09:00
return ret ;
}
2019-07-26 10:29:49 -07:00
static void __nvme_rdma_stop_queue ( struct nvme_rdma_queue * queue )
{
rdma_disconnect ( queue - > cm_id ) ;
ib_drain_qp ( queue - > qp ) ;
}
2016-07-06 21:55:52 +09:00
static void nvme_rdma_stop_queue ( struct nvme_rdma_queue * queue )
{
2021-01-14 17:09:25 +08:00
mutex_lock ( & queue - > queue_lock ) ;
if ( test_and_clear_bit ( NVME_RDMA_Q_LIVE , & queue - > flags ) )
__nvme_rdma_stop_queue ( queue ) ;
mutex_unlock ( & queue - > queue_lock ) ;
2016-07-06 21:55:52 +09:00
}
static void nvme_rdma_free_queue ( struct nvme_rdma_queue * queue )
{
2017-10-11 15:29:12 +03:00
if ( ! test_and_clear_bit ( NVME_RDMA_Q_ALLOCATED , & queue - > flags ) )
2017-08-28 21:41:10 +02:00
return ;
2016-07-06 21:55:52 +09:00
rdma_destroy_id ( queue - > cm_id ) ;
2021-09-06 11:51:34 +08:00
nvme_rdma_destroy_queue_ib ( queue ) ;
2021-01-14 17:09:25 +08:00
mutex_destroy ( & queue - > queue_lock ) ;
2016-07-06 21:55:52 +09:00
}
2017-08-28 21:41:10 +02:00
static void nvme_rdma_free_io_queues ( struct nvme_rdma_ctrl * ctrl )
2016-07-06 21:55:52 +09:00
{
2017-08-28 21:41:10 +02:00
int i ;
for ( i = 1 ; i < ctrl - > ctrl . queue_count ; i + + )
nvme_rdma_free_queue ( & ctrl - > queues [ i ] ) ;
2016-07-06 21:55:52 +09:00
}
2017-08-28 21:41:10 +02:00
static void nvme_rdma_stop_io_queues ( struct nvme_rdma_ctrl * ctrl )
2016-07-06 21:55:52 +09:00
{
int i ;
2017-04-24 10:58:29 +03:00
for ( i = 1 ; i < ctrl - > ctrl . queue_count ; i + + )
2017-08-28 21:41:10 +02:00
nvme_rdma_stop_queue ( & ctrl - > queues [ i ] ) ;
2016-07-06 21:55:52 +09:00
}
2017-07-10 09:22:37 +03:00
static int nvme_rdma_start_queue ( struct nvme_rdma_ctrl * ctrl , int idx )
{
2018-12-14 11:06:10 -08:00
struct nvme_rdma_queue * queue = & ctrl - > queues [ idx ] ;
2017-07-10 09:22:37 +03:00
int ret ;
if ( idx )
2021-06-10 14:44:35 -07:00
ret = nvmf_connect_io_queue ( & ctrl - > ctrl , idx ) ;
2017-07-10 09:22:37 +03:00
else
ret = nvmf_connect_admin_queue ( & ctrl - > ctrl ) ;
2019-07-26 10:29:49 -07:00
if ( ! ret ) {
2018-12-14 11:06:10 -08:00
set_bit ( NVME_RDMA_Q_LIVE , & queue - > flags ) ;
2019-07-26 10:29:49 -07:00
} else {
2019-09-24 11:27:05 -07:00
if ( test_bit ( NVME_RDMA_Q_ALLOCATED , & queue - > flags ) )
__nvme_rdma_stop_queue ( queue ) ;
2017-07-10 09:22:37 +03:00
dev_info ( ctrl - > ctrl . device ,
" failed to connect queue: %d ret=%d \n " , idx , ret ) ;
2019-07-26 10:29:49 -07:00
}
2017-07-10 09:22:37 +03:00
return ret ;
}
static int nvme_rdma_start_io_queues ( struct nvme_rdma_ctrl * ctrl )
2016-07-06 21:55:52 +09:00
{
int i , ret = 0 ;
2017-04-24 10:58:29 +03:00
for ( i = 1 ; i < ctrl - > ctrl . queue_count ; i + + ) {
2017-07-10 09:22:37 +03:00
ret = nvme_rdma_start_queue ( ctrl , i ) ;
if ( ret )
2017-08-28 21:41:10 +02:00
goto out_stop_queues ;
2016-07-06 21:55:52 +09:00
}
2016-11-08 09:16:02 -08:00
return 0 ;
2017-08-28 21:41:10 +02:00
out_stop_queues :
2017-07-10 09:22:37 +03:00
for ( i - - ; i > = 1 ; i - - )
nvme_rdma_stop_queue ( & ctrl - > queues [ i ] ) ;
2016-07-06 21:55:52 +09:00
return ret ;
}
2017-07-10 09:22:36 +03:00
static int nvme_rdma_alloc_io_queues ( struct nvme_rdma_ctrl * ctrl )
2016-07-06 21:55:52 +09:00
{
2017-03-09 13:26:07 +02:00
struct nvmf_ctrl_options * opts = ctrl - > ctrl . opts ;
2017-07-13 11:09:44 +03:00
struct ib_device * ibdev = ctrl - > device - > dev ;
2019-05-28 22:49:04 -07:00
unsigned int nr_io_queues , nr_default_queues ;
unsigned int nr_read_queues , nr_poll_queues ;
2016-07-06 21:55:52 +09:00
int i , ret ;
2019-05-28 22:49:04 -07:00
nr_read_queues = min_t ( unsigned int , ibdev - > num_comp_vectors ,
min ( opts - > nr_io_queues , num_online_cpus ( ) ) ) ;
nr_default_queues = min_t ( unsigned int , ibdev - > num_comp_vectors ,
min ( opts - > nr_write_queues , num_online_cpus ( ) ) ) ;
nr_poll_queues = min ( opts - > nr_poll_queues , num_online_cpus ( ) ) ;
nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues ;
2018-12-11 23:38:58 -08:00
2017-03-09 13:26:07 +02:00
ret = nvme_set_queue_count ( & ctrl - > ctrl , & nr_io_queues ) ;
if ( ret )
return ret ;
2021-07-28 17:41:20 +08:00
if ( nr_io_queues = = 0 ) {
2021-03-15 14:04:27 -07:00
dev_err ( ctrl - > ctrl . device ,
" unable to set any I/O queues \n " ) ;
return - ENOMEM ;
}
2017-03-09 13:26:07 +02:00
2021-07-28 17:41:20 +08:00
ctrl - > ctrl . queue_count = nr_io_queues + 1 ;
2017-03-09 13:26:07 +02:00
dev_info ( ctrl - > ctrl . device ,
" creating %d I/O queues. \n " , nr_io_queues ) ;
2019-05-28 22:49:04 -07:00
if ( opts - > nr_write_queues & & nr_read_queues < nr_io_queues ) {
/*
* separate read / write queues
* hand out dedicated default queues only after we have
* sufficient read queues .
*/
ctrl - > io_queues [ HCTX_TYPE_READ ] = nr_read_queues ;
nr_io_queues - = ctrl - > io_queues [ HCTX_TYPE_READ ] ;
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] =
min ( nr_default_queues , nr_io_queues ) ;
nr_io_queues - = ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
} else {
/*
* shared read / write queues
* either no write queues were requested , or we don ' t have
* sufficient queue count to have dedicated default queues .
*/
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] =
min ( nr_read_queues , nr_io_queues ) ;
nr_io_queues - = ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
}
if ( opts - > nr_poll_queues & & nr_io_queues ) {
/* map dedicated poll queues only if we have queues left */
ctrl - > io_queues [ HCTX_TYPE_POLL ] =
min ( nr_poll_queues , nr_io_queues ) ;
}
2017-04-24 10:58:29 +03:00
for ( i = 1 ; i < ctrl - > ctrl . queue_count ; i + + ) {
2017-07-10 09:22:36 +03:00
ret = nvme_rdma_alloc_queue ( ctrl , i ,
ctrl - > ctrl . sqsize + 1 ) ;
if ( ret )
2016-07-06 21:55:52 +09:00
goto out_free_queues ;
}
return 0 ;
out_free_queues :
2016-09-02 09:01:27 -07:00
for ( i - - ; i > = 1 ; i - - )
2017-08-28 21:41:10 +02:00
nvme_rdma_free_queue ( & ctrl - > queues [ i ] ) ;
2016-07-06 21:55:52 +09:00
return ret ;
}
2017-07-10 09:22:30 +03:00
static struct blk_mq_tag_set * nvme_rdma_alloc_tagset ( struct nvme_ctrl * nctrl ,
bool admin )
{
struct nvme_rdma_ctrl * ctrl = to_rdma_ctrl ( nctrl ) ;
struct blk_mq_tag_set * set ;
int ret ;
if ( admin ) {
set = & ctrl - > admin_tag_set ;
memset ( set , 0 , sizeof ( * set ) ) ;
set - > ops = & nvme_rdma_admin_mq_ops ;
2017-11-07 15:13:10 -07:00
set - > queue_depth = NVME_AQ_MQ_TAG_DEPTH ;
2021-03-03 13:28:22 +01:00
set - > reserved_tags = NVMF_RESERVED_TAGS ;
2018-11-16 09:22:29 +01:00
set - > numa_node = nctrl - > numa_node ;
2017-07-10 09:22:30 +03:00
set - > cmd_size = sizeof ( struct nvme_rdma_request ) +
2020-05-19 17:05:56 +03:00
NVME_RDMA_DATA_SGL_SIZE ;
2017-07-10 09:22:30 +03:00
set - > driver_data = ctrl ;
set - > nr_hw_queues = 1 ;
2020-11-09 16:33:45 -08:00
set - > timeout = NVME_ADMIN_TIMEOUT ;
2017-10-18 12:38:24 +00:00
set - > flags = BLK_MQ_F_NO_SCHED ;
2017-07-10 09:22:30 +03:00
} else {
set = & ctrl - > tag_set ;
memset ( set , 0 , sizeof ( * set ) ) ;
set - > ops = & nvme_rdma_mq_ops ;
2018-06-19 15:34:13 +03:00
set - > queue_depth = nctrl - > sqsize + 1 ;
2021-03-03 13:28:22 +01:00
set - > reserved_tags = NVMF_RESERVED_TAGS ;
2018-11-16 09:22:29 +01:00
set - > numa_node = nctrl - > numa_node ;
2017-07-10 09:22:30 +03:00
set - > flags = BLK_MQ_F_SHOULD_MERGE ;
set - > cmd_size = sizeof ( struct nvme_rdma_request ) +
2020-05-19 17:05:56 +03:00
NVME_RDMA_DATA_SGL_SIZE ;
if ( nctrl - > max_integrity_segments )
set - > cmd_size + = sizeof ( struct nvme_rdma_sgl ) +
NVME_RDMA_METADATA_SGL_SIZE ;
2017-07-10 09:22:30 +03:00
set - > driver_data = ctrl ;
set - > nr_hw_queues = nctrl - > queue_count - 1 ;
set - > timeout = NVME_IO_TIMEOUT ;
2018-12-14 11:06:10 -08:00
set - > nr_maps = nctrl - > opts - > nr_poll_queues ? HCTX_MAX_TYPES : 2 ;
2017-07-10 09:22:30 +03:00
}
ret = blk_mq_alloc_tag_set ( set ) ;
if ( ret )
2019-05-06 13:47:55 +03:00
return ERR_PTR ( ret ) ;
2017-07-10 09:22:30 +03:00
return set ;
}
2017-07-10 09:22:32 +03:00
static void nvme_rdma_destroy_admin_queue ( struct nvme_rdma_ctrl * ctrl ,
bool remove )
2016-07-06 21:55:52 +09:00
{
2017-07-10 09:22:32 +03:00
if ( remove ) {
blk_cleanup_queue ( ctrl - > ctrl . admin_q ) ;
2019-08-02 19:33:59 -07:00
blk_cleanup_queue ( ctrl - > ctrl . fabrics_q ) ;
2019-05-06 13:47:55 +03:00
blk_mq_free_tag_set ( ctrl - > ctrl . admin_tagset ) ;
2017-07-10 09:22:32 +03:00
}
2018-06-25 20:58:17 +03:00
if ( ctrl - > async_event_sqe . data ) {
2020-09-02 17:42:52 -05:00
cancel_work_sync ( & ctrl - > ctrl . async_event_work ) ;
2018-06-25 20:58:17 +03:00
nvme_rdma_free_qe ( ctrl - > device - > dev , & ctrl - > async_event_sqe ,
sizeof ( struct nvme_command ) , DMA_TO_DEVICE ) ;
ctrl - > async_event_sqe . data = NULL ;
}
2017-08-28 21:41:10 +02:00
nvme_rdma_free_queue ( & ctrl - > queues [ 0 ] ) ;
2016-07-06 21:55:52 +09:00
}
2017-07-10 09:22:32 +03:00
static int nvme_rdma_configure_admin_queue ( struct nvme_rdma_ctrl * ctrl ,
bool new )
2017-07-10 09:22:28 +03:00
{
2020-05-19 17:05:56 +03:00
bool pi_capable = false ;
2017-07-10 09:22:28 +03:00
int error ;
2017-07-10 09:22:36 +03:00
error = nvme_rdma_alloc_queue ( ctrl , 0 , NVME_AQ_DEPTH ) ;
2017-07-10 09:22:28 +03:00
if ( error )
return error ;
ctrl - > device = ctrl - > queues [ 0 ] . device ;
2020-11-06 19:19:35 +01:00
ctrl - > ctrl . numa_node = ibdev_to_node ( ctrl - > device - > dev ) ;
2017-07-10 09:22:28 +03:00
2020-05-19 17:05:56 +03:00
/* T10-PI support */
if ( ctrl - > device - > dev - > attrs . device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER )
pi_capable = true ;
ctrl - > max_fr_pages = nvme_rdma_get_max_fr_pages ( ctrl - > device - > dev ,
pi_capable ) ;
2017-07-10 09:22:28 +03:00
2019-06-06 12:27:36 +03:00
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime .
* It ' s safe , since any chage in the underlying RDMA device will issue
* error recovery and queue re - creation .
*/
2018-06-19 15:34:10 +03:00
error = nvme_rdma_alloc_qe ( ctrl - > device - > dev , & ctrl - > async_event_sqe ,
sizeof ( struct nvme_command ) , DMA_TO_DEVICE ) ;
if ( error )
goto out_free_queue ;
2017-07-10 09:22:32 +03:00
if ( new ) {
ctrl - > ctrl . admin_tagset = nvme_rdma_alloc_tagset ( & ctrl - > ctrl , true ) ;
2017-10-19 18:10:53 +03:00
if ( IS_ERR ( ctrl - > ctrl . admin_tagset ) ) {
error = PTR_ERR ( ctrl - > ctrl . admin_tagset ) ;
2018-06-19 15:34:10 +03:00
goto out_free_async_qe ;
2017-10-19 18:10:53 +03:00
}
2017-07-10 09:22:28 +03:00
2019-08-02 19:33:59 -07:00
ctrl - > ctrl . fabrics_q = blk_mq_init_queue ( & ctrl - > admin_tag_set ) ;
if ( IS_ERR ( ctrl - > ctrl . fabrics_q ) ) {
error = PTR_ERR ( ctrl - > ctrl . fabrics_q ) ;
goto out_free_tagset ;
}
2017-07-10 09:22:32 +03:00
ctrl - > ctrl . admin_q = blk_mq_init_queue ( & ctrl - > admin_tag_set ) ;
if ( IS_ERR ( ctrl - > ctrl . admin_q ) ) {
error = PTR_ERR ( ctrl - > ctrl . admin_q ) ;
2019-08-02 19:33:59 -07:00
goto out_cleanup_fabrics_q ;
2017-07-10 09:22:32 +03:00
}
2017-07-10 09:22:28 +03:00
}
2017-07-10 09:22:37 +03:00
error = nvme_rdma_start_queue ( ctrl , 0 ) ;
2017-07-10 09:22:28 +03:00
if ( error )
goto out_cleanup_queue ;
2019-07-22 17:06:53 -07:00
error = nvme_enable_ctrl ( & ctrl - > ctrl ) ;
2017-07-10 09:22:28 +03:00
if ( error )
2018-05-24 09:27:38 +08:00
goto out_stop_queue ;
2017-07-10 09:22:28 +03:00
2019-09-21 23:58:19 +03:00
ctrl - > ctrl . max_segments = ctrl - > max_fr_pages ;
ctrl - > ctrl . max_hw_sectors = ctrl - > max_fr_pages < < ( ilog2 ( SZ_4K ) - 9 ) ;
2020-05-19 17:05:56 +03:00
if ( pi_capable )
ctrl - > ctrl . max_integrity_segments = ctrl - > max_fr_pages ;
else
ctrl - > ctrl . max_integrity_segments = 0 ;
2017-07-10 09:22:28 +03:00
2021-10-14 16:17:06 +08:00
nvme_start_admin_queue ( & ctrl - > ctrl ) ;
2019-08-02 19:33:59 -07:00
2021-02-28 18:06:04 -08:00
error = nvme_init_ctrl_finish ( & ctrl - > ctrl ) ;
2017-07-10 09:22:28 +03:00
if ( error )
2021-01-21 11:32:37 +08:00
goto out_quiesce_queue ;
2017-07-10 09:22:28 +03:00
return 0 ;
2021-01-21 11:32:37 +08:00
out_quiesce_queue :
2021-10-14 16:17:06 +08:00
nvme_stop_admin_queue ( & ctrl - > ctrl ) ;
2021-01-21 11:32:37 +08:00
blk_sync_queue ( ctrl - > ctrl . admin_q ) ;
2018-05-24 09:27:38 +08:00
out_stop_queue :
nvme_rdma_stop_queue ( & ctrl - > queues [ 0 ] ) ;
2021-01-21 11:32:37 +08:00
nvme_cancel_admin_tagset ( & ctrl - > ctrl ) ;
2017-07-10 09:22:28 +03:00
out_cleanup_queue :
2017-07-10 09:22:32 +03:00
if ( new )
blk_cleanup_queue ( ctrl - > ctrl . admin_q ) ;
2019-08-02 19:33:59 -07:00
out_cleanup_fabrics_q :
if ( new )
blk_cleanup_queue ( ctrl - > ctrl . fabrics_q ) ;
2017-07-10 09:22:28 +03:00
out_free_tagset :
2017-07-10 09:22:32 +03:00
if ( new )
2019-05-06 13:47:55 +03:00
blk_mq_free_tag_set ( ctrl - > ctrl . admin_tagset ) ;
2018-06-19 15:34:10 +03:00
out_free_async_qe :
2020-03-09 15:07:53 -06:00
if ( ctrl - > async_event_sqe . data ) {
nvme_rdma_free_qe ( ctrl - > device - > dev , & ctrl - > async_event_sqe ,
sizeof ( struct nvme_command ) , DMA_TO_DEVICE ) ;
ctrl - > async_event_sqe . data = NULL ;
}
2017-07-10 09:22:28 +03:00
out_free_queue :
nvme_rdma_free_queue ( & ctrl - > queues [ 0 ] ) ;
return error ;
}
2017-08-28 21:41:10 +02:00
static void nvme_rdma_destroy_io_queues ( struct nvme_rdma_ctrl * ctrl ,
bool remove )
{
if ( remove ) {
blk_cleanup_queue ( ctrl - > ctrl . connect_q ) ;
2019-05-06 13:47:55 +03:00
blk_mq_free_tag_set ( ctrl - > ctrl . tagset ) ;
2017-08-28 21:41:10 +02:00
}
nvme_rdma_free_io_queues ( ctrl ) ;
}
static int nvme_rdma_configure_io_queues ( struct nvme_rdma_ctrl * ctrl , bool new )
{
int ret ;
2017-07-10 09:22:36 +03:00
ret = nvme_rdma_alloc_io_queues ( ctrl ) ;
2017-08-28 21:41:10 +02:00
if ( ret )
return ret ;
if ( new ) {
ctrl - > ctrl . tagset = nvme_rdma_alloc_tagset ( & ctrl - > ctrl , false ) ;
2017-10-19 18:10:53 +03:00
if ( IS_ERR ( ctrl - > ctrl . tagset ) ) {
ret = PTR_ERR ( ctrl - > ctrl . tagset ) ;
2017-08-28 21:41:10 +02:00
goto out_free_io_queues ;
2017-10-19 18:10:53 +03:00
}
2017-08-28 21:41:10 +02:00
2022-02-10 11:12:36 -08:00
ret = nvme_ctrl_init_connect_q ( & ( ctrl - > ctrl ) ) ;
if ( ret )
2017-08-28 21:41:10 +02:00
goto out_free_tag_set ;
}
2017-07-10 09:22:37 +03:00
ret = nvme_rdma_start_io_queues ( ctrl ) ;
2017-08-28 21:41:10 +02:00
if ( ret )
goto out_cleanup_connect_q ;
2020-07-27 17:32:09 -07:00
if ( ! new ) {
nvme_start_queues ( & ctrl - > ctrl ) ;
2020-07-30 13:42:42 -07:00
if ( ! nvme_wait_freeze_timeout ( & ctrl - > ctrl , NVME_IO_TIMEOUT ) ) {
/*
* If we timed out waiting for freeze we are likely to
* be stuck . Fail the controller initialization just
* to be safe .
*/
ret = - ENODEV ;
goto out_wait_freeze_timed_out ;
}
2020-07-27 17:32:09 -07:00
blk_mq_update_nr_hw_queues ( ctrl - > ctrl . tagset ,
ctrl - > ctrl . queue_count - 1 ) ;
nvme_unfreeze ( & ctrl - > ctrl ) ;
}
2017-08-28 21:41:10 +02:00
return 0 ;
2020-07-30 13:42:42 -07:00
out_wait_freeze_timed_out :
nvme_stop_queues ( & ctrl - > ctrl ) ;
2021-01-21 11:32:37 +08:00
nvme_sync_io_queues ( & ctrl - > ctrl ) ;
2020-07-30 13:42:42 -07:00
nvme_rdma_stop_io_queues ( ctrl ) ;
2017-08-28 21:41:10 +02:00
out_cleanup_connect_q :
2021-01-21 11:32:37 +08:00
nvme_cancel_tagset ( & ctrl - > ctrl ) ;
2017-08-28 21:41:10 +02:00
if ( new )
blk_cleanup_queue ( ctrl - > ctrl . connect_q ) ;
out_free_tag_set :
if ( new )
2019-05-06 13:47:55 +03:00
blk_mq_free_tag_set ( ctrl - > ctrl . tagset ) ;
2017-08-28 21:41:10 +02:00
out_free_io_queues :
nvme_rdma_free_io_queues ( ctrl ) ;
return ret ;
2016-07-06 21:55:52 +09:00
}
2018-07-09 12:49:07 +03:00
static void nvme_rdma_teardown_admin_queue ( struct nvme_rdma_ctrl * ctrl ,
bool remove )
{
2021-10-14 16:17:06 +08:00
nvme_stop_admin_queue ( & ctrl - > ctrl ) ;
2020-10-22 10:15:08 +08:00
blk_sync_queue ( ctrl - > ctrl . admin_q ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_stop_queue ( & ctrl - > queues [ 0 ] ) ;
2021-01-21 11:32:39 +08:00
nvme_cancel_admin_tagset ( & ctrl - > ctrl ) ;
2019-08-02 19:33:59 -07:00
if ( remove )
2021-10-14 16:17:06 +08:00
nvme_start_admin_queue ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_destroy_admin_queue ( ctrl , remove ) ;
}
static void nvme_rdma_teardown_io_queues ( struct nvme_rdma_ctrl * ctrl ,
bool remove )
{
if ( ctrl - > ctrl . queue_count > 1 ) {
2020-07-27 17:32:09 -07:00
nvme_start_freeze ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
nvme_stop_queues ( & ctrl - > ctrl ) ;
2020-10-22 10:15:08 +08:00
nvme_sync_io_queues ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_stop_io_queues ( ctrl ) ;
2021-01-21 11:32:39 +08:00
nvme_cancel_tagset ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
if ( remove )
nvme_start_queues ( & ctrl - > ctrl ) ;
nvme_rdma_destroy_io_queues ( ctrl , remove ) ;
}
}
2016-07-06 21:55:52 +09:00
static void nvme_rdma_free_ctrl ( struct nvme_ctrl * nctrl )
{
struct nvme_rdma_ctrl * ctrl = to_rdma_ctrl ( nctrl ) ;
if ( list_empty ( & ctrl - > list ) )
goto free_ctrl ;
mutex_lock ( & nvme_rdma_ctrl_mutex ) ;
list_del ( & ctrl - > list ) ;
mutex_unlock ( & nvme_rdma_ctrl_mutex ) ;
nvmf_free_options ( nctrl - > opts ) ;
free_ctrl :
2018-06-19 15:34:09 +03:00
kfree ( ctrl - > queues ) ;
2016-07-06 21:55:52 +09:00
kfree ( ctrl ) ;
}
2017-03-18 20:58:29 +02:00
static void nvme_rdma_reconnect_or_remove ( struct nvme_rdma_ctrl * ctrl )
{
/* If we are resetting/deleting then do nothing */
2018-01-31 18:31:24 +02:00
if ( ctrl - > ctrl . state ! = NVME_CTRL_CONNECTING ) {
2017-03-18 20:58:29 +02:00
WARN_ON_ONCE ( ctrl - > ctrl . state = = NVME_CTRL_NEW | |
ctrl - > ctrl . state = = NVME_CTRL_LIVE ) ;
return ;
}
if ( nvmf_should_reconnect ( & ctrl - > ctrl ) ) {
dev_info ( ctrl - > ctrl . device , " Reconnecting in %d seconds... \n " ,
ctrl - > ctrl . opts - > reconnect_delay ) ;
2017-06-07 20:31:55 +02:00
queue_delayed_work ( nvme_wq , & ctrl - > reconnect_work ,
2017-03-18 20:58:29 +02:00
ctrl - > ctrl . opts - > reconnect_delay * HZ ) ;
} else {
2017-10-29 14:21:01 +02:00
nvme_delete_ctrl ( & ctrl - > ctrl ) ;
2017-03-18 20:58:29 +02:00
}
}
2018-07-09 12:49:06 +03:00
static int nvme_rdma_setup_ctrl ( struct nvme_rdma_ctrl * ctrl , bool new )
2016-07-06 21:55:52 +09:00
{
2021-05-13 12:59:52 +01:00
int ret ;
2016-07-06 21:55:52 +09:00
bool changed ;
2018-07-09 12:49:06 +03:00
ret = nvme_rdma_configure_admin_queue ( ctrl , new ) ;
2016-07-06 21:55:52 +09:00
if ( ret )
2018-07-09 12:49:06 +03:00
return ret ;
if ( ctrl - > ctrl . icdoff ) {
2021-10-17 11:58:16 +03:00
ret = - EOPNOTSUPP ;
2018-07-09 12:49:06 +03:00
dev_err ( ctrl - > ctrl . device , " icdoff is not supported! \n " ) ;
goto destroy_admin ;
}
if ( ! ( ctrl - > ctrl . sgls & ( 1 < < 2 ) ) ) {
2021-10-17 11:58:16 +03:00
ret = - EOPNOTSUPP ;
2018-07-09 12:49:06 +03:00
dev_err ( ctrl - > ctrl . device ,
" Mandatory keyed sgls are not supported! \n " ) ;
goto destroy_admin ;
}
if ( ctrl - > ctrl . opts - > queue_size > ctrl - > ctrl . sqsize + 1 ) {
dev_warn ( ctrl - > ctrl . device ,
" queue_size %zu > ctrl sqsize %u, clamping down \n " ,
ctrl - > ctrl . opts - > queue_size , ctrl - > ctrl . sqsize + 1 ) ;
}
2021-09-23 00:55:35 +03:00
if ( ctrl - > ctrl . sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE ) {
dev_warn ( ctrl - > ctrl . device ,
" ctrl sqsize %u > max queue size %u, clamping down \n " ,
ctrl - > ctrl . sqsize + 1 , NVME_RDMA_MAX_QUEUE_SIZE ) ;
ctrl - > ctrl . sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1 ;
}
2018-07-09 12:49:06 +03:00
if ( ctrl - > ctrl . sqsize + 1 > ctrl - > ctrl . maxcmd ) {
dev_warn ( ctrl - > ctrl . device ,
" sqsize %u > ctrl maxcmd %u, clamping down \n " ,
ctrl - > ctrl . sqsize + 1 , ctrl - > ctrl . maxcmd ) ;
ctrl - > ctrl . sqsize = ctrl - > ctrl . maxcmd - 1 ;
}
2016-07-06 21:55:52 +09:00
2018-06-20 07:15:05 -07:00
if ( ctrl - > ctrl . sgls & ( 1 < < 20 ) )
ctrl - > use_inline_data = true ;
2016-07-06 21:55:52 +09:00
2017-04-24 10:58:29 +03:00
if ( ctrl - > ctrl . queue_count > 1 ) {
2018-07-09 12:49:06 +03:00
ret = nvme_rdma_configure_io_queues ( ctrl , new ) ;
2016-07-06 21:55:52 +09:00
if ( ret )
2017-10-11 15:29:11 +03:00
goto destroy_admin ;
2016-07-06 21:55:52 +09:00
}
changed = nvme_change_ctrl_state ( & ctrl - > ctrl , NVME_CTRL_LIVE ) ;
2017-09-21 17:01:37 +03:00
if ( ! changed ) {
2020-03-24 17:29:44 +02:00
/*
2020-07-22 16:32:19 -07:00
* state change failure is ok if we started ctrl delete ,
2020-03-24 17:29:44 +02:00
* unless we ' re during creation of a new controller to
* avoid races with teardown flow .
*/
2020-07-22 16:32:19 -07:00
WARN_ON_ONCE ( ctrl - > ctrl . state ! = NVME_CTRL_DELETING & &
ctrl - > ctrl . state ! = NVME_CTRL_DELETING_NOIO ) ;
2020-03-24 17:29:44 +02:00
WARN_ON_ONCE ( new ) ;
2018-07-09 12:49:06 +03:00
ret = - EINVAL ;
goto destroy_io ;
2017-09-21 17:01:37 +03:00
}
2017-07-02 10:56:43 +03:00
nvme_start_ctrl ( & ctrl - > ctrl ) ;
2018-07-09 12:49:06 +03:00
return 0 ;
destroy_io :
2021-01-21 11:32:37 +08:00
if ( ctrl - > ctrl . queue_count > 1 ) {
nvme_stop_queues ( & ctrl - > ctrl ) ;
nvme_sync_io_queues ( & ctrl - > ctrl ) ;
nvme_rdma_stop_io_queues ( ctrl ) ;
nvme_cancel_tagset ( & ctrl - > ctrl ) ;
2018-07-09 12:49:06 +03:00
nvme_rdma_destroy_io_queues ( ctrl , new ) ;
2021-01-21 11:32:37 +08:00
}
2018-07-09 12:49:06 +03:00
destroy_admin :
2021-10-14 16:17:06 +08:00
nvme_stop_admin_queue ( & ctrl - > ctrl ) ;
2021-01-21 11:32:37 +08:00
blk_sync_queue ( ctrl - > ctrl . admin_q ) ;
2018-07-09 12:49:06 +03:00
nvme_rdma_stop_queue ( & ctrl - > queues [ 0 ] ) ;
2021-01-21 11:32:37 +08:00
nvme_cancel_admin_tagset ( & ctrl - > ctrl ) ;
2018-07-09 12:49:06 +03:00
nvme_rdma_destroy_admin_queue ( ctrl , new ) ;
return ret ;
}
static void nvme_rdma_reconnect_ctrl_work ( struct work_struct * work )
{
struct nvme_rdma_ctrl * ctrl = container_of ( to_delayed_work ( work ) ,
struct nvme_rdma_ctrl , reconnect_work ) ;
+ + ctrl - > ctrl . nr_reconnects ;
if ( nvme_rdma_setup_ctrl ( ctrl , false ) )
goto requeue ;
2016-07-06 21:55:52 +09:00
2017-10-11 15:29:11 +03:00
dev_info ( ctrl - > ctrl . device , " Successfully reconnected (%d attempts) \n " ,
ctrl - > ctrl . nr_reconnects ) ;
ctrl - > ctrl . nr_reconnects = 0 ;
2016-07-06 21:55:52 +09:00
return ;
requeue :
2017-03-18 20:58:29 +02:00
dev_info ( ctrl - > ctrl . device , " Failed reconnect attempt %d \n " ,
2017-05-04 13:33:15 +03:00
ctrl - > ctrl . nr_reconnects ) ;
2017-03-18 20:58:29 +02:00
nvme_rdma_reconnect_or_remove ( ctrl ) ;
2016-07-06 21:55:52 +09:00
}
static void nvme_rdma_error_recovery_work ( struct work_struct * work )
{
struct nvme_rdma_ctrl * ctrl = container_of ( work ,
struct nvme_rdma_ctrl , err_work ) ;
2017-09-21 17:01:38 +03:00
nvme_stop_keep_alive ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_teardown_io_queues ( ctrl , false ) ;
2017-06-05 20:35:56 +03:00
nvme_start_queues ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_teardown_admin_queue ( ctrl , false ) ;
2021-10-14 16:17:06 +08:00
nvme_start_admin_queue ( & ctrl - > ctrl ) ;
2017-06-05 20:35:56 +03:00
2018-01-31 18:31:24 +02:00
if ( ! nvme_change_ctrl_state ( & ctrl - > ctrl , NVME_CTRL_CONNECTING ) ) {
2020-07-22 16:32:19 -07:00
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE ( ctrl - > ctrl . state ! = NVME_CTRL_DELETING & &
ctrl - > ctrl . state ! = NVME_CTRL_DELETING_NOIO ) ;
2017-12-21 14:54:15 +02:00
return ;
}
2017-03-18 20:58:29 +02:00
nvme_rdma_reconnect_or_remove ( ctrl ) ;
2016-07-06 21:55:52 +09:00
}
static void nvme_rdma_error_recovery ( struct nvme_rdma_ctrl * ctrl )
{
2017-12-21 14:54:15 +02:00
if ( ! nvme_change_ctrl_state ( & ctrl - > ctrl , NVME_CTRL_RESETTING ) )
2016-07-06 21:55:52 +09:00
return ;
2020-07-29 02:36:03 -07:00
dev_warn ( ctrl - > ctrl . device , " starting error recovery \n " ) ;
2020-02-10 16:01:45 -08:00
queue_work ( nvme_reset_wq , & ctrl - > err_work ) ;
2016-07-06 21:55:52 +09:00
}
2020-06-11 08:44:51 +02:00
static void nvme_rdma_end_request ( struct nvme_rdma_request * req )
{
struct request * rq = blk_mq_rq_from_pdu ( req ) ;
if ( ! refcount_dec_and_test ( & req - > ref ) )
return ;
2020-08-18 09:11:29 +02:00
if ( ! nvme_try_complete_req ( rq , req - > status , req - > result ) )
2020-06-11 08:44:52 +02:00
nvme_rdma_complete_rq ( rq ) ;
2020-06-11 08:44:51 +02:00
}
2016-07-06 21:55:52 +09:00
static void nvme_rdma_wr_error ( struct ib_cq * cq , struct ib_wc * wc ,
const char * op )
{
2020-07-13 11:53:29 +03:00
struct nvme_rdma_queue * queue = wc - > qp - > qp_context ;
2016-07-06 21:55:52 +09:00
struct nvme_rdma_ctrl * ctrl = queue - > ctrl ;
if ( ctrl - > ctrl . state = = NVME_CTRL_LIVE )
dev_info ( ctrl - > ctrl . device ,
" %s for CQE 0x%p failed with status %s (%d) \n " ,
op , wc - > wr_cqe ,
ib_wc_status_msg ( wc - > status ) , wc - > status ) ;
nvme_rdma_error_recovery ( ctrl ) ;
}
static void nvme_rdma_memreg_done ( struct ib_cq * cq , struct ib_wc * wc )
{
if ( unlikely ( wc - > status ! = IB_WC_SUCCESS ) )
nvme_rdma_wr_error ( cq , wc , " MEMREG " ) ;
}
static void nvme_rdma_inv_rkey_done ( struct ib_cq * cq , struct ib_wc * wc )
{
2017-11-23 17:35:23 +02:00
struct nvme_rdma_request * req =
container_of ( wc - > wr_cqe , struct nvme_rdma_request , reg_cqe ) ;
2020-06-11 08:44:51 +02:00
if ( unlikely ( wc - > status ! = IB_WC_SUCCESS ) )
2016-07-06 21:55:52 +09:00
nvme_rdma_wr_error ( cq , wc , " LOCAL_INV " ) ;
2020-06-11 08:44:51 +02:00
else
nvme_rdma_end_request ( req ) ;
2016-07-06 21:55:52 +09:00
}
static int nvme_rdma_inv_rkey ( struct nvme_rdma_queue * queue ,
struct nvme_rdma_request * req )
{
struct ib_send_wr wr = {
. opcode = IB_WR_LOCAL_INV ,
. next = NULL ,
. num_sge = 0 ,
2017-11-23 17:35:23 +02:00
. send_flags = IB_SEND_SIGNALED ,
2016-07-06 21:55:52 +09:00
. ex . invalidate_rkey = req - > mr - > rkey ,
} ;
req - > reg_cqe . done = nvme_rdma_inv_rkey_done ;
wr . wr_cqe = & req - > reg_cqe ;
2018-07-18 09:25:23 -07:00
return ib_post_send ( queue - > qp , & wr , NULL ) ;
2016-07-06 21:55:52 +09:00
}
2022-02-09 10:54:49 +02:00
static void nvme_rdma_dma_unmap_req ( struct ib_device * ibdev , struct request * rq )
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
if ( blk_integrity_rq ( rq ) ) {
ib_dma_unmap_sg ( ibdev , req - > metadata_sgl - > sg_table . sgl ,
req - > metadata_sgl - > nents , rq_dma_dir ( rq ) ) ;
sg_free_table_chained ( & req - > metadata_sgl - > sg_table ,
NVME_INLINE_METADATA_SG_CNT ) ;
}
ib_dma_unmap_sg ( ibdev , req - > data_sgl . sg_table . sgl , req - > data_sgl . nents ,
rq_dma_dir ( rq ) ) ;
sg_free_table_chained ( & req - > data_sgl . sg_table , NVME_INLINE_SG_CNT ) ;
}
2016-07-06 21:55:52 +09:00
static void nvme_rdma_unmap_data ( struct nvme_rdma_queue * queue ,
struct request * rq )
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_rdma_device * dev = queue - > device ;
struct ib_device * ibdev = dev - > dev ;
2020-05-19 17:05:56 +03:00
struct list_head * pool = & queue - > qp - > rdma_mrs ;
2016-07-06 21:55:52 +09:00
2019-02-20 20:13:34 -08:00
if ( ! blk_rq_nr_phys_segments ( rq ) )
2016-07-06 21:55:52 +09:00
return ;
2020-05-19 17:05:56 +03:00
if ( req - > use_sig_mr )
pool = & queue - > qp - > sig_mrs ;
2017-11-26 10:40:55 +00:00
if ( req - > mr ) {
2020-05-19 17:05:56 +03:00
ib_mr_pool_put ( queue - > qp , pool , req - > mr ) ;
2017-11-26 10:40:55 +00:00
req - > mr = NULL ;
}
2022-02-09 10:54:49 +02:00
nvme_rdma_dma_unmap_req ( ibdev , rq ) ;
2016-07-06 21:55:52 +09:00
}
static int nvme_rdma_set_sg_null ( struct nvme_command * c )
{
struct nvme_keyed_sgl_desc * sg = & c - > common . dptr . ksgl ;
sg - > addr = 0 ;
put_unaligned_le24 ( 0 , sg - > length ) ;
put_unaligned_le32 ( 0 , sg - > key ) ;
sg - > type = NVME_KEY_SGL_FMT_DATA_DESC < < 4 ;
return 0 ;
}
static int nvme_rdma_map_sg_inline ( struct nvme_rdma_queue * queue ,
2018-06-20 07:15:05 -07:00
struct nvme_rdma_request * req , struct nvme_command * c ,
int count )
2016-07-06 21:55:52 +09:00
{
struct nvme_sgl_desc * sg = & c - > common . dptr . sgl ;
2018-06-20 07:15:05 -07:00
struct ib_sge * sge = & req - > sge [ 1 ] ;
2021-05-27 18:16:38 -07:00
struct scatterlist * sgl ;
2018-06-20 07:15:05 -07:00
u32 len = 0 ;
int i ;
2016-07-06 21:55:52 +09:00
2021-05-27 18:16:38 -07:00
for_each_sg ( req - > data_sgl . sg_table . sgl , sgl , count , i ) {
2018-06-20 07:15:05 -07:00
sge - > addr = sg_dma_address ( sgl ) ;
sge - > length = sg_dma_len ( sgl ) ;
sge - > lkey = queue - > device - > pd - > local_dma_lkey ;
len + = sge - > length ;
2021-05-27 18:16:38 -07:00
sge + + ;
2018-06-20 07:15:05 -07:00
}
2016-07-06 21:55:52 +09:00
sg - > addr = cpu_to_le64 ( queue - > ctrl - > ctrl . icdoff ) ;
2018-06-20 07:15:05 -07:00
sg - > length = cpu_to_le32 ( len ) ;
2016-07-06 21:55:52 +09:00
sg - > type = ( NVME_SGL_FMT_DATA_DESC < < 4 ) | NVME_SGL_FMT_OFFSET ;
2018-06-20 07:15:05 -07:00
req - > num_sge + = count ;
2016-07-06 21:55:52 +09:00
return 0 ;
}
static int nvme_rdma_map_sg_single ( struct nvme_rdma_queue * queue ,
struct nvme_rdma_request * req , struct nvme_command * c )
{
struct nvme_keyed_sgl_desc * sg = & c - > common . dptr . ksgl ;
2020-05-19 17:05:55 +03:00
sg - > addr = cpu_to_le64 ( sg_dma_address ( req - > data_sgl . sg_table . sgl ) ) ;
put_unaligned_le24 ( sg_dma_len ( req - > data_sgl . sg_table . sgl ) , sg - > length ) ;
2016-09-05 12:56:20 +02:00
put_unaligned_le32 ( queue - > device - > pd - > unsafe_global_rkey , sg - > key ) ;
2016-07-06 21:55:52 +09:00
sg - > type = NVME_KEY_SGL_FMT_DATA_DESC < < 4 ;
return 0 ;
}
static int nvme_rdma_map_sg_fr ( struct nvme_rdma_queue * queue ,
struct nvme_rdma_request * req , struct nvme_command * c ,
int count )
{
struct nvme_keyed_sgl_desc * sg = & c - > common . dptr . ksgl ;
int nr ;
2017-11-26 10:40:55 +00:00
req - > mr = ib_mr_pool_get ( queue - > qp , & queue - > qp - > rdma_mrs ) ;
if ( WARN_ON_ONCE ( ! req - > mr ) )
return - EAGAIN ;
2017-08-28 12:52:27 +03:00
/*
* Align the MR to a 4 K page size to match the ctrl page size and
* the block virtual boundary .
*/
2020-05-19 17:05:55 +03:00
nr = ib_map_mr_sg ( req - > mr , req - > data_sgl . sg_table . sgl , count , NULL ,
SZ_4K ) ;
2017-08-14 15:29:26 +03:00
if ( unlikely ( nr < count ) ) {
2017-11-26 10:40:55 +00:00
ib_mr_pool_put ( queue - > qp , & queue - > qp - > rdma_mrs , req - > mr ) ;
req - > mr = NULL ;
2016-07-06 21:55:52 +09:00
if ( nr < 0 )
return nr ;
return - EINVAL ;
}
ib_update_fast_reg_key ( req - > mr , ib_inc_rkey ( req - > mr - > rkey ) ) ;
req - > reg_cqe . done = nvme_rdma_memreg_done ;
memset ( & req - > reg_wr , 0 , sizeof ( req - > reg_wr ) ) ;
req - > reg_wr . wr . opcode = IB_WR_REG_MR ;
req - > reg_wr . wr . wr_cqe = & req - > reg_cqe ;
req - > reg_wr . wr . num_sge = 0 ;
req - > reg_wr . mr = req - > mr ;
req - > reg_wr . key = req - > mr - > rkey ;
req - > reg_wr . access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE ;
sg - > addr = cpu_to_le64 ( req - > mr - > iova ) ;
put_unaligned_le24 ( req - > mr - > length , sg - > length ) ;
put_unaligned_le32 ( req - > mr - > rkey , sg - > key ) ;
sg - > type = ( NVME_KEY_SGL_FMT_DATA_DESC < < 4 ) |
NVME_SGL_FMT_INVALIDATE ;
return 0 ;
}
2020-05-19 17:05:56 +03:00
static void nvme_rdma_set_sig_domain ( struct blk_integrity * bi ,
struct nvme_command * cmd , struct ib_sig_domain * domain ,
u16 control , u8 pi_type )
{
domain - > sig_type = IB_SIG_TYPE_T10_DIF ;
domain - > sig . dif . bg_type = IB_T10DIF_CRC ;
domain - > sig . dif . pi_interval = 1 < < bi - > interval_exp ;
domain - > sig . dif . ref_tag = le32_to_cpu ( cmd - > rw . reftag ) ;
if ( control & NVME_RW_PRINFO_PRCHK_REF )
domain - > sig . dif . ref_remap = true ;
domain - > sig . dif . app_tag = le16_to_cpu ( cmd - > rw . apptag ) ;
domain - > sig . dif . apptag_check_mask = le16_to_cpu ( cmd - > rw . appmask ) ;
domain - > sig . dif . app_escape = true ;
if ( pi_type = = NVME_NS_DPS_PI_TYPE3 )
domain - > sig . dif . ref_escape = true ;
}
static void nvme_rdma_set_sig_attrs ( struct blk_integrity * bi ,
struct nvme_command * cmd , struct ib_sig_attrs * sig_attrs ,
u8 pi_type )
{
u16 control = le16_to_cpu ( cmd - > rw . control ) ;
memset ( sig_attrs , 0 , sizeof ( * sig_attrs ) ) ;
if ( control & NVME_RW_PRINFO_PRACT ) {
/* for WRITE_INSERT/READ_STRIP no memory domain */
sig_attrs - > mem . sig_type = IB_SIG_TYPE_NONE ;
nvme_rdma_set_sig_domain ( bi , cmd , & sig_attrs - > wire , control ,
pi_type ) ;
/* Clear the PRACT bit since HCA will generate/verify the PI */
control & = ~ NVME_RW_PRINFO_PRACT ;
cmd - > rw . control = cpu_to_le16 ( control ) ;
} else {
/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
nvme_rdma_set_sig_domain ( bi , cmd , & sig_attrs - > wire , control ,
pi_type ) ;
nvme_rdma_set_sig_domain ( bi , cmd , & sig_attrs - > mem , control ,
pi_type ) ;
}
}
static void nvme_rdma_set_prot_checks ( struct nvme_command * cmd , u8 * mask )
{
* mask = 0 ;
if ( le16_to_cpu ( cmd - > rw . control ) & NVME_RW_PRINFO_PRCHK_REF )
* mask | = IB_SIG_CHECK_REFTAG ;
if ( le16_to_cpu ( cmd - > rw . control ) & NVME_RW_PRINFO_PRCHK_GUARD )
* mask | = IB_SIG_CHECK_GUARD ;
}
static void nvme_rdma_sig_done ( struct ib_cq * cq , struct ib_wc * wc )
{
if ( unlikely ( wc - > status ! = IB_WC_SUCCESS ) )
nvme_rdma_wr_error ( cq , wc , " SIG " ) ;
}
static int nvme_rdma_map_sg_pi ( struct nvme_rdma_queue * queue ,
struct nvme_rdma_request * req , struct nvme_command * c ,
int count , int pi_count )
{
struct nvme_rdma_sgl * sgl = & req - > data_sgl ;
struct ib_reg_wr * wr = & req - > reg_wr ;
struct request * rq = blk_mq_rq_from_pdu ( req ) ;
struct nvme_ns * ns = rq - > q - > queuedata ;
struct bio * bio = rq - > bio ;
struct nvme_keyed_sgl_desc * sg = & c - > common . dptr . ksgl ;
int nr ;
req - > mr = ib_mr_pool_get ( queue - > qp , & queue - > qp - > sig_mrs ) ;
if ( WARN_ON_ONCE ( ! req - > mr ) )
return - EAGAIN ;
nr = ib_map_mr_sg_pi ( req - > mr , sgl - > sg_table . sgl , count , NULL ,
req - > metadata_sgl - > sg_table . sgl , pi_count , NULL ,
SZ_4K ) ;
if ( unlikely ( nr ) )
goto mr_put ;
2021-01-24 11:02:34 +01:00
nvme_rdma_set_sig_attrs ( blk_get_integrity ( bio - > bi_bdev - > bd_disk ) , c ,
2020-05-19 17:05:56 +03:00
req - > mr - > sig_attrs , ns - > pi_type ) ;
nvme_rdma_set_prot_checks ( c , & req - > mr - > sig_attrs - > check_mask ) ;
ib_update_fast_reg_key ( req - > mr , ib_inc_rkey ( req - > mr - > rkey ) ) ;
req - > reg_cqe . done = nvme_rdma_sig_done ;
memset ( wr , 0 , sizeof ( * wr ) ) ;
wr - > wr . opcode = IB_WR_REG_MR_INTEGRITY ;
wr - > wr . wr_cqe = & req - > reg_cqe ;
wr - > wr . num_sge = 0 ;
wr - > wr . send_flags = 0 ;
wr - > mr = req - > mr ;
wr - > key = req - > mr - > rkey ;
wr - > access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE ;
sg - > addr = cpu_to_le64 ( req - > mr - > iova ) ;
put_unaligned_le24 ( req - > mr - > length , sg - > length ) ;
put_unaligned_le32 ( req - > mr - > rkey , sg - > key ) ;
sg - > type = NVME_KEY_SGL_FMT_DATA_DESC < < 4 ;
return 0 ;
mr_put :
ib_mr_pool_put ( queue - > qp , & queue - > qp - > sig_mrs , req - > mr ) ;
req - > mr = NULL ;
if ( nr < 0 )
return nr ;
return - EINVAL ;
}
2022-02-09 10:54:49 +02:00
static int nvme_rdma_dma_map_req ( struct ib_device * ibdev , struct request * rq ,
int * count , int * pi_count )
2016-07-06 21:55:52 +09:00
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
2022-02-09 10:54:49 +02:00
int ret ;
2016-07-06 21:55:52 +09:00
2020-05-19 17:05:55 +03:00
req - > data_sgl . sg_table . sgl = ( struct scatterlist * ) ( req + 1 ) ;
ret = sg_alloc_table_chained ( & req - > data_sgl . sg_table ,
blk_rq_nr_phys_segments ( rq ) , req - > data_sgl . sg_table . sgl ,
2019-11-24 18:38:30 +02:00
NVME_INLINE_SG_CNT ) ;
2016-07-06 21:55:52 +09:00
if ( ret )
return - ENOMEM ;
2020-05-19 17:05:55 +03:00
req - > data_sgl . nents = blk_rq_map_sg ( rq - > q , rq ,
req - > data_sgl . sg_table . sgl ) ;
2016-07-06 21:55:52 +09:00
2022-02-09 10:54:49 +02:00
* count = ib_dma_map_sg ( ibdev , req - > data_sgl . sg_table . sgl ,
req - > data_sgl . nents , rq_dma_dir ( rq ) ) ;
if ( unlikely ( * count < = 0 ) ) {
2018-06-10 16:58:29 +03:00
ret = - EIO ;
goto out_free_table ;
2016-07-06 21:55:52 +09:00
}
2020-05-19 17:05:56 +03:00
if ( blk_integrity_rq ( rq ) ) {
req - > metadata_sgl - > sg_table . sgl =
( struct scatterlist * ) ( req - > metadata_sgl + 1 ) ;
ret = sg_alloc_table_chained ( & req - > metadata_sgl - > sg_table ,
blk_rq_count_integrity_sg ( rq - > q , rq - > bio ) ,
req - > metadata_sgl - > sg_table . sgl ,
NVME_INLINE_METADATA_SG_CNT ) ;
if ( unlikely ( ret ) ) {
ret = - ENOMEM ;
goto out_unmap_sg ;
}
req - > metadata_sgl - > nents = blk_rq_map_integrity_sg ( rq - > q ,
rq - > bio , req - > metadata_sgl - > sg_table . sgl ) ;
2022-02-09 10:54:49 +02:00
* pi_count = ib_dma_map_sg ( ibdev ,
req - > metadata_sgl - > sg_table . sgl ,
req - > metadata_sgl - > nents ,
rq_dma_dir ( rq ) ) ;
if ( unlikely ( * pi_count < = 0 ) ) {
2020-05-19 17:05:56 +03:00
ret = - EIO ;
goto out_free_pi_table ;
}
}
2022-02-09 10:54:49 +02:00
return 0 ;
out_free_pi_table :
sg_free_table_chained ( & req - > metadata_sgl - > sg_table ,
NVME_INLINE_METADATA_SG_CNT ) ;
out_unmap_sg :
ib_dma_unmap_sg ( ibdev , req - > data_sgl . sg_table . sgl , req - > data_sgl . nents ,
rq_dma_dir ( rq ) ) ;
out_free_table :
sg_free_table_chained ( & req - > data_sgl . sg_table , NVME_INLINE_SG_CNT ) ;
return ret ;
}
static int nvme_rdma_map_data ( struct nvme_rdma_queue * queue ,
struct request * rq , struct nvme_command * c )
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_rdma_device * dev = queue - > device ;
struct ib_device * ibdev = dev - > dev ;
int pi_count = 0 ;
int count , ret ;
req - > num_sge = 1 ;
refcount_set ( & req - > ref , 2 ) ; /* send and recv completions */
c - > common . flags | = NVME_CMD_SGL_METABUF ;
if ( ! blk_rq_nr_phys_segments ( rq ) )
return nvme_rdma_set_sg_null ( c ) ;
ret = nvme_rdma_dma_map_req ( ibdev , rq , & count , & pi_count ) ;
if ( unlikely ( ret ) )
return ret ;
2020-05-19 17:05:56 +03:00
if ( req - > use_sig_mr ) {
ret = nvme_rdma_map_sg_pi ( queue , req , c , count , pi_count ) ;
goto out ;
}
2018-06-20 07:15:05 -07:00
if ( count < = dev - > num_inline_segments ) {
2017-01-13 12:29:12 +01:00
if ( rq_data_dir ( rq ) = = WRITE & & nvme_rdma_queue_idx ( queue ) & &
2018-06-20 07:15:05 -07:00
queue - > ctrl - > use_inline_data & &
2017-01-13 12:29:12 +01:00
blk_rq_payload_bytes ( rq ) < =
2018-06-10 16:58:29 +03:00
nvme_rdma_inline_data_size ( queue ) ) {
2018-06-20 07:15:05 -07:00
ret = nvme_rdma_map_sg_inline ( queue , req , c , count ) ;
2018-06-10 16:58:29 +03:00
goto out ;
}
2016-07-06 21:55:52 +09:00
2018-06-20 07:15:05 -07:00
if ( count = = 1 & & dev - > pd - > flags & IB_PD_UNSAFE_GLOBAL_RKEY ) {
2018-06-10 16:58:29 +03:00
ret = nvme_rdma_map_sg_single ( queue , req , c ) ;
goto out ;
}
2016-07-06 21:55:52 +09:00
}
2018-06-10 16:58:29 +03:00
ret = nvme_rdma_map_sg_fr ( queue , req , c , count ) ;
out :
if ( unlikely ( ret ) )
2022-02-09 10:54:49 +02:00
goto out_dma_unmap_req ;
2018-06-10 16:58:29 +03:00
return 0 ;
2022-02-09 10:54:49 +02:00
out_dma_unmap_req :
nvme_rdma_dma_unmap_req ( ibdev , rq ) ;
2018-06-10 16:58:29 +03:00
return ret ;
2016-07-06 21:55:52 +09:00
}
static void nvme_rdma_send_done ( struct ib_cq * cq , struct ib_wc * wc )
{
2017-11-23 17:35:22 +02:00
struct nvme_rdma_qe * qe =
container_of ( wc - > wr_cqe , struct nvme_rdma_qe , cqe ) ;
struct nvme_rdma_request * req =
container_of ( qe , struct nvme_rdma_request , sqe ) ;
2020-06-11 08:44:51 +02:00
if ( unlikely ( wc - > status ! = IB_WC_SUCCESS ) )
2016-07-06 21:55:52 +09:00
nvme_rdma_wr_error ( cq , wc , " SEND " ) ;
2020-06-11 08:44:51 +02:00
else
nvme_rdma_end_request ( req ) ;
2016-07-06 21:55:52 +09:00
}
static int nvme_rdma_post_send ( struct nvme_rdma_queue * queue ,
struct nvme_rdma_qe * qe , struct ib_sge * sge , u32 num_sge ,
2017-11-23 17:35:21 +02:00
struct ib_send_wr * first )
2016-07-06 21:55:52 +09:00
{
2018-07-18 09:25:23 -07:00
struct ib_send_wr wr ;
2016-07-06 21:55:52 +09:00
int ret ;
sge - > addr = qe - > dma ;
2020-03-31 15:46:33 +03:00
sge - > length = sizeof ( struct nvme_command ) ;
2016-07-06 21:55:52 +09:00
sge - > lkey = queue - > device - > pd - > local_dma_lkey ;
wr . next = NULL ;
wr . wr_cqe = & qe - > cqe ;
wr . sg_list = sge ;
wr . num_sge = num_sge ;
wr . opcode = IB_WR_SEND ;
2017-11-23 17:35:21 +02:00
wr . send_flags = IB_SEND_SIGNALED ;
2016-07-06 21:55:52 +09:00
if ( first )
first - > next = & wr ;
else
first = & wr ;
2018-07-18 09:25:23 -07:00
ret = ib_post_send ( queue - > qp , first , NULL ) ;
2017-08-14 15:29:26 +03:00
if ( unlikely ( ret ) ) {
2016-07-06 21:55:52 +09:00
dev_err ( queue - > ctrl - > ctrl . device ,
" %s failed with error code %d \n " , __func__ , ret ) ;
}
return ret ;
}
static int nvme_rdma_post_recv ( struct nvme_rdma_queue * queue ,
struct nvme_rdma_qe * qe )
{
2018-07-18 09:25:23 -07:00
struct ib_recv_wr wr ;
2016-07-06 21:55:52 +09:00
struct ib_sge list ;
int ret ;
list . addr = qe - > dma ;
list . length = sizeof ( struct nvme_completion ) ;
list . lkey = queue - > device - > pd - > local_dma_lkey ;
qe - > cqe . done = nvme_rdma_recv_done ;
wr . next = NULL ;
wr . wr_cqe = & qe - > cqe ;
wr . sg_list = & list ;
wr . num_sge = 1 ;
2018-07-18 09:25:23 -07:00
ret = ib_post_recv ( queue - > qp , & wr , NULL ) ;
2017-08-14 15:29:26 +03:00
if ( unlikely ( ret ) ) {
2016-07-06 21:55:52 +09:00
dev_err ( queue - > ctrl - > ctrl . device ,
" %s failed with error code %d \n " , __func__ , ret ) ;
}
return ret ;
}
static struct blk_mq_tags * nvme_rdma_tagset ( struct nvme_rdma_queue * queue )
{
u32 queue_idx = nvme_rdma_queue_idx ( queue ) ;
if ( queue_idx = = 0 )
return queue - > ctrl - > admin_tag_set . tags [ queue_idx ] ;
return queue - > ctrl - > tag_set . tags [ queue_idx - 1 ] ;
}
2017-11-23 17:35:21 +02:00
static void nvme_rdma_async_done ( struct ib_cq * cq , struct ib_wc * wc )
{
if ( unlikely ( wc - > status ! = IB_WC_SUCCESS ) )
nvme_rdma_wr_error ( cq , wc , " ASYNC " ) ;
}
2017-11-07 15:13:12 -07:00
static void nvme_rdma_submit_async_event ( struct nvme_ctrl * arg )
2016-07-06 21:55:52 +09:00
{
struct nvme_rdma_ctrl * ctrl = to_rdma_ctrl ( arg ) ;
struct nvme_rdma_queue * queue = & ctrl - > queues [ 0 ] ;
struct ib_device * dev = queue - > device - > dev ;
struct nvme_rdma_qe * sqe = & ctrl - > async_event_sqe ;
struct nvme_command * cmd = sqe - > data ;
struct ib_sge sge ;
int ret ;
ib_dma_sync_single_for_cpu ( dev , sqe - > dma , sizeof ( * cmd ) , DMA_TO_DEVICE ) ;
memset ( cmd , 0 , sizeof ( * cmd ) ) ;
cmd - > common . opcode = nvme_admin_async_event ;
2017-11-07 15:13:10 -07:00
cmd - > common . command_id = NVME_AQ_BLK_MQ_DEPTH ;
2016-07-06 21:55:52 +09:00
cmd - > common . flags | = NVME_CMD_SGL_METABUF ;
nvme_rdma_set_sg_null ( cmd ) ;
2017-11-23 17:35:21 +02:00
sqe - > cqe . done = nvme_rdma_async_done ;
2016-07-06 21:55:52 +09:00
ib_dma_sync_single_for_device ( dev , sqe - > dma , sizeof ( * cmd ) ,
DMA_TO_DEVICE ) ;
2017-11-23 17:35:21 +02:00
ret = nvme_rdma_post_send ( queue , sqe , & sge , 1 , NULL ) ;
2016-07-06 21:55:52 +09:00
WARN_ON_ONCE ( ret ) ;
}
2018-11-26 08:21:49 -07:00
static void nvme_rdma_process_nvme_rsp ( struct nvme_rdma_queue * queue ,
struct nvme_completion * cqe , struct ib_wc * wc )
2016-07-06 21:55:52 +09:00
{
struct request * rq ;
struct nvme_rdma_request * req ;
2021-06-16 14:19:36 -07:00
rq = nvme_find_rq ( nvme_rdma_tagset ( queue ) , cqe - > command_id ) ;
2016-07-06 21:55:52 +09:00
if ( ! rq ) {
dev_err ( queue - > ctrl - > ctrl . device ,
2021-06-16 14:19:36 -07:00
" got bad command_id %#x on QP %#x \n " ,
2016-07-06 21:55:52 +09:00
cqe - > command_id , queue - > qp - > qp_num ) ;
nvme_rdma_error_recovery ( queue - > ctrl ) ;
2018-11-26 08:21:49 -07:00
return ;
2016-07-06 21:55:52 +09:00
}
req = blk_mq_rq_to_pdu ( rq ) ;
2017-11-23 17:35:22 +02:00
req - > status = cqe - > status ;
req - > result = cqe - > result ;
2016-07-06 21:55:52 +09:00
2017-11-23 17:35:24 +02:00
if ( wc - > wc_flags & IB_WC_WITH_INVALIDATE ) {
2020-10-12 16:55:37 +08:00
if ( unlikely ( ! req - > mr | |
wc - > ex . invalidate_rkey ! = req - > mr - > rkey ) ) {
2017-11-23 17:35:24 +02:00
dev_err ( queue - > ctrl - > ctrl . device ,
" Bogus remote invalidation for rkey %#x \n " ,
2020-10-12 16:55:37 +08:00
req - > mr ? req - > mr - > rkey : 0 ) ;
2017-11-23 17:35:24 +02:00
nvme_rdma_error_recovery ( queue - > ctrl ) ;
}
2017-11-26 10:40:55 +00:00
} else if ( req - > mr ) {
2018-11-26 08:21:49 -07:00
int ret ;
2017-11-23 17:35:23 +02:00
ret = nvme_rdma_inv_rkey ( queue , req ) ;
if ( unlikely ( ret < 0 ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" Queueing INV WR for rkey %#x failed (%d) \n " ,
req - > mr - > rkey , ret ) ;
nvme_rdma_error_recovery ( queue - > ctrl ) ;
}
/* the local invalidation completion will end the request */
2020-06-23 18:22:39 +02:00
return ;
2017-11-23 17:35:23 +02:00
}
2020-06-23 18:22:39 +02:00
nvme_rdma_end_request ( req ) ;
2016-07-06 21:55:52 +09:00
}
2018-11-26 08:21:49 -07:00
static void nvme_rdma_recv_done ( struct ib_cq * cq , struct ib_wc * wc )
2016-07-06 21:55:52 +09:00
{
struct nvme_rdma_qe * qe =
container_of ( wc - > wr_cqe , struct nvme_rdma_qe , cqe ) ;
2020-07-13 11:53:29 +03:00
struct nvme_rdma_queue * queue = wc - > qp - > qp_context ;
2016-07-06 21:55:52 +09:00
struct ib_device * ibdev = queue - > device - > dev ;
struct nvme_completion * cqe = qe - > data ;
const size_t len = sizeof ( struct nvme_completion ) ;
if ( unlikely ( wc - > status ! = IB_WC_SUCCESS ) ) {
nvme_rdma_wr_error ( cq , wc , " RECV " ) ;
2018-11-26 08:21:49 -07:00
return ;
2016-07-06 21:55:52 +09:00
}
2020-10-25 19:51:24 +08:00
/* sanity checking for received data length */
if ( unlikely ( wc - > byte_len < len ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" Unexpected nvme completion length(%d) \n " , wc - > byte_len ) ;
nvme_rdma_error_recovery ( queue - > ctrl ) ;
return ;
}
2016-07-06 21:55:52 +09:00
ib_dma_sync_single_for_cpu ( ibdev , qe - > dma , len , DMA_FROM_DEVICE ) ;
/*
* AEN requests are special as they don ' t time out and can
* survive any kind of queue freeze and often don ' t respond to
* aborts . We don ' t even bother to allocate a struct request
* for them but rather special case them here .
*/
2019-10-13 19:57:31 +03:00
if ( unlikely ( nvme_is_aen_req ( nvme_rdma_queue_idx ( queue ) ,
cqe - > command_id ) ) )
2016-11-10 07:32:34 -08:00
nvme_complete_async_event ( & queue - > ctrl - > ctrl , cqe - > status ,
& cqe - > result ) ;
2016-07-06 21:55:52 +09:00
else
2018-11-26 08:21:49 -07:00
nvme_rdma_process_nvme_rsp ( queue , cqe , wc ) ;
2016-07-06 21:55:52 +09:00
ib_dma_sync_single_for_device ( ibdev , qe - > dma , len , DMA_FROM_DEVICE ) ;
nvme_rdma_post_recv ( queue , qe ) ;
}
static int nvme_rdma_conn_established ( struct nvme_rdma_queue * queue )
{
int ret , i ;
for ( i = 0 ; i < queue - > queue_size ; i + + ) {
ret = nvme_rdma_post_recv ( queue , & queue - > rsp_ring [ i ] ) ;
if ( ret )
2021-09-06 11:51:34 +08:00
return ret ;
2016-07-06 21:55:52 +09:00
}
return 0 ;
}
static int nvme_rdma_conn_rejected ( struct nvme_rdma_queue * queue ,
struct rdma_cm_event * ev )
{
2016-10-26 12:36:47 -07:00
struct rdma_cm_id * cm_id = queue - > cm_id ;
int status = ev - > status ;
const char * rej_msg ;
const struct nvme_rdma_cm_rej * rej_data ;
u8 rej_data_len ;
rej_msg = rdma_reject_msg ( cm_id , status ) ;
rej_data = rdma_consumer_reject_data ( cm_id , ev , & rej_data_len ) ;
if ( rej_data & & rej_data_len > = sizeof ( u16 ) ) {
u16 sts = le16_to_cpu ( rej_data - > sts ) ;
2016-07-06 21:55:52 +09:00
dev_err ( queue - > ctrl - > ctrl . device ,
2016-10-26 12:36:47 -07:00
" Connect rejected: status %d (%s) nvme status %d (%s). \n " ,
status , rej_msg , sts , nvme_rdma_cm_msg ( sts ) ) ;
2016-07-06 21:55:52 +09:00
} else {
dev_err ( queue - > ctrl - > ctrl . device ,
2016-10-26 12:36:47 -07:00
" Connect rejected: status %d (%s). \n " , status , rej_msg ) ;
2016-07-06 21:55:52 +09:00
}
return - ECONNRESET ;
}
static int nvme_rdma_addr_resolved ( struct nvme_rdma_queue * queue )
{
2019-08-18 12:08:52 +03:00
struct nvme_ctrl * ctrl = & queue - > ctrl - > ctrl ;
2016-07-06 21:55:52 +09:00
int ret ;
2017-05-04 13:33:09 +03:00
ret = nvme_rdma_create_queue_ib ( queue ) ;
if ( ret )
return ret ;
2016-07-06 21:55:52 +09:00
2019-08-18 12:08:52 +03:00
if ( ctrl - > opts - > tos > = 0 )
rdma_set_service_type ( queue - > cm_id , ctrl - > opts - > tos ) ;
2016-07-06 21:55:52 +09:00
ret = rdma_resolve_route ( queue - > cm_id , NVME_RDMA_CONNECT_TIMEOUT_MS ) ;
if ( ret ) {
2019-08-18 12:08:52 +03:00
dev_err ( ctrl - > device , " rdma_resolve_route failed (%d). \n " ,
2016-07-06 21:55:52 +09:00
queue - > cm_error ) ;
goto out_destroy_queue ;
}
return 0 ;
out_destroy_queue :
nvme_rdma_destroy_queue_ib ( queue ) ;
return ret ;
}
static int nvme_rdma_route_resolved ( struct nvme_rdma_queue * queue )
{
struct nvme_rdma_ctrl * ctrl = queue - > ctrl ;
struct rdma_conn_param param = { } ;
2016-07-31 00:27:39 -07:00
struct nvme_rdma_cm_req priv = { } ;
2016-07-06 21:55:52 +09:00
int ret ;
param . qp_num = queue - > qp - > qp_num ;
param . flow_control = 1 ;
param . responder_resources = queue - > device - > dev - > attrs . max_qp_rd_atom ;
2016-06-22 15:06:00 +03:00
/* maximum retry count */
param . retry_count = 7 ;
2016-07-06 21:55:52 +09:00
param . rnr_retry_count = 7 ;
param . private_data = & priv ;
param . private_data_len = sizeof ( priv ) ;
priv . recfmt = cpu_to_le16 ( NVME_RDMA_CM_FMT_1_0 ) ;
priv . qid = cpu_to_le16 ( nvme_rdma_queue_idx ( queue ) ) ;
2016-08-17 15:00:26 -07:00
/*
* set the admin queue depth to the minimum size
* specified by the Fabrics standard .
*/
if ( priv . qid = = 0 ) {
2017-06-18 16:15:59 +03:00
priv . hrqsize = cpu_to_le16 ( NVME_AQ_DEPTH ) ;
priv . hsqsize = cpu_to_le16 ( NVME_AQ_DEPTH - 1 ) ;
2016-08-17 15:00:26 -07:00
} else {
2016-08-17 15:00:27 -07:00
/*
* current interpretation of the fabrics spec
* is at minimum you make hrqsize sqsize + 1 , or a
* 1 ' s based representation of sqsize .
*/
2016-08-17 15:00:26 -07:00
priv . hrqsize = cpu_to_le16 ( queue - > queue_size ) ;
2016-08-17 15:00:27 -07:00
priv . hsqsize = cpu_to_le16 ( queue - > ctrl - > ctrl . sqsize ) ;
2016-08-17 15:00:26 -07:00
}
2016-07-06 21:55:52 +09:00
2020-10-26 11:25:49 -03:00
ret = rdma_connect_locked ( queue - > cm_id , & param ) ;
2016-07-06 21:55:52 +09:00
if ( ret ) {
dev_err ( ctrl - > ctrl . device ,
2020-10-26 11:25:49 -03:00
" rdma_connect_locked failed (%d). \n " , ret ) ;
2021-09-06 11:51:34 +08:00
return ret ;
2016-07-06 21:55:52 +09:00
}
return 0 ;
}
static int nvme_rdma_cm_handler ( struct rdma_cm_id * cm_id ,
struct rdma_cm_event * ev )
{
struct nvme_rdma_queue * queue = cm_id - > context ;
int cm_error = 0 ;
dev_dbg ( queue - > ctrl - > ctrl . device , " %s (%d): status %d id %p \n " ,
rdma_event_msg ( ev - > event ) , ev - > event ,
ev - > status , cm_id ) ;
switch ( ev - > event ) {
case RDMA_CM_EVENT_ADDR_RESOLVED :
cm_error = nvme_rdma_addr_resolved ( queue ) ;
break ;
case RDMA_CM_EVENT_ROUTE_RESOLVED :
cm_error = nvme_rdma_route_resolved ( queue ) ;
break ;
case RDMA_CM_EVENT_ESTABLISHED :
queue - > cm_error = nvme_rdma_conn_established ( queue ) ;
/* complete cm_done regardless of success/failure */
complete ( & queue - > cm_done ) ;
return 0 ;
case RDMA_CM_EVENT_REJECTED :
cm_error = nvme_rdma_conn_rejected ( queue , ev ) ;
break ;
case RDMA_CM_EVENT_ROUTE_ERROR :
case RDMA_CM_EVENT_CONNECT_ERROR :
case RDMA_CM_EVENT_UNREACHABLE :
2017-05-04 13:33:10 +03:00
case RDMA_CM_EVENT_ADDR_ERROR :
2016-07-06 21:55:52 +09:00
dev_dbg ( queue - > ctrl - > ctrl . device ,
" CM error event %d \n " , ev - > event ) ;
cm_error = - ECONNRESET ;
break ;
case RDMA_CM_EVENT_DISCONNECTED :
case RDMA_CM_EVENT_ADDR_CHANGE :
case RDMA_CM_EVENT_TIMEWAIT_EXIT :
dev_dbg ( queue - > ctrl - > ctrl . device ,
" disconnect received - connection closed \n " ) ;
nvme_rdma_error_recovery ( queue - > ctrl ) ;
break ;
case RDMA_CM_EVENT_DEVICE_REMOVAL :
2016-09-02 09:01:54 -07:00
/* device removal is handled via the ib_client API */
break ;
2016-07-06 21:55:52 +09:00
default :
dev_err ( queue - > ctrl - > ctrl . device ,
" Unexpected RDMA CM event (%d) \n " , ev - > event ) ;
nvme_rdma_error_recovery ( queue - > ctrl ) ;
break ;
}
if ( cm_error ) {
queue - > cm_error = cm_error ;
complete ( & queue - > cm_done ) ;
}
return 0 ;
}
2020-07-29 02:36:03 -07:00
static void nvme_rdma_complete_timed_out ( struct request * rq )
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_rdma_queue * queue = req - > queue ;
nvme_rdma_stop_queue ( queue ) ;
2020-10-22 10:15:23 +08:00
if ( blk_mq_request_started ( rq ) & & ! blk_mq_request_completed ( rq ) ) {
2020-07-29 02:36:03 -07:00
nvme_req ( rq ) - > status = NVME_SC_HOST_ABORTED_CMD ;
blk_mq_complete_request ( rq ) ;
}
}
2016-07-06 21:55:52 +09:00
static enum blk_eh_timer_return
nvme_rdma_timeout ( struct request * rq , bool reserved )
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
2019-01-08 00:53:22 -08:00
struct nvme_rdma_queue * queue = req - > queue ;
struct nvme_rdma_ctrl * ctrl = queue - > ctrl ;
2016-07-06 21:55:52 +09:00
2019-01-08 00:53:22 -08:00
dev_warn ( ctrl - > ctrl . device , " I/O %d QID %d timeout \n " ,
rq - > tag , nvme_rdma_queue_idx ( queue ) ) ;
2017-10-22 09:37:04 +00:00
2019-01-08 00:53:22 -08:00
if ( ctrl - > ctrl . state ! = NVME_CTRL_LIVE ) {
/*
2020-07-29 02:36:03 -07:00
* If we are resetting , connecting or deleting we should
* complete immediately because we may block controller
* teardown or setup sequence
* - ctrl disable / shutdown fabrics requests
* - connect requests
* - initialization admin requests
* - I / O requests that entered after unquiescing and
* the controller stopped responding
*
* All other requests should be cancelled by the error
* recovery work , so it ' s fine that we fail it here .
2019-01-08 00:53:22 -08:00
*/
2020-07-29 02:36:03 -07:00
nvme_rdma_complete_timed_out ( rq ) ;
2019-01-08 00:53:22 -08:00
return BLK_EH_DONE ;
}
2016-07-06 21:55:52 +09:00
2020-07-29 02:36:03 -07:00
/*
* LIVE state should trigger the normal error recovery which will
* handle completing this request .
*/
2019-01-08 00:53:22 -08:00
nvme_rdma_error_recovery ( ctrl ) ;
return BLK_EH_RESET_TIMER ;
2016-07-06 21:55:52 +09:00
}
2017-06-03 09:38:05 +02:00
static blk_status_t nvme_rdma_queue_rq ( struct blk_mq_hw_ctx * hctx ,
2016-07-06 21:55:52 +09:00
const struct blk_mq_queue_data * bd )
{
struct nvme_ns * ns = hctx - > queue - > queuedata ;
struct nvme_rdma_queue * queue = hctx - > driver_data ;
struct request * rq = bd - > rq ;
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_rdma_qe * sqe = & req - > sqe ;
2021-03-17 13:37:03 -07:00
struct nvme_command * c = nvme_req ( rq ) - > cmd ;
2016-07-06 21:55:52 +09:00
struct ib_device * dev ;
2018-06-11 17:34:06 +02:00
bool queue_ready = test_bit ( NVME_RDMA_Q_LIVE , & queue - > flags ) ;
2017-06-03 09:38:05 +02:00
blk_status_t ret ;
int err ;
2016-07-06 21:55:52 +09:00
WARN_ON_ONCE ( rq - > tag < 0 ) ;
2021-04-26 10:53:10 +08:00
if ( ! nvme_check_ready ( & queue - > ctrl - > ctrl , rq , queue_ready ) )
return nvme_fail_nonready_command ( & queue - > ctrl - > ctrl , rq ) ;
2016-11-02 08:49:18 -06:00
2016-07-06 21:55:52 +09:00
dev = queue - > device - > dev ;
2019-06-06 12:27:36 +03:00
req - > sqe . dma = ib_dma_map_single ( dev , req - > sqe . data ,
sizeof ( struct nvme_command ) ,
DMA_TO_DEVICE ) ;
err = ib_dma_mapping_error ( dev , req - > sqe . dma ) ;
if ( unlikely ( err ) )
return BLK_STS_RESOURCE ;
2016-07-06 21:55:52 +09:00
ib_dma_sync_single_for_cpu ( dev , sqe - > dma ,
sizeof ( struct nvme_command ) , DMA_TO_DEVICE ) ;
2021-03-17 13:37:03 -07:00
ret = nvme_setup_cmd ( ns , rq ) ;
2017-06-03 09:38:05 +02:00
if ( ret )
2019-06-06 12:27:36 +03:00
goto unmap_qe ;
2016-07-06 21:55:52 +09:00
blk_mq_start_request ( rq ) ;
2020-05-19 17:05:56 +03:00
if ( IS_ENABLED ( CONFIG_BLK_DEV_INTEGRITY ) & &
queue - > pi_support & &
( c - > common . opcode = = nvme_cmd_write | |
c - > common . opcode = = nvme_cmd_read ) & &
nvme_ns_has_pi ( ns ) )
req - > use_sig_mr = true ;
else
req - > use_sig_mr = false ;
2017-06-03 09:38:05 +02:00
err = nvme_rdma_map_data ( queue , rq , c ) ;
2017-08-14 15:29:26 +03:00
if ( unlikely ( err < 0 ) ) {
2016-07-06 21:55:52 +09:00
dev_err ( queue - > ctrl - > ctrl . device ,
2017-06-03 09:38:05 +02:00
" Failed to map data (%d) \n " , err ) ;
2016-07-06 21:55:52 +09:00
goto err ;
}
2017-11-23 17:35:21 +02:00
sqe - > cqe . done = nvme_rdma_send_done ;
2016-07-06 21:55:52 +09:00
ib_dma_sync_single_for_device ( dev , sqe - > dma ,
sizeof ( struct nvme_command ) , DMA_TO_DEVICE ) ;
2017-06-03 09:38:05 +02:00
err = nvme_rdma_post_send ( queue , sqe , req - > sge , req - > num_sge ,
2017-11-26 10:40:55 +00:00
req - > mr ? & req - > reg_wr . wr : NULL ) ;
2019-10-13 19:57:36 +03:00
if ( unlikely ( err ) )
goto err_unmap ;
2016-07-06 21:55:52 +09:00
2017-06-03 09:38:05 +02:00
return BLK_STS_OK ;
2019-06-06 12:27:36 +03:00
2019-10-13 19:57:36 +03:00
err_unmap :
nvme_rdma_unmap_data ( queue , rq ) ;
2016-07-06 21:55:52 +09:00
err :
2021-02-01 11:49:40 +08:00
if ( err = = - EIO )
ret = nvme_host_path_error ( rq ) ;
else if ( err = = - ENOMEM | | err = = - EAGAIN )
2019-06-06 12:27:36 +03:00
ret = BLK_STS_RESOURCE ;
else
ret = BLK_STS_IOERR ;
2019-10-13 19:57:36 +03:00
nvme_cleanup_cmd ( rq ) ;
2019-06-06 12:27:36 +03:00
unmap_qe :
ib_dma_unmap_single ( dev , req - > sqe . dma , sizeof ( struct nvme_command ) ,
DMA_TO_DEVICE ) ;
return ret ;
2016-07-06 21:55:52 +09:00
}
2021-10-12 09:24:29 -06:00
static int nvme_rdma_poll ( struct blk_mq_hw_ctx * hctx , struct io_comp_batch * iob )
2018-12-14 11:06:10 -08:00
{
struct nvme_rdma_queue * queue = hctx - > driver_data ;
return ib_process_cq_direct ( queue - > ib_cq , - 1 ) ;
}
2020-05-19 17:05:56 +03:00
static void nvme_rdma_check_pi_status ( struct nvme_rdma_request * req )
{
struct request * rq = blk_mq_rq_from_pdu ( req ) ;
struct ib_mr_status mr_status ;
int ret ;
ret = ib_check_mr_status ( req - > mr , IB_MR_CHECK_SIG_STATUS , & mr_status ) ;
if ( ret ) {
pr_err ( " ib_check_mr_status failed, ret %d \n " , ret ) ;
nvme_req ( rq ) - > status = NVME_SC_INVALID_PI ;
return ;
}
if ( mr_status . fail_status & IB_MR_CHECK_SIG_STATUS ) {
switch ( mr_status . sig_err . err_type ) {
case IB_SIG_BAD_GUARD :
nvme_req ( rq ) - > status = NVME_SC_GUARD_CHECK ;
break ;
case IB_SIG_BAD_REFTAG :
nvme_req ( rq ) - > status = NVME_SC_REFTAG_CHECK ;
break ;
case IB_SIG_BAD_APPTAG :
nvme_req ( rq ) - > status = NVME_SC_APPTAG_CHECK ;
break ;
}
pr_err ( " PI error found type %d expected 0x%x vs actual 0x%x \n " ,
mr_status . sig_err . err_type , mr_status . sig_err . expected ,
mr_status . sig_err . actual ) ;
}
}
2016-07-06 21:55:52 +09:00
static void nvme_rdma_complete_rq ( struct request * rq )
{
struct nvme_rdma_request * req = blk_mq_rq_to_pdu ( rq ) ;
2019-06-06 12:27:36 +03:00
struct nvme_rdma_queue * queue = req - > queue ;
struct ib_device * ibdev = queue - > device - > dev ;
2016-07-06 21:55:52 +09:00
2020-05-19 17:05:56 +03:00
if ( req - > use_sig_mr )
nvme_rdma_check_pi_status ( req ) ;
2019-06-06 12:27:36 +03:00
nvme_rdma_unmap_data ( queue , rq ) ;
ib_dma_unmap_single ( ibdev , req - > sqe . dma , sizeof ( struct nvme_command ) ,
DMA_TO_DEVICE ) ;
2017-03-30 13:41:32 +02:00
nvme_complete_rq ( rq ) ;
2016-07-06 21:55:52 +09:00
}
2017-07-13 11:09:44 +03:00
static int nvme_rdma_map_queues ( struct blk_mq_tag_set * set )
{
struct nvme_rdma_ctrl * ctrl = set - > driver_data ;
2019-05-28 22:49:04 -07:00
struct nvmf_ctrl_options * opts = ctrl - > ctrl . opts ;
2017-07-13 11:09:44 +03:00
2019-05-28 22:49:04 -07:00
if ( opts - > nr_write_queues & & ctrl - > io_queues [ HCTX_TYPE_READ ] ) {
2018-12-11 23:38:58 -08:00
/* separate read/write queues */
2019-05-28 22:49:04 -07:00
set - > map [ HCTX_TYPE_DEFAULT ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
set - > map [ HCTX_TYPE_DEFAULT ] . queue_offset = 0 ;
set - > map [ HCTX_TYPE_READ ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_READ ] ;
2018-12-11 23:38:58 -08:00
set - > map [ HCTX_TYPE_READ ] . queue_offset =
2019-05-28 22:49:04 -07:00
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
2018-12-11 23:38:58 -08:00
} else {
2019-05-28 22:49:04 -07:00
/* shared read/write queues */
set - > map [ HCTX_TYPE_DEFAULT ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
set - > map [ HCTX_TYPE_DEFAULT ] . queue_offset = 0 ;
set - > map [ HCTX_TYPE_READ ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
2018-12-11 23:38:58 -08:00
set - > map [ HCTX_TYPE_READ ] . queue_offset = 0 ;
}
blk_mq_rdma_map_queues ( & set - > map [ HCTX_TYPE_DEFAULT ] ,
ctrl - > device - > dev , 0 ) ;
blk_mq_rdma_map_queues ( & set - > map [ HCTX_TYPE_READ ] ,
ctrl - > device - > dev , 0 ) ;
2018-12-14 11:06:10 -08:00
2019-05-28 22:49:04 -07:00
if ( opts - > nr_poll_queues & & ctrl - > io_queues [ HCTX_TYPE_POLL ] ) {
/* map dedicated poll queues only if we have queues left */
2018-12-14 11:06:10 -08:00
set - > map [ HCTX_TYPE_POLL ] . nr_queues =
2019-01-18 16:43:24 -08:00
ctrl - > io_queues [ HCTX_TYPE_POLL ] ;
2018-12-14 11:06:10 -08:00
set - > map [ HCTX_TYPE_POLL ] . queue_offset =
2019-05-28 22:49:04 -07:00
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] +
ctrl - > io_queues [ HCTX_TYPE_READ ] ;
2018-12-14 11:06:10 -08:00
blk_mq_map_queues ( & set - > map [ HCTX_TYPE_POLL ] ) ;
}
2019-05-28 22:49:04 -07:00
dev_info ( ctrl - > ctrl . device ,
" mapped %d/%d/%d default/read/poll queues. \n " ,
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ,
ctrl - > io_queues [ HCTX_TYPE_READ ] ,
ctrl - > io_queues [ HCTX_TYPE_POLL ] ) ;
2018-12-11 23:38:58 -08:00
return 0 ;
2017-07-13 11:09:44 +03:00
}
2017-03-30 13:39:16 -07:00
static const struct blk_mq_ops nvme_rdma_mq_ops = {
2016-07-06 21:55:52 +09:00
. queue_rq = nvme_rdma_queue_rq ,
. complete = nvme_rdma_complete_rq ,
. init_request = nvme_rdma_init_request ,
. exit_request = nvme_rdma_exit_request ,
. init_hctx = nvme_rdma_init_hctx ,
. timeout = nvme_rdma_timeout ,
2017-07-13 11:09:44 +03:00
. map_queues = nvme_rdma_map_queues ,
2018-12-14 11:06:10 -08:00
. poll = nvme_rdma_poll ,
2016-07-06 21:55:52 +09:00
} ;
2017-03-30 13:39:16 -07:00
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
2016-07-06 21:55:52 +09:00
. queue_rq = nvme_rdma_queue_rq ,
. complete = nvme_rdma_complete_rq ,
2017-06-13 09:15:19 +02:00
. init_request = nvme_rdma_init_request ,
. exit_request = nvme_rdma_exit_request ,
2016-07-06 21:55:52 +09:00
. init_hctx = nvme_rdma_init_admin_hctx ,
. timeout = nvme_rdma_timeout ,
} ;
2017-07-10 09:22:31 +03:00
static void nvme_rdma_shutdown_ctrl ( struct nvme_rdma_ctrl * ctrl , bool shutdown )
2016-07-06 21:55:52 +09:00
{
2019-01-01 00:19:30 -08:00
cancel_work_sync ( & ctrl - > err_work ) ;
cancel_delayed_work_sync ( & ctrl - > reconnect_work ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_teardown_io_queues ( ctrl , shutdown ) ;
2021-10-14 16:17:06 +08:00
nvme_stop_admin_queue ( & ctrl - > ctrl ) ;
2017-07-10 09:22:31 +03:00
if ( shutdown )
2016-07-06 21:55:52 +09:00
nvme_shutdown_ctrl ( & ctrl - > ctrl ) ;
2017-07-10 09:22:31 +03:00
else
2019-07-22 17:06:54 -07:00
nvme_disable_ctrl ( & ctrl - > ctrl ) ;
2018-07-09 12:49:07 +03:00
nvme_rdma_teardown_admin_queue ( ctrl , shutdown ) ;
2016-07-06 21:55:52 +09:00
}
2017-10-29 10:44:29 +02:00
static void nvme_rdma_delete_ctrl ( struct nvme_ctrl * ctrl )
2016-07-24 09:29:51 +03:00
{
2017-10-29 10:44:30 +02:00
nvme_rdma_shutdown_ctrl ( to_rdma_ctrl ( ctrl ) , true ) ;
2016-07-06 21:55:52 +09:00
}
static void nvme_rdma_reset_ctrl_work ( struct work_struct * work )
{
2017-06-15 15:41:08 +02:00
struct nvme_rdma_ctrl * ctrl =
container_of ( work , struct nvme_rdma_ctrl , ctrl . reset_work ) ;
2016-07-06 21:55:52 +09:00
2017-07-02 10:56:43 +03:00
nvme_stop_ctrl ( & ctrl - > ctrl ) ;
2017-07-10 09:22:31 +03:00
nvme_rdma_shutdown_ctrl ( ctrl , false ) ;
2016-07-06 21:55:52 +09:00
2018-01-31 18:31:24 +02:00
if ( ! nvme_change_ctrl_state ( & ctrl - > ctrl , NVME_CTRL_CONNECTING ) ) {
2017-12-21 14:54:15 +02:00
/* state change failure should never happen */
WARN_ON_ONCE ( 1 ) ;
return ;
}
2018-07-09 12:49:06 +03:00
if ( nvme_rdma_setup_ctrl ( ctrl , false ) )
2017-07-10 09:22:38 +03:00
goto out_fail ;
2016-07-06 21:55:52 +09:00
return ;
2017-07-10 09:22:38 +03:00
out_fail :
2018-01-17 11:01:14 +00:00
+ + ctrl - > ctrl . nr_reconnects ;
nvme_rdma_reconnect_or_remove ( ctrl ) ;
2016-07-06 21:55:52 +09:00
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
. name = " rdma " ,
. module = THIS_MODULE ,
2020-05-19 17:05:56 +03:00
. flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED ,
2016-07-06 21:55:52 +09:00
. reg_read32 = nvmf_reg_read32 ,
. reg_read64 = nvmf_reg_read64 ,
. reg_write32 = nvmf_reg_write32 ,
. free_ctrl = nvme_rdma_free_ctrl ,
. submit_async_event = nvme_rdma_submit_async_event ,
2017-10-29 10:44:29 +02:00
. delete_ctrl = nvme_rdma_delete_ctrl ,
2016-07-06 21:55:52 +09:00
. get_address = nvmf_get_address ,
} ;
2017-10-20 16:17:09 -07:00
/*
* Fails a connection request if it matches an existing controller
* ( association ) with the same tuple :
* < Host NQN , Host ID , local address , remote address , remote port , SUBSYS NQN >
*
* if local address is not specified in the request , it will match an
* existing controller with all the other parameters the same and no
* local port address specified as well .
*
* The ports don ' t need to be compared as they are intrinsically
* already matched by the port pointers supplied .
*/
static bool
nvme_rdma_existing_controller ( struct nvmf_ctrl_options * opts )
{
struct nvme_rdma_ctrl * ctrl ;
bool found = false ;
mutex_lock ( & nvme_rdma_ctrl_mutex ) ;
list_for_each_entry ( ctrl , & nvme_rdma_ctrl_list , list ) {
2018-10-18 17:40:40 -07:00
found = nvmf_ip_options_match ( & ctrl - > ctrl , opts ) ;
2017-10-20 16:17:09 -07:00
if ( found )
break ;
}
mutex_unlock ( & nvme_rdma_ctrl_mutex ) ;
return found ;
}
2016-07-06 21:55:52 +09:00
static struct nvme_ctrl * nvme_rdma_create_ctrl ( struct device * dev ,
struct nvmf_ctrl_options * opts )
{
struct nvme_rdma_ctrl * ctrl ;
int ret ;
bool changed ;
ctrl = kzalloc ( sizeof ( * ctrl ) , GFP_KERNEL ) ;
if ( ! ctrl )
return ERR_PTR ( - ENOMEM ) ;
ctrl - > ctrl . opts = opts ;
INIT_LIST_HEAD ( & ctrl - > list ) ;
2018-10-19 00:50:29 -07:00
if ( ! ( opts - > mask & NVMF_OPT_TRSVCID ) ) {
opts - > trsvcid =
kstrdup ( __stringify ( NVME_RDMA_IP_PORT ) , GFP_KERNEL ) ;
if ( ! opts - > trsvcid ) {
ret = - ENOMEM ;
goto out_free_ctrl ;
}
opts - > mask | = NVMF_OPT_TRSVCID ;
}
2017-02-05 21:49:32 +02:00
ret = inet_pton_with_scope ( & init_net , AF_UNSPEC ,
2018-10-19 00:50:29 -07:00
opts - > traddr , opts - > trsvcid , & ctrl - > addr ) ;
2016-07-06 21:55:52 +09:00
if ( ret ) {
2018-10-19 00:50:29 -07:00
pr_err ( " malformed address passed: %s:%s \n " ,
opts - > traddr , opts - > trsvcid ) ;
2016-07-06 21:55:52 +09:00
goto out_free_ctrl ;
}
2017-02-19 20:08:03 +02:00
if ( opts - > mask & NVMF_OPT_HOST_TRADDR ) {
2017-02-05 21:49:32 +02:00
ret = inet_pton_with_scope ( & init_net , AF_UNSPEC ,
opts - > host_traddr , NULL , & ctrl - > src_addr ) ;
2017-02-19 20:08:03 +02:00
if ( ret ) {
2017-02-05 21:49:32 +02:00
pr_err ( " malformed src address passed: %s \n " ,
2017-02-19 20:08:03 +02:00
opts - > host_traddr ) ;
goto out_free_ctrl ;
}
}
2017-10-20 16:17:09 -07:00
if ( ! opts - > duplicate_connect & & nvme_rdma_existing_controller ( opts ) ) {
ret = - EALREADY ;
goto out_free_ctrl ;
}
2016-07-06 21:55:52 +09:00
INIT_DELAYED_WORK ( & ctrl - > reconnect_work ,
nvme_rdma_reconnect_ctrl_work ) ;
INIT_WORK ( & ctrl - > err_work , nvme_rdma_error_recovery_work ) ;
2017-06-15 15:41:08 +02:00
INIT_WORK ( & ctrl - > ctrl . reset_work , nvme_rdma_reset_ctrl_work ) ;
2016-07-06 21:55:52 +09:00
2018-12-14 11:06:10 -08:00
ctrl - > ctrl . queue_count = opts - > nr_io_queues + opts - > nr_write_queues +
opts - > nr_poll_queues + 1 ;
2016-08-17 15:00:27 -07:00
ctrl - > ctrl . sqsize = opts - > queue_size - 1 ;
2016-07-06 21:55:52 +09:00
ctrl - > ctrl . kato = opts - > kato ;
ret = - ENOMEM ;
2017-04-24 10:58:29 +03:00
ctrl - > queues = kcalloc ( ctrl - > ctrl . queue_count , sizeof ( * ctrl - > queues ) ,
2016-07-06 21:55:52 +09:00
GFP_KERNEL ) ;
if ( ! ctrl - > queues )
2018-06-19 15:34:09 +03:00
goto out_free_ctrl ;
ret = nvme_init_ctrl ( & ctrl - > ctrl , dev , & nvme_rdma_ctrl_ops ,
0 /* no quirks, we're perfect! */ ) ;
if ( ret )
goto out_kfree_queues ;
2016-07-06 21:55:52 +09:00
2018-01-31 18:31:25 +02:00
changed = nvme_change_ctrl_state ( & ctrl - > ctrl , NVME_CTRL_CONNECTING ) ;
WARN_ON_ONCE ( ! changed ) ;
2018-07-09 12:49:06 +03:00
ret = nvme_rdma_setup_ctrl ( ctrl , true ) ;
2016-07-06 21:55:52 +09:00
if ( ret )
2018-06-19 15:34:09 +03:00
goto out_uninit_ctrl ;
2016-07-06 21:55:52 +09:00
2017-02-05 21:49:32 +02:00
dev_info ( ctrl - > ctrl . device , " new ctrl: NQN \" %s \" , addr %pISpcs \n " ,
2021-09-22 08:35:25 +02:00
nvmf_ctrl_subsysnqn ( & ctrl - > ctrl ) , & ctrl - > addr ) ;
2016-07-06 21:55:52 +09:00
mutex_lock ( & nvme_rdma_ctrl_mutex ) ;
list_add_tail ( & ctrl - > list , & nvme_rdma_ctrl_list ) ;
mutex_unlock ( & nvme_rdma_ctrl_mutex ) ;
return & ctrl - > ctrl ;
out_uninit_ctrl :
nvme_uninit_ctrl ( & ctrl - > ctrl ) ;
nvme_put_ctrl ( & ctrl - > ctrl ) ;
if ( ret > 0 )
ret = - EIO ;
return ERR_PTR ( ret ) ;
2018-06-19 15:34:09 +03:00
out_kfree_queues :
kfree ( ctrl - > queues ) ;
2016-07-06 21:55:52 +09:00
out_free_ctrl :
kfree ( ctrl ) ;
return ERR_PTR ( ret ) ;
}
static struct nvmf_transport_ops nvme_rdma_transport = {
. name = " rdma " ,
2017-12-25 14:18:30 +02:00
. module = THIS_MODULE ,
2016-07-06 21:55:52 +09:00
. required_opts = NVMF_OPT_TRADDR ,
2017-02-19 20:08:03 +02:00
. allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
2018-12-11 23:38:58 -08:00
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
2019-08-18 12:08:52 +03:00
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
NVMF_OPT_TOS ,
2016-07-06 21:55:52 +09:00
. create_ctrl = nvme_rdma_create_ctrl ,
} ;
2016-09-02 09:01:54 -07:00
static void nvme_rdma_remove_one ( struct ib_device * ib_device , void * client_data )
{
struct nvme_rdma_ctrl * ctrl ;
2018-02-28 13:12:39 +02:00
struct nvme_rdma_device * ndev ;
bool found = false ;
mutex_lock ( & device_list_mutex ) ;
list_for_each_entry ( ndev , & device_list , entry ) {
if ( ndev - > dev = = ib_device ) {
found = true ;
break ;
}
}
mutex_unlock ( & device_list_mutex ) ;
if ( ! found )
return ;
2016-09-02 09:01:54 -07:00
/* Delete all controllers using this device */
mutex_lock ( & nvme_rdma_ctrl_mutex ) ;
list_for_each_entry ( ctrl , & nvme_rdma_ctrl_list , list ) {
if ( ctrl - > device - > dev ! = ib_device )
continue ;
2017-10-29 10:44:29 +02:00
nvme_delete_ctrl ( & ctrl - > ctrl ) ;
2016-09-02 09:01:54 -07:00
}
mutex_unlock ( & nvme_rdma_ctrl_mutex ) ;
2018-01-14 12:39:02 +02:00
flush_workqueue ( nvme_delete_wq ) ;
2016-09-02 09:01:54 -07:00
}
static struct ib_client nvme_rdma_ib_client = {
. name = " nvme_rdma " ,
. remove = nvme_rdma_remove_one
} ;
2016-07-06 21:55:52 +09:00
static int __init nvme_rdma_init_module ( void )
{
2016-09-02 09:01:54 -07:00
int ret ;
ret = ib_register_client ( & nvme_rdma_ib_client ) ;
2017-03-19 06:21:42 +02:00
if ( ret )
2017-06-07 20:31:55 +02:00
return ret ;
2017-03-19 06:21:42 +02:00
ret = nvmf_register_transport ( & nvme_rdma_transport ) ;
if ( ret )
goto err_unreg_client ;
2016-09-02 09:01:54 -07:00
2017-03-19 06:21:42 +02:00
return 0 ;
2016-09-02 09:01:54 -07:00
2017-03-19 06:21:42 +02:00
err_unreg_client :
ib_unregister_client ( & nvme_rdma_ib_client ) ;
return ret ;
2016-07-06 21:55:52 +09:00
}
static void __exit nvme_rdma_cleanup_module ( void )
{
2019-10-29 16:42:27 +02:00
struct nvme_rdma_ctrl * ctrl ;
2016-07-06 21:55:52 +09:00
nvmf_unregister_transport ( & nvme_rdma_transport ) ;
2016-09-02 09:01:54 -07:00
ib_unregister_client ( & nvme_rdma_ib_client ) ;
2019-10-29 16:42:27 +02:00
mutex_lock ( & nvme_rdma_ctrl_mutex ) ;
list_for_each_entry ( ctrl , & nvme_rdma_ctrl_list , list )
nvme_delete_ctrl ( & ctrl - > ctrl ) ;
mutex_unlock ( & nvme_rdma_ctrl_mutex ) ;
flush_workqueue ( nvme_delete_wq ) ;
2016-07-06 21:55:52 +09:00
}
module_init ( nvme_rdma_init_module ) ;
module_exit ( nvme_rdma_cleanup_module ) ;
MODULE_LICENSE ( " GPL v2 " ) ;