2018-12-04 04:52:17 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* NVMe over Fabrics TCP host .
* Copyright ( c ) 2018 Lightbits Labs . All rights reserved .
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/module.h>
# include <linux/init.h>
# include <linux/slab.h>
# include <linux/err.h>
# include <linux/nvme-tcp.h>
# include <net/sock.h>
# include <net/tcp.h>
# include <linux/blk-mq.h>
# include <crypto/hash.h>
2019-07-04 00:08:04 +03:00
# include <net/busy_poll.h>
2018-12-04 04:52:17 +03:00
# include "nvme.h"
# include "fabrics.h"
struct nvme_tcp_queue ;
2020-01-16 03:46:12 +03:00
/* Define the socket priority to use for connections were it is desirable
* that the NIC consider performing optimized packet processing or filtering .
* A non - zero value being sufficient to indicate general consideration of any
* possible optimization . Making it a module param allows for alternative
* values that may be unique for some NIC implementations .
*/
static int so_priority ;
module_param ( so_priority , int , 0644 ) ;
MODULE_PARM_DESC ( so_priority , " nvme tcp socket optimize priority " ) ;
2018-12-04 04:52:17 +03:00
enum nvme_tcp_send_state {
NVME_TCP_SEND_CMD_PDU = 0 ,
NVME_TCP_SEND_H2C_PDU ,
NVME_TCP_SEND_DATA ,
NVME_TCP_SEND_DDGST ,
} ;
struct nvme_tcp_request {
struct nvme_request req ;
void * pdu ;
struct nvme_tcp_queue * queue ;
u32 data_len ;
u32 pdu_len ;
u32 pdu_sent ;
u16 ttag ;
struct list_head entry ;
2020-06-19 03:30:22 +03:00
struct llist_node lentry ;
2018-12-13 11:46:59 +03:00
__le32 ddgst ;
2018-12-04 04:52:17 +03:00
struct bio * curr_bio ;
struct iov_iter iter ;
/* send state */
size_t offset ;
size_t data_sent ;
enum nvme_tcp_send_state state ;
} ;
enum nvme_tcp_queue_flags {
NVME_TCP_Q_ALLOCATED = 0 ,
NVME_TCP_Q_LIVE = 1 ,
2020-05-02 00:25:44 +03:00
NVME_TCP_Q_POLLING = 2 ,
2018-12-04 04:52:17 +03:00
} ;
enum nvme_tcp_recv_state {
NVME_TCP_RECV_PDU = 0 ,
NVME_TCP_RECV_DATA ,
NVME_TCP_RECV_DDGST ,
} ;
struct nvme_tcp_ctrl ;
struct nvme_tcp_queue {
struct socket * sock ;
struct work_struct io_work ;
int io_cpu ;
2020-05-02 00:25:45 +03:00
struct mutex send_mutex ;
2020-06-19 03:30:22 +03:00
struct llist_head req_list ;
2018-12-04 04:52:17 +03:00
struct list_head send_list ;
2020-06-19 03:30:24 +03:00
bool more_requests ;
2018-12-04 04:52:17 +03:00
/* recv state */
void * pdu ;
int pdu_remaining ;
int pdu_offset ;
size_t data_remaining ;
size_t ddgst_remaining ;
2019-07-04 00:08:04 +03:00
unsigned int nr_cqe ;
2018-12-04 04:52:17 +03:00
/* send state */
struct nvme_tcp_request * request ;
int queue_size ;
size_t cmnd_capsule_len ;
struct nvme_tcp_ctrl * ctrl ;
unsigned long flags ;
bool rd_enabled ;
bool hdr_digest ;
bool data_digest ;
struct ahash_request * rcv_hash ;
struct ahash_request * snd_hash ;
__le32 exp_ddgst ;
__le32 recv_ddgst ;
struct page_frag_cache pf_cache ;
void ( * state_change ) ( struct sock * ) ;
void ( * data_ready ) ( struct sock * ) ;
void ( * write_space ) ( struct sock * ) ;
} ;
struct nvme_tcp_ctrl {
/* read only in the hot path */
struct nvme_tcp_queue * queues ;
struct blk_mq_tag_set tag_set ;
/* other member variables */
struct list_head list ;
struct blk_mq_tag_set admin_tag_set ;
struct sockaddr_storage addr ;
struct sockaddr_storage src_addr ;
struct nvme_ctrl ctrl ;
struct work_struct err_work ;
struct delayed_work connect_work ;
struct nvme_tcp_request async_req ;
2019-05-29 08:49:05 +03:00
u32 io_queues [ HCTX_MAX_TYPES ] ;
2018-12-04 04:52:17 +03:00
} ;
static LIST_HEAD ( nvme_tcp_ctrl_list ) ;
static DEFINE_MUTEX ( nvme_tcp_ctrl_mutex ) ;
static struct workqueue_struct * nvme_tcp_wq ;
2020-05-29 01:25:07 +03:00
static const struct blk_mq_ops nvme_tcp_mq_ops ;
static const struct blk_mq_ops nvme_tcp_admin_mq_ops ;
2020-05-02 00:25:45 +03:00
static int nvme_tcp_try_send ( struct nvme_tcp_queue * queue ) ;
2018-12-04 04:52:17 +03:00
static inline struct nvme_tcp_ctrl * to_tcp_ctrl ( struct nvme_ctrl * ctrl )
{
return container_of ( ctrl , struct nvme_tcp_ctrl , ctrl ) ;
}
static inline int nvme_tcp_queue_id ( struct nvme_tcp_queue * queue )
{
return queue - queue - > ctrl - > queues ;
}
static inline struct blk_mq_tags * nvme_tcp_tagset ( struct nvme_tcp_queue * queue )
{
u32 queue_idx = nvme_tcp_queue_id ( queue ) ;
if ( queue_idx = = 0 )
return queue - > ctrl - > admin_tag_set . tags [ queue_idx ] ;
return queue - > ctrl - > tag_set . tags [ queue_idx - 1 ] ;
}
static inline u8 nvme_tcp_hdgst_len ( struct nvme_tcp_queue * queue )
{
return queue - > hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0 ;
}
static inline u8 nvme_tcp_ddgst_len ( struct nvme_tcp_queue * queue )
{
return queue - > data_digest ? NVME_TCP_DIGEST_LENGTH : 0 ;
}
static inline size_t nvme_tcp_inline_data_size ( struct nvme_tcp_queue * queue )
{
return queue - > cmnd_capsule_len - sizeof ( struct nvme_command ) ;
}
static inline bool nvme_tcp_async_req ( struct nvme_tcp_request * req )
{
return req = = & req - > queue - > ctrl - > async_req ;
}
static inline bool nvme_tcp_has_inline_data ( struct nvme_tcp_request * req )
{
struct request * rq ;
if ( unlikely ( nvme_tcp_async_req ( req ) ) )
return false ; /* async events don't have a request */
rq = blk_mq_rq_from_pdu ( req ) ;
2020-03-24 01:06:30 +03:00
return rq_data_dir ( rq ) = = WRITE & & req - > data_len & &
req - > data_len < = nvme_tcp_inline_data_size ( req - > queue ) ;
2018-12-04 04:52:17 +03:00
}
static inline struct page * nvme_tcp_req_cur_page ( struct nvme_tcp_request * req )
{
return req - > iter . bvec - > bv_page ;
}
static inline size_t nvme_tcp_req_cur_offset ( struct nvme_tcp_request * req )
{
return req - > iter . bvec - > bv_offset + req - > iter . iov_offset ;
}
static inline size_t nvme_tcp_req_cur_length ( struct nvme_tcp_request * req )
{
return min_t ( size_t , req - > iter . bvec - > bv_len - req - > iter . iov_offset ,
req - > pdu_len - req - > pdu_sent ) ;
}
static inline size_t nvme_tcp_req_offset ( struct nvme_tcp_request * req )
{
return req - > iter . iov_offset ;
}
static inline size_t nvme_tcp_pdu_data_left ( struct nvme_tcp_request * req )
{
return rq_data_dir ( blk_mq_rq_from_pdu ( req ) ) = = WRITE ?
req - > pdu_len - req - > pdu_sent : 0 ;
}
static inline size_t nvme_tcp_pdu_last_send ( struct nvme_tcp_request * req ,
int len )
{
return nvme_tcp_pdu_data_left ( req ) < = len ;
}
static void nvme_tcp_init_iter ( struct nvme_tcp_request * req ,
unsigned int dir )
{
struct request * rq = blk_mq_rq_from_pdu ( req ) ;
struct bio_vec * vec ;
unsigned int size ;
int nsegs ;
size_t offset ;
if ( rq - > rq_flags & RQF_SPECIAL_PAYLOAD ) {
vec = & rq - > special_vec ;
nsegs = 1 ;
size = blk_rq_payload_bytes ( rq ) ;
offset = 0 ;
} else {
struct bio * bio = req - > curr_bio ;
vec = __bvec_iter_bvec ( bio - > bi_io_vec , bio - > bi_iter ) ;
nsegs = bio_segments ( bio ) ;
size = bio - > bi_iter . bi_size ;
offset = bio - > bi_iter . bi_bvec_done ;
}
iov_iter_bvec ( & req - > iter , dir , vec , nsegs , size ) ;
req - > iter . iov_offset = offset ;
}
static inline void nvme_tcp_advance_req ( struct nvme_tcp_request * req ,
int len )
{
req - > data_sent + = len ;
req - > pdu_sent + = len ;
iov_iter_advance ( & req - > iter , len ) ;
if ( ! iov_iter_count ( & req - > iter ) & &
req - > data_sent < req - > data_len ) {
req - > curr_bio = req - > curr_bio - > bi_next ;
nvme_tcp_init_iter ( req , WRITE ) ;
}
}
2020-12-21 11:03:39 +03:00
static inline void nvme_tcp_send_all ( struct nvme_tcp_queue * queue )
{
int ret ;
/* drain the send queue as much as we can... */
do {
ret = nvme_tcp_try_send ( queue ) ;
} while ( ret > 0 ) ;
}
2020-05-02 00:25:45 +03:00
static inline void nvme_tcp_queue_request ( struct nvme_tcp_request * req ,
2020-06-19 03:30:23 +03:00
bool sync , bool last )
2018-12-04 04:52:17 +03:00
{
struct nvme_tcp_queue * queue = req - > queue ;
2020-05-02 00:25:45 +03:00
bool empty ;
2018-12-04 04:52:17 +03:00
2020-06-19 03:30:22 +03:00
empty = llist_add ( & req - > lentry , & queue - > req_list ) & &
list_empty ( & queue - > send_list ) & & ! queue - > request ;
2018-12-04 04:52:17 +03:00
2020-05-02 00:25:45 +03:00
/*
* if we ' re the first on the send_list and we can try to send
* directly , otherwise queue io_work . Also , only do that if we
* are on the same cpu , so we don ' t introduce contention .
*/
2021-01-14 01:03:04 +03:00
if ( queue - > io_cpu = = __smp_processor_id ( ) & &
2020-05-02 00:25:45 +03:00
sync & & empty & & mutex_trylock ( & queue - > send_mutex ) ) {
2020-06-19 03:30:24 +03:00
queue - > more_requests = ! last ;
2020-12-21 11:03:39 +03:00
nvme_tcp_send_all ( queue ) ;
2020-06-19 03:30:24 +03:00
queue - > more_requests = false ;
2020-05-02 00:25:45 +03:00
mutex_unlock ( & queue - > send_mutex ) ;
2020-06-19 03:30:23 +03:00
} else if ( last ) {
2020-05-02 00:25:45 +03:00
queue_work_on ( queue - > io_cpu , nvme_tcp_wq , & queue - > io_work ) ;
}
2018-12-04 04:52:17 +03:00
}
2020-06-19 03:30:22 +03:00
static void nvme_tcp_process_req_list ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_request * req ;
struct llist_node * node ;
for ( node = llist_del_all ( & queue - > req_list ) ; node ; node = node - > next ) {
req = llist_entry ( node , struct nvme_tcp_request , lentry ) ;
list_add ( & req - > entry , & queue - > send_list ) ;
}
}
2018-12-04 04:52:17 +03:00
static inline struct nvme_tcp_request *
nvme_tcp_fetch_request ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_request * req ;
req = list_first_entry_or_null ( & queue - > send_list ,
struct nvme_tcp_request , entry ) ;
2020-06-19 03:30:22 +03:00
if ( ! req ) {
nvme_tcp_process_req_list ( queue ) ;
req = list_first_entry_or_null ( & queue - > send_list ,
struct nvme_tcp_request , entry ) ;
if ( unlikely ( ! req ) )
return NULL ;
}
2018-12-04 04:52:17 +03:00
2020-06-19 03:30:22 +03:00
list_del ( & req - > entry ) ;
2018-12-04 04:52:17 +03:00
return req ;
}
2018-12-13 11:46:59 +03:00
static inline void nvme_tcp_ddgst_final ( struct ahash_request * hash ,
__le32 * dgst )
2018-12-04 04:52:17 +03:00
{
ahash_request_set_crypt ( hash , NULL , ( u8 * ) dgst , 0 ) ;
crypto_ahash_final ( hash ) ;
}
static inline void nvme_tcp_ddgst_update ( struct ahash_request * hash ,
struct page * page , off_t off , size_t len )
{
struct scatterlist sg ;
sg_init_marker ( & sg , 1 ) ;
sg_set_page ( & sg , page , len , off ) ;
ahash_request_set_crypt ( hash , & sg , NULL , len ) ;
crypto_ahash_update ( hash ) ;
}
static inline void nvme_tcp_hdgst ( struct ahash_request * hash ,
void * pdu , size_t len )
{
struct scatterlist sg ;
sg_init_one ( & sg , pdu , len ) ;
ahash_request_set_crypt ( hash , & sg , pdu + len , len ) ;
crypto_ahash_digest ( hash ) ;
}
static int nvme_tcp_verify_hdgst ( struct nvme_tcp_queue * queue ,
void * pdu , size_t pdu_len )
{
struct nvme_tcp_hdr * hdr = pdu ;
__le32 recv_digest ;
__le32 exp_digest ;
if ( unlikely ( ! ( hdr - > flags & NVME_TCP_F_HDGST ) ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d: header digest flag is cleared \n " ,
nvme_tcp_queue_id ( queue ) ) ;
return - EPROTO ;
}
recv_digest = * ( __le32 * ) ( pdu + hdr - > hlen ) ;
nvme_tcp_hdgst ( queue - > rcv_hash , pdu , pdu_len ) ;
exp_digest = * ( __le32 * ) ( pdu + hdr - > hlen ) ;
if ( recv_digest ! = exp_digest ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" header digest error: recv %#x expected %#x \n " ,
le32_to_cpu ( recv_digest ) , le32_to_cpu ( exp_digest ) ) ;
return - EIO ;
}
return 0 ;
}
static int nvme_tcp_check_ddgst ( struct nvme_tcp_queue * queue , void * pdu )
{
struct nvme_tcp_hdr * hdr = pdu ;
u8 digest_len = nvme_tcp_hdgst_len ( queue ) ;
u32 len ;
len = le32_to_cpu ( hdr - > plen ) - hdr - > hlen -
( ( hdr - > flags & NVME_TCP_F_HDGST ) ? digest_len : 0 ) ;
if ( unlikely ( len & & ! ( hdr - > flags & NVME_TCP_F_DDGST ) ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d: data digest flag is cleared \n " ,
nvme_tcp_queue_id ( queue ) ) ;
return - EPROTO ;
}
crypto_ahash_init ( queue - > rcv_hash ) ;
return 0 ;
}
static void nvme_tcp_exit_request ( struct blk_mq_tag_set * set ,
struct request * rq , unsigned int hctx_idx )
{
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
page_frag_free ( req - > pdu ) ;
}
static int nvme_tcp_init_request ( struct blk_mq_tag_set * set ,
struct request * rq , unsigned int hctx_idx ,
unsigned int numa_node )
{
struct nvme_tcp_ctrl * ctrl = set - > driver_data ;
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
int queue_idx = ( set = = & ctrl - > tag_set ) ? hctx_idx + 1 : 0 ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ queue_idx ] ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) ;
req - > pdu = page_frag_alloc ( & queue - > pf_cache ,
sizeof ( struct nvme_tcp_cmd_pdu ) + hdgst ,
GFP_KERNEL | __GFP_ZERO ) ;
if ( ! req - > pdu )
return - ENOMEM ;
req - > queue = queue ;
nvme_req ( rq ) - > ctrl = & ctrl - > ctrl ;
return 0 ;
}
static int nvme_tcp_init_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int hctx_idx )
{
struct nvme_tcp_ctrl * ctrl = data ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ hctx_idx + 1 ] ;
hctx - > driver_data = queue ;
return 0 ;
}
static int nvme_tcp_init_admin_hctx ( struct blk_mq_hw_ctx * hctx , void * data ,
unsigned int hctx_idx )
{
struct nvme_tcp_ctrl * ctrl = data ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ 0 ] ;
hctx - > driver_data = queue ;
return 0 ;
}
static enum nvme_tcp_recv_state
nvme_tcp_recv_state ( struct nvme_tcp_queue * queue )
{
return ( queue - > pdu_remaining ) ? NVME_TCP_RECV_PDU :
( queue - > ddgst_remaining ) ? NVME_TCP_RECV_DDGST :
NVME_TCP_RECV_DATA ;
}
static void nvme_tcp_init_recv_ctx ( struct nvme_tcp_queue * queue )
{
queue - > pdu_remaining = sizeof ( struct nvme_tcp_rsp_pdu ) +
nvme_tcp_hdgst_len ( queue ) ;
queue - > pdu_offset = 0 ;
queue - > data_remaining = - 1 ;
queue - > ddgst_remaining = 0 ;
}
static void nvme_tcp_error_recovery ( struct nvme_ctrl * ctrl )
{
if ( ! nvme_change_ctrl_state ( ctrl , NVME_CTRL_RESETTING ) )
return ;
2020-07-28 23:16:36 +03:00
dev_warn ( ctrl - > device , " starting error recovery \n " ) ;
2020-02-11 03:01:45 +03:00
queue_work ( nvme_reset_wq , & to_tcp_ctrl ( ctrl ) - > err_work ) ;
2018-12-04 04:52:17 +03:00
}
static int nvme_tcp_process_nvme_cqe ( struct nvme_tcp_queue * queue ,
struct nvme_completion * cqe )
{
struct request * rq ;
rq = blk_mq_tag_to_rq ( nvme_tcp_tagset ( queue ) , cqe - > command_id ) ;
if ( ! rq ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d tag 0x%x not found \n " ,
nvme_tcp_queue_id ( queue ) , cqe - > command_id ) ;
nvme_tcp_error_recovery ( & queue - > ctrl - > ctrl ) ;
return - EINVAL ;
}
2020-08-18 10:11:29 +03:00
if ( ! nvme_try_complete_req ( rq , cqe - > status , cqe - > result ) )
2020-06-11 09:44:52 +03:00
nvme_complete_rq ( rq ) ;
2019-07-04 00:08:04 +03:00
queue - > nr_cqe + + ;
2018-12-04 04:52:17 +03:00
return 0 ;
}
static int nvme_tcp_handle_c2h_data ( struct nvme_tcp_queue * queue ,
struct nvme_tcp_data_pdu * pdu )
{
struct request * rq ;
rq = blk_mq_tag_to_rq ( nvme_tcp_tagset ( queue ) , pdu - > command_id ) ;
if ( ! rq ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d tag %#x not found \n " ,
nvme_tcp_queue_id ( queue ) , pdu - > command_id ) ;
return - ENOENT ;
}
if ( ! blk_rq_payload_bytes ( rq ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d tag %#x unexpected data \n " ,
nvme_tcp_queue_id ( queue ) , rq - > tag ) ;
return - EIO ;
}
queue - > data_remaining = le32_to_cpu ( pdu - > data_length ) ;
2019-03-13 20:55:10 +03:00
if ( pdu - > hdr . flags & NVME_TCP_F_DATA_SUCCESS & &
unlikely ( ! ( pdu - > hdr . flags & NVME_TCP_F_DATA_LAST ) ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d tag %#x SUCCESS set but not last PDU \n " ,
nvme_tcp_queue_id ( queue ) , rq - > tag ) ;
nvme_tcp_error_recovery ( & queue - > ctrl - > ctrl ) ;
return - EPROTO ;
}
2018-12-04 04:52:17 +03:00
return 0 ;
}
static int nvme_tcp_handle_comp ( struct nvme_tcp_queue * queue ,
struct nvme_tcp_rsp_pdu * pdu )
{
struct nvme_completion * cqe = & pdu - > cqe ;
int ret = 0 ;
/*
* AEN requests are special as they don ' t time out and can
* survive any kind of queue freeze and often don ' t respond to
* aborts . We don ' t even bother to allocate a struct request
* for them but rather special case them here .
*/
2019-10-13 19:57:31 +03:00
if ( unlikely ( nvme_is_aen_req ( nvme_tcp_queue_id ( queue ) ,
cqe - > command_id ) ) )
2018-12-04 04:52:17 +03:00
nvme_complete_async_event ( & queue - > ctrl - > ctrl , cqe - > status ,
& cqe - > result ) ;
else
ret = nvme_tcp_process_nvme_cqe ( queue , cqe ) ;
return ret ;
}
static int nvme_tcp_setup_h2c_data_pdu ( struct nvme_tcp_request * req ,
struct nvme_tcp_r2t_pdu * pdu )
{
struct nvme_tcp_data_pdu * data = req - > pdu ;
struct nvme_tcp_queue * queue = req - > queue ;
struct request * rq = blk_mq_rq_from_pdu ( req ) ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) ;
u8 ddgst = nvme_tcp_ddgst_len ( queue ) ;
req - > pdu_len = le32_to_cpu ( pdu - > r2t_length ) ;
req - > pdu_sent = 0 ;
if ( unlikely ( req - > data_sent + req - > pdu_len > req - > data_len ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" req %d r2t len %u exceeded data len %u (%zu sent) \n " ,
rq - > tag , req - > pdu_len , req - > data_len ,
req - > data_sent ) ;
return - EPROTO ;
}
if ( unlikely ( le32_to_cpu ( pdu - > r2t_offset ) < req - > data_sent ) ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" req %d unexpected r2t offset %u (expected %zu) \n " ,
rq - > tag , le32_to_cpu ( pdu - > r2t_offset ) ,
req - > data_sent ) ;
return - EPROTO ;
}
memset ( data , 0 , sizeof ( * data ) ) ;
data - > hdr . type = nvme_tcp_h2c_data ;
data - > hdr . flags = NVME_TCP_F_DATA_LAST ;
if ( queue - > hdr_digest )
data - > hdr . flags | = NVME_TCP_F_HDGST ;
if ( queue - > data_digest )
data - > hdr . flags | = NVME_TCP_F_DDGST ;
data - > hdr . hlen = sizeof ( * data ) ;
data - > hdr . pdo = data - > hdr . hlen + hdgst ;
data - > hdr . plen =
cpu_to_le32 ( data - > hdr . hlen + hdgst + req - > pdu_len + ddgst ) ;
data - > ttag = pdu - > ttag ;
data - > command_id = rq - > tag ;
data - > data_offset = cpu_to_le32 ( req - > data_sent ) ;
data - > data_length = cpu_to_le32 ( req - > pdu_len ) ;
return 0 ;
}
static int nvme_tcp_handle_r2t ( struct nvme_tcp_queue * queue ,
struct nvme_tcp_r2t_pdu * pdu )
{
struct nvme_tcp_request * req ;
struct request * rq ;
int ret ;
rq = blk_mq_tag_to_rq ( nvme_tcp_tagset ( queue ) , pdu - > command_id ) ;
if ( ! rq ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d tag %#x not found \n " ,
nvme_tcp_queue_id ( queue ) , pdu - > command_id ) ;
return - ENOENT ;
}
req = blk_mq_rq_to_pdu ( rq ) ;
ret = nvme_tcp_setup_h2c_data_pdu ( req , pdu ) ;
if ( unlikely ( ret ) )
return ret ;
req - > state = NVME_TCP_SEND_H2C_PDU ;
req - > offset = 0 ;
2020-06-19 03:30:23 +03:00
nvme_tcp_queue_request ( req , false , true ) ;
2018-12-04 04:52:17 +03:00
return 0 ;
}
static int nvme_tcp_recv_pdu ( struct nvme_tcp_queue * queue , struct sk_buff * skb ,
unsigned int * offset , size_t * len )
{
struct nvme_tcp_hdr * hdr ;
char * pdu = queue - > pdu ;
size_t rcv_len = min_t ( size_t , * len , queue - > pdu_remaining ) ;
int ret ;
ret = skb_copy_bits ( skb , * offset ,
& pdu [ queue - > pdu_offset ] , rcv_len ) ;
if ( unlikely ( ret ) )
return ret ;
queue - > pdu_remaining - = rcv_len ;
queue - > pdu_offset + = rcv_len ;
* offset + = rcv_len ;
* len - = rcv_len ;
if ( queue - > pdu_remaining )
return 0 ;
hdr = queue - > pdu ;
if ( queue - > hdr_digest ) {
ret = nvme_tcp_verify_hdgst ( queue , queue - > pdu , hdr - > hlen ) ;
if ( unlikely ( ret ) )
return ret ;
}
if ( queue - > data_digest ) {
ret = nvme_tcp_check_ddgst ( queue , queue - > pdu ) ;
if ( unlikely ( ret ) )
return ret ;
}
switch ( hdr - > type ) {
case nvme_tcp_c2h_data :
2019-07-19 22:46:46 +03:00
return nvme_tcp_handle_c2h_data ( queue , ( void * ) queue - > pdu ) ;
2018-12-04 04:52:17 +03:00
case nvme_tcp_rsp :
nvme_tcp_init_recv_ctx ( queue ) ;
2019-07-19 22:46:46 +03:00
return nvme_tcp_handle_comp ( queue , ( void * ) queue - > pdu ) ;
2018-12-04 04:52:17 +03:00
case nvme_tcp_r2t :
nvme_tcp_init_recv_ctx ( queue ) ;
2019-07-19 22:46:46 +03:00
return nvme_tcp_handle_r2t ( queue , ( void * ) queue - > pdu ) ;
2018-12-04 04:52:17 +03:00
default :
dev_err ( queue - > ctrl - > ctrl . device ,
" unsupported pdu type (%d) \n " , hdr - > type ) ;
return - EINVAL ;
}
}
2019-03-15 10:41:04 +03:00
static inline void nvme_tcp_end_request ( struct request * rq , u16 status )
2019-03-13 20:55:10 +03:00
{
union nvme_result res = { } ;
2020-08-18 10:11:29 +03:00
if ( ! nvme_try_complete_req ( rq , cpu_to_le16 ( status < < 1 ) , res ) )
2020-06-11 09:44:52 +03:00
nvme_complete_rq ( rq ) ;
2019-03-13 20:55:10 +03:00
}
2018-12-04 04:52:17 +03:00
static int nvme_tcp_recv_data ( struct nvme_tcp_queue * queue , struct sk_buff * skb ,
unsigned int * offset , size_t * len )
{
struct nvme_tcp_data_pdu * pdu = ( void * ) queue - > pdu ;
struct nvme_tcp_request * req ;
struct request * rq ;
rq = blk_mq_tag_to_rq ( nvme_tcp_tagset ( queue ) , pdu - > command_id ) ;
if ( ! rq ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d tag %#x not found \n " ,
nvme_tcp_queue_id ( queue ) , pdu - > command_id ) ;
return - ENOENT ;
}
req = blk_mq_rq_to_pdu ( rq ) ;
while ( true ) {
int recv_len , ret ;
recv_len = min_t ( size_t , * len , queue - > data_remaining ) ;
if ( ! recv_len )
break ;
if ( ! iov_iter_count ( & req - > iter ) ) {
req - > curr_bio = req - > curr_bio - > bi_next ;
/*
* If we don ` t have any bios it means that controller
* sent more data than we requested , hence error
*/
if ( ! req - > curr_bio ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d no space in request %#x " ,
nvme_tcp_queue_id ( queue ) , rq - > tag ) ;
nvme_tcp_init_recv_ctx ( queue ) ;
return - EIO ;
}
nvme_tcp_init_iter ( req , READ ) ;
}
/* we can read only from what is left in this bio */
recv_len = min_t ( size_t , recv_len ,
iov_iter_count ( & req - > iter ) ) ;
if ( queue - > data_digest )
ret = skb_copy_and_hash_datagram_iter ( skb , * offset ,
& req - > iter , recv_len , queue - > rcv_hash ) ;
else
ret = skb_copy_datagram_iter ( skb , * offset ,
& req - > iter , recv_len ) ;
if ( ret ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" queue %d failed to copy request %#x data " ,
nvme_tcp_queue_id ( queue ) , rq - > tag ) ;
return ret ;
}
* len - = recv_len ;
* offset + = recv_len ;
queue - > data_remaining - = recv_len ;
}
if ( ! queue - > data_remaining ) {
if ( queue - > data_digest ) {
nvme_tcp_ddgst_final ( queue - > rcv_hash , & queue - > exp_ddgst ) ;
queue - > ddgst_remaining = NVME_TCP_DIGEST_LENGTH ;
} else {
2019-07-04 00:08:04 +03:00
if ( pdu - > hdr . flags & NVME_TCP_F_DATA_SUCCESS ) {
2019-03-13 20:55:10 +03:00
nvme_tcp_end_request ( rq , NVME_SC_SUCCESS ) ;
2019-07-04 00:08:04 +03:00
queue - > nr_cqe + + ;
}
2018-12-04 04:52:17 +03:00
nvme_tcp_init_recv_ctx ( queue ) ;
}
}
return 0 ;
}
static int nvme_tcp_recv_ddgst ( struct nvme_tcp_queue * queue ,
struct sk_buff * skb , unsigned int * offset , size_t * len )
{
2019-03-13 20:55:10 +03:00
struct nvme_tcp_data_pdu * pdu = ( void * ) queue - > pdu ;
2018-12-04 04:52:17 +03:00
char * ddgst = ( char * ) & queue - > recv_ddgst ;
size_t recv_len = min_t ( size_t , * len , queue - > ddgst_remaining ) ;
off_t off = NVME_TCP_DIGEST_LENGTH - queue - > ddgst_remaining ;
int ret ;
ret = skb_copy_bits ( skb , * offset , & ddgst [ off ] , recv_len ) ;
if ( unlikely ( ret ) )
return ret ;
queue - > ddgst_remaining - = recv_len ;
* offset + = recv_len ;
* len - = recv_len ;
if ( queue - > ddgst_remaining )
return 0 ;
if ( queue - > recv_ddgst ! = queue - > exp_ddgst ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" data digest error: recv %#x expected %#x \n " ,
le32_to_cpu ( queue - > recv_ddgst ) ,
le32_to_cpu ( queue - > exp_ddgst ) ) ;
return - EIO ;
}
2019-03-13 20:55:10 +03:00
if ( pdu - > hdr . flags & NVME_TCP_F_DATA_SUCCESS ) {
struct request * rq = blk_mq_tag_to_rq ( nvme_tcp_tagset ( queue ) ,
pdu - > command_id ) ;
nvme_tcp_end_request ( rq , NVME_SC_SUCCESS ) ;
2019-07-04 00:08:04 +03:00
queue - > nr_cqe + + ;
2019-03-13 20:55:10 +03:00
}
2018-12-04 04:52:17 +03:00
nvme_tcp_init_recv_ctx ( queue ) ;
return 0 ;
}
static int nvme_tcp_recv_skb ( read_descriptor_t * desc , struct sk_buff * skb ,
unsigned int offset , size_t len )
{
struct nvme_tcp_queue * queue = desc - > arg . data ;
size_t consumed = len ;
int result ;
while ( len ) {
switch ( nvme_tcp_recv_state ( queue ) ) {
case NVME_TCP_RECV_PDU :
result = nvme_tcp_recv_pdu ( queue , skb , & offset , & len ) ;
break ;
case NVME_TCP_RECV_DATA :
result = nvme_tcp_recv_data ( queue , skb , & offset , & len ) ;
break ;
case NVME_TCP_RECV_DDGST :
result = nvme_tcp_recv_ddgst ( queue , skb , & offset , & len ) ;
break ;
default :
result = - EFAULT ;
}
if ( result ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" receive failed: %d \n " , result ) ;
queue - > rd_enabled = false ;
nvme_tcp_error_recovery ( & queue - > ctrl - > ctrl ) ;
return result ;
}
}
return consumed ;
}
static void nvme_tcp_data_ready ( struct sock * sk )
{
struct nvme_tcp_queue * queue ;
2020-04-30 23:59:32 +03:00
read_lock_bh ( & sk - > sk_callback_lock ) ;
2018-12-04 04:52:17 +03:00
queue = sk - > sk_user_data ;
2020-05-02 00:25:44 +03:00
if ( likely ( queue & & queue - > rd_enabled ) & &
! test_bit ( NVME_TCP_Q_POLLING , & queue - > flags ) )
2018-12-04 04:52:17 +03:00
queue_work_on ( queue - > io_cpu , nvme_tcp_wq , & queue - > io_work ) ;
2020-04-30 23:59:32 +03:00
read_unlock_bh ( & sk - > sk_callback_lock ) ;
2018-12-04 04:52:17 +03:00
}
static void nvme_tcp_write_space ( struct sock * sk )
{
struct nvme_tcp_queue * queue ;
read_lock_bh ( & sk - > sk_callback_lock ) ;
queue = sk - > sk_user_data ;
if ( likely ( queue & & sk_stream_is_writeable ( sk ) ) ) {
clear_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ;
queue_work_on ( queue - > io_cpu , nvme_tcp_wq , & queue - > io_work ) ;
}
read_unlock_bh ( & sk - > sk_callback_lock ) ;
}
static void nvme_tcp_state_change ( struct sock * sk )
{
struct nvme_tcp_queue * queue ;
read_lock ( & sk - > sk_callback_lock ) ;
queue = sk - > sk_user_data ;
if ( ! queue )
goto done ;
switch ( sk - > sk_state ) {
case TCP_CLOSE :
case TCP_CLOSE_WAIT :
case TCP_LAST_ACK :
case TCP_FIN_WAIT1 :
case TCP_FIN_WAIT2 :
nvme_tcp_error_recovery ( & queue - > ctrl - > ctrl ) ;
break ;
default :
dev_info ( queue - > ctrl - > ctrl . device ,
" queue %d socket state %d \n " ,
nvme_tcp_queue_id ( queue ) , sk - > sk_state ) ;
}
queue - > state_change ( sk ) ;
done :
read_unlock ( & sk - > sk_callback_lock ) ;
}
2020-06-19 03:30:24 +03:00
static inline bool nvme_tcp_queue_more ( struct nvme_tcp_queue * queue )
{
return ! list_empty ( & queue - > send_list ) | |
! llist_empty ( & queue - > req_list ) | | queue - > more_requests ;
}
2018-12-04 04:52:17 +03:00
static inline void nvme_tcp_done_send_req ( struct nvme_tcp_queue * queue )
{
queue - > request = NULL ;
}
static void nvme_tcp_fail_request ( struct nvme_tcp_request * req )
{
2019-08-03 04:17:52 +03:00
nvme_tcp_end_request ( blk_mq_rq_from_pdu ( req ) , NVME_SC_HOST_PATH_ERROR ) ;
2018-12-04 04:52:17 +03:00
}
static int nvme_tcp_try_send_data ( struct nvme_tcp_request * req )
{
struct nvme_tcp_queue * queue = req - > queue ;
while ( true ) {
struct page * page = nvme_tcp_req_cur_page ( req ) ;
size_t offset = nvme_tcp_req_cur_offset ( req ) ;
size_t len = nvme_tcp_req_cur_length ( req ) ;
bool last = nvme_tcp_pdu_last_send ( req , len ) ;
int ret , flags = MSG_DONTWAIT ;
2020-06-19 03:30:24 +03:00
if ( last & & ! queue - > data_digest & & ! nvme_tcp_queue_more ( queue ) )
2018-12-04 04:52:17 +03:00
flags | = MSG_EOR ;
else
2020-05-05 08:20:01 +03:00
flags | = MSG_MORE | MSG_SENDPAGE_NOTLAST ;
2018-12-04 04:52:17 +03:00
2020-10-02 11:27:30 +03:00
if ( sendpage_ok ( page ) ) {
ret = kernel_sendpage ( queue - > sock , page , offset , len ,
2019-07-08 13:31:29 +03:00
flags ) ;
} else {
2020-10-02 11:27:30 +03:00
ret = sock_no_sendpage ( queue - > sock , page , offset , len ,
2019-07-08 13:31:29 +03:00
flags ) ;
}
2018-12-04 04:52:17 +03:00
if ( ret < = 0 )
return ret ;
nvme_tcp_advance_req ( req , ret ) ;
if ( queue - > data_digest )
nvme_tcp_ddgst_update ( queue - > snd_hash , page ,
offset , ret ) ;
/* fully successful last write*/
if ( last & & ret = = len ) {
if ( queue - > data_digest ) {
nvme_tcp_ddgst_final ( queue - > snd_hash ,
& req - > ddgst ) ;
req - > state = NVME_TCP_SEND_DDGST ;
req - > offset = 0 ;
} else {
nvme_tcp_done_send_req ( queue ) ;
}
return 1 ;
}
}
return - EAGAIN ;
}
static int nvme_tcp_try_send_cmd_pdu ( struct nvme_tcp_request * req )
{
struct nvme_tcp_queue * queue = req - > queue ;
struct nvme_tcp_cmd_pdu * pdu = req - > pdu ;
bool inline_data = nvme_tcp_has_inline_data ( req ) ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) ;
int len = sizeof ( * pdu ) + hdgst - req - > offset ;
2020-05-05 08:20:01 +03:00
int flags = MSG_DONTWAIT ;
2018-12-04 04:52:17 +03:00
int ret ;
2020-06-19 03:30:24 +03:00
if ( inline_data | | nvme_tcp_queue_more ( queue ) )
2020-05-05 08:20:01 +03:00
flags | = MSG_MORE | MSG_SENDPAGE_NOTLAST ;
else
flags | = MSG_EOR ;
2018-12-04 04:52:17 +03:00
if ( queue - > hdr_digest & & ! req - > offset )
nvme_tcp_hdgst ( queue - > snd_hash , pdu , sizeof ( * pdu ) ) ;
ret = kernel_sendpage ( queue - > sock , virt_to_page ( pdu ) ,
offset_in_page ( pdu ) + req - > offset , len , flags ) ;
if ( unlikely ( ret < = 0 ) )
return ret ;
len - = ret ;
if ( ! len ) {
if ( inline_data ) {
req - > state = NVME_TCP_SEND_DATA ;
if ( queue - > data_digest )
crypto_ahash_init ( queue - > snd_hash ) ;
nvme_tcp_init_iter ( req , WRITE ) ;
} else {
nvme_tcp_done_send_req ( queue ) ;
}
return 1 ;
}
req - > offset + = ret ;
return - EAGAIN ;
}
static int nvme_tcp_try_send_data_pdu ( struct nvme_tcp_request * req )
{
struct nvme_tcp_queue * queue = req - > queue ;
struct nvme_tcp_data_pdu * pdu = req - > pdu ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) ;
int len = sizeof ( * pdu ) - req - > offset + hdgst ;
int ret ;
if ( queue - > hdr_digest & & ! req - > offset )
nvme_tcp_hdgst ( queue - > snd_hash , pdu , sizeof ( * pdu ) ) ;
ret = kernel_sendpage ( queue - > sock , virt_to_page ( pdu ) ,
offset_in_page ( pdu ) + req - > offset , len ,
2020-05-05 08:20:01 +03:00
MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST ) ;
2018-12-04 04:52:17 +03:00
if ( unlikely ( ret < = 0 ) )
return ret ;
len - = ret ;
if ( ! len ) {
req - > state = NVME_TCP_SEND_DATA ;
if ( queue - > data_digest )
crypto_ahash_init ( queue - > snd_hash ) ;
if ( ! req - > data_sent )
nvme_tcp_init_iter ( req , WRITE ) ;
return 1 ;
}
req - > offset + = ret ;
return - EAGAIN ;
}
static int nvme_tcp_try_send_ddgst ( struct nvme_tcp_request * req )
{
struct nvme_tcp_queue * queue = req - > queue ;
int ret ;
2020-06-19 03:30:24 +03:00
struct msghdr msg = { . msg_flags = MSG_DONTWAIT } ;
2018-12-04 04:52:17 +03:00
struct kvec iov = {
. iov_base = & req - > ddgst + req - > offset ,
. iov_len = NVME_TCP_DIGEST_LENGTH - req - > offset
} ;
2020-06-19 03:30:24 +03:00
if ( nvme_tcp_queue_more ( queue ) )
msg . msg_flags | = MSG_MORE ;
else
msg . msg_flags | = MSG_EOR ;
2018-12-04 04:52:17 +03:00
ret = kernel_sendmsg ( queue - > sock , & msg , & iov , 1 , iov . iov_len ) ;
if ( unlikely ( ret < = 0 ) )
return ret ;
if ( req - > offset + ret = = NVME_TCP_DIGEST_LENGTH ) {
nvme_tcp_done_send_req ( queue ) ;
return 1 ;
}
req - > offset + = ret ;
return - EAGAIN ;
}
static int nvme_tcp_try_send ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_request * req ;
int ret = 1 ;
if ( ! queue - > request ) {
queue - > request = nvme_tcp_fetch_request ( queue ) ;
if ( ! queue - > request )
return 0 ;
}
req = queue - > request ;
if ( req - > state = = NVME_TCP_SEND_CMD_PDU ) {
ret = nvme_tcp_try_send_cmd_pdu ( req ) ;
if ( ret < = 0 )
goto done ;
if ( ! nvme_tcp_has_inline_data ( req ) )
return ret ;
}
if ( req - > state = = NVME_TCP_SEND_H2C_PDU ) {
ret = nvme_tcp_try_send_data_pdu ( req ) ;
if ( ret < = 0 )
goto done ;
}
if ( req - > state = = NVME_TCP_SEND_DATA ) {
ret = nvme_tcp_try_send_data ( req ) ;
if ( ret < = 0 )
goto done ;
}
if ( req - > state = = NVME_TCP_SEND_DDGST )
ret = nvme_tcp_try_send_ddgst ( req ) ;
done :
2020-02-26 03:43:23 +03:00
if ( ret = = - EAGAIN ) {
2018-12-04 04:52:17 +03:00
ret = 0 ;
2020-02-26 03:43:23 +03:00
} else if ( ret < 0 ) {
dev_err ( queue - > ctrl - > ctrl . device ,
" failed to send request %d \n " , ret ) ;
if ( ret ! = - EPIPE & & ret ! = - ECONNRESET )
nvme_tcp_fail_request ( queue - > request ) ;
nvme_tcp_done_send_req ( queue ) ;
}
2018-12-04 04:52:17 +03:00
return ret ;
}
static int nvme_tcp_try_recv ( struct nvme_tcp_queue * queue )
{
2019-07-08 12:52:00 +03:00
struct socket * sock = queue - > sock ;
struct sock * sk = sock - > sk ;
2018-12-04 04:52:17 +03:00
read_descriptor_t rd_desc ;
int consumed ;
rd_desc . arg . data = queue ;
rd_desc . count = 1 ;
lock_sock ( sk ) ;
2019-07-04 00:08:04 +03:00
queue - > nr_cqe = 0 ;
2019-07-08 12:52:00 +03:00
consumed = sock - > ops - > read_sock ( sk , & rd_desc , nvme_tcp_recv_skb ) ;
2018-12-04 04:52:17 +03:00
release_sock ( sk ) ;
return consumed ;
}
static void nvme_tcp_io_work ( struct work_struct * w )
{
struct nvme_tcp_queue * queue =
container_of ( w , struct nvme_tcp_queue , io_work ) ;
2019-09-19 02:36:37 +03:00
unsigned long deadline = jiffies + msecs_to_jiffies ( 1 ) ;
2018-12-04 04:52:17 +03:00
do {
bool pending = false ;
int result ;
2020-05-02 00:25:45 +03:00
if ( mutex_trylock ( & queue - > send_mutex ) ) {
result = nvme_tcp_try_send ( queue ) ;
mutex_unlock ( & queue - > send_mutex ) ;
if ( result > 0 )
pending = true ;
else if ( unlikely ( result < 0 ) )
break ;
}
2018-12-04 04:52:17 +03:00
result = nvme_tcp_try_recv ( queue ) ;
if ( result > 0 )
pending = true ;
2020-02-26 03:43:24 +03:00
else if ( unlikely ( result < 0 ) )
2020-04-01 08:44:23 +03:00
return ;
2018-12-04 04:52:17 +03:00
if ( ! pending )
return ;
2019-09-19 02:36:37 +03:00
} while ( ! time_after ( jiffies , deadline ) ) ; /* quota is exhausted */
2018-12-04 04:52:17 +03:00
queue_work_on ( queue - > io_cpu , nvme_tcp_wq , & queue - > io_work ) ;
}
static void nvme_tcp_free_crypto ( struct nvme_tcp_queue * queue )
{
struct crypto_ahash * tfm = crypto_ahash_reqtfm ( queue - > rcv_hash ) ;
ahash_request_free ( queue - > rcv_hash ) ;
ahash_request_free ( queue - > snd_hash ) ;
crypto_free_ahash ( tfm ) ;
}
static int nvme_tcp_alloc_crypto ( struct nvme_tcp_queue * queue )
{
struct crypto_ahash * tfm ;
tfm = crypto_alloc_ahash ( " crc32c " , 0 , CRYPTO_ALG_ASYNC ) ;
if ( IS_ERR ( tfm ) )
return PTR_ERR ( tfm ) ;
queue - > snd_hash = ahash_request_alloc ( tfm , GFP_KERNEL ) ;
if ( ! queue - > snd_hash )
goto free_tfm ;
ahash_request_set_callback ( queue - > snd_hash , 0 , NULL , NULL ) ;
queue - > rcv_hash = ahash_request_alloc ( tfm , GFP_KERNEL ) ;
if ( ! queue - > rcv_hash )
goto free_snd_hash ;
ahash_request_set_callback ( queue - > rcv_hash , 0 , NULL , NULL ) ;
return 0 ;
free_snd_hash :
ahash_request_free ( queue - > snd_hash ) ;
free_tfm :
crypto_free_ahash ( tfm ) ;
return - ENOMEM ;
}
static void nvme_tcp_free_async_req ( struct nvme_tcp_ctrl * ctrl )
{
struct nvme_tcp_request * async = & ctrl - > async_req ;
page_frag_free ( async - > pdu ) ;
}
static int nvme_tcp_alloc_async_req ( struct nvme_tcp_ctrl * ctrl )
{
struct nvme_tcp_queue * queue = & ctrl - > queues [ 0 ] ;
struct nvme_tcp_request * async = & ctrl - > async_req ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) ;
async - > pdu = page_frag_alloc ( & queue - > pf_cache ,
sizeof ( struct nvme_tcp_cmd_pdu ) + hdgst ,
GFP_KERNEL | __GFP_ZERO ) ;
if ( ! async - > pdu )
return - ENOMEM ;
async - > queue = & ctrl - > queues [ 0 ] ;
return 0 ;
}
static void nvme_tcp_free_queue ( struct nvme_ctrl * nctrl , int qid )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ qid ] ;
if ( ! test_and_clear_bit ( NVME_TCP_Q_ALLOCATED , & queue - > flags ) )
return ;
if ( queue - > hdr_digest | | queue - > data_digest )
nvme_tcp_free_crypto ( queue ) ;
sock_release ( queue - > sock ) ;
kfree ( queue - > pdu ) ;
}
static int nvme_tcp_init_connection ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_icreq_pdu * icreq ;
struct nvme_tcp_icresp_pdu * icresp ;
struct msghdr msg = { } ;
struct kvec iov ;
bool ctrl_hdgst , ctrl_ddgst ;
int ret ;
icreq = kzalloc ( sizeof ( * icreq ) , GFP_KERNEL ) ;
if ( ! icreq )
return - ENOMEM ;
icresp = kzalloc ( sizeof ( * icresp ) , GFP_KERNEL ) ;
if ( ! icresp ) {
ret = - ENOMEM ;
goto free_icreq ;
}
icreq - > hdr . type = nvme_tcp_icreq ;
icreq - > hdr . hlen = sizeof ( * icreq ) ;
icreq - > hdr . pdo = 0 ;
icreq - > hdr . plen = cpu_to_le32 ( icreq - > hdr . hlen ) ;
icreq - > pfv = cpu_to_le16 ( NVME_TCP_PFV_1_0 ) ;
icreq - > maxr2t = 0 ; /* single inflight r2t supported */
icreq - > hpda = 0 ; /* no alignment constraint */
if ( queue - > hdr_digest )
icreq - > digest | = NVME_TCP_HDR_DIGEST_ENABLE ;
if ( queue - > data_digest )
icreq - > digest | = NVME_TCP_DATA_DIGEST_ENABLE ;
iov . iov_base = icreq ;
iov . iov_len = sizeof ( * icreq ) ;
ret = kernel_sendmsg ( queue - > sock , & msg , & iov , 1 , iov . iov_len ) ;
if ( ret < 0 )
goto free_icresp ;
memset ( & msg , 0 , sizeof ( msg ) ) ;
iov . iov_base = icresp ;
iov . iov_len = sizeof ( * icresp ) ;
ret = kernel_recvmsg ( queue - > sock , & msg , & iov , 1 ,
iov . iov_len , msg . msg_flags ) ;
if ( ret < 0 )
goto free_icresp ;
ret = - EINVAL ;
if ( icresp - > hdr . type ! = nvme_tcp_icresp ) {
pr_err ( " queue %d: bad type returned %d \n " ,
nvme_tcp_queue_id ( queue ) , icresp - > hdr . type ) ;
goto free_icresp ;
}
if ( le32_to_cpu ( icresp - > hdr . plen ) ! = sizeof ( * icresp ) ) {
pr_err ( " queue %d: bad pdu length returned %d \n " ,
nvme_tcp_queue_id ( queue ) , icresp - > hdr . plen ) ;
goto free_icresp ;
}
if ( icresp - > pfv ! = NVME_TCP_PFV_1_0 ) {
pr_err ( " queue %d: bad pfv returned %d \n " ,
nvme_tcp_queue_id ( queue ) , icresp - > pfv ) ;
goto free_icresp ;
}
ctrl_ddgst = ! ! ( icresp - > digest & NVME_TCP_DATA_DIGEST_ENABLE ) ;
if ( ( queue - > data_digest & & ! ctrl_ddgst ) | |
( ! queue - > data_digest & & ctrl_ddgst ) ) {
pr_err ( " queue %d: data digest mismatch host: %s ctrl: %s \n " ,
nvme_tcp_queue_id ( queue ) ,
queue - > data_digest ? " enabled " : " disabled " ,
ctrl_ddgst ? " enabled " : " disabled " ) ;
goto free_icresp ;
}
ctrl_hdgst = ! ! ( icresp - > digest & NVME_TCP_HDR_DIGEST_ENABLE ) ;
if ( ( queue - > hdr_digest & & ! ctrl_hdgst ) | |
( ! queue - > hdr_digest & & ctrl_hdgst ) ) {
pr_err ( " queue %d: header digest mismatch host: %s ctrl: %s \n " ,
nvme_tcp_queue_id ( queue ) ,
queue - > hdr_digest ? " enabled " : " disabled " ,
ctrl_hdgst ? " enabled " : " disabled " ) ;
goto free_icresp ;
}
if ( icresp - > cpda ! = 0 ) {
pr_err ( " queue %d: unsupported cpda returned %d \n " ,
nvme_tcp_queue_id ( queue ) , icresp - > cpda ) ;
goto free_icresp ;
}
ret = 0 ;
free_icresp :
kfree ( icresp ) ;
free_icreq :
kfree ( icreq ) ;
return ret ;
}
2020-02-26 02:53:09 +03:00
static bool nvme_tcp_admin_queue ( struct nvme_tcp_queue * queue )
{
return nvme_tcp_queue_id ( queue ) = = 0 ;
}
static bool nvme_tcp_default_queue ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_ctrl * ctrl = queue - > ctrl ;
int qid = nvme_tcp_queue_id ( queue ) ;
return ! nvme_tcp_admin_queue ( queue ) & &
qid < 1 + ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
}
static bool nvme_tcp_read_queue ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_ctrl * ctrl = queue - > ctrl ;
int qid = nvme_tcp_queue_id ( queue ) ;
return ! nvme_tcp_admin_queue ( queue ) & &
! nvme_tcp_default_queue ( queue ) & &
qid < 1 + ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] +
ctrl - > io_queues [ HCTX_TYPE_READ ] ;
}
static bool nvme_tcp_poll_queue ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_ctrl * ctrl = queue - > ctrl ;
int qid = nvme_tcp_queue_id ( queue ) ;
return ! nvme_tcp_admin_queue ( queue ) & &
! nvme_tcp_default_queue ( queue ) & &
! nvme_tcp_read_queue ( queue ) & &
qid < 1 + ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] +
ctrl - > io_queues [ HCTX_TYPE_READ ] +
ctrl - > io_queues [ HCTX_TYPE_POLL ] ;
}
static void nvme_tcp_set_queue_io_cpu ( struct nvme_tcp_queue * queue )
{
struct nvme_tcp_ctrl * ctrl = queue - > ctrl ;
int qid = nvme_tcp_queue_id ( queue ) ;
int n = 0 ;
if ( nvme_tcp_default_queue ( queue ) )
n = qid - 1 ;
else if ( nvme_tcp_read_queue ( queue ) )
n = qid - ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] - 1 ;
else if ( nvme_tcp_poll_queue ( queue ) )
n = qid - ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] -
ctrl - > io_queues [ HCTX_TYPE_READ ] - 1 ;
queue - > io_cpu = cpumask_next_wrap ( n - 1 , cpu_online_mask , - 1 , false ) ;
}
2018-12-04 04:52:17 +03:00
static int nvme_tcp_alloc_queue ( struct nvme_ctrl * nctrl ,
int qid , size_t queue_size )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ qid ] ;
2020-05-28 08:12:26 +03:00
int ret , rcv_pdu_size ;
2018-12-04 04:52:17 +03:00
queue - > ctrl = ctrl ;
2020-06-19 03:30:22 +03:00
init_llist_head ( & queue - > req_list ) ;
2018-12-04 04:52:17 +03:00
INIT_LIST_HEAD ( & queue - > send_list ) ;
2020-05-02 00:25:45 +03:00
mutex_init ( & queue - > send_mutex ) ;
2018-12-04 04:52:17 +03:00
INIT_WORK ( & queue - > io_work , nvme_tcp_io_work ) ;
queue - > queue_size = queue_size ;
if ( qid > 0 )
2019-08-18 12:08:53 +03:00
queue - > cmnd_capsule_len = nctrl - > ioccsz * 16 ;
2018-12-04 04:52:17 +03:00
else
queue - > cmnd_capsule_len = sizeof ( struct nvme_command ) +
NVME_TCP_ADMIN_CCSZ ;
ret = sock_create ( ctrl - > addr . ss_family , SOCK_STREAM ,
IPPROTO_TCP , & queue - > sock ) ;
if ( ret ) {
2019-08-18 12:08:53 +03:00
dev_err ( nctrl - > device ,
2018-12-04 04:52:17 +03:00
" failed to create socket: %d \n " , ret ) ;
return ret ;
}
/* Single syn retry */
2020-05-28 08:12:21 +03:00
tcp_sock_set_syncnt ( queue - > sock - > sk , 1 ) ;
2018-12-04 04:52:17 +03:00
/* Set TCP no delay */
2020-05-28 08:12:19 +03:00
tcp_sock_set_nodelay ( queue - > sock - > sk ) ;
2018-12-04 04:52:17 +03:00
/*
* Cleanup whatever is sitting in the TCP transmit queue on socket
* close . This is done to prevent stale data from being sent should
* the network connection be restored before TCP times out .
*/
2020-05-28 08:12:10 +03:00
sock_no_linger ( queue - > sock - > sk ) ;
2018-12-04 04:52:17 +03:00
2020-05-28 08:12:11 +03:00
if ( so_priority > 0 )
sock_set_priority ( queue - > sock - > sk , so_priority ) ;
2020-01-16 03:46:12 +03:00
2019-08-18 12:08:54 +03:00
/* Set socket type of service */
2020-05-28 08:12:26 +03:00
if ( nctrl - > opts - > tos > = 0 )
ip_sock_set_tos ( queue - > sock - > sk , nctrl - > opts - > tos ) ;
2019-08-18 12:08:54 +03:00
2020-07-24 02:42:26 +03:00
/* Set 10 seconds timeout for icresp recvmsg */
queue - > sock - > sk - > sk_rcvtimeo = 10 * HZ ;
2018-12-04 04:52:17 +03:00
queue - > sock - > sk - > sk_allocation = GFP_ATOMIC ;
2020-02-26 02:53:09 +03:00
nvme_tcp_set_queue_io_cpu ( queue ) ;
2018-12-04 04:52:17 +03:00
queue - > request = NULL ;
queue - > data_remaining = 0 ;
queue - > ddgst_remaining = 0 ;
queue - > pdu_remaining = 0 ;
queue - > pdu_offset = 0 ;
sk_set_memalloc ( queue - > sock - > sk ) ;
2019-08-18 12:08:53 +03:00
if ( nctrl - > opts - > mask & NVMF_OPT_HOST_TRADDR ) {
2018-12-04 04:52:17 +03:00
ret = kernel_bind ( queue - > sock , ( struct sockaddr * ) & ctrl - > src_addr ,
sizeof ( ctrl - > src_addr ) ) ;
if ( ret ) {
2019-08-18 12:08:53 +03:00
dev_err ( nctrl - > device ,
2018-12-04 04:52:17 +03:00
" failed to bind queue %d socket %d \n " ,
qid , ret ) ;
goto err_sock ;
}
}
queue - > hdr_digest = nctrl - > opts - > hdr_digest ;
queue - > data_digest = nctrl - > opts - > data_digest ;
if ( queue - > hdr_digest | | queue - > data_digest ) {
ret = nvme_tcp_alloc_crypto ( queue ) ;
if ( ret ) {
2019-08-18 12:08:53 +03:00
dev_err ( nctrl - > device ,
2018-12-04 04:52:17 +03:00
" failed to allocate queue %d crypto \n " , qid ) ;
goto err_sock ;
}
}
rcv_pdu_size = sizeof ( struct nvme_tcp_rsp_pdu ) +
nvme_tcp_hdgst_len ( queue ) ;
queue - > pdu = kmalloc ( rcv_pdu_size , GFP_KERNEL ) ;
if ( ! queue - > pdu ) {
ret = - ENOMEM ;
goto err_crypto ;
}
2019-08-18 12:08:53 +03:00
dev_dbg ( nctrl - > device , " connecting queue %d \n " ,
2018-12-04 04:52:17 +03:00
nvme_tcp_queue_id ( queue ) ) ;
ret = kernel_connect ( queue - > sock , ( struct sockaddr * ) & ctrl - > addr ,
sizeof ( ctrl - > addr ) , 0 ) ;
if ( ret ) {
2019-08-18 12:08:53 +03:00
dev_err ( nctrl - > device ,
2018-12-04 04:52:17 +03:00
" failed to connect socket: %d \n " , ret ) ;
goto err_rcv_pdu ;
}
ret = nvme_tcp_init_connection ( queue ) ;
if ( ret )
goto err_init_connect ;
queue - > rd_enabled = true ;
set_bit ( NVME_TCP_Q_ALLOCATED , & queue - > flags ) ;
nvme_tcp_init_recv_ctx ( queue ) ;
write_lock_bh ( & queue - > sock - > sk - > sk_callback_lock ) ;
queue - > sock - > sk - > sk_user_data = queue ;
queue - > state_change = queue - > sock - > sk - > sk_state_change ;
queue - > data_ready = queue - > sock - > sk - > sk_data_ready ;
queue - > write_space = queue - > sock - > sk - > sk_write_space ;
queue - > sock - > sk - > sk_data_ready = nvme_tcp_data_ready ;
queue - > sock - > sk - > sk_state_change = nvme_tcp_state_change ;
queue - > sock - > sk - > sk_write_space = nvme_tcp_write_space ;
2019-10-10 18:34:12 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
2019-07-04 00:08:04 +03:00
queue - > sock - > sk - > sk_ll_usec = 1 ;
2019-10-10 18:34:12 +03:00
# endif
2018-12-04 04:52:17 +03:00
write_unlock_bh ( & queue - > sock - > sk - > sk_callback_lock ) ;
return 0 ;
err_init_connect :
kernel_sock_shutdown ( queue - > sock , SHUT_RDWR ) ;
err_rcv_pdu :
kfree ( queue - > pdu ) ;
err_crypto :
if ( queue - > hdr_digest | | queue - > data_digest )
nvme_tcp_free_crypto ( queue ) ;
err_sock :
sock_release ( queue - > sock ) ;
queue - > sock = NULL ;
return ret ;
}
static void nvme_tcp_restore_sock_calls ( struct nvme_tcp_queue * queue )
{
struct socket * sock = queue - > sock ;
write_lock_bh ( & sock - > sk - > sk_callback_lock ) ;
sock - > sk - > sk_user_data = NULL ;
sock - > sk - > sk_data_ready = queue - > data_ready ;
sock - > sk - > sk_state_change = queue - > state_change ;
sock - > sk - > sk_write_space = queue - > write_space ;
write_unlock_bh ( & sock - > sk - > sk_callback_lock ) ;
}
static void __nvme_tcp_stop_queue ( struct nvme_tcp_queue * queue )
{
kernel_sock_shutdown ( queue - > sock , SHUT_RDWR ) ;
nvme_tcp_restore_sock_calls ( queue ) ;
cancel_work_sync ( & queue - > io_work ) ;
}
static void nvme_tcp_stop_queue ( struct nvme_ctrl * nctrl , int qid )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ qid ] ;
if ( ! test_and_clear_bit ( NVME_TCP_Q_LIVE , & queue - > flags ) )
return ;
__nvme_tcp_stop_queue ( queue ) ;
}
static int nvme_tcp_start_queue ( struct nvme_ctrl * nctrl , int idx )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
int ret ;
if ( idx )
2018-12-14 22:06:08 +03:00
ret = nvmf_connect_io_queue ( nctrl , idx , false ) ;
2018-12-04 04:52:17 +03:00
else
ret = nvmf_connect_admin_queue ( nctrl ) ;
if ( ! ret ) {
set_bit ( NVME_TCP_Q_LIVE , & ctrl - > queues [ idx ] . flags ) ;
} else {
2019-04-30 02:25:48 +03:00
if ( test_bit ( NVME_TCP_Q_ALLOCATED , & ctrl - > queues [ idx ] . flags ) )
__nvme_tcp_stop_queue ( & ctrl - > queues [ idx ] ) ;
2018-12-04 04:52:17 +03:00
dev_err ( nctrl - > device ,
" failed to connect queue: %d ret=%d \n " , idx , ret ) ;
}
return ret ;
}
static struct blk_mq_tag_set * nvme_tcp_alloc_tagset ( struct nvme_ctrl * nctrl ,
bool admin )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
struct blk_mq_tag_set * set ;
int ret ;
if ( admin ) {
set = & ctrl - > admin_tag_set ;
memset ( set , 0 , sizeof ( * set ) ) ;
set - > ops = & nvme_tcp_admin_mq_ops ;
set - > queue_depth = NVME_AQ_MQ_TAG_DEPTH ;
set - > reserved_tags = 2 ; /* connect + keep-alive */
2020-06-16 12:34:24 +03:00
set - > numa_node = nctrl - > numa_node ;
2020-05-02 00:25:45 +03:00
set - > flags = BLK_MQ_F_BLOCKING ;
2018-12-04 04:52:17 +03:00
set - > cmd_size = sizeof ( struct nvme_tcp_request ) ;
set - > driver_data = ctrl ;
set - > nr_hw_queues = 1 ;
2020-11-10 03:33:45 +03:00
set - > timeout = NVME_ADMIN_TIMEOUT ;
2018-12-04 04:52:17 +03:00
} else {
set = & ctrl - > tag_set ;
memset ( set , 0 , sizeof ( * set ) ) ;
set - > ops = & nvme_tcp_mq_ops ;
set - > queue_depth = nctrl - > sqsize + 1 ;
set - > reserved_tags = 1 ; /* fabric connect */
2020-06-16 12:34:24 +03:00
set - > numa_node = nctrl - > numa_node ;
2020-05-02 00:25:45 +03:00
set - > flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING ;
2018-12-04 04:52:17 +03:00
set - > cmd_size = sizeof ( struct nvme_tcp_request ) ;
set - > driver_data = ctrl ;
set - > nr_hw_queues = nctrl - > queue_count - 1 ;
set - > timeout = NVME_IO_TIMEOUT ;
2019-07-04 00:08:04 +03:00
set - > nr_maps = nctrl - > opts - > nr_poll_queues ? HCTX_MAX_TYPES : 2 ;
2018-12-04 04:52:17 +03:00
}
ret = blk_mq_alloc_tag_set ( set ) ;
if ( ret )
return ERR_PTR ( ret ) ;
return set ;
}
static void nvme_tcp_free_admin_queue ( struct nvme_ctrl * ctrl )
{
if ( to_tcp_ctrl ( ctrl ) - > async_req . pdu ) {
2020-09-03 01:42:53 +03:00
cancel_work_sync ( & ctrl - > async_event_work ) ;
2018-12-04 04:52:17 +03:00
nvme_tcp_free_async_req ( to_tcp_ctrl ( ctrl ) ) ;
to_tcp_ctrl ( ctrl ) - > async_req . pdu = NULL ;
}
nvme_tcp_free_queue ( ctrl , 0 ) ;
}
static void nvme_tcp_free_io_queues ( struct nvme_ctrl * ctrl )
{
int i ;
for ( i = 1 ; i < ctrl - > queue_count ; i + + )
nvme_tcp_free_queue ( ctrl , i ) ;
}
static void nvme_tcp_stop_io_queues ( struct nvme_ctrl * ctrl )
{
int i ;
for ( i = 1 ; i < ctrl - > queue_count ; i + + )
nvme_tcp_stop_queue ( ctrl , i ) ;
}
static int nvme_tcp_start_io_queues ( struct nvme_ctrl * ctrl )
{
int i , ret = 0 ;
for ( i = 1 ; i < ctrl - > queue_count ; i + + ) {
ret = nvme_tcp_start_queue ( ctrl , i ) ;
if ( ret )
goto out_stop_queues ;
}
return 0 ;
out_stop_queues :
for ( i - - ; i > = 1 ; i - - )
nvme_tcp_stop_queue ( ctrl , i ) ;
return ret ;
}
static int nvme_tcp_alloc_admin_queue ( struct nvme_ctrl * ctrl )
{
int ret ;
ret = nvme_tcp_alloc_queue ( ctrl , 0 , NVME_AQ_DEPTH ) ;
if ( ret )
return ret ;
ret = nvme_tcp_alloc_async_req ( to_tcp_ctrl ( ctrl ) ) ;
if ( ret )
goto out_free_queue ;
return 0 ;
out_free_queue :
nvme_tcp_free_queue ( ctrl , 0 ) ;
return ret ;
}
2019-04-24 21:53:19 +03:00
static int __nvme_tcp_alloc_io_queues ( struct nvme_ctrl * ctrl )
2018-12-04 04:52:17 +03:00
{
int i , ret ;
for ( i = 1 ; i < ctrl - > queue_count ; i + + ) {
ret = nvme_tcp_alloc_queue ( ctrl , i ,
ctrl - > sqsize + 1 ) ;
if ( ret )
goto out_free_queues ;
}
return 0 ;
out_free_queues :
for ( i - - ; i > = 1 ; i - - )
nvme_tcp_free_queue ( ctrl , i ) ;
return ret ;
}
static unsigned int nvme_tcp_nr_io_queues ( struct nvme_ctrl * ctrl )
{
2018-12-12 10:38:57 +03:00
unsigned int nr_io_queues ;
nr_io_queues = min ( ctrl - > opts - > nr_io_queues , num_online_cpus ( ) ) ;
nr_io_queues + = min ( ctrl - > opts - > nr_write_queues , num_online_cpus ( ) ) ;
2019-07-04 00:08:04 +03:00
nr_io_queues + = min ( ctrl - > opts - > nr_poll_queues , num_online_cpus ( ) ) ;
2018-12-12 10:38:57 +03:00
return nr_io_queues ;
2018-12-04 04:52:17 +03:00
}
2019-05-29 08:49:05 +03:00
static void nvme_tcp_set_io_queues ( struct nvme_ctrl * nctrl ,
unsigned int nr_io_queues )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
struct nvmf_ctrl_options * opts = nctrl - > opts ;
if ( opts - > nr_write_queues & & opts - > nr_io_queues < nr_io_queues ) {
/*
* separate read / write queues
* hand out dedicated default queues only after we have
* sufficient read queues .
*/
ctrl - > io_queues [ HCTX_TYPE_READ ] = opts - > nr_io_queues ;
nr_io_queues - = ctrl - > io_queues [ HCTX_TYPE_READ ] ;
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] =
min ( opts - > nr_write_queues , nr_io_queues ) ;
nr_io_queues - = ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
} else {
/*
* shared read / write queues
* either no write queues were requested , or we don ' t have
* sufficient queue count to have dedicated default queues .
*/
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] =
min ( opts - > nr_io_queues , nr_io_queues ) ;
nr_io_queues - = ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
}
2019-07-04 00:08:04 +03:00
if ( opts - > nr_poll_queues & & nr_io_queues ) {
/* map dedicated poll queues only if we have queues left */
ctrl - > io_queues [ HCTX_TYPE_POLL ] =
min ( opts - > nr_poll_queues , nr_io_queues ) ;
}
2019-05-29 08:49:05 +03:00
}
2019-04-24 21:53:19 +03:00
static int nvme_tcp_alloc_io_queues ( struct nvme_ctrl * ctrl )
2018-12-04 04:52:17 +03:00
{
unsigned int nr_io_queues ;
int ret ;
nr_io_queues = nvme_tcp_nr_io_queues ( ctrl ) ;
ret = nvme_set_queue_count ( ctrl , & nr_io_queues ) ;
if ( ret )
return ret ;
ctrl - > queue_count = nr_io_queues + 1 ;
if ( ctrl - > queue_count < 2 )
return 0 ;
dev_info ( ctrl - > device ,
" creating %d I/O queues. \n " , nr_io_queues ) ;
2019-05-29 08:49:05 +03:00
nvme_tcp_set_io_queues ( ctrl , nr_io_queues ) ;
2019-04-24 21:53:19 +03:00
return __nvme_tcp_alloc_io_queues ( ctrl ) ;
2018-12-04 04:52:17 +03:00
}
static void nvme_tcp_destroy_io_queues ( struct nvme_ctrl * ctrl , bool remove )
{
nvme_tcp_stop_io_queues ( ctrl ) ;
if ( remove ) {
2019-01-01 10:58:30 +03:00
blk_cleanup_queue ( ctrl - > connect_q ) ;
2018-12-04 04:52:17 +03:00
blk_mq_free_tag_set ( ctrl - > tagset ) ;
}
nvme_tcp_free_io_queues ( ctrl ) ;
}
static int nvme_tcp_configure_io_queues ( struct nvme_ctrl * ctrl , bool new )
{
int ret ;
2019-04-24 21:53:19 +03:00
ret = nvme_tcp_alloc_io_queues ( ctrl ) ;
2018-12-04 04:52:17 +03:00
if ( ret )
return ret ;
if ( new ) {
ctrl - > tagset = nvme_tcp_alloc_tagset ( ctrl , false ) ;
if ( IS_ERR ( ctrl - > tagset ) ) {
ret = PTR_ERR ( ctrl - > tagset ) ;
goto out_free_io_queues ;
}
2019-01-01 10:58:30 +03:00
ctrl - > connect_q = blk_mq_init_queue ( ctrl - > tagset ) ;
if ( IS_ERR ( ctrl - > connect_q ) ) {
ret = PTR_ERR ( ctrl - > connect_q ) ;
goto out_free_tag_set ;
2018-12-04 04:52:17 +03:00
}
}
ret = nvme_tcp_start_io_queues ( ctrl ) ;
if ( ret )
goto out_cleanup_connect_q ;
2020-07-25 01:10:12 +03:00
if ( ! new ) {
nvme_start_queues ( ctrl ) ;
2020-07-30 23:25:34 +03:00
if ( ! nvme_wait_freeze_timeout ( ctrl , NVME_IO_TIMEOUT ) ) {
/*
* If we timed out waiting for freeze we are likely to
* be stuck . Fail the controller initialization just
* to be safe .
*/
ret = - ENODEV ;
goto out_wait_freeze_timed_out ;
}
2020-07-25 01:10:12 +03:00
blk_mq_update_nr_hw_queues ( ctrl - > tagset ,
ctrl - > queue_count - 1 ) ;
nvme_unfreeze ( ctrl ) ;
}
2018-12-04 04:52:17 +03:00
return 0 ;
2020-07-30 23:25:34 +03:00
out_wait_freeze_timed_out :
nvme_stop_queues ( ctrl ) ;
nvme_tcp_stop_io_queues ( ctrl ) ;
2018-12-04 04:52:17 +03:00
out_cleanup_connect_q :
2019-01-01 10:58:30 +03:00
if ( new )
2018-12-04 04:52:17 +03:00
blk_cleanup_queue ( ctrl - > connect_q ) ;
out_free_tag_set :
if ( new )
blk_mq_free_tag_set ( ctrl - > tagset ) ;
out_free_io_queues :
nvme_tcp_free_io_queues ( ctrl ) ;
return ret ;
}
static void nvme_tcp_destroy_admin_queue ( struct nvme_ctrl * ctrl , bool remove )
{
nvme_tcp_stop_queue ( ctrl , 0 ) ;
if ( remove ) {
blk_cleanup_queue ( ctrl - > admin_q ) ;
2019-08-03 05:33:59 +03:00
blk_cleanup_queue ( ctrl - > fabrics_q ) ;
2018-12-04 04:52:17 +03:00
blk_mq_free_tag_set ( ctrl - > admin_tagset ) ;
}
nvme_tcp_free_admin_queue ( ctrl ) ;
}
static int nvme_tcp_configure_admin_queue ( struct nvme_ctrl * ctrl , bool new )
{
int error ;
error = nvme_tcp_alloc_admin_queue ( ctrl ) ;
if ( error )
return error ;
if ( new ) {
ctrl - > admin_tagset = nvme_tcp_alloc_tagset ( ctrl , true ) ;
if ( IS_ERR ( ctrl - > admin_tagset ) ) {
error = PTR_ERR ( ctrl - > admin_tagset ) ;
goto out_free_queue ;
}
2019-08-03 05:33:59 +03:00
ctrl - > fabrics_q = blk_mq_init_queue ( ctrl - > admin_tagset ) ;
if ( IS_ERR ( ctrl - > fabrics_q ) ) {
error = PTR_ERR ( ctrl - > fabrics_q ) ;
goto out_free_tagset ;
}
2018-12-04 04:52:17 +03:00
ctrl - > admin_q = blk_mq_init_queue ( ctrl - > admin_tagset ) ;
if ( IS_ERR ( ctrl - > admin_q ) ) {
error = PTR_ERR ( ctrl - > admin_q ) ;
2019-08-03 05:33:59 +03:00
goto out_cleanup_fabrics_q ;
2018-12-04 04:52:17 +03:00
}
}
error = nvme_tcp_start_queue ( ctrl , 0 ) ;
if ( error )
goto out_cleanup_queue ;
2019-07-23 03:06:53 +03:00
error = nvme_enable_ctrl ( ctrl ) ;
2018-12-04 04:52:17 +03:00
if ( error )
goto out_stop_queue ;
2019-08-03 05:33:59 +03:00
blk_mq_unquiesce_queue ( ctrl - > admin_q ) ;
2018-12-04 04:52:17 +03:00
error = nvme_init_identify ( ctrl ) ;
if ( error )
goto out_stop_queue ;
return 0 ;
out_stop_queue :
nvme_tcp_stop_queue ( ctrl , 0 ) ;
out_cleanup_queue :
if ( new )
blk_cleanup_queue ( ctrl - > admin_q ) ;
2019-08-03 05:33:59 +03:00
out_cleanup_fabrics_q :
if ( new )
blk_cleanup_queue ( ctrl - > fabrics_q ) ;
2018-12-04 04:52:17 +03:00
out_free_tagset :
if ( new )
blk_mq_free_tag_set ( ctrl - > admin_tagset ) ;
out_free_queue :
nvme_tcp_free_admin_queue ( ctrl ) ;
return error ;
}
static void nvme_tcp_teardown_admin_queue ( struct nvme_ctrl * ctrl ,
bool remove )
{
blk_mq_quiesce_queue ( ctrl - > admin_q ) ;
2020-10-22 05:15:15 +03:00
blk_sync_queue ( ctrl - > admin_q ) ;
2018-12-04 04:52:17 +03:00
nvme_tcp_stop_queue ( ctrl , 0 ) ;
2019-07-24 06:48:42 +03:00
if ( ctrl - > admin_tagset ) {
2019-04-24 21:53:17 +03:00
blk_mq_tagset_busy_iter ( ctrl - > admin_tagset ,
nvme_cancel_request , ctrl ) ;
2019-07-24 06:48:42 +03:00
blk_mq_tagset_wait_completed_request ( ctrl - > admin_tagset ) ;
}
2019-08-03 05:33:59 +03:00
if ( remove )
blk_mq_unquiesce_queue ( ctrl - > admin_q ) ;
2018-12-04 04:52:17 +03:00
nvme_tcp_destroy_admin_queue ( ctrl , remove ) ;
}
static void nvme_tcp_teardown_io_queues ( struct nvme_ctrl * ctrl ,
bool remove )
{
if ( ctrl - > queue_count < = 1 )
2020-10-22 05:15:15 +03:00
return ;
2020-08-06 04:13:48 +03:00
blk_mq_quiesce_queue ( ctrl - > admin_q ) ;
2020-07-25 01:10:12 +03:00
nvme_start_freeze ( ctrl ) ;
2018-12-04 04:52:17 +03:00
nvme_stop_queues ( ctrl ) ;
2020-10-22 05:15:15 +03:00
nvme_sync_io_queues ( ctrl ) ;
2018-12-04 04:52:17 +03:00
nvme_tcp_stop_io_queues ( ctrl ) ;
2019-07-24 06:48:42 +03:00
if ( ctrl - > tagset ) {
2019-04-24 21:53:17 +03:00
blk_mq_tagset_busy_iter ( ctrl - > tagset ,
nvme_cancel_request , ctrl ) ;
2019-07-24 06:48:42 +03:00
blk_mq_tagset_wait_completed_request ( ctrl - > tagset ) ;
}
2018-12-04 04:52:17 +03:00
if ( remove )
nvme_start_queues ( ctrl ) ;
nvme_tcp_destroy_io_queues ( ctrl , remove ) ;
}
static void nvme_tcp_reconnect_or_remove ( struct nvme_ctrl * ctrl )
{
/* If we are resetting/deleting then do nothing */
if ( ctrl - > state ! = NVME_CTRL_CONNECTING ) {
WARN_ON_ONCE ( ctrl - > state = = NVME_CTRL_NEW | |
ctrl - > state = = NVME_CTRL_LIVE ) ;
return ;
}
if ( nvmf_should_reconnect ( ctrl ) ) {
dev_info ( ctrl - > device , " Reconnecting in %d seconds... \n " ,
ctrl - > opts - > reconnect_delay ) ;
queue_delayed_work ( nvme_wq , & to_tcp_ctrl ( ctrl ) - > connect_work ,
ctrl - > opts - > reconnect_delay * HZ ) ;
} else {
dev_info ( ctrl - > device , " Removing controller... \n " ) ;
nvme_delete_ctrl ( ctrl ) ;
}
}
static int nvme_tcp_setup_ctrl ( struct nvme_ctrl * ctrl , bool new )
{
struct nvmf_ctrl_options * opts = ctrl - > opts ;
2019-09-05 17:34:35 +03:00
int ret ;
2018-12-04 04:52:17 +03:00
ret = nvme_tcp_configure_admin_queue ( ctrl , new ) ;
if ( ret )
return ret ;
if ( ctrl - > icdoff ) {
dev_err ( ctrl - > device , " icdoff is not supported! \n " ) ;
goto destroy_admin ;
}
if ( opts - > queue_size > ctrl - > sqsize + 1 )
dev_warn ( ctrl - > device ,
" queue_size %zu > ctrl sqsize %u, clamping down \n " ,
opts - > queue_size , ctrl - > sqsize + 1 ) ;
if ( ctrl - > sqsize + 1 > ctrl - > maxcmd ) {
dev_warn ( ctrl - > device ,
" sqsize %u > ctrl maxcmd %u, clamping down \n " ,
ctrl - > sqsize + 1 , ctrl - > maxcmd ) ;
ctrl - > sqsize = ctrl - > maxcmd - 1 ;
}
if ( ctrl - > queue_count > 1 ) {
ret = nvme_tcp_configure_io_queues ( ctrl , new ) ;
if ( ret )
goto destroy_admin ;
}
if ( ! nvme_change_ctrl_state ( ctrl , NVME_CTRL_LIVE ) ) {
2020-03-24 18:29:45 +03:00
/*
2020-07-23 02:32:19 +03:00
* state change failure is ok if we started ctrl delete ,
2020-03-24 18:29:45 +03:00
* unless we ' re during creation of a new controller to
* avoid races with teardown flow .
*/
2020-07-23 02:32:19 +03:00
WARN_ON_ONCE ( ctrl - > state ! = NVME_CTRL_DELETING & &
ctrl - > state ! = NVME_CTRL_DELETING_NOIO ) ;
2020-03-24 18:29:45 +03:00
WARN_ON_ONCE ( new ) ;
2018-12-04 04:52:17 +03:00
ret = - EINVAL ;
goto destroy_io ;
}
nvme_start_ctrl ( ctrl ) ;
return 0 ;
destroy_io :
if ( ctrl - > queue_count > 1 )
nvme_tcp_destroy_io_queues ( ctrl , new ) ;
destroy_admin :
nvme_tcp_stop_queue ( ctrl , 0 ) ;
nvme_tcp_destroy_admin_queue ( ctrl , new ) ;
return ret ;
}
static void nvme_tcp_reconnect_ctrl_work ( struct work_struct * work )
{
struct nvme_tcp_ctrl * tcp_ctrl = container_of ( to_delayed_work ( work ) ,
struct nvme_tcp_ctrl , connect_work ) ;
struct nvme_ctrl * ctrl = & tcp_ctrl - > ctrl ;
+ + ctrl - > nr_reconnects ;
if ( nvme_tcp_setup_ctrl ( ctrl , false ) )
goto requeue ;
2018-12-14 14:42:43 +03:00
dev_info ( ctrl - > device , " Successfully reconnected (%d attempt) \n " ,
2018-12-04 04:52:17 +03:00
ctrl - > nr_reconnects ) ;
ctrl - > nr_reconnects = 0 ;
return ;
requeue :
dev_info ( ctrl - > device , " Failed reconnect attempt %d \n " ,
ctrl - > nr_reconnects ) ;
nvme_tcp_reconnect_or_remove ( ctrl ) ;
}
static void nvme_tcp_error_recovery_work ( struct work_struct * work )
{
struct nvme_tcp_ctrl * tcp_ctrl = container_of ( work ,
struct nvme_tcp_ctrl , err_work ) ;
struct nvme_ctrl * ctrl = & tcp_ctrl - > ctrl ;
nvme_stop_keep_alive ( ctrl ) ;
nvme_tcp_teardown_io_queues ( ctrl , false ) ;
/* unquiesce to fail fast pending requests */
nvme_start_queues ( ctrl ) ;
nvme_tcp_teardown_admin_queue ( ctrl , false ) ;
2019-08-03 05:33:59 +03:00
blk_mq_unquiesce_queue ( ctrl - > admin_q ) ;
2018-12-04 04:52:17 +03:00
if ( ! nvme_change_ctrl_state ( ctrl , NVME_CTRL_CONNECTING ) ) {
2020-07-23 02:32:19 +03:00
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE ( ctrl - > state ! = NVME_CTRL_DELETING & &
ctrl - > state ! = NVME_CTRL_DELETING_NOIO ) ;
2018-12-04 04:52:17 +03:00
return ;
}
nvme_tcp_reconnect_or_remove ( ctrl ) ;
}
static void nvme_tcp_teardown_ctrl ( struct nvme_ctrl * ctrl , bool shutdown )
{
2019-01-01 11:19:30 +03:00
cancel_work_sync ( & to_tcp_ctrl ( ctrl ) - > err_work ) ;
cancel_delayed_work_sync ( & to_tcp_ctrl ( ctrl ) - > connect_work ) ;
2018-12-04 04:52:17 +03:00
nvme_tcp_teardown_io_queues ( ctrl , shutdown ) ;
2019-08-03 05:33:59 +03:00
blk_mq_quiesce_queue ( ctrl - > admin_q ) ;
2018-12-04 04:52:17 +03:00
if ( shutdown )
nvme_shutdown_ctrl ( ctrl ) ;
else
2019-07-23 03:06:54 +03:00
nvme_disable_ctrl ( ctrl ) ;
2018-12-04 04:52:17 +03:00
nvme_tcp_teardown_admin_queue ( ctrl , shutdown ) ;
}
static void nvme_tcp_delete_ctrl ( struct nvme_ctrl * ctrl )
{
nvme_tcp_teardown_ctrl ( ctrl , true ) ;
}
static void nvme_reset_ctrl_work ( struct work_struct * work )
{
struct nvme_ctrl * ctrl =
container_of ( work , struct nvme_ctrl , reset_work ) ;
nvme_stop_ctrl ( ctrl ) ;
nvme_tcp_teardown_ctrl ( ctrl , false ) ;
if ( ! nvme_change_ctrl_state ( ctrl , NVME_CTRL_CONNECTING ) ) {
2020-07-23 02:32:19 +03:00
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE ( ctrl - > state ! = NVME_CTRL_DELETING & &
ctrl - > state ! = NVME_CTRL_DELETING_NOIO ) ;
2018-12-04 04:52:17 +03:00
return ;
}
if ( nvme_tcp_setup_ctrl ( ctrl , false ) )
goto out_fail ;
return ;
out_fail :
+ + ctrl - > nr_reconnects ;
nvme_tcp_reconnect_or_remove ( ctrl ) ;
}
static void nvme_tcp_free_ctrl ( struct nvme_ctrl * nctrl )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( nctrl ) ;
if ( list_empty ( & ctrl - > list ) )
goto free_ctrl ;
mutex_lock ( & nvme_tcp_ctrl_mutex ) ;
list_del ( & ctrl - > list ) ;
mutex_unlock ( & nvme_tcp_ctrl_mutex ) ;
nvmf_free_options ( nctrl - > opts ) ;
free_ctrl :
kfree ( ctrl - > queues ) ;
kfree ( ctrl ) ;
}
static void nvme_tcp_set_sg_null ( struct nvme_command * c )
{
struct nvme_sgl_desc * sg = & c - > common . dptr . sgl ;
sg - > addr = 0 ;
sg - > length = 0 ;
sg - > type = ( NVME_TRANSPORT_SGL_DATA_DESC < < 4 ) |
NVME_SGL_FMT_TRANSPORT_A ;
}
static void nvme_tcp_set_sg_inline ( struct nvme_tcp_queue * queue ,
struct nvme_command * c , u32 data_len )
{
struct nvme_sgl_desc * sg = & c - > common . dptr . sgl ;
sg - > addr = cpu_to_le64 ( queue - > ctrl - > ctrl . icdoff ) ;
sg - > length = cpu_to_le32 ( data_len ) ;
sg - > type = ( NVME_SGL_FMT_DATA_DESC < < 4 ) | NVME_SGL_FMT_OFFSET ;
}
static void nvme_tcp_set_sg_host_data ( struct nvme_command * c ,
u32 data_len )
{
struct nvme_sgl_desc * sg = & c - > common . dptr . sgl ;
sg - > addr = 0 ;
sg - > length = cpu_to_le32 ( data_len ) ;
sg - > type = ( NVME_TRANSPORT_SGL_DATA_DESC < < 4 ) |
NVME_SGL_FMT_TRANSPORT_A ;
}
static void nvme_tcp_submit_async_event ( struct nvme_ctrl * arg )
{
struct nvme_tcp_ctrl * ctrl = to_tcp_ctrl ( arg ) ;
struct nvme_tcp_queue * queue = & ctrl - > queues [ 0 ] ;
struct nvme_tcp_cmd_pdu * pdu = ctrl - > async_req . pdu ;
struct nvme_command * cmd = & pdu - > cmd ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) ;
memset ( pdu , 0 , sizeof ( * pdu ) ) ;
pdu - > hdr . type = nvme_tcp_cmd ;
if ( queue - > hdr_digest )
pdu - > hdr . flags | = NVME_TCP_F_HDGST ;
pdu - > hdr . hlen = sizeof ( * pdu ) ;
pdu - > hdr . plen = cpu_to_le32 ( pdu - > hdr . hlen + hdgst ) ;
cmd - > common . opcode = nvme_admin_async_event ;
cmd - > common . command_id = NVME_AQ_BLK_MQ_DEPTH ;
cmd - > common . flags | = NVME_CMD_SGL_METABUF ;
nvme_tcp_set_sg_null ( cmd ) ;
ctrl - > async_req . state = NVME_TCP_SEND_CMD_PDU ;
ctrl - > async_req . offset = 0 ;
ctrl - > async_req . curr_bio = NULL ;
ctrl - > async_req . data_len = 0 ;
2020-06-19 03:30:23 +03:00
nvme_tcp_queue_request ( & ctrl - > async_req , true , true ) ;
2018-12-04 04:52:17 +03:00
}
2020-07-28 23:16:36 +03:00
static void nvme_tcp_complete_timed_out ( struct request * rq )
{
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_ctrl * ctrl = & req - > queue - > ctrl - > ctrl ;
nvme_tcp_stop_queue ( ctrl , nvme_tcp_queue_id ( req - > queue ) ) ;
2020-10-22 05:15:31 +03:00
if ( blk_mq_request_started ( rq ) & & ! blk_mq_request_completed ( rq ) ) {
2020-07-28 23:16:36 +03:00
nvme_req ( rq ) - > status = NVME_SC_HOST_ABORTED_CMD ;
blk_mq_complete_request ( rq ) ;
}
}
2018-12-04 04:52:17 +03:00
static enum blk_eh_timer_return
nvme_tcp_timeout ( struct request * rq , bool reserved )
{
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
2020-07-28 23:16:36 +03:00
struct nvme_ctrl * ctrl = & req - > queue - > ctrl - > ctrl ;
2018-12-04 04:52:17 +03:00
struct nvme_tcp_cmd_pdu * pdu = req - > pdu ;
2020-07-28 23:16:36 +03:00
dev_warn ( ctrl - > device ,
2018-12-04 04:52:17 +03:00
" queue %d: timeout request %#x type %d \n " ,
2019-01-08 12:01:30 +03:00
nvme_tcp_queue_id ( req - > queue ) , rq - > tag , pdu - > hdr . type ) ;
2018-12-04 04:52:17 +03:00
2020-07-28 23:16:36 +03:00
if ( ctrl - > state ! = NVME_CTRL_LIVE ) {
2019-01-08 12:01:30 +03:00
/*
2020-07-28 23:16:36 +03:00
* If we are resetting , connecting or deleting we should
* complete immediately because we may block controller
* teardown or setup sequence
* - ctrl disable / shutdown fabrics requests
* - connect requests
* - initialization admin requests
* - I / O requests that entered after unquiescing and
* the controller stopped responding
*
* All other requests should be cancelled by the error
* recovery work , so it ' s fine that we fail it here .
2019-01-08 12:01:30 +03:00
*/
2020-07-28 23:16:36 +03:00
nvme_tcp_complete_timed_out ( rq ) ;
2018-12-04 04:52:17 +03:00
return BLK_EH_DONE ;
}
2020-07-28 23:16:36 +03:00
/*
* LIVE state should trigger the normal error recovery which will
* handle completing this request .
*/
nvme_tcp_error_recovery ( ctrl ) ;
2018-12-04 04:52:17 +03:00
return BLK_EH_RESET_TIMER ;
}
static blk_status_t nvme_tcp_map_data ( struct nvme_tcp_queue * queue ,
struct request * rq )
{
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_tcp_cmd_pdu * pdu = req - > pdu ;
struct nvme_command * c = & pdu - > cmd ;
c - > common . flags | = NVME_CMD_SGL_METABUF ;
2020-03-24 01:06:30 +03:00
if ( ! blk_rq_nr_phys_segments ( rq ) )
nvme_tcp_set_sg_null ( c ) ;
else if ( rq_data_dir ( rq ) = = WRITE & &
2018-12-04 04:52:17 +03:00
req - > data_len < = nvme_tcp_inline_data_size ( queue ) )
nvme_tcp_set_sg_inline ( queue , c , req - > data_len ) ;
else
nvme_tcp_set_sg_host_data ( c , req - > data_len ) ;
return 0 ;
}
static blk_status_t nvme_tcp_setup_cmd_pdu ( struct nvme_ns * ns ,
struct request * rq )
{
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
struct nvme_tcp_cmd_pdu * pdu = req - > pdu ;
struct nvme_tcp_queue * queue = req - > queue ;
u8 hdgst = nvme_tcp_hdgst_len ( queue ) , ddgst = 0 ;
blk_status_t ret ;
ret = nvme_setup_cmd ( ns , rq , & pdu - > cmd ) ;
if ( ret )
return ret ;
req - > state = NVME_TCP_SEND_CMD_PDU ;
req - > offset = 0 ;
req - > data_sent = 0 ;
req - > pdu_len = 0 ;
req - > pdu_sent = 0 ;
2020-03-24 01:06:30 +03:00
req - > data_len = blk_rq_nr_phys_segments ( rq ) ?
blk_rq_payload_bytes ( rq ) : 0 ;
2018-12-04 04:52:17 +03:00
req - > curr_bio = rq - > bio ;
if ( rq_data_dir ( rq ) = = WRITE & &
req - > data_len < = nvme_tcp_inline_data_size ( queue ) )
req - > pdu_len = req - > data_len ;
else if ( req - > curr_bio )
nvme_tcp_init_iter ( req , READ ) ;
pdu - > hdr . type = nvme_tcp_cmd ;
pdu - > hdr . flags = 0 ;
if ( queue - > hdr_digest )
pdu - > hdr . flags | = NVME_TCP_F_HDGST ;
if ( queue - > data_digest & & req - > pdu_len ) {
pdu - > hdr . flags | = NVME_TCP_F_DDGST ;
ddgst = nvme_tcp_ddgst_len ( queue ) ;
}
pdu - > hdr . hlen = sizeof ( * pdu ) ;
pdu - > hdr . pdo = req - > pdu_len ? pdu - > hdr . hlen + hdgst : 0 ;
pdu - > hdr . plen =
cpu_to_le32 ( pdu - > hdr . hlen + hdgst + req - > pdu_len + ddgst ) ;
ret = nvme_tcp_map_data ( queue , rq ) ;
if ( unlikely ( ret ) ) {
2019-10-13 19:57:38 +03:00
nvme_cleanup_cmd ( rq ) ;
2018-12-04 04:52:17 +03:00
dev_err ( queue - > ctrl - > ctrl . device ,
" Failed to map data (%d) \n " , ret ) ;
return ret ;
}
return 0 ;
}
2020-06-19 03:30:23 +03:00
static void nvme_tcp_commit_rqs ( struct blk_mq_hw_ctx * hctx )
{
struct nvme_tcp_queue * queue = hctx - > driver_data ;
if ( ! llist_empty ( & queue - > req_list ) )
queue_work_on ( queue - > io_cpu , nvme_tcp_wq , & queue - > io_work ) ;
}
2018-12-04 04:52:17 +03:00
static blk_status_t nvme_tcp_queue_rq ( struct blk_mq_hw_ctx * hctx ,
const struct blk_mq_queue_data * bd )
{
struct nvme_ns * ns = hctx - > queue - > queuedata ;
struct nvme_tcp_queue * queue = hctx - > driver_data ;
struct request * rq = bd - > rq ;
struct nvme_tcp_request * req = blk_mq_rq_to_pdu ( rq ) ;
bool queue_ready = test_bit ( NVME_TCP_Q_LIVE , & queue - > flags ) ;
blk_status_t ret ;
if ( ! nvmf_check_ready ( & queue - > ctrl - > ctrl , rq , queue_ready ) )
return nvmf_fail_nonready_command ( & queue - > ctrl - > ctrl , rq ) ;
ret = nvme_tcp_setup_cmd_pdu ( ns , rq ) ;
if ( unlikely ( ret ) )
return ret ;
blk_mq_start_request ( rq ) ;
2020-06-19 03:30:23 +03:00
nvme_tcp_queue_request ( req , true , bd - > last ) ;
2018-12-04 04:52:17 +03:00
return BLK_STS_OK ;
}
2018-12-12 10:38:57 +03:00
static int nvme_tcp_map_queues ( struct blk_mq_tag_set * set )
{
struct nvme_tcp_ctrl * ctrl = set - > driver_data ;
2019-05-29 08:49:05 +03:00
struct nvmf_ctrl_options * opts = ctrl - > ctrl . opts ;
2018-12-12 10:38:57 +03:00
2019-05-29 08:49:05 +03:00
if ( opts - > nr_write_queues & & ctrl - > io_queues [ HCTX_TYPE_READ ] ) {
2018-12-12 10:38:57 +03:00
/* separate read/write queues */
set - > map [ HCTX_TYPE_DEFAULT ] . nr_queues =
2019-05-29 08:49:05 +03:00
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
set - > map [ HCTX_TYPE_DEFAULT ] . queue_offset = 0 ;
set - > map [ HCTX_TYPE_READ ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_READ ] ;
2018-12-12 10:38:57 +03:00
set - > map [ HCTX_TYPE_READ ] . queue_offset =
2019-05-29 08:49:05 +03:00
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
2018-12-12 10:38:57 +03:00
} else {
2019-05-29 08:49:05 +03:00
/* shared read/write queues */
2018-12-12 10:38:57 +03:00
set - > map [ HCTX_TYPE_DEFAULT ] . nr_queues =
2019-05-29 08:49:05 +03:00
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
set - > map [ HCTX_TYPE_DEFAULT ] . queue_offset = 0 ;
set - > map [ HCTX_TYPE_READ ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ;
2018-12-12 10:38:57 +03:00
set - > map [ HCTX_TYPE_READ ] . queue_offset = 0 ;
}
blk_mq_map_queues ( & set - > map [ HCTX_TYPE_DEFAULT ] ) ;
blk_mq_map_queues ( & set - > map [ HCTX_TYPE_READ ] ) ;
2019-05-29 08:49:05 +03:00
2019-07-04 00:08:04 +03:00
if ( opts - > nr_poll_queues & & ctrl - > io_queues [ HCTX_TYPE_POLL ] ) {
/* map dedicated poll queues only if we have queues left */
set - > map [ HCTX_TYPE_POLL ] . nr_queues =
ctrl - > io_queues [ HCTX_TYPE_POLL ] ;
set - > map [ HCTX_TYPE_POLL ] . queue_offset =
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] +
ctrl - > io_queues [ HCTX_TYPE_READ ] ;
blk_mq_map_queues ( & set - > map [ HCTX_TYPE_POLL ] ) ;
}
2019-05-29 08:49:05 +03:00
dev_info ( ctrl - > ctrl . device ,
2019-07-04 00:08:04 +03:00
" mapped %d/%d/%d default/read/poll queues. \n " ,
2019-05-29 08:49:05 +03:00
ctrl - > io_queues [ HCTX_TYPE_DEFAULT ] ,
2019-07-04 00:08:04 +03:00
ctrl - > io_queues [ HCTX_TYPE_READ ] ,
ctrl - > io_queues [ HCTX_TYPE_POLL ] ) ;
2019-05-29 08:49:05 +03:00
2018-12-12 10:38:57 +03:00
return 0 ;
}
2019-07-04 00:08:04 +03:00
static int nvme_tcp_poll ( struct blk_mq_hw_ctx * hctx )
{
struct nvme_tcp_queue * queue = hctx - > driver_data ;
struct sock * sk = queue - > sock - > sk ;
2020-03-24 02:43:52 +03:00
if ( ! test_bit ( NVME_TCP_Q_LIVE , & queue - > flags ) )
return 0 ;
2020-05-02 00:25:44 +03:00
set_bit ( NVME_TCP_Q_POLLING , & queue - > flags ) ;
2019-10-24 08:44:51 +03:00
if ( sk_can_busy_loop ( sk ) & & skb_queue_empty_lockless ( & sk - > sk_receive_queue ) )
2019-07-04 00:08:04 +03:00
sk_busy_loop ( sk , true ) ;
nvme_tcp_try_recv ( queue ) ;
2020-05-02 00:25:44 +03:00
clear_bit ( NVME_TCP_Q_POLLING , & queue - > flags ) ;
2019-07-04 00:08:04 +03:00
return queue - > nr_cqe ;
}
2020-05-29 01:25:07 +03:00
static const struct blk_mq_ops nvme_tcp_mq_ops = {
2018-12-04 04:52:17 +03:00
. queue_rq = nvme_tcp_queue_rq ,
2020-06-19 03:30:23 +03:00
. commit_rqs = nvme_tcp_commit_rqs ,
2018-12-04 04:52:17 +03:00
. complete = nvme_complete_rq ,
. init_request = nvme_tcp_init_request ,
. exit_request = nvme_tcp_exit_request ,
. init_hctx = nvme_tcp_init_hctx ,
. timeout = nvme_tcp_timeout ,
2018-12-12 10:38:57 +03:00
. map_queues = nvme_tcp_map_queues ,
2019-07-04 00:08:04 +03:00
. poll = nvme_tcp_poll ,
2018-12-04 04:52:17 +03:00
} ;
2020-05-29 01:25:07 +03:00
static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
2018-12-04 04:52:17 +03:00
. queue_rq = nvme_tcp_queue_rq ,
. complete = nvme_complete_rq ,
. init_request = nvme_tcp_init_request ,
. exit_request = nvme_tcp_exit_request ,
. init_hctx = nvme_tcp_init_admin_hctx ,
. timeout = nvme_tcp_timeout ,
} ;
static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
. name = " tcp " ,
. module = THIS_MODULE ,
. flags = NVME_F_FABRICS ,
. reg_read32 = nvmf_reg_read32 ,
. reg_read64 = nvmf_reg_read64 ,
. reg_write32 = nvmf_reg_write32 ,
. free_ctrl = nvme_tcp_free_ctrl ,
. submit_async_event = nvme_tcp_submit_async_event ,
. delete_ctrl = nvme_tcp_delete_ctrl ,
. get_address = nvmf_get_address ,
} ;
static bool
nvme_tcp_existing_controller ( struct nvmf_ctrl_options * opts )
{
struct nvme_tcp_ctrl * ctrl ;
bool found = false ;
mutex_lock ( & nvme_tcp_ctrl_mutex ) ;
list_for_each_entry ( ctrl , & nvme_tcp_ctrl_list , list ) {
found = nvmf_ip_options_match ( & ctrl - > ctrl , opts ) ;
if ( found )
break ;
}
mutex_unlock ( & nvme_tcp_ctrl_mutex ) ;
return found ;
}
static struct nvme_ctrl * nvme_tcp_create_ctrl ( struct device * dev ,
struct nvmf_ctrl_options * opts )
{
struct nvme_tcp_ctrl * ctrl ;
int ret ;
ctrl = kzalloc ( sizeof ( * ctrl ) , GFP_KERNEL ) ;
if ( ! ctrl )
return ERR_PTR ( - ENOMEM ) ;
INIT_LIST_HEAD ( & ctrl - > list ) ;
ctrl - > ctrl . opts = opts ;
2019-07-04 00:08:04 +03:00
ctrl - > ctrl . queue_count = opts - > nr_io_queues + opts - > nr_write_queues +
opts - > nr_poll_queues + 1 ;
2018-12-04 04:52:17 +03:00
ctrl - > ctrl . sqsize = opts - > queue_size - 1 ;
ctrl - > ctrl . kato = opts - > kato ;
INIT_DELAYED_WORK ( & ctrl - > connect_work ,
nvme_tcp_reconnect_ctrl_work ) ;
INIT_WORK ( & ctrl - > err_work , nvme_tcp_error_recovery_work ) ;
INIT_WORK ( & ctrl - > ctrl . reset_work , nvme_reset_ctrl_work ) ;
if ( ! ( opts - > mask & NVMF_OPT_TRSVCID ) ) {
opts - > trsvcid =
kstrdup ( __stringify ( NVME_TCP_DISC_PORT ) , GFP_KERNEL ) ;
if ( ! opts - > trsvcid ) {
ret = - ENOMEM ;
goto out_free_ctrl ;
}
opts - > mask | = NVMF_OPT_TRSVCID ;
}
ret = inet_pton_with_scope ( & init_net , AF_UNSPEC ,
opts - > traddr , opts - > trsvcid , & ctrl - > addr ) ;
if ( ret ) {
pr_err ( " malformed address passed: %s:%s \n " ,
opts - > traddr , opts - > trsvcid ) ;
goto out_free_ctrl ;
}
if ( opts - > mask & NVMF_OPT_HOST_TRADDR ) {
ret = inet_pton_with_scope ( & init_net , AF_UNSPEC ,
opts - > host_traddr , NULL , & ctrl - > src_addr ) ;
if ( ret ) {
pr_err ( " malformed src address passed: %s \n " ,
opts - > host_traddr ) ;
goto out_free_ctrl ;
}
}
if ( ! opts - > duplicate_connect & & nvme_tcp_existing_controller ( opts ) ) {
ret = - EALREADY ;
goto out_free_ctrl ;
}
2018-12-12 10:38:57 +03:00
ctrl - > queues = kcalloc ( ctrl - > ctrl . queue_count , sizeof ( * ctrl - > queues ) ,
2018-12-04 04:52:17 +03:00
GFP_KERNEL ) ;
if ( ! ctrl - > queues ) {
ret = - ENOMEM ;
goto out_free_ctrl ;
}
ret = nvme_init_ctrl ( & ctrl - > ctrl , dev , & nvme_tcp_ctrl_ops , 0 ) ;
if ( ret )
goto out_kfree_queues ;
if ( ! nvme_change_ctrl_state ( & ctrl - > ctrl , NVME_CTRL_CONNECTING ) ) {
WARN_ON_ONCE ( 1 ) ;
ret = - EINTR ;
goto out_uninit_ctrl ;
}
ret = nvme_tcp_setup_ctrl ( & ctrl - > ctrl , true ) ;
if ( ret )
goto out_uninit_ctrl ;
dev_info ( ctrl - > ctrl . device , " new ctrl: NQN \" %s \" , addr %pISp \n " ,
ctrl - > ctrl . opts - > subsysnqn , & ctrl - > addr ) ;
mutex_lock ( & nvme_tcp_ctrl_mutex ) ;
list_add_tail ( & ctrl - > list , & nvme_tcp_ctrl_list ) ;
mutex_unlock ( & nvme_tcp_ctrl_mutex ) ;
return & ctrl - > ctrl ;
out_uninit_ctrl :
nvme_uninit_ctrl ( & ctrl - > ctrl ) ;
nvme_put_ctrl ( & ctrl - > ctrl ) ;
if ( ret > 0 )
ret = - EIO ;
return ERR_PTR ( ret ) ;
out_kfree_queues :
kfree ( ctrl - > queues ) ;
out_free_ctrl :
kfree ( ctrl ) ;
return ERR_PTR ( ret ) ;
}
static struct nvmf_transport_ops nvme_tcp_transport = {
. name = " tcp " ,
. module = THIS_MODULE ,
. required_opts = NVMF_OPT_TRADDR ,
. allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
2018-12-12 10:38:57 +03:00
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
2019-08-18 12:08:54 +03:00
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
NVMF_OPT_TOS ,
2018-12-04 04:52:17 +03:00
. create_ctrl = nvme_tcp_create_ctrl ,
} ;
static int __init nvme_tcp_init_module ( void )
{
nvme_tcp_wq = alloc_workqueue ( " nvme_tcp_wq " ,
WQ_MEM_RECLAIM | WQ_HIGHPRI , 0 ) ;
if ( ! nvme_tcp_wq )
return - ENOMEM ;
nvmf_register_transport ( & nvme_tcp_transport ) ;
return 0 ;
}
static void __exit nvme_tcp_cleanup_module ( void )
{
struct nvme_tcp_ctrl * ctrl ;
nvmf_unregister_transport ( & nvme_tcp_transport ) ;
mutex_lock ( & nvme_tcp_ctrl_mutex ) ;
list_for_each_entry ( ctrl , & nvme_tcp_ctrl_list , list )
nvme_delete_ctrl ( & ctrl - > ctrl ) ;
mutex_unlock ( & nvme_tcp_ctrl_mutex ) ;
flush_workqueue ( nvme_delete_wq ) ;
destroy_workqueue ( nvme_tcp_wq ) ;
}
module_init ( nvme_tcp_init_module ) ;
module_exit ( nvme_tcp_cleanup_module ) ;
MODULE_LICENSE ( " GPL v2 " ) ;