2017-11-07 11:54:56 +03:00
/*
* Copyright ( C ) 2017 , Microsoft Corporation .
*
* Author ( s ) : Long Li < longli @ microsoft . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See
* the GNU General Public License for more details .
*/
2017-11-05 04:17:24 +03:00
# include <linux/module.h>
2017-11-23 03:38:40 +03:00
# include <linux/highmem.h>
2017-11-07 11:54:56 +03:00
# include "smbdirect.h"
2017-11-05 04:17:24 +03:00
# include "cifs_debug.h"
2018-05-30 22:48:00 +03:00
# include "cifsproto.h"
2018-06-15 16:22:44 +03:00
# include "smb2proto.h"
2017-11-05 04:17:24 +03:00
static struct smbd_response * get_empty_queue_buffer (
struct smbd_connection * info ) ;
static struct smbd_response * get_receive_buffer (
struct smbd_connection * info ) ;
static void put_receive_buffer (
struct smbd_connection * info ,
struct smbd_response * response ) ;
static int allocate_receive_buffers ( struct smbd_connection * info , int num_buf ) ;
static void destroy_receive_buffers ( struct smbd_connection * info ) ;
static void put_empty_packet (
struct smbd_connection * info , struct smbd_response * response ) ;
static void enqueue_reassembly (
struct smbd_connection * info ,
struct smbd_response * response , int data_length ) ;
static struct smbd_response * _get_first_reassembly (
struct smbd_connection * info ) ;
static int smbd_post_recv (
struct smbd_connection * info ,
struct smbd_response * response ) ;
static int smbd_post_send_empty ( struct smbd_connection * info ) ;
2017-11-23 03:38:42 +03:00
static int smbd_post_send_data (
struct smbd_connection * info ,
struct kvec * iov , int n_vec , int remaining_data_length ) ;
static int smbd_post_send_page ( struct smbd_connection * info ,
struct page * page , unsigned long offset ,
size_t size , int remaining_data_length ) ;
2017-11-07 11:54:56 +03:00
2017-11-23 03:38:44 +03:00
static void destroy_mr_list ( struct smbd_connection * info ) ;
static int allocate_mr_list ( struct smbd_connection * info ) ;
2017-11-07 11:54:56 +03:00
/* SMBD version number */
# define SMBD_V1 0x0100
/* Port numbers for SMBD transport */
# define SMB_PORT 445
# define SMBD_PORT 5445
/* Address lookup and resolve timeout in ms */
# define RDMA_RESOLVE_TIMEOUT 5000
/* SMBD negotiation timeout in seconds */
# define SMBD_NEGOTIATE_TIMEOUT 120
/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */
# define SMBD_MIN_RECEIVE_SIZE 128
# define SMBD_MIN_FRAGMENTED_SIZE 131072
/*
* Default maximum number of RDMA read / write outstanding on this connection
* This value is possibly decreased during QP creation on hardware limit
*/
# define SMBD_CM_RESPONDER_RESOURCES 32
/* Maximum number of retries on data transfer operations */
# define SMBD_CM_RETRY 6
/* No need to retry on Receiver Not Ready since SMBD manages credits */
# define SMBD_CM_RNR_RETRY 0
/*
* User configurable initial values per SMBD transport connection
* as defined in [ MS - SMBD ] 3.1 .1 .1
* Those may change after a SMBD negotiation
*/
/* The local peer's maximum number of credits to grant to the peer */
int smbd_receive_credit_max = 255 ;
/* The remote peer's credit request of local peer */
int smbd_send_credit_target = 255 ;
/* The maximum single message size can be sent to remote peer */
int smbd_max_send_size = 1364 ;
/* The maximum fragmented upper-layer payload receive size supported */
int smbd_max_fragmented_recv_size = 1024 * 1024 ;
/* The maximum single-message size which can be received */
int smbd_max_receive_size = 8192 ;
/* The timeout to initiate send of a keepalive message on idle */
int smbd_keep_alive_interval = 120 ;
/*
* User configurable initial values for RDMA transport
* The actual values used may be lower and are limited to hardware capabilities
*/
/* Default maximum number of SGEs in a RDMA write/read */
int smbd_max_frmr_depth = 2048 ;
/* If payload is less than this byte, use RDMA send/recv not read/write */
int rdma_readwrite_threshold = 4096 ;
2017-11-05 04:17:24 +03:00
/* Transport logging functions
* Logging are defined as classes . They can be OR ' ed to define the actual
* logging level via module parameter smbd_logging_class
* e . g . cifs . smbd_logging_class = 0xa0 will log all log_rdma_recv ( ) and
* log_rdma_event ( )
*/
# define LOG_OUTGOING 0x1
# define LOG_INCOMING 0x2
# define LOG_READ 0x4
# define LOG_WRITE 0x8
# define LOG_RDMA_SEND 0x10
# define LOG_RDMA_RECV 0x20
# define LOG_KEEP_ALIVE 0x40
# define LOG_RDMA_EVENT 0x80
# define LOG_RDMA_MR 0x100
static unsigned int smbd_logging_class ;
module_param ( smbd_logging_class , uint , 0644 ) ;
MODULE_PARM_DESC ( smbd_logging_class ,
" Logging class for SMBD transport 0x0 to 0x100 " ) ;
# define ERR 0x0
# define INFO 0x1
static unsigned int smbd_logging_level = ERR ;
module_param ( smbd_logging_level , uint , 0644 ) ;
MODULE_PARM_DESC ( smbd_logging_level ,
" Logging level for SMBD transport, 0 (default): error, 1: info " ) ;
# define log_rdma(level, class, fmt, args...) \
do { \
if ( level < = smbd_logging_level | | class & smbd_logging_class ) \
cifs_dbg ( VFS , " %s:%d " fmt , __func__ , __LINE__ , # # args ) ; \
} while ( 0 )
# define log_outgoing(level, fmt, args...) \
log_rdma ( level , LOG_OUTGOING , fmt , # # args )
# define log_incoming(level, fmt, args...) \
log_rdma ( level , LOG_INCOMING , fmt , # # args )
# define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args)
# define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args)
# define log_rdma_send(level, fmt, args...) \
log_rdma ( level , LOG_RDMA_SEND , fmt , # # args )
# define log_rdma_recv(level, fmt, args...) \
log_rdma ( level , LOG_RDMA_RECV , fmt , # # args )
# define log_keep_alive(level, fmt, args...) \
log_rdma ( level , LOG_KEEP_ALIVE , fmt , # # args )
# define log_rdma_event(level, fmt, args...) \
log_rdma ( level , LOG_RDMA_EVENT , fmt , # # args )
# define log_rdma_mr(level, fmt, args...) \
log_rdma ( level , LOG_RDMA_MR , fmt , # # args )
/*
* Destroy the transport and related RDMA and memory resources
* Need to go through all the pending counters and make sure on one is using
* the transport while it is destroyed
*/
static void smbd_destroy_rdma_work ( struct work_struct * work )
{
struct smbd_response * response ;
struct smbd_connection * info =
container_of ( work , struct smbd_connection , destroy_work ) ;
unsigned long flags ;
log_rdma_event ( INFO , " destroying qp \n " ) ;
ib_drain_qp ( info - > id - > qp ) ;
rdma_destroy_qp ( info - > id ) ;
/* Unblock all I/O waiting on the send queue */
wake_up_interruptible_all ( & info - > wait_send_queue ) ;
log_rdma_event ( INFO , " cancelling idle timer \n " ) ;
cancel_delayed_work_sync ( & info - > idle_timer_work ) ;
log_rdma_event ( INFO , " cancelling send immediate work \n " ) ;
cancel_delayed_work_sync ( & info - > send_immediate_work ) ;
2017-11-23 03:38:42 +03:00
log_rdma_event ( INFO , " wait for all send to finish \n " ) ;
wait_event ( info - > wait_smbd_send_pending ,
info - > smbd_send_pending = = 0 ) ;
2017-11-05 04:17:24 +03:00
log_rdma_event ( INFO , " wait for all recv to finish \n " ) ;
wake_up_interruptible ( & info - > wait_reassembly_queue ) ;
2017-11-23 03:38:40 +03:00
wait_event ( info - > wait_smbd_recv_pending ,
info - > smbd_recv_pending = = 0 ) ;
2017-11-05 04:17:24 +03:00
log_rdma_event ( INFO , " wait for all send posted to IB to finish \n " ) ;
wait_event ( info - > wait_send_pending ,
atomic_read ( & info - > send_pending ) = = 0 ) ;
wait_event ( info - > wait_send_payload_pending ,
atomic_read ( & info - > send_payload_pending ) = = 0 ) ;
2017-11-23 03:38:44 +03:00
log_rdma_event ( INFO , " freeing mr list \n " ) ;
wake_up_interruptible_all ( & info - > wait_mr ) ;
wait_event ( info - > wait_for_mr_cleanup ,
atomic_read ( & info - > mr_used_count ) = = 0 ) ;
destroy_mr_list ( info ) ;
2017-11-05 04:17:24 +03:00
/* It's not posssible for upper layer to get to reassembly */
log_rdma_event ( INFO , " drain the reassembly queue \n " ) ;
do {
spin_lock_irqsave ( & info - > reassembly_queue_lock , flags ) ;
response = _get_first_reassembly ( info ) ;
if ( response ) {
list_del ( & response - > list ) ;
spin_unlock_irqrestore (
& info - > reassembly_queue_lock , flags ) ;
put_receive_buffer ( info , response ) ;
2018-02-04 04:45:07 +03:00
} else
spin_unlock_irqrestore ( & info - > reassembly_queue_lock , flags ) ;
2017-11-05 04:17:24 +03:00
} while ( response ) ;
2018-02-04 04:45:07 +03:00
2017-11-05 04:17:24 +03:00
info - > reassembly_data_length = 0 ;
log_rdma_event ( INFO , " free receive buffers \n " ) ;
wait_event ( info - > wait_receive_queues ,
info - > count_receive_queue + info - > count_empty_packet_queue
= = info - > receive_credit_max ) ;
destroy_receive_buffers ( info ) ;
ib_free_cq ( info - > send_cq ) ;
ib_free_cq ( info - > recv_cq ) ;
ib_dealloc_pd ( info - > pd ) ;
rdma_destroy_id ( info - > id ) ;
/* free mempools */
mempool_destroy ( info - > request_mempool ) ;
kmem_cache_destroy ( info - > request_cache ) ;
mempool_destroy ( info - > response_mempool ) ;
kmem_cache_destroy ( info - > response_cache ) ;
info - > transport_status = SMBD_DESTROYED ;
wake_up_all ( & info - > wait_destroy ) ;
}
static int smbd_process_disconnected ( struct smbd_connection * info )
{
schedule_work ( & info - > destroy_work ) ;
return 0 ;
}
static void smbd_disconnect_rdma_work ( struct work_struct * work )
{
struct smbd_connection * info =
container_of ( work , struct smbd_connection , disconnect_work ) ;
if ( info - > transport_status = = SMBD_CONNECTED ) {
info - > transport_status = SMBD_DISCONNECTING ;
rdma_disconnect ( info - > id ) ;
}
}
static void smbd_disconnect_rdma_connection ( struct smbd_connection * info )
{
queue_work ( info - > workqueue , & info - > disconnect_work ) ;
}
/* Upcall from RDMA CM */
static int smbd_conn_upcall (
struct rdma_cm_id * id , struct rdma_cm_event * event )
{
struct smbd_connection * info = id - > context ;
log_rdma_event ( INFO , " event=%d status=%d \n " ,
event - > event , event - > status ) ;
switch ( event - > event ) {
case RDMA_CM_EVENT_ADDR_RESOLVED :
case RDMA_CM_EVENT_ROUTE_RESOLVED :
info - > ri_rc = 0 ;
complete ( & info - > ri_done ) ;
break ;
case RDMA_CM_EVENT_ADDR_ERROR :
info - > ri_rc = - EHOSTUNREACH ;
complete ( & info - > ri_done ) ;
break ;
case RDMA_CM_EVENT_ROUTE_ERROR :
info - > ri_rc = - ENETUNREACH ;
complete ( & info - > ri_done ) ;
break ;
case RDMA_CM_EVENT_ESTABLISHED :
log_rdma_event ( INFO , " connected event=%d \n " , event - > event ) ;
info - > transport_status = SMBD_CONNECTED ;
wake_up_interruptible ( & info - > conn_wait ) ;
break ;
case RDMA_CM_EVENT_CONNECT_ERROR :
case RDMA_CM_EVENT_UNREACHABLE :
case RDMA_CM_EVENT_REJECTED :
log_rdma_event ( INFO , " connecting failed event=%d \n " , event - > event ) ;
info - > transport_status = SMBD_DISCONNECTED ;
wake_up_interruptible ( & info - > conn_wait ) ;
break ;
case RDMA_CM_EVENT_DEVICE_REMOVAL :
case RDMA_CM_EVENT_DISCONNECTED :
/* This happenes when we fail the negotiation */
if ( info - > transport_status = = SMBD_NEGOTIATE_FAILED ) {
info - > transport_status = SMBD_DISCONNECTED ;
wake_up ( & info - > conn_wait ) ;
break ;
}
info - > transport_status = SMBD_DISCONNECTED ;
smbd_process_disconnected ( info ) ;
break ;
default :
break ;
}
return 0 ;
}
/* Upcall from RDMA QP */
static void
smbd_qp_async_error_upcall ( struct ib_event * event , void * context )
{
struct smbd_connection * info = context ;
log_rdma_event ( ERR , " %s on device %s info %p \n " ,
ib_event_msg ( event - > event ) , event - > device - > name , info ) ;
switch ( event - > event ) {
case IB_EVENT_CQ_ERR :
case IB_EVENT_QP_FATAL :
smbd_disconnect_rdma_connection ( info ) ;
default :
break ;
}
}
static inline void * smbd_request_payload ( struct smbd_request * request )
{
return ( void * ) request - > packet ;
}
static inline void * smbd_response_payload ( struct smbd_response * response )
{
return ( void * ) response - > packet ;
}
/* Called when a RDMA send is done */
static void send_done ( struct ib_cq * cq , struct ib_wc * wc )
{
int i ;
struct smbd_request * request =
container_of ( wc - > wr_cqe , struct smbd_request , cqe ) ;
log_rdma_send ( INFO , " smbd_request %p completed wc->status=%d \n " ,
request , wc - > status ) ;
if ( wc - > status ! = IB_WC_SUCCESS | | wc - > opcode ! = IB_WC_SEND ) {
log_rdma_send ( ERR , " wc->status=%d wc->opcode=%d \n " ,
wc - > status , wc - > opcode ) ;
smbd_disconnect_rdma_connection ( request - > info ) ;
}
for ( i = 0 ; i < request - > num_sge ; i + + )
ib_dma_unmap_single ( request - > info - > id - > device ,
request - > sge [ i ] . addr ,
request - > sge [ i ] . length ,
DMA_TO_DEVICE ) ;
if ( request - > has_payload ) {
if ( atomic_dec_and_test ( & request - > info - > send_payload_pending ) )
wake_up ( & request - > info - > wait_send_payload_pending ) ;
} else {
if ( atomic_dec_and_test ( & request - > info - > send_pending ) )
wake_up ( & request - > info - > wait_send_pending ) ;
}
mempool_free ( request , request - > info - > request_mempool ) ;
}
static void dump_smbd_negotiate_resp ( struct smbd_negotiate_resp * resp )
{
log_rdma_event ( INFO , " resp message min_version %u max_version %u "
" negotiated_version %u credits_requested %u "
" credits_granted %u status %u max_readwrite_size %u "
" preferred_send_size %u max_receive_size %u "
" max_fragmented_size %u \n " ,
resp - > min_version , resp - > max_version , resp - > negotiated_version ,
resp - > credits_requested , resp - > credits_granted , resp - > status ,
resp - > max_readwrite_size , resp - > preferred_send_size ,
resp - > max_receive_size , resp - > max_fragmented_size ) ;
}
/*
* Process a negotiation response message , according to [ MS - SMBD ] 3.1 .5 .7
* response , packet_length : the negotiation response message
* return value : true if negotiation is a success , false if failed
*/
static bool process_negotiation_response (
struct smbd_response * response , int packet_length )
{
struct smbd_connection * info = response - > info ;
struct smbd_negotiate_resp * packet = smbd_response_payload ( response ) ;
if ( packet_length < sizeof ( struct smbd_negotiate_resp ) ) {
log_rdma_event ( ERR ,
" error: packet_length=%d \n " , packet_length ) ;
return false ;
}
if ( le16_to_cpu ( packet - > negotiated_version ) ! = SMBD_V1 ) {
log_rdma_event ( ERR , " error: negotiated_version=%x \n " ,
le16_to_cpu ( packet - > negotiated_version ) ) ;
return false ;
}
info - > protocol = le16_to_cpu ( packet - > negotiated_version ) ;
if ( packet - > credits_requested = = 0 ) {
log_rdma_event ( ERR , " error: credits_requested==0 \n " ) ;
return false ;
}
info - > receive_credit_target = le16_to_cpu ( packet - > credits_requested ) ;
if ( packet - > credits_granted = = 0 ) {
log_rdma_event ( ERR , " error: credits_granted==0 \n " ) ;
return false ;
}
atomic_set ( & info - > send_credits , le16_to_cpu ( packet - > credits_granted ) ) ;
atomic_set ( & info - > receive_credits , 0 ) ;
if ( le32_to_cpu ( packet - > preferred_send_size ) > info - > max_receive_size ) {
log_rdma_event ( ERR , " error: preferred_send_size=%d \n " ,
le32_to_cpu ( packet - > preferred_send_size ) ) ;
return false ;
}
info - > max_receive_size = le32_to_cpu ( packet - > preferred_send_size ) ;
if ( le32_to_cpu ( packet - > max_receive_size ) < SMBD_MIN_RECEIVE_SIZE ) {
log_rdma_event ( ERR , " error: max_receive_size=%d \n " ,
le32_to_cpu ( packet - > max_receive_size ) ) ;
return false ;
}
info - > max_send_size = min_t ( int , info - > max_send_size ,
le32_to_cpu ( packet - > max_receive_size ) ) ;
if ( le32_to_cpu ( packet - > max_fragmented_size ) <
SMBD_MIN_FRAGMENTED_SIZE ) {
log_rdma_event ( ERR , " error: max_fragmented_size=%d \n " ,
le32_to_cpu ( packet - > max_fragmented_size ) ) ;
return false ;
}
info - > max_fragmented_send_size =
le32_to_cpu ( packet - > max_fragmented_size ) ;
2017-11-23 03:38:44 +03:00
info - > rdma_readwrite_threshold =
rdma_readwrite_threshold > info - > max_fragmented_send_size ?
info - > max_fragmented_send_size :
rdma_readwrite_threshold ;
info - > max_readwrite_size = min_t ( u32 ,
le32_to_cpu ( packet - > max_readwrite_size ) ,
info - > max_frmr_depth * PAGE_SIZE ) ;
info - > max_frmr_depth = info - > max_readwrite_size / PAGE_SIZE ;
2017-11-05 04:17:24 +03:00
return true ;
}
/*
* Check and schedule to send an immediate packet
* This is used to extend credtis to remote peer to keep the transport busy
*/
static void check_and_send_immediate ( struct smbd_connection * info )
{
if ( info - > transport_status ! = SMBD_CONNECTED )
return ;
info - > send_immediate = true ;
/*
* Promptly send a packet if our peer is running low on receive
* credits
*/
if ( atomic_read ( & info - > receive_credits ) <
info - > receive_credit_target - 1 )
queue_delayed_work (
info - > workqueue , & info - > send_immediate_work , 0 ) ;
}
static void smbd_post_send_credits ( struct work_struct * work )
{
int ret = 0 ;
int use_receive_queue = 1 ;
int rc ;
struct smbd_response * response ;
struct smbd_connection * info =
container_of ( work , struct smbd_connection ,
post_send_credits_work ) ;
if ( info - > transport_status ! = SMBD_CONNECTED ) {
wake_up ( & info - > wait_receive_queues ) ;
return ;
}
if ( info - > receive_credit_target >
atomic_read ( & info - > receive_credits ) ) {
while ( true ) {
if ( use_receive_queue )
response = get_receive_buffer ( info ) ;
else
response = get_empty_queue_buffer ( info ) ;
if ( ! response ) {
/* now switch to emtpy packet queue */
if ( use_receive_queue ) {
use_receive_queue = 0 ;
continue ;
} else
break ;
}
response - > type = SMBD_TRANSFER_DATA ;
response - > first_segment = false ;
rc = smbd_post_recv ( info , response ) ;
if ( rc ) {
log_rdma_recv ( ERR ,
" post_recv failed rc=%d \n " , rc ) ;
put_receive_buffer ( info , response ) ;
break ;
}
ret + + ;
}
}
spin_lock ( & info - > lock_new_credits_offered ) ;
info - > new_credits_offered + = ret ;
spin_unlock ( & info - > lock_new_credits_offered ) ;
atomic_add ( ret , & info - > receive_credits ) ;
/* Check if we can post new receive and grant credits to peer */
check_and_send_immediate ( info ) ;
}
static void smbd_recv_done_work ( struct work_struct * work )
{
struct smbd_connection * info =
container_of ( work , struct smbd_connection , recv_done_work ) ;
/*
* We may have new send credits granted from remote peer
* If any sender is blcoked on lack of credets , unblock it
*/
if ( atomic_read ( & info - > send_credits ) )
wake_up_interruptible ( & info - > wait_send_queue ) ;
/*
* Check if we need to send something to remote peer to
* grant more credits or respond to KEEP_ALIVE packet
*/
check_and_send_immediate ( info ) ;
}
/* Called from softirq, when recv is done */
static void recv_done ( struct ib_cq * cq , struct ib_wc * wc )
{
struct smbd_data_transfer * data_transfer ;
struct smbd_response * response =
container_of ( wc - > wr_cqe , struct smbd_response , cqe ) ;
struct smbd_connection * info = response - > info ;
int data_length = 0 ;
log_rdma_recv ( INFO , " response=%p type=%d wc status=%d wc opcode %d "
" byte_len=%d pkey_index=%x \n " ,
response , response - > type , wc - > status , wc - > opcode ,
wc - > byte_len , wc - > pkey_index ) ;
if ( wc - > status ! = IB_WC_SUCCESS | | wc - > opcode ! = IB_WC_RECV ) {
log_rdma_recv ( INFO , " wc->status=%d opcode=%d \n " ,
wc - > status , wc - > opcode ) ;
smbd_disconnect_rdma_connection ( info ) ;
goto error ;
}
ib_dma_sync_single_for_cpu (
wc - > qp - > device ,
response - > sge . addr ,
response - > sge . length ,
DMA_FROM_DEVICE ) ;
switch ( response - > type ) {
/* SMBD negotiation response */
case SMBD_NEGOTIATE_RESP :
dump_smbd_negotiate_resp ( smbd_response_payload ( response ) ) ;
info - > full_packet_received = true ;
info - > negotiate_done =
process_negotiation_response ( response , wc - > byte_len ) ;
complete ( & info - > negotiate_completion ) ;
break ;
/* SMBD data transfer packet */
case SMBD_TRANSFER_DATA :
data_transfer = smbd_response_payload ( response ) ;
data_length = le32_to_cpu ( data_transfer - > data_length ) ;
/*
* If this is a packet with data playload place the data in
* reassembly queue and wake up the reading thread
*/
if ( data_length ) {
if ( info - > full_packet_received )
response - > first_segment = true ;
if ( le32_to_cpu ( data_transfer - > remaining_data_length ) )
info - > full_packet_received = false ;
else
info - > full_packet_received = true ;
enqueue_reassembly (
info ,
response ,
data_length ) ;
} else
put_empty_packet ( info , response ) ;
if ( data_length )
wake_up_interruptible ( & info - > wait_reassembly_queue ) ;
atomic_dec ( & info - > receive_credits ) ;
info - > receive_credit_target =
le16_to_cpu ( data_transfer - > credits_requested ) ;
atomic_add ( le16_to_cpu ( data_transfer - > credits_granted ) ,
& info - > send_credits ) ;
log_incoming ( INFO , " data flags %d data_offset %d "
" data_length %d remaining_data_length %d \n " ,
le16_to_cpu ( data_transfer - > flags ) ,
le32_to_cpu ( data_transfer - > data_offset ) ,
le32_to_cpu ( data_transfer - > data_length ) ,
le32_to_cpu ( data_transfer - > remaining_data_length ) ) ;
/* Send a KEEP_ALIVE response right away if requested */
info - > keep_alive_requested = KEEP_ALIVE_NONE ;
if ( le16_to_cpu ( data_transfer - > flags ) &
SMB_DIRECT_RESPONSE_REQUESTED ) {
info - > keep_alive_requested = KEEP_ALIVE_PENDING ;
}
queue_work ( info - > workqueue , & info - > recv_done_work ) ;
return ;
default :
log_rdma_recv ( ERR ,
" unexpected response type=%d \n " , response - > type ) ;
}
error :
put_receive_buffer ( info , response ) ;
}
static struct rdma_cm_id * smbd_create_id (
struct smbd_connection * info ,
struct sockaddr * dstaddr , int port )
{
struct rdma_cm_id * id ;
int rc ;
__be16 * sport ;
id = rdma_create_id ( & init_net , smbd_conn_upcall , info ,
RDMA_PS_TCP , IB_QPT_RC ) ;
if ( IS_ERR ( id ) ) {
rc = PTR_ERR ( id ) ;
log_rdma_event ( ERR , " rdma_create_id() failed %i \n " , rc ) ;
return id ;
}
if ( dstaddr - > sa_family = = AF_INET6 )
sport = & ( ( struct sockaddr_in6 * ) dstaddr ) - > sin6_port ;
else
sport = & ( ( struct sockaddr_in * ) dstaddr ) - > sin_port ;
* sport = htons ( port ) ;
init_completion ( & info - > ri_done ) ;
info - > ri_rc = - ETIMEDOUT ;
rc = rdma_resolve_addr ( id , NULL , ( struct sockaddr * ) dstaddr ,
RDMA_RESOLVE_TIMEOUT ) ;
if ( rc ) {
log_rdma_event ( ERR , " rdma_resolve_addr() failed %i \n " , rc ) ;
goto out ;
}
wait_for_completion_interruptible_timeout (
& info - > ri_done , msecs_to_jiffies ( RDMA_RESOLVE_TIMEOUT ) ) ;
rc = info - > ri_rc ;
if ( rc ) {
log_rdma_event ( ERR , " rdma_resolve_addr() completed %i \n " , rc ) ;
goto out ;
}
info - > ri_rc = - ETIMEDOUT ;
rc = rdma_resolve_route ( id , RDMA_RESOLVE_TIMEOUT ) ;
if ( rc ) {
log_rdma_event ( ERR , " rdma_resolve_route() failed %i \n " , rc ) ;
goto out ;
}
wait_for_completion_interruptible_timeout (
& info - > ri_done , msecs_to_jiffies ( RDMA_RESOLVE_TIMEOUT ) ) ;
rc = info - > ri_rc ;
if ( rc ) {
log_rdma_event ( ERR , " rdma_resolve_route() completed %i \n " , rc ) ;
goto out ;
}
return id ;
out :
rdma_destroy_id ( id ) ;
return ERR_PTR ( rc ) ;
}
/*
* Test if FRWR ( Fast Registration Work Requests ) is supported on the device
* This implementation requries FRWR on RDMA read / write
* return value : true if it is supported
*/
static bool frwr_is_supported ( struct ib_device_attr * attrs )
{
if ( ! ( attrs - > device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS ) )
return false ;
if ( attrs - > max_fast_reg_page_list_len = = 0 )
return false ;
return true ;
}
static int smbd_ia_open (
struct smbd_connection * info ,
struct sockaddr * dstaddr , int port )
{
int rc ;
info - > id = smbd_create_id ( info , dstaddr , port ) ;
if ( IS_ERR ( info - > id ) ) {
rc = PTR_ERR ( info - > id ) ;
goto out1 ;
}
if ( ! frwr_is_supported ( & info - > id - > device - > attrs ) ) {
log_rdma_event ( ERR ,
" Fast Registration Work Requests "
" (FRWR) is not supported \n " ) ;
log_rdma_event ( ERR ,
" Device capability flags = %llx "
" max_fast_reg_page_list_len = %u \n " ,
info - > id - > device - > attrs . device_cap_flags ,
info - > id - > device - > attrs . max_fast_reg_page_list_len ) ;
rc = - EPROTONOSUPPORT ;
goto out2 ;
}
2017-11-23 03:38:44 +03:00
info - > max_frmr_depth = min_t ( int ,
smbd_max_frmr_depth ,
info - > id - > device - > attrs . max_fast_reg_page_list_len ) ;
info - > mr_type = IB_MR_TYPE_MEM_REG ;
if ( info - > id - > device - > attrs . device_cap_flags & IB_DEVICE_SG_GAPS_REG )
info - > mr_type = IB_MR_TYPE_SG_GAPS ;
2017-11-05 04:17:24 +03:00
info - > pd = ib_alloc_pd ( info - > id - > device , 0 ) ;
if ( IS_ERR ( info - > pd ) ) {
rc = PTR_ERR ( info - > pd ) ;
log_rdma_event ( ERR , " ib_alloc_pd() returned %d \n " , rc ) ;
goto out2 ;
}
return 0 ;
out2 :
rdma_destroy_id ( info - > id ) ;
info - > id = NULL ;
out1 :
return rc ;
}
/*
* Send a negotiation request message to the peer
* The negotiation procedure is in [ MS - SMBD ] 3.1 .5 .2 and 3.1 .5 .3
* After negotiation , the transport is connected and ready for
* carrying upper layer SMB payload
*/
static int smbd_post_send_negotiate_req ( struct smbd_connection * info )
{
2018-07-18 19:25:25 +03:00
struct ib_send_wr send_wr ;
2017-11-05 04:17:24 +03:00
int rc = - ENOMEM ;
struct smbd_request * request ;
struct smbd_negotiate_req * packet ;
request = mempool_alloc ( info - > request_mempool , GFP_KERNEL ) ;
if ( ! request )
return rc ;
request - > info = info ;
packet = smbd_request_payload ( request ) ;
packet - > min_version = cpu_to_le16 ( SMBD_V1 ) ;
packet - > max_version = cpu_to_le16 ( SMBD_V1 ) ;
packet - > reserved = 0 ;
packet - > credits_requested = cpu_to_le16 ( info - > send_credit_target ) ;
packet - > preferred_send_size = cpu_to_le32 ( info - > max_send_size ) ;
packet - > max_receive_size = cpu_to_le32 ( info - > max_receive_size ) ;
packet - > max_fragmented_size =
cpu_to_le32 ( info - > max_fragmented_recv_size ) ;
request - > num_sge = 1 ;
request - > sge [ 0 ] . addr = ib_dma_map_single (
info - > id - > device , ( void * ) packet ,
sizeof ( * packet ) , DMA_TO_DEVICE ) ;
if ( ib_dma_mapping_error ( info - > id - > device , request - > sge [ 0 ] . addr ) ) {
rc = - EIO ;
goto dma_mapping_failed ;
}
request - > sge [ 0 ] . length = sizeof ( * packet ) ;
request - > sge [ 0 ] . lkey = info - > pd - > local_dma_lkey ;
ib_dma_sync_single_for_device (
info - > id - > device , request - > sge [ 0 ] . addr ,
request - > sge [ 0 ] . length , DMA_TO_DEVICE ) ;
request - > cqe . done = send_done ;
send_wr . next = NULL ;
send_wr . wr_cqe = & request - > cqe ;
send_wr . sg_list = request - > sge ;
send_wr . num_sge = request - > num_sge ;
send_wr . opcode = IB_WR_SEND ;
send_wr . send_flags = IB_SEND_SIGNALED ;
log_rdma_send ( INFO , " sge addr=%llx length=%x lkey=%x \n " ,
request - > sge [ 0 ] . addr ,
request - > sge [ 0 ] . length , request - > sge [ 0 ] . lkey ) ;
request - > has_payload = false ;
atomic_inc ( & info - > send_pending ) ;
2018-07-18 19:25:25 +03:00
rc = ib_post_send ( info - > id - > qp , & send_wr , NULL ) ;
2017-11-05 04:17:24 +03:00
if ( ! rc )
return 0 ;
/* if we reach here, post send failed */
log_rdma_send ( ERR , " ib_post_send failed rc=%d \n " , rc ) ;
atomic_dec ( & info - > send_pending ) ;
ib_dma_unmap_single ( info - > id - > device , request - > sge [ 0 ] . addr ,
request - > sge [ 0 ] . length , DMA_TO_DEVICE ) ;
2018-03-31 01:16:36 +03:00
smbd_disconnect_rdma_connection ( info ) ;
2017-11-05 04:17:24 +03:00
dma_mapping_failed :
mempool_free ( request , info - > request_mempool ) ;
return rc ;
}
/*
* Extend the credits to remote peer
* This implements [ MS - SMBD ] 3.1 .5 .9
* The idea is that we should extend credits to remote peer as quickly as
* it ' s allowed , to maintain data flow . We allocate as much receive
* buffer as possible , and extend the receive credits to remote peer
* return value : the new credtis being granted .
*/
static int manage_credits_prior_sending ( struct smbd_connection * info )
{
int new_credits ;
spin_lock ( & info - > lock_new_credits_offered ) ;
new_credits = info - > new_credits_offered ;
info - > new_credits_offered = 0 ;
spin_unlock ( & info - > lock_new_credits_offered ) ;
return new_credits ;
}
/*
* Check if we need to send a KEEP_ALIVE message
* The idle connection timer triggers a KEEP_ALIVE message when expires
* SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
* back a response .
* return value :
* 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
* 0 : otherwise
*/
static int manage_keep_alive_before_sending ( struct smbd_connection * info )
{
if ( info - > keep_alive_requested = = KEEP_ALIVE_PENDING ) {
info - > keep_alive_requested = KEEP_ALIVE_SENT ;
return 1 ;
}
return 0 ;
}
/*
* Build and prepare the SMBD packet header
* This function waits for avaialbe send credits and build a SMBD packet
* header . The caller then optional append payload to the packet after
* the header
* intput values
* size : the size of the payload
* remaining_data_length : remaining data to send if this is part of a
* fragmented packet
* output values
* request_out : the request allocated from this function
* return values : 0 on success , otherwise actual error code returned
*/
static int smbd_create_header ( struct smbd_connection * info ,
int size , int remaining_data_length ,
struct smbd_request * * request_out )
{
struct smbd_request * request ;
struct smbd_data_transfer * packet ;
int header_length ;
int rc ;
/* Wait for send credits. A SMBD packet needs one credit */
rc = wait_event_interruptible ( info - > wait_send_queue ,
atomic_read ( & info - > send_credits ) > 0 | |
info - > transport_status ! = SMBD_CONNECTED ) ;
if ( rc )
return rc ;
if ( info - > transport_status ! = SMBD_CONNECTED ) {
log_outgoing ( ERR , " disconnected not sending \n " ) ;
return - ENOENT ;
}
atomic_dec ( & info - > send_credits ) ;
request = mempool_alloc ( info - > request_mempool , GFP_KERNEL ) ;
if ( ! request ) {
rc = - ENOMEM ;
goto err ;
}
request - > info = info ;
/* Fill in the packet header */
packet = smbd_request_payload ( request ) ;
packet - > credits_requested = cpu_to_le16 ( info - > send_credit_target ) ;
packet - > credits_granted =
cpu_to_le16 ( manage_credits_prior_sending ( info ) ) ;
info - > send_immediate = false ;
packet - > flags = 0 ;
if ( manage_keep_alive_before_sending ( info ) )
packet - > flags | = cpu_to_le16 ( SMB_DIRECT_RESPONSE_REQUESTED ) ;
packet - > reserved = 0 ;
if ( ! size )
packet - > data_offset = 0 ;
else
packet - > data_offset = cpu_to_le32 ( 24 ) ;
packet - > data_length = cpu_to_le32 ( size ) ;
packet - > remaining_data_length = cpu_to_le32 ( remaining_data_length ) ;
packet - > padding = 0 ;
log_outgoing ( INFO , " credits_requested=%d credits_granted=%d "
" data_offset=%d data_length=%d remaining_data_length=%d \n " ,
le16_to_cpu ( packet - > credits_requested ) ,
le16_to_cpu ( packet - > credits_granted ) ,
le32_to_cpu ( packet - > data_offset ) ,
le32_to_cpu ( packet - > data_length ) ,
le32_to_cpu ( packet - > remaining_data_length ) ) ;
/* Map the packet to DMA */
header_length = sizeof ( struct smbd_data_transfer ) ;
/* If this is a packet without payload, don't send padding */
if ( ! size )
header_length = offsetof ( struct smbd_data_transfer , padding ) ;
request - > num_sge = 1 ;
request - > sge [ 0 ] . addr = ib_dma_map_single ( info - > id - > device ,
( void * ) packet ,
header_length ,
DMA_BIDIRECTIONAL ) ;
if ( ib_dma_mapping_error ( info - > id - > device , request - > sge [ 0 ] . addr ) ) {
mempool_free ( request , info - > request_mempool ) ;
rc = - EIO ;
goto err ;
}
request - > sge [ 0 ] . length = header_length ;
request - > sge [ 0 ] . lkey = info - > pd - > local_dma_lkey ;
* request_out = request ;
return 0 ;
err :
atomic_inc ( & info - > send_credits ) ;
return rc ;
}
static void smbd_destroy_header ( struct smbd_connection * info ,
struct smbd_request * request )
{
ib_dma_unmap_single ( info - > id - > device ,
request - > sge [ 0 ] . addr ,
request - > sge [ 0 ] . length ,
DMA_TO_DEVICE ) ;
mempool_free ( request , info - > request_mempool ) ;
atomic_inc ( & info - > send_credits ) ;
}
/* Post the send request */
static int smbd_post_send ( struct smbd_connection * info ,
struct smbd_request * request , bool has_payload )
{
2018-07-18 19:25:25 +03:00
struct ib_send_wr send_wr ;
2017-11-05 04:17:24 +03:00
int rc , i ;
for ( i = 0 ; i < request - > num_sge ; i + + ) {
log_rdma_send ( INFO ,
2018-02-09 15:14:15 +03:00
" rdma_request sge[%d] addr=%llu length=%u \n " ,
2018-04-17 22:17:10 +03:00
i , request - > sge [ i ] . addr , request - > sge [ i ] . length ) ;
2017-11-05 04:17:24 +03:00
ib_dma_sync_single_for_device (
info - > id - > device ,
request - > sge [ i ] . addr ,
request - > sge [ i ] . length ,
DMA_TO_DEVICE ) ;
}
request - > cqe . done = send_done ;
send_wr . next = NULL ;
send_wr . wr_cqe = & request - > cqe ;
send_wr . sg_list = request - > sge ;
send_wr . num_sge = request - > num_sge ;
send_wr . opcode = IB_WR_SEND ;
send_wr . send_flags = IB_SEND_SIGNALED ;
if ( has_payload ) {
request - > has_payload = true ;
atomic_inc ( & info - > send_payload_pending ) ;
} else {
request - > has_payload = false ;
atomic_inc ( & info - > send_pending ) ;
}
2018-07-18 19:25:25 +03:00
rc = ib_post_send ( info - > id - > qp , & send_wr , NULL ) ;
2017-11-05 04:17:24 +03:00
if ( rc ) {
log_rdma_send ( ERR , " ib_post_send failed rc=%d \n " , rc ) ;
if ( has_payload ) {
if ( atomic_dec_and_test ( & info - > send_payload_pending ) )
wake_up ( & info - > wait_send_payload_pending ) ;
} else {
if ( atomic_dec_and_test ( & info - > send_pending ) )
wake_up ( & info - > wait_send_pending ) ;
}
2018-03-31 01:16:36 +03:00
smbd_disconnect_rdma_connection ( info ) ;
2017-11-05 04:17:24 +03:00
} else
/* Reset timer for idle connection after packet is sent */
mod_delayed_work ( info - > workqueue , & info - > idle_timer_work ,
info - > keep_alive_interval * HZ ) ;
return rc ;
}
static int smbd_post_send_sgl ( struct smbd_connection * info ,
struct scatterlist * sgl , int data_length , int remaining_data_length )
{
int num_sgs ;
int i , rc ;
struct smbd_request * request ;
struct scatterlist * sg ;
rc = smbd_create_header (
info , data_length , remaining_data_length , & request ) ;
if ( rc )
return rc ;
num_sgs = sgl ? sg_nents ( sgl ) : 0 ;
for_each_sg ( sgl , sg , num_sgs , i ) {
request - > sge [ i + 1 ] . addr =
ib_dma_map_page ( info - > id - > device , sg_page ( sg ) ,
sg - > offset , sg - > length , DMA_BIDIRECTIONAL ) ;
if ( ib_dma_mapping_error (
info - > id - > device , request - > sge [ i + 1 ] . addr ) ) {
rc = - EIO ;
request - > sge [ i + 1 ] . addr = 0 ;
goto dma_mapping_failure ;
}
request - > sge [ i + 1 ] . length = sg - > length ;
request - > sge [ i + 1 ] . lkey = info - > pd - > local_dma_lkey ;
request - > num_sge + + ;
}
rc = smbd_post_send ( info , request , data_length ) ;
if ( ! rc )
return 0 ;
dma_mapping_failure :
for ( i = 1 ; i < request - > num_sge ; i + + )
if ( request - > sge [ i ] . addr )
ib_dma_unmap_single ( info - > id - > device ,
request - > sge [ i ] . addr ,
request - > sge [ i ] . length ,
DMA_TO_DEVICE ) ;
smbd_destroy_header ( info , request ) ;
return rc ;
}
2017-11-23 03:38:42 +03:00
/*
* Send a page
* page : the page to send
* offset : offset in the page to send
* size : length in the page to send
* remaining_data_length : remaining data to send in this payload
*/
static int smbd_post_send_page ( struct smbd_connection * info , struct page * page ,
unsigned long offset , size_t size , int remaining_data_length )
{
struct scatterlist sgl ;
sg_init_table ( & sgl , 1 ) ;
sg_set_page ( & sgl , page , size , offset ) ;
return smbd_post_send_sgl ( info , & sgl , size , remaining_data_length ) ;
}
2017-11-05 04:17:24 +03:00
/*
* Send an empty message
* Empty message is used to extend credits to peer to for keep live
* while there is no upper layer payload to send at the time
*/
static int smbd_post_send_empty ( struct smbd_connection * info )
{
info - > count_send_empty + + ;
return smbd_post_send_sgl ( info , NULL , 0 , 0 ) ;
}
2017-11-23 03:38:42 +03:00
/*
* Send a data buffer
* iov : the iov array describing the data buffers
* n_vec : number of iov array
* remaining_data_length : remaining data to send following this packet
* in segmented SMBD packet
*/
static int smbd_post_send_data (
struct smbd_connection * info , struct kvec * iov , int n_vec ,
int remaining_data_length )
{
int i ;
u32 data_length = 0 ;
struct scatterlist sgl [ SMBDIRECT_MAX_SGE ] ;
if ( n_vec > SMBDIRECT_MAX_SGE ) {
cifs_dbg ( VFS , " Can't fit data to SGL, n_vec=%d \n " , n_vec ) ;
return - ENOMEM ;
}
sg_init_table ( sgl , n_vec ) ;
for ( i = 0 ; i < n_vec ; i + + ) {
data_length + = iov [ i ] . iov_len ;
sg_set_buf ( & sgl [ i ] , iov [ i ] . iov_base , iov [ i ] . iov_len ) ;
}
return smbd_post_send_sgl ( info , sgl , data_length , remaining_data_length ) ;
}
2017-11-05 04:17:24 +03:00
/*
* Post a receive request to the transport
* The remote peer can only send data when a receive request is posted
* The interaction is controlled by send / receive credit system
*/
static int smbd_post_recv (
struct smbd_connection * info , struct smbd_response * response )
{
2018-07-18 19:25:25 +03:00
struct ib_recv_wr recv_wr ;
2017-11-05 04:17:24 +03:00
int rc = - EIO ;
response - > sge . addr = ib_dma_map_single (
info - > id - > device , response - > packet ,
info - > max_receive_size , DMA_FROM_DEVICE ) ;
if ( ib_dma_mapping_error ( info - > id - > device , response - > sge . addr ) )
return rc ;
response - > sge . length = info - > max_receive_size ;
response - > sge . lkey = info - > pd - > local_dma_lkey ;
response - > cqe . done = recv_done ;
recv_wr . wr_cqe = & response - > cqe ;
recv_wr . next = NULL ;
recv_wr . sg_list = & response - > sge ;
recv_wr . num_sge = 1 ;
2018-07-18 19:25:25 +03:00
rc = ib_post_recv ( info - > id - > qp , & recv_wr , NULL ) ;
2017-11-05 04:17:24 +03:00
if ( rc ) {
ib_dma_unmap_single ( info - > id - > device , response - > sge . addr ,
response - > sge . length , DMA_FROM_DEVICE ) ;
2018-03-31 01:16:36 +03:00
smbd_disconnect_rdma_connection ( info ) ;
2017-11-05 04:17:24 +03:00
log_rdma_recv ( ERR , " ib_post_recv failed rc=%d \n " , rc ) ;
}
return rc ;
}
/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
static int smbd_negotiate ( struct smbd_connection * info )
{
int rc ;
struct smbd_response * response = get_receive_buffer ( info ) ;
response - > type = SMBD_NEGOTIATE_RESP ;
rc = smbd_post_recv ( info , response ) ;
log_rdma_event ( INFO ,
" smbd_post_recv rc=%d iov.addr=%llx iov.length=%x "
" iov.lkey=%x \n " ,
rc , response - > sge . addr ,
response - > sge . length , response - > sge . lkey ) ;
if ( rc )
return rc ;
init_completion ( & info - > negotiate_completion ) ;
info - > negotiate_done = false ;
rc = smbd_post_send_negotiate_req ( info ) ;
if ( rc )
return rc ;
rc = wait_for_completion_interruptible_timeout (
& info - > negotiate_completion , SMBD_NEGOTIATE_TIMEOUT * HZ ) ;
log_rdma_event ( INFO , " wait_for_completion_timeout rc=%d \n " , rc ) ;
if ( info - > negotiate_done )
return 0 ;
if ( rc = = 0 )
rc = - ETIMEDOUT ;
else if ( rc = = - ERESTARTSYS )
rc = - EINTR ;
else
rc = - ENOTCONN ;
return rc ;
}
static void put_empty_packet (
struct smbd_connection * info , struct smbd_response * response )
{
spin_lock ( & info - > empty_packet_queue_lock ) ;
list_add_tail ( & response - > list , & info - > empty_packet_queue ) ;
info - > count_empty_packet_queue + + ;
spin_unlock ( & info - > empty_packet_queue_lock ) ;
queue_work ( info - > workqueue , & info - > post_send_credits_work ) ;
}
/*
* Implement Connection . FragmentReassemblyBuffer defined in [ MS - SMBD ] 3.1 .1 .1
* This is a queue for reassembling upper layer payload and present to upper
* layer . All the inncoming payload go to the reassembly queue , regardless of
* if reassembly is required . The uuper layer code reads from the queue for all
* incoming payloads .
* Put a received packet to the reassembly queue
* response : the packet received
* data_length : the size of payload in this packet
*/
static void enqueue_reassembly (
struct smbd_connection * info ,
struct smbd_response * response ,
int data_length )
{
spin_lock ( & info - > reassembly_queue_lock ) ;
list_add_tail ( & response - > list , & info - > reassembly_queue ) ;
info - > reassembly_queue_length + + ;
/*
* Make sure reassembly_data_length is updated after list and
* reassembly_queue_length are updated . On the dequeue side
* reassembly_data_length is checked without a lock to determine
* if reassembly_queue_length and list is up to date
*/
virt_wmb ( ) ;
info - > reassembly_data_length + = data_length ;
spin_unlock ( & info - > reassembly_queue_lock ) ;
info - > count_reassembly_queue + + ;
info - > count_enqueue_reassembly_queue + + ;
}
/*
* Get the first entry at the front of reassembly queue
* Caller is responsible for locking
* return value : the first entry if any , NULL if queue is empty
*/
static struct smbd_response * _get_first_reassembly ( struct smbd_connection * info )
{
struct smbd_response * ret = NULL ;
if ( ! list_empty ( & info - > reassembly_queue ) ) {
ret = list_first_entry (
& info - > reassembly_queue ,
struct smbd_response , list ) ;
}
return ret ;
}
static struct smbd_response * get_empty_queue_buffer (
struct smbd_connection * info )
{
struct smbd_response * ret = NULL ;
unsigned long flags ;
spin_lock_irqsave ( & info - > empty_packet_queue_lock , flags ) ;
if ( ! list_empty ( & info - > empty_packet_queue ) ) {
ret = list_first_entry (
& info - > empty_packet_queue ,
struct smbd_response , list ) ;
list_del ( & ret - > list ) ;
info - > count_empty_packet_queue - - ;
}
spin_unlock_irqrestore ( & info - > empty_packet_queue_lock , flags ) ;
return ret ;
}
/*
* Get a receive buffer
* For each remote send , we need to post a receive . The receive buffers are
* pre - allocated in advance .
* return value : the receive buffer , NULL if none is available
*/
static struct smbd_response * get_receive_buffer ( struct smbd_connection * info )
{
struct smbd_response * ret = NULL ;
unsigned long flags ;
spin_lock_irqsave ( & info - > receive_queue_lock , flags ) ;
if ( ! list_empty ( & info - > receive_queue ) ) {
ret = list_first_entry (
& info - > receive_queue ,
struct smbd_response , list ) ;
list_del ( & ret - > list ) ;
info - > count_receive_queue - - ;
info - > count_get_receive_buffer + + ;
}
spin_unlock_irqrestore ( & info - > receive_queue_lock , flags ) ;
return ret ;
}
/*
* Return a receive buffer
* Upon returning of a receive buffer , we can post new receive and extend
* more receive credits to remote peer . This is done immediately after a
* receive buffer is returned .
*/
static void put_receive_buffer (
struct smbd_connection * info , struct smbd_response * response )
{
unsigned long flags ;
ib_dma_unmap_single ( info - > id - > device , response - > sge . addr ,
response - > sge . length , DMA_FROM_DEVICE ) ;
spin_lock_irqsave ( & info - > receive_queue_lock , flags ) ;
list_add_tail ( & response - > list , & info - > receive_queue ) ;
info - > count_receive_queue + + ;
info - > count_put_receive_buffer + + ;
spin_unlock_irqrestore ( & info - > receive_queue_lock , flags ) ;
queue_work ( info - > workqueue , & info - > post_send_credits_work ) ;
}
/* Preallocate all receive buffer on transport establishment */
static int allocate_receive_buffers ( struct smbd_connection * info , int num_buf )
{
int i ;
struct smbd_response * response ;
INIT_LIST_HEAD ( & info - > reassembly_queue ) ;
spin_lock_init ( & info - > reassembly_queue_lock ) ;
info - > reassembly_data_length = 0 ;
info - > reassembly_queue_length = 0 ;
INIT_LIST_HEAD ( & info - > receive_queue ) ;
spin_lock_init ( & info - > receive_queue_lock ) ;
info - > count_receive_queue = 0 ;
INIT_LIST_HEAD ( & info - > empty_packet_queue ) ;
spin_lock_init ( & info - > empty_packet_queue_lock ) ;
info - > count_empty_packet_queue = 0 ;
init_waitqueue_head ( & info - > wait_receive_queues ) ;
for ( i = 0 ; i < num_buf ; i + + ) {
response = mempool_alloc ( info - > response_mempool , GFP_KERNEL ) ;
if ( ! response )
goto allocate_failed ;
response - > info = info ;
list_add_tail ( & response - > list , & info - > receive_queue ) ;
info - > count_receive_queue + + ;
}
return 0 ;
allocate_failed :
while ( ! list_empty ( & info - > receive_queue ) ) {
response = list_first_entry (
& info - > receive_queue ,
struct smbd_response , list ) ;
list_del ( & response - > list ) ;
info - > count_receive_queue - - ;
mempool_free ( response , info - > response_mempool ) ;
}
return - ENOMEM ;
}
static void destroy_receive_buffers ( struct smbd_connection * info )
{
struct smbd_response * response ;
while ( ( response = get_receive_buffer ( info ) ) )
mempool_free ( response , info - > response_mempool ) ;
while ( ( response = get_empty_queue_buffer ( info ) ) )
mempool_free ( response , info - > response_mempool ) ;
}
/*
* Check and send an immediate or keep alive packet
* The condition to send those packets are defined in [ MS - SMBD ] 3.1 .1 .1
* Connection . KeepaliveRequested and Connection . SendImmediate
* The idea is to extend credits to server as soon as it becomes available
*/
static void send_immediate_work ( struct work_struct * work )
{
struct smbd_connection * info = container_of (
work , struct smbd_connection ,
send_immediate_work . work ) ;
if ( info - > keep_alive_requested = = KEEP_ALIVE_PENDING | |
info - > send_immediate ) {
log_keep_alive ( INFO , " send an empty message \n " ) ;
smbd_post_send_empty ( info ) ;
}
}
/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
static void idle_connection_timer ( struct work_struct * work )
{
struct smbd_connection * info = container_of (
work , struct smbd_connection ,
idle_timer_work . work ) ;
if ( info - > keep_alive_requested ! = KEEP_ALIVE_NONE ) {
log_keep_alive ( ERR ,
" error status info->keep_alive_requested=%d \n " ,
info - > keep_alive_requested ) ;
smbd_disconnect_rdma_connection ( info ) ;
return ;
}
log_keep_alive ( INFO , " about to send an empty idle message \n " ) ;
smbd_post_send_empty ( info ) ;
/* Setup the next idle timeout work */
queue_delayed_work ( info - > workqueue , & info - > idle_timer_work ,
info - > keep_alive_interval * HZ ) ;
}
2017-11-23 03:38:37 +03:00
/* Destroy this SMBD connection, called from upper layer */
void smbd_destroy ( struct smbd_connection * info )
{
log_rdma_event ( INFO , " destroying rdma session \n " ) ;
/* Kick off the disconnection process */
smbd_disconnect_rdma_connection ( info ) ;
log_rdma_event ( INFO , " wait for transport being destroyed \n " ) ;
wait_event ( info - > wait_destroy ,
info - > transport_status = = SMBD_DESTROYED ) ;
destroy_workqueue ( info - > workqueue ) ;
kfree ( info ) ;
}
2017-11-23 03:38:35 +03:00
/*
* Reconnect this SMBD connection , called from upper layer
* return value : 0 on success , or actual error code
*/
int smbd_reconnect ( struct TCP_Server_Info * server )
{
log_rdma_event ( INFO , " reconnecting rdma session \n " ) ;
if ( ! server - > smbd_conn ) {
2018-03-31 01:16:35 +03:00
log_rdma_event ( INFO , " rdma session already destroyed \n " ) ;
goto create_conn ;
2017-11-23 03:38:35 +03:00
}
/*
* This is possible if transport is disconnected and we haven ' t received
* notification from RDMA , but upper layer has detected timeout
*/
if ( server - > smbd_conn - > transport_status = = SMBD_CONNECTED ) {
log_rdma_event ( INFO , " disconnecting transport \n " ) ;
smbd_disconnect_rdma_connection ( server - > smbd_conn ) ;
}
/* wait until the transport is destroyed */
2018-03-31 01:16:35 +03:00
if ( ! wait_event_timeout ( server - > smbd_conn - > wait_destroy ,
server - > smbd_conn - > transport_status = = SMBD_DESTROYED , 5 * HZ ) )
return - EAGAIN ;
2017-11-23 03:38:35 +03:00
destroy_workqueue ( server - > smbd_conn - > workqueue ) ;
kfree ( server - > smbd_conn ) ;
2018-03-31 01:16:35 +03:00
create_conn :
2017-11-23 03:38:35 +03:00
log_rdma_event ( INFO , " creating rdma session \n " ) ;
server - > smbd_conn = smbd_get_connection (
server , ( struct sockaddr * ) & server - > dstaddr ) ;
2018-03-31 01:16:35 +03:00
log_rdma_event ( INFO , " created rdma session info=%p \n " ,
server - > smbd_conn ) ;
2017-11-23 03:38:35 +03:00
return server - > smbd_conn ? 0 : - ENOENT ;
}
2017-11-05 04:17:24 +03:00
static void destroy_caches_and_workqueue ( struct smbd_connection * info )
{
destroy_receive_buffers ( info ) ;
destroy_workqueue ( info - > workqueue ) ;
mempool_destroy ( info - > response_mempool ) ;
kmem_cache_destroy ( info - > response_cache ) ;
mempool_destroy ( info - > request_mempool ) ;
kmem_cache_destroy ( info - > request_cache ) ;
}
# define MAX_NAME_LEN 80
static int allocate_caches_and_workqueue ( struct smbd_connection * info )
{
char name [ MAX_NAME_LEN ] ;
int rc ;
snprintf ( name , MAX_NAME_LEN , " smbd_request_%p " , info ) ;
info - > request_cache =
kmem_cache_create (
name ,
sizeof ( struct smbd_request ) +
sizeof ( struct smbd_data_transfer ) ,
0 , SLAB_HWCACHE_ALIGN , NULL ) ;
if ( ! info - > request_cache )
return - ENOMEM ;
info - > request_mempool =
mempool_create ( info - > send_credit_target , mempool_alloc_slab ,
mempool_free_slab , info - > request_cache ) ;
if ( ! info - > request_mempool )
goto out1 ;
snprintf ( name , MAX_NAME_LEN , " smbd_response_%p " , info ) ;
info - > response_cache =
kmem_cache_create (
name ,
sizeof ( struct smbd_response ) +
info - > max_receive_size ,
0 , SLAB_HWCACHE_ALIGN , NULL ) ;
if ( ! info - > response_cache )
goto out2 ;
info - > response_mempool =
mempool_create ( info - > receive_credit_max , mempool_alloc_slab ,
mempool_free_slab , info - > response_cache ) ;
if ( ! info - > response_mempool )
goto out3 ;
snprintf ( name , MAX_NAME_LEN , " smbd_%p " , info ) ;
info - > workqueue = create_workqueue ( name ) ;
if ( ! info - > workqueue )
goto out4 ;
rc = allocate_receive_buffers ( info , info - > receive_credit_max ) ;
if ( rc ) {
log_rdma_event ( ERR , " failed to allocate receive buffers \n " ) ;
goto out5 ;
}
return 0 ;
out5 :
destroy_workqueue ( info - > workqueue ) ;
out4 :
mempool_destroy ( info - > response_mempool ) ;
out3 :
kmem_cache_destroy ( info - > response_cache ) ;
out2 :
mempool_destroy ( info - > request_mempool ) ;
out1 :
kmem_cache_destroy ( info - > request_cache ) ;
return - ENOMEM ;
}
/* Create a SMBD connection, called by upper layer */
2017-12-18 16:30:06 +03:00
static struct smbd_connection * _smbd_get_connection (
2017-11-05 04:17:24 +03:00
struct TCP_Server_Info * server , struct sockaddr * dstaddr , int port )
{
int rc ;
struct smbd_connection * info ;
struct rdma_conn_param conn_param ;
struct ib_qp_init_attr qp_attr ;
struct sockaddr_in * addr_in = ( struct sockaddr_in * ) dstaddr ;
2017-11-23 03:38:44 +03:00
struct ib_port_immutable port_immutable ;
u32 ird_ord_hdr [ 2 ] ;
2017-11-05 04:17:24 +03:00
info = kzalloc ( sizeof ( struct smbd_connection ) , GFP_KERNEL ) ;
if ( ! info )
return NULL ;
info - > transport_status = SMBD_CONNECTING ;
rc = smbd_ia_open ( info , dstaddr , port ) ;
if ( rc ) {
log_rdma_event ( INFO , " smbd_ia_open rc=%d \n " , rc ) ;
goto create_id_failed ;
}
if ( smbd_send_credit_target > info - > id - > device - > attrs . max_cqe | |
smbd_send_credit_target > info - > id - > device - > attrs . max_qp_wr ) {
log_rdma_event ( ERR ,
" consider lowering send_credit_target = %d. "
" Possible CQE overrun, device "
" reporting max_cpe %d max_qp_wr %d \n " ,
smbd_send_credit_target ,
info - > id - > device - > attrs . max_cqe ,
info - > id - > device - > attrs . max_qp_wr ) ;
goto config_failed ;
}
if ( smbd_receive_credit_max > info - > id - > device - > attrs . max_cqe | |
smbd_receive_credit_max > info - > id - > device - > attrs . max_qp_wr ) {
log_rdma_event ( ERR ,
" consider lowering receive_credit_max = %d. "
" Possible CQE overrun, device "
" reporting max_cpe %d max_qp_wr %d \n " ,
smbd_receive_credit_max ,
info - > id - > device - > attrs . max_cqe ,
info - > id - > device - > attrs . max_qp_wr ) ;
goto config_failed ;
}
info - > receive_credit_max = smbd_receive_credit_max ;
info - > send_credit_target = smbd_send_credit_target ;
info - > max_send_size = smbd_max_send_size ;
info - > max_fragmented_recv_size = smbd_max_fragmented_recv_size ;
info - > max_receive_size = smbd_max_receive_size ;
info - > keep_alive_interval = smbd_keep_alive_interval ;
2018-06-18 18:05:26 +03:00
if ( info - > id - > device - > attrs . max_send_sge < SMBDIRECT_MAX_SGE ) {
log_rdma_event ( ERR ,
" warning: device max_send_sge = %d too small \n " ,
info - > id - > device - > attrs . max_send_sge ) ;
log_rdma_event ( ERR , " Queue Pair creation may fail \n " ) ;
}
if ( info - > id - > device - > attrs . max_recv_sge < SMBDIRECT_MAX_SGE ) {
log_rdma_event ( ERR ,
" warning: device max_recv_sge = %d too small \n " ,
info - > id - > device - > attrs . max_recv_sge ) ;
2017-11-05 04:17:24 +03:00
log_rdma_event ( ERR , " Queue Pair creation may fail \n " ) ;
}
info - > send_cq = NULL ;
info - > recv_cq = NULL ;
info - > send_cq = ib_alloc_cq ( info - > id - > device , info ,
info - > send_credit_target , 0 , IB_POLL_SOFTIRQ ) ;
if ( IS_ERR ( info - > send_cq ) ) {
info - > send_cq = NULL ;
goto alloc_cq_failed ;
}
info - > recv_cq = ib_alloc_cq ( info - > id - > device , info ,
info - > receive_credit_max , 0 , IB_POLL_SOFTIRQ ) ;
if ( IS_ERR ( info - > recv_cq ) ) {
info - > recv_cq = NULL ;
goto alloc_cq_failed ;
}
memset ( & qp_attr , 0 , sizeof ( qp_attr ) ) ;
qp_attr . event_handler = smbd_qp_async_error_upcall ;
qp_attr . qp_context = info ;
qp_attr . cap . max_send_wr = info - > send_credit_target ;
qp_attr . cap . max_recv_wr = info - > receive_credit_max ;
qp_attr . cap . max_send_sge = SMBDIRECT_MAX_SGE ;
qp_attr . cap . max_recv_sge = SMBDIRECT_MAX_SGE ;
qp_attr . cap . max_inline_data = 0 ;
qp_attr . sq_sig_type = IB_SIGNAL_REQ_WR ;
qp_attr . qp_type = IB_QPT_RC ;
qp_attr . send_cq = info - > send_cq ;
qp_attr . recv_cq = info - > recv_cq ;
qp_attr . port_num = ~ 0 ;
rc = rdma_create_qp ( info - > id , info - > pd , & qp_attr ) ;
if ( rc ) {
log_rdma_event ( ERR , " rdma_create_qp failed %i \n " , rc ) ;
goto create_qp_failed ;
}
memset ( & conn_param , 0 , sizeof ( conn_param ) ) ;
conn_param . initiator_depth = 0 ;
2017-11-23 03:38:44 +03:00
conn_param . responder_resources =
info - > id - > device - > attrs . max_qp_rd_atom
< SMBD_CM_RESPONDER_RESOURCES ?
info - > id - > device - > attrs . max_qp_rd_atom :
SMBD_CM_RESPONDER_RESOURCES ;
info - > responder_resources = conn_param . responder_resources ;
log_rdma_mr ( INFO , " responder_resources=%d \n " ,
info - > responder_resources ) ;
/* Need to send IRD/ORD in private data for iWARP */
2018-12-10 22:09:48 +03:00
info - > id - > device - > ops . get_port_immutable (
2017-11-23 03:38:44 +03:00
info - > id - > device , info - > id - > port_num , & port_immutable ) ;
if ( port_immutable . core_cap_flags & RDMA_CORE_PORT_IWARP ) {
ird_ord_hdr [ 0 ] = info - > responder_resources ;
ird_ord_hdr [ 1 ] = 1 ;
conn_param . private_data = ird_ord_hdr ;
conn_param . private_data_len = sizeof ( ird_ord_hdr ) ;
} else {
conn_param . private_data = NULL ;
conn_param . private_data_len = 0 ;
}
2017-11-05 04:17:24 +03:00
conn_param . retry_count = SMBD_CM_RETRY ;
conn_param . rnr_retry_count = SMBD_CM_RNR_RETRY ;
conn_param . flow_control = 0 ;
init_waitqueue_head ( & info - > wait_destroy ) ;
log_rdma_event ( INFO , " connecting to IP %pI4 port %d \n " ,
& addr_in - > sin_addr , port ) ;
init_waitqueue_head ( & info - > conn_wait ) ;
rc = rdma_connect ( info - > id , & conn_param ) ;
if ( rc ) {
log_rdma_event ( ERR , " rdma_connect() failed with %i \n " , rc ) ;
goto rdma_connect_failed ;
}
wait_event_interruptible (
info - > conn_wait , info - > transport_status ! = SMBD_CONNECTING ) ;
if ( info - > transport_status ! = SMBD_CONNECTED ) {
log_rdma_event ( ERR , " rdma_connect failed port=%d \n " , port ) ;
goto rdma_connect_failed ;
}
log_rdma_event ( INFO , " rdma_connect connected \n " ) ;
rc = allocate_caches_and_workqueue ( info ) ;
if ( rc ) {
log_rdma_event ( ERR , " cache allocation failed \n " ) ;
goto allocate_cache_failed ;
}
init_waitqueue_head ( & info - > wait_send_queue ) ;
init_waitqueue_head ( & info - > wait_reassembly_queue ) ;
INIT_DELAYED_WORK ( & info - > idle_timer_work , idle_connection_timer ) ;
INIT_DELAYED_WORK ( & info - > send_immediate_work , send_immediate_work ) ;
queue_delayed_work ( info - > workqueue , & info - > idle_timer_work ,
info - > keep_alive_interval * HZ ) ;
2017-11-23 03:38:42 +03:00
init_waitqueue_head ( & info - > wait_smbd_send_pending ) ;
info - > smbd_send_pending = 0 ;
2017-11-23 03:38:40 +03:00
init_waitqueue_head ( & info - > wait_smbd_recv_pending ) ;
info - > smbd_recv_pending = 0 ;
2017-11-05 04:17:24 +03:00
init_waitqueue_head ( & info - > wait_send_pending ) ;
atomic_set ( & info - > send_pending , 0 ) ;
init_waitqueue_head ( & info - > wait_send_payload_pending ) ;
atomic_set ( & info - > send_payload_pending , 0 ) ;
INIT_WORK ( & info - > disconnect_work , smbd_disconnect_rdma_work ) ;
INIT_WORK ( & info - > destroy_work , smbd_destroy_rdma_work ) ;
INIT_WORK ( & info - > recv_done_work , smbd_recv_done_work ) ;
INIT_WORK ( & info - > post_send_credits_work , smbd_post_send_credits ) ;
info - > new_credits_offered = 0 ;
spin_lock_init ( & info - > lock_new_credits_offered ) ;
rc = smbd_negotiate ( info ) ;
if ( rc ) {
log_rdma_event ( ERR , " smbd_negotiate rc=%d \n " , rc ) ;
goto negotiation_failed ;
}
2017-11-23 03:38:44 +03:00
rc = allocate_mr_list ( info ) ;
if ( rc ) {
log_rdma_mr ( ERR , " memory registration allocation failed \n " ) ;
goto allocate_mr_failed ;
}
2017-11-05 04:17:24 +03:00
return info ;
2017-11-23 03:38:44 +03:00
allocate_mr_failed :
/* At this point, need to a full transport shutdown */
smbd_destroy ( info ) ;
return NULL ;
2017-11-05 04:17:24 +03:00
negotiation_failed :
cancel_delayed_work_sync ( & info - > idle_timer_work ) ;
destroy_caches_and_workqueue ( info ) ;
info - > transport_status = SMBD_NEGOTIATE_FAILED ;
init_waitqueue_head ( & info - > conn_wait ) ;
rdma_disconnect ( info - > id ) ;
wait_event ( info - > conn_wait ,
info - > transport_status = = SMBD_DISCONNECTED ) ;
allocate_cache_failed :
rdma_connect_failed :
rdma_destroy_qp ( info - > id ) ;
create_qp_failed :
alloc_cq_failed :
if ( info - > send_cq )
ib_free_cq ( info - > send_cq ) ;
if ( info - > recv_cq )
ib_free_cq ( info - > recv_cq ) ;
config_failed :
ib_dealloc_pd ( info - > pd ) ;
rdma_destroy_id ( info - > id ) ;
create_id_failed :
kfree ( info ) ;
return NULL ;
}
2017-11-18 04:26:52 +03:00
struct smbd_connection * smbd_get_connection (
struct TCP_Server_Info * server , struct sockaddr * dstaddr )
{
struct smbd_connection * ret ;
int port = SMBD_PORT ;
try_again :
ret = _smbd_get_connection ( server , dstaddr , port ) ;
/* Try SMB_PORT if SMBD_PORT doesn't work */
if ( ! ret & & port = = SMBD_PORT ) {
port = SMB_PORT ;
goto try_again ;
}
return ret ;
}
2017-11-23 03:38:40 +03:00
/*
* Receive data from receive reassembly queue
* All the incoming data packets are placed in reassembly queue
* buf : the buffer to read data into
* size : the length of data to read
* return value : actual data read
* Note : this implementation copies the data from reassebmly queue to receive
* buffers used by upper layer . This is not the optimal code path . A better way
* to do it is to not have upper layer allocate its receive buffers but rather
* borrow the buffer from reassembly queue , and return it after data is
* consumed . But this will require more changes to upper layer code , and also
* need to consider packet boundaries while they still being reassembled .
*/
2018-01-25 08:07:41 +03:00
static int smbd_recv_buf ( struct smbd_connection * info , char * buf ,
unsigned int size )
2017-11-23 03:38:40 +03:00
{
struct smbd_response * response ;
struct smbd_data_transfer * data_transfer ;
int to_copy , to_read , data_read , offset ;
u32 data_length , remaining_data_length , data_offset ;
int rc ;
again :
if ( info - > transport_status ! = SMBD_CONNECTED ) {
log_read ( ERR , " disconnected \n " ) ;
return - ENODEV ;
}
/*
* No need to hold the reassembly queue lock all the time as we are
* the only one reading from the front of the queue . The transport
* may add more entries to the back of the queue at the same time
*/
log_read ( INFO , " size=%d info->reassembly_data_length=%d \n " , size ,
info - > reassembly_data_length ) ;
if ( info - > reassembly_data_length > = size ) {
int queue_length ;
int queue_removed = 0 ;
/*
* Need to make sure reassembly_data_length is read before
* reading reassembly_queue_length and calling
* _get_first_reassembly . This call is lock free
* as we never read at the end of the queue which are being
* updated in SOFTIRQ as more data is received
*/
virt_rmb ( ) ;
queue_length = info - > reassembly_queue_length ;
data_read = 0 ;
to_read = size ;
offset = info - > first_entry_offset ;
while ( data_read < size ) {
response = _get_first_reassembly ( info ) ;
data_transfer = smbd_response_payload ( response ) ;
data_length = le32_to_cpu ( data_transfer - > data_length ) ;
remaining_data_length =
le32_to_cpu (
data_transfer - > remaining_data_length ) ;
data_offset = le32_to_cpu ( data_transfer - > data_offset ) ;
/*
* The upper layer expects RFC1002 length at the
* beginning of the payload . Return it to indicate
* the total length of the packet . This minimize the
* change to upper layer packet processing logic . This
* will be eventually remove when an intermediate
* transport layer is added
*/
if ( response - > first_segment & & size = = 4 ) {
unsigned int rfc1002_len =
data_length + remaining_data_length ;
* ( ( __be32 * ) buf ) = cpu_to_be32 ( rfc1002_len ) ;
data_read = 4 ;
response - > first_segment = false ;
log_read ( INFO , " returning rfc1002 length %d \n " ,
rfc1002_len ) ;
goto read_rfc1002_done ;
}
to_copy = min_t ( int , data_length - offset , to_read ) ;
memcpy (
buf + data_read ,
( char * ) data_transfer + data_offset + offset ,
to_copy ) ;
/* move on to the next buffer? */
if ( to_copy = = data_length - offset ) {
queue_length - - ;
/*
* No need to lock if we are not at the
* end of the queue
*/
2018-02-04 04:45:07 +03:00
if ( queue_length )
list_del ( & response - > list ) ;
else {
2018-01-10 23:51:05 +03:00
spin_lock_irq (
& info - > reassembly_queue_lock ) ;
2018-02-04 04:45:07 +03:00
list_del ( & response - > list ) ;
2018-01-10 23:51:05 +03:00
spin_unlock_irq (
& info - > reassembly_queue_lock ) ;
2018-02-04 04:45:07 +03:00
}
queue_removed + + ;
2017-11-23 03:38:40 +03:00
info - > count_reassembly_queue - - ;
info - > count_dequeue_reassembly_queue + + ;
put_receive_buffer ( info , response ) ;
offset = 0 ;
log_read ( INFO , " put_receive_buffer offset=0 \n " ) ;
} else
offset + = to_copy ;
to_read - = to_copy ;
data_read + = to_copy ;
log_read ( INFO , " _get_first_reassembly memcpy %d bytes "
" data_transfer_length-offset=%d after that "
" to_read=%d data_read=%d offset=%d \n " ,
to_copy , data_length - offset ,
to_read , data_read , offset ) ;
}
2018-01-10 23:51:05 +03:00
spin_lock_irq ( & info - > reassembly_queue_lock ) ;
2017-11-23 03:38:40 +03:00
info - > reassembly_data_length - = data_read ;
info - > reassembly_queue_length - = queue_removed ;
2018-01-10 23:51:05 +03:00
spin_unlock_irq ( & info - > reassembly_queue_lock ) ;
2017-11-23 03:38:40 +03:00
info - > first_entry_offset = offset ;
log_read ( INFO , " returning to thread data_read=%d "
" reassembly_data_length=%d first_entry_offset=%d \n " ,
data_read , info - > reassembly_data_length ,
info - > first_entry_offset ) ;
read_rfc1002_done :
return data_read ;
}
log_read ( INFO , " wait_event on more data \n " ) ;
rc = wait_event_interruptible (
info - > wait_reassembly_queue ,
info - > reassembly_data_length > = size | |
info - > transport_status ! = SMBD_CONNECTED ) ;
/* Don't return any data if interrupted */
if ( rc )
return - ENODEV ;
goto again ;
}
/*
* Receive a page from receive reassembly queue
* page : the page to read data into
* to_read : the length of data to read
* return value : actual data read
*/
2018-01-25 08:07:41 +03:00
static int smbd_recv_page ( struct smbd_connection * info ,
2018-05-30 22:48:01 +03:00
struct page * page , unsigned int page_offset ,
unsigned int to_read )
2017-11-23 03:38:40 +03:00
{
int ret ;
char * to_address ;
2018-05-30 22:48:01 +03:00
void * page_address ;
2017-11-23 03:38:40 +03:00
/* make sure we have the page ready for read */
ret = wait_event_interruptible (
info - > wait_reassembly_queue ,
info - > reassembly_data_length > = to_read | |
info - > transport_status ! = SMBD_CONNECTED ) ;
if ( ret )
2018-05-30 22:48:01 +03:00
return ret ;
2017-11-23 03:38:40 +03:00
/* now we can read from reassembly queue and not sleep */
2018-05-30 22:48:01 +03:00
page_address = kmap_atomic ( page ) ;
to_address = ( char * ) page_address + page_offset ;
2017-11-23 03:38:40 +03:00
log_read ( INFO , " reading from page=%p address=%p to_read=%d \n " ,
page , to_address , to_read ) ;
ret = smbd_recv_buf ( info , to_address , to_read ) ;
2018-05-30 22:48:01 +03:00
kunmap_atomic ( page_address ) ;
2017-11-23 03:38:40 +03:00
return ret ;
}
/*
* Receive data from transport
* msg : a msghdr point to the buffer , can be ITER_KVEC or ITER_BVEC
* return : total bytes read , or 0. SMB Direct will not do partial read .
*/
int smbd_recv ( struct smbd_connection * info , struct msghdr * msg )
{
char * buf ;
struct page * page ;
2018-05-30 22:48:01 +03:00
unsigned int to_read , page_offset ;
2017-11-23 03:38:40 +03:00
int rc ;
info - > smbd_recv_pending + + ;
2018-10-22 15:07:28 +03:00
if ( iov_iter_rw ( & msg - > msg_iter ) = = WRITE ) {
/* It's a bug in upper layer to get there */
cifs_dbg ( VFS , " CIFS: invalid msg iter dir %u \n " ,
iov_iter_rw ( & msg - > msg_iter ) ) ;
rc = - EINVAL ;
goto out ;
}
switch ( iov_iter_type ( & msg - > msg_iter ) ) {
case ITER_KVEC :
2017-11-23 03:38:40 +03:00
buf = msg - > msg_iter . kvec - > iov_base ;
to_read = msg - > msg_iter . kvec - > iov_len ;
rc = smbd_recv_buf ( info , buf , to_read ) ;
break ;
2018-10-22 15:07:28 +03:00
case ITER_BVEC :
2017-11-23 03:38:40 +03:00
page = msg - > msg_iter . bvec - > bv_page ;
2018-05-30 22:48:01 +03:00
page_offset = msg - > msg_iter . bvec - > bv_offset ;
2017-11-23 03:38:40 +03:00
to_read = msg - > msg_iter . bvec - > bv_len ;
2018-05-30 22:48:01 +03:00
rc = smbd_recv_page ( info , page , page_offset , to_read ) ;
2017-11-23 03:38:40 +03:00
break ;
default :
/* It's a bug in upper layer to get there */
cifs_dbg ( VFS , " CIFS: invalid msg type %d \n " ,
2018-10-22 15:07:28 +03:00
iov_iter_type ( & msg - > msg_iter ) ) ;
2018-05-30 22:48:01 +03:00
rc = - EINVAL ;
2017-11-23 03:38:40 +03:00
}
2018-10-22 15:07:28 +03:00
out :
2017-11-23 03:38:40 +03:00
info - > smbd_recv_pending - - ;
wake_up ( & info - > wait_smbd_recv_pending ) ;
/* SMBDirect will read it all or nothing */
if ( rc > 0 )
msg - > msg_iter . count = 0 ;
return rc ;
}
2017-11-23 03:38:42 +03:00
/*
* Send data to transport
* Each rqst is transported as a SMBDirect payload
* rqst : the data to write
* return value : 0 if successfully write , otherwise error code
*/
2018-06-28 03:47:14 +03:00
int smbd_send ( struct TCP_Server_Info * server , struct smb_rqst * rqst )
2017-11-23 03:38:42 +03:00
{
2018-06-28 03:47:14 +03:00
struct smbd_connection * info = server - > smbd_conn ;
2017-11-23 03:38:42 +03:00
struct kvec vec ;
int nvecs ;
int size ;
2018-06-15 16:22:44 +03:00
unsigned int buflen , remaining_data_length ;
2017-11-23 03:38:42 +03:00
int start , i , j ;
int max_iov_size =
info - > max_send_size - sizeof ( struct smbd_data_transfer ) ;
2018-04-17 22:17:07 +03:00
struct kvec * iov ;
2017-11-23 03:38:42 +03:00
int rc ;
info - > smbd_send_pending + + ;
if ( info - > transport_status ! = SMBD_CONNECTED ) {
rc = - ENODEV ;
goto done ;
}
/*
2018-04-17 22:17:07 +03:00
* Skip the RFC1002 length defined in MS - SMB2 section 2.1
* It is used only for TCP transport in the iov [ 0 ]
2017-11-23 03:38:42 +03:00
* In future we may want to add a transport layer under protocol
* layer so this will only be issued to TCP transport
*/
2018-04-17 22:17:07 +03:00
if ( rqst - > rq_iov [ 0 ] . iov_len ! = 4 ) {
log_write ( ERR , " expected the pdu length in 1st iov, but got %zu \n " , rqst - > rq_iov [ 0 ] . iov_len ) ;
return - EINVAL ;
}
2017-11-23 03:38:42 +03:00
2018-05-30 22:48:00 +03:00
/*
* Add in the page array if there is one . The caller needs to set
* rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
* ends at page boundary
*/
2018-06-28 03:47:14 +03:00
buflen = smb_rqst_len ( server , rqst ) ;
2017-11-23 03:38:42 +03:00
if ( buflen + sizeof ( struct smbd_data_transfer ) >
info - > max_fragmented_send_size ) {
log_write ( ERR , " payload size %d > max size %d \n " ,
buflen , info - > max_fragmented_send_size ) ;
rc = - EINVAL ;
goto done ;
}
2018-06-15 16:22:44 +03:00
iov = & rqst - > rq_iov [ 1 ] ;
2018-04-17 22:17:10 +03:00
cifs_dbg ( FYI , " Sending smb (RDMA): smb_len=%u \n " , buflen ) ;
for ( i = 0 ; i < rqst - > rq_nvec - 1 ; i + + )
dump_smb ( iov [ i ] . iov_base , iov [ i ] . iov_len ) ;
2017-11-23 03:38:42 +03:00
remaining_data_length = buflen ;
log_write ( INFO , " rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
" rq_tailsz=%d buflen=%d \n " ,
rqst - > rq_nvec , rqst - > rq_npages , rqst - > rq_pagesz ,
rqst - > rq_tailsz , buflen ) ;
start = i = iov [ 0 ] . iov_len ? 0 : 1 ;
buflen = 0 ;
while ( true ) {
buflen + = iov [ i ] . iov_len ;
if ( buflen > max_iov_size ) {
if ( i > start ) {
remaining_data_length - =
( buflen - iov [ i ] . iov_len ) ;
log_write ( INFO , " sending iov[] from start=%d "
" i=%d nvecs=%d "
" remaining_data_length=%d \n " ,
start , i , i - start ,
remaining_data_length ) ;
rc = smbd_post_send_data (
info , & iov [ start ] , i - start ,
remaining_data_length ) ;
if ( rc )
goto done ;
} else {
/* iov[start] is too big, break it */
nvecs = ( buflen + max_iov_size - 1 ) / max_iov_size ;
log_write ( INFO , " iov[%d] iov_base=%p buflen=%d "
" break to %d vectors \n " ,
start , iov [ start ] . iov_base ,
buflen , nvecs ) ;
for ( j = 0 ; j < nvecs ; j + + ) {
vec . iov_base =
( char * ) iov [ start ] . iov_base +
j * max_iov_size ;
vec . iov_len = max_iov_size ;
if ( j = = nvecs - 1 )
vec . iov_len =
buflen -
max_iov_size * ( nvecs - 1 ) ;
remaining_data_length - = vec . iov_len ;
log_write ( INFO ,
" sending vec j=%d iov_base=%p "
" iov_len=%zu "
" remaining_data_length=%d \n " ,
j , vec . iov_base , vec . iov_len ,
remaining_data_length ) ;
rc = smbd_post_send_data (
info , & vec , 1 ,
remaining_data_length ) ;
if ( rc )
goto done ;
}
i + + ;
2018-04-17 22:17:07 +03:00
if ( i = = rqst - > rq_nvec - 1 )
2018-04-17 22:17:05 +03:00
break ;
2017-11-23 03:38:42 +03:00
}
start = i ;
buflen = 0 ;
} else {
i + + ;
2018-04-17 22:17:07 +03:00
if ( i = = rqst - > rq_nvec - 1 ) {
2017-11-23 03:38:42 +03:00
/* send out all remaining vecs */
remaining_data_length - = buflen ;
log_write ( INFO ,
" sending iov[] from start=%d i=%d "
" nvecs=%d remaining_data_length=%d \n " ,
start , i , i - start ,
remaining_data_length ) ;
rc = smbd_post_send_data ( info , & iov [ start ] ,
i - start , remaining_data_length ) ;
if ( rc )
goto done ;
break ;
}
}
log_write ( INFO , " looping i=%d buflen=%d \n " , i , buflen ) ;
}
/* now sending pages if there are any */
for ( i = 0 ; i < rqst - > rq_npages ; i + + ) {
2018-05-30 22:48:00 +03:00
unsigned int offset ;
rqst_page_get_length ( rqst , i , & buflen , & offset ) ;
2017-11-23 03:38:42 +03:00
nvecs = ( buflen + max_iov_size - 1 ) / max_iov_size ;
log_write ( INFO , " sending pages buflen=%d nvecs=%d \n " ,
buflen , nvecs ) ;
for ( j = 0 ; j < nvecs ; j + + ) {
size = max_iov_size ;
if ( j = = nvecs - 1 )
size = buflen - j * max_iov_size ;
remaining_data_length - = size ;
log_write ( INFO , " sending pages i=%d offset=%d size=%d "
" remaining_data_length=%d \n " ,
2018-05-30 22:48:00 +03:00
i , j * max_iov_size + offset , size ,
remaining_data_length ) ;
2017-11-23 03:38:42 +03:00
rc = smbd_post_send_page (
2018-05-30 22:48:00 +03:00
info , rqst - > rq_pages [ i ] ,
j * max_iov_size + offset ,
2017-11-23 03:38:42 +03:00
size , remaining_data_length ) ;
if ( rc )
goto done ;
}
}
done :
/*
* As an optimization , we don ' t wait for individual I / O to finish
* before sending the next one .
* Send them all and wait for pending send count to get to 0
* that means all the I / Os have been out and we are good to return
*/
wait_event ( info - > wait_send_payload_pending ,
atomic_read ( & info - > send_payload_pending ) = = 0 ) ;
info - > smbd_send_pending - - ;
wake_up ( & info - > wait_smbd_send_pending ) ;
return rc ;
}
2017-11-23 03:38:44 +03:00
static void register_mr_done ( struct ib_cq * cq , struct ib_wc * wc )
{
struct smbd_mr * mr ;
struct ib_cqe * cqe ;
if ( wc - > status ) {
log_rdma_mr ( ERR , " status=%d \n " , wc - > status ) ;
cqe = wc - > wr_cqe ;
mr = container_of ( cqe , struct smbd_mr , cqe ) ;
smbd_disconnect_rdma_connection ( mr - > conn ) ;
}
}
/*
* The work queue function that recovers MRs
* We need to call ib_dereg_mr ( ) and ib_alloc_mr ( ) before this MR can be used
* again . Both calls are slow , so finish them in a workqueue . This will not
* block I / O path .
* There is one workqueue that recovers MRs , there is no need to lock as the
* I / O requests calling smbd_register_mr will never update the links in the
* mr_list .
*/
static void smbd_mr_recovery_work ( struct work_struct * work )
{
struct smbd_connection * info =
container_of ( work , struct smbd_connection , mr_recovery_work ) ;
struct smbd_mr * smbdirect_mr ;
int rc ;
list_for_each_entry ( smbdirect_mr , & info - > mr_list , list ) {
2018-09-21 00:18:39 +03:00
if ( smbdirect_mr - > state = = MR_INVALIDATED )
ib_dma_unmap_sg (
info - > id - > device , smbdirect_mr - > sgl ,
smbdirect_mr - > sgl_count ,
smbdirect_mr - > dir ) ;
else if ( smbdirect_mr - > state = = MR_ERROR ) {
2017-11-23 03:38:44 +03:00
2018-05-30 22:48:02 +03:00
/* recover this MR entry */
rc = ib_dereg_mr ( smbdirect_mr - > mr ) ;
if ( rc ) {
log_rdma_mr ( ERR ,
" ib_dereg_mr failed rc=%x \n " ,
rc ) ;
smbd_disconnect_rdma_connection ( info ) ;
continue ;
}
smbdirect_mr - > mr = ib_alloc_mr (
info - > pd , info - > mr_type ,
info - > max_frmr_depth ) ;
if ( IS_ERR ( smbdirect_mr - > mr ) ) {
log_rdma_mr ( ERR ,
" ib_alloc_mr failed mr_type=%x "
" max_frmr_depth=%x \n " ,
info - > mr_type ,
info - > max_frmr_depth ) ;
smbd_disconnect_rdma_connection ( info ) ;
continue ;
}
2018-09-21 00:18:39 +03:00
} else
/* This MR is being used, don't recover it */
continue ;
2018-05-30 22:48:02 +03:00
2018-09-21 00:18:39 +03:00
smbdirect_mr - > state = MR_READY ;
2017-11-23 03:38:44 +03:00
2018-09-21 00:18:39 +03:00
/* smbdirect_mr->state is updated by this function
* and is read and updated by I / O issuing CPUs trying
* to get a MR , the call to atomic_inc_return
* implicates a memory barrier and guarantees this
* value is updated before waking up any calls to
* get_mr ( ) from the I / O issuing CPUs
*/
if ( atomic_inc_return ( & info - > mr_ready_count ) = = 1 )
wake_up_interruptible ( & info - > wait_mr ) ;
2017-11-23 03:38:44 +03:00
}
}
static void destroy_mr_list ( struct smbd_connection * info )
{
struct smbd_mr * mr , * tmp ;
cancel_work_sync ( & info - > mr_recovery_work ) ;
list_for_each_entry_safe ( mr , tmp , & info - > mr_list , list ) {
if ( mr - > state = = MR_INVALIDATED )
ib_dma_unmap_sg ( info - > id - > device , mr - > sgl ,
mr - > sgl_count , mr - > dir ) ;
ib_dereg_mr ( mr - > mr ) ;
kfree ( mr - > sgl ) ;
kfree ( mr ) ;
}
}
/*
* Allocate MRs used for RDMA read / write
* The number of MRs will not exceed hardware capability in responder_resources
* All MRs are kept in mr_list . The MR can be recovered after it ' s used
* Recovery is done in smbd_mr_recovery_work . The content of list entry changes
* as MRs are used and recovered for I / O , but the list links will not change
*/
static int allocate_mr_list ( struct smbd_connection * info )
{
int i ;
struct smbd_mr * smbdirect_mr , * tmp ;
INIT_LIST_HEAD ( & info - > mr_list ) ;
init_waitqueue_head ( & info - > wait_mr ) ;
spin_lock_init ( & info - > mr_list_lock ) ;
atomic_set ( & info - > mr_ready_count , 0 ) ;
atomic_set ( & info - > mr_used_count , 0 ) ;
init_waitqueue_head ( & info - > wait_for_mr_cleanup ) ;
/* Allocate more MRs (2x) than hardware responder_resources */
for ( i = 0 ; i < info - > responder_resources * 2 ; i + + ) {
smbdirect_mr = kzalloc ( sizeof ( * smbdirect_mr ) , GFP_KERNEL ) ;
if ( ! smbdirect_mr )
goto out ;
smbdirect_mr - > mr = ib_alloc_mr ( info - > pd , info - > mr_type ,
info - > max_frmr_depth ) ;
if ( IS_ERR ( smbdirect_mr - > mr ) ) {
log_rdma_mr ( ERR , " ib_alloc_mr failed mr_type=%x "
" max_frmr_depth=%x \n " ,
info - > mr_type , info - > max_frmr_depth ) ;
goto out ;
}
smbdirect_mr - > sgl = kcalloc (
info - > max_frmr_depth ,
sizeof ( struct scatterlist ) ,
GFP_KERNEL ) ;
if ( ! smbdirect_mr - > sgl ) {
log_rdma_mr ( ERR , " failed to allocate sgl \n " ) ;
ib_dereg_mr ( smbdirect_mr - > mr ) ;
goto out ;
}
smbdirect_mr - > state = MR_READY ;
smbdirect_mr - > conn = info ;
list_add_tail ( & smbdirect_mr - > list , & info - > mr_list ) ;
atomic_inc ( & info - > mr_ready_count ) ;
}
INIT_WORK ( & info - > mr_recovery_work , smbd_mr_recovery_work ) ;
return 0 ;
out :
kfree ( smbdirect_mr ) ;
list_for_each_entry_safe ( smbdirect_mr , tmp , & info - > mr_list , list ) {
ib_dereg_mr ( smbdirect_mr - > mr ) ;
kfree ( smbdirect_mr - > sgl ) ;
kfree ( smbdirect_mr ) ;
}
return - ENOMEM ;
}
/*
* Get a MR from mr_list . This function waits until there is at least one
* MR available in the list . It may access the list while the
* smbd_mr_recovery_work is recovering the MR list . This doesn ' t need a lock
* as they never modify the same places . However , there may be several CPUs
* issueing I / O trying to get MR at the same time , mr_list_lock is used to
* protect this situation .
*/
static struct smbd_mr * get_mr ( struct smbd_connection * info )
{
struct smbd_mr * ret ;
int rc ;
again :
rc = wait_event_interruptible ( info - > wait_mr ,
atomic_read ( & info - > mr_ready_count ) | |
info - > transport_status ! = SMBD_CONNECTED ) ;
if ( rc ) {
log_rdma_mr ( ERR , " wait_event_interruptible rc=%x \n " , rc ) ;
return NULL ;
}
if ( info - > transport_status ! = SMBD_CONNECTED ) {
log_rdma_mr ( ERR , " info->transport_status=%x \n " ,
info - > transport_status ) ;
return NULL ;
}
spin_lock ( & info - > mr_list_lock ) ;
list_for_each_entry ( ret , & info - > mr_list , list ) {
if ( ret - > state = = MR_READY ) {
ret - > state = MR_REGISTERED ;
spin_unlock ( & info - > mr_list_lock ) ;
atomic_dec ( & info - > mr_ready_count ) ;
atomic_inc ( & info - > mr_used_count ) ;
return ret ;
}
}
spin_unlock ( & info - > mr_list_lock ) ;
/*
* It is possible that we could fail to get MR because other processes may
* try to acquire a MR at the same time . If this is the case , retry it .
*/
goto again ;
}
/*
* Register memory for RDMA read / write
* pages [ ] : the list of pages to register memory with
* num_pages : the number of pages to register
* tailsz : if non - zero , the bytes to register in the last page
* writing : true if this is a RDMA write ( SMB read ) , false for RDMA read
* need_invalidate : true if this MR needs to be locally invalidated after I / O
* return value : the MR registered , NULL if failed .
*/
struct smbd_mr * smbd_register_mr (
struct smbd_connection * info , struct page * pages [ ] , int num_pages ,
2018-05-30 22:48:02 +03:00
int offset , int tailsz , bool writing , bool need_invalidate )
2017-11-23 03:38:44 +03:00
{
struct smbd_mr * smbdirect_mr ;
int rc , i ;
enum dma_data_direction dir ;
struct ib_reg_wr * reg_wr ;
if ( num_pages > info - > max_frmr_depth ) {
log_rdma_mr ( ERR , " num_pages=%d max_frmr_depth=%d \n " ,
num_pages , info - > max_frmr_depth ) ;
return NULL ;
}
smbdirect_mr = get_mr ( info ) ;
if ( ! smbdirect_mr ) {
log_rdma_mr ( ERR , " get_mr returning NULL \n " ) ;
return NULL ;
}
smbdirect_mr - > need_invalidate = need_invalidate ;
smbdirect_mr - > sgl_count = num_pages ;
sg_init_table ( smbdirect_mr - > sgl , num_pages ) ;
2018-05-30 22:48:02 +03:00
log_rdma_mr ( INFO , " num_pages=0x%x offset=0x%x tailsz=0x%x \n " ,
num_pages , offset , tailsz ) ;
2017-11-23 03:38:44 +03:00
2018-05-30 22:48:02 +03:00
if ( num_pages = = 1 ) {
sg_set_page ( & smbdirect_mr - > sgl [ 0 ] , pages [ 0 ] , tailsz , offset ) ;
goto skip_multiple_pages ;
}
/* We have at least two pages to register */
sg_set_page (
& smbdirect_mr - > sgl [ 0 ] , pages [ 0 ] , PAGE_SIZE - offset , offset ) ;
i = 1 ;
while ( i < num_pages - 1 ) {
sg_set_page ( & smbdirect_mr - > sgl [ i ] , pages [ i ] , PAGE_SIZE , 0 ) ;
i + + ;
}
2017-11-23 03:38:44 +03:00
sg_set_page ( & smbdirect_mr - > sgl [ i ] , pages [ i ] ,
tailsz ? tailsz : PAGE_SIZE , 0 ) ;
2018-05-30 22:48:02 +03:00
skip_multiple_pages :
2017-11-23 03:38:44 +03:00
dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE ;
smbdirect_mr - > dir = dir ;
rc = ib_dma_map_sg ( info - > id - > device , smbdirect_mr - > sgl , num_pages , dir ) ;
if ( ! rc ) {
2018-05-30 22:48:02 +03:00
log_rdma_mr ( ERR , " ib_dma_map_sg num_pages=%x dir=%x rc=%x \n " ,
2017-11-23 03:38:44 +03:00
num_pages , dir , rc ) ;
goto dma_map_error ;
}
rc = ib_map_mr_sg ( smbdirect_mr - > mr , smbdirect_mr - > sgl , num_pages ,
NULL , PAGE_SIZE ) ;
if ( rc ! = num_pages ) {
2018-05-30 22:48:02 +03:00
log_rdma_mr ( ERR ,
" ib_map_mr_sg failed rc = %d num_pages = %x \n " ,
2017-11-23 03:38:44 +03:00
rc , num_pages ) ;
goto map_mr_error ;
}
ib_update_fast_reg_key ( smbdirect_mr - > mr ,
ib_inc_rkey ( smbdirect_mr - > mr - > rkey ) ) ;
reg_wr = & smbdirect_mr - > wr ;
reg_wr - > wr . opcode = IB_WR_REG_MR ;
smbdirect_mr - > cqe . done = register_mr_done ;
reg_wr - > wr . wr_cqe = & smbdirect_mr - > cqe ;
reg_wr - > wr . num_sge = 0 ;
reg_wr - > wr . send_flags = IB_SEND_SIGNALED ;
reg_wr - > mr = smbdirect_mr - > mr ;
reg_wr - > key = smbdirect_mr - > mr - > rkey ;
reg_wr - > access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ ;
/*
* There is no need for waiting for complemtion on ib_post_send
* on IB_WR_REG_MR . Hardware enforces a barrier and order of execution
* on the next ib_post_send when we actaully send I / O to remote peer
*/
2018-07-18 19:25:25 +03:00
rc = ib_post_send ( info - > id - > qp , & reg_wr - > wr , NULL ) ;
2017-11-23 03:38:44 +03:00
if ( ! rc )
return smbdirect_mr ;
log_rdma_mr ( ERR , " ib_post_send failed rc=%x reg_wr->key=%x \n " ,
rc , reg_wr - > key ) ;
/* If all failed, attempt to recover this MR by setting it MR_ERROR*/
map_mr_error :
ib_dma_unmap_sg ( info - > id - > device , smbdirect_mr - > sgl ,
smbdirect_mr - > sgl_count , smbdirect_mr - > dir ) ;
dma_map_error :
smbdirect_mr - > state = MR_ERROR ;
if ( atomic_dec_and_test ( & info - > mr_used_count ) )
wake_up ( & info - > wait_for_mr_cleanup ) ;
2018-03-31 01:16:36 +03:00
smbd_disconnect_rdma_connection ( info ) ;
2017-11-23 03:38:44 +03:00
return NULL ;
}
static void local_inv_done ( struct ib_cq * cq , struct ib_wc * wc )
{
struct smbd_mr * smbdirect_mr ;
struct ib_cqe * cqe ;
cqe = wc - > wr_cqe ;
smbdirect_mr = container_of ( cqe , struct smbd_mr , cqe ) ;
smbdirect_mr - > state = MR_INVALIDATED ;
if ( wc - > status ! = IB_WC_SUCCESS ) {
log_rdma_mr ( ERR , " invalidate failed status=%x \n " , wc - > status ) ;
smbdirect_mr - > state = MR_ERROR ;
}
complete ( & smbdirect_mr - > invalidate_done ) ;
}
/*
* Deregister a MR after I / O is done
* This function may wait if remote invalidation is not used
* and we have to locally invalidate the buffer to prevent data is being
* modified by remote peer after upper layer consumes it
*/
int smbd_deregister_mr ( struct smbd_mr * smbdirect_mr )
{
2018-07-18 19:25:25 +03:00
struct ib_send_wr * wr ;
2017-11-23 03:38:44 +03:00
struct smbd_connection * info = smbdirect_mr - > conn ;
int rc = 0 ;
if ( smbdirect_mr - > need_invalidate ) {
/* Need to finish local invalidation before returning */
wr = & smbdirect_mr - > inv_wr ;
wr - > opcode = IB_WR_LOCAL_INV ;
smbdirect_mr - > cqe . done = local_inv_done ;
wr - > wr_cqe = & smbdirect_mr - > cqe ;
wr - > num_sge = 0 ;
wr - > ex . invalidate_rkey = smbdirect_mr - > mr - > rkey ;
wr - > send_flags = IB_SEND_SIGNALED ;
init_completion ( & smbdirect_mr - > invalidate_done ) ;
2018-07-18 19:25:25 +03:00
rc = ib_post_send ( info - > id - > qp , wr , NULL ) ;
2017-11-23 03:38:44 +03:00
if ( rc ) {
log_rdma_mr ( ERR , " ib_post_send failed rc=%x \n " , rc ) ;
smbd_disconnect_rdma_connection ( info ) ;
goto done ;
}
wait_for_completion ( & smbdirect_mr - > invalidate_done ) ;
smbdirect_mr - > need_invalidate = false ;
} else
/*
* For remote invalidation , just set it to MR_INVALIDATED
* and defer to mr_recovery_work to recover the MR for next use
*/
smbdirect_mr - > state = MR_INVALIDATED ;
/*
* Schedule the work to do MR recovery for future I / Os
* MR recovery is slow and we don ' t want it to block the current I / O
*/
queue_work ( info - > workqueue , & info - > mr_recovery_work ) ;
done :
if ( atomic_dec_and_test ( & info - > mr_used_count ) )
wake_up ( & info - > wait_for_mr_cleanup ) ;
return rc ;
}