2017-01-09 16:55:13 +01:00
/*
* Shared Memory Communications over RDMA ( SMC - R ) and RoCE
*
* AF_SMC protocol family socket handler keeping the AF_INET sock address type
* applies to SOCK_STREAM sockets only
* offers an alternative communication option for TCP - protocol sockets
* applicable with RoCE - cards only
*
2017-01-09 16:55:16 +01:00
* Initial restrictions :
* - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non - blocking sockets only
* - support for urgent data postponed
*
2017-01-09 16:55:13 +01:00
* Copyright IBM Corp . 2016
*
* Author ( s ) : Ursula Braun < ubraun @ linux . vnet . ibm . com >
* based on prototype from Frank Blaschka
*/
# define KMSG_COMPONENT "smc"
# define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
# include <linux/module.h>
# include <linux/socket.h>
2017-01-09 16:55:16 +01:00
# include <linux/workqueue.h>
2017-01-09 16:55:22 +01:00
# include <linux/in.h>
2017-02-02 08:35:14 +01:00
# include <linux/sched/signal.h>
2017-01-09 16:55:13 +01:00
# include <net/sock.h>
2017-01-09 16:55:16 +01:00
# include <net/tcp.h>
2017-01-09 16:55:26 +01:00
# include <net/smc.h>
2017-01-09 16:55:13 +01:00
# include "smc.h"
2017-01-09 16:55:16 +01:00
# include "smc_clc.h"
2017-01-09 16:55:21 +01:00
# include "smc_llc.h"
2017-01-09 16:55:22 +01:00
# include "smc_cdc.h"
2017-01-09 16:55:17 +01:00
# include "smc_core.h"
2017-01-09 16:55:14 +01:00
# include "smc_ib.h"
2017-01-09 16:55:15 +01:00
# include "smc_pnet.h"
2017-01-09 16:55:23 +01:00
# include "smc_tx.h"
2017-01-09 16:55:24 +01:00
# include "smc_rx.h"
2017-01-09 16:55:25 +01:00
# include "smc_close.h"
2017-01-09 16:55:13 +01:00
2017-01-09 16:55:17 +01:00
static DEFINE_MUTEX ( smc_create_lgr_pending ) ; /* serialize link group
* creation
*/
struct smc_lgr_list smc_lgr_list = { /* established link groups */
. lock = __SPIN_LOCK_UNLOCKED ( smc_lgr_list . lock ) ,
. list = LIST_HEAD_INIT ( smc_lgr_list . list ) ,
} ;
2017-01-09 16:55:16 +01:00
static void smc_tcp_listen_work ( struct work_struct * ) ;
2017-01-09 16:55:13 +01:00
static void smc_set_keepalive ( struct sock * sk , int val )
{
struct smc_sock * smc = smc_sk ( sk ) ;
smc - > clcsock - > sk - > sk_prot - > keepalive ( smc - > clcsock - > sk , val ) ;
}
2017-01-09 16:55:26 +01:00
static struct smc_hashinfo smc_v4_hashinfo = {
. lock = __RW_LOCK_UNLOCKED ( smc_v4_hashinfo . lock ) ,
} ;
int smc_hash_sk ( struct sock * sk )
{
struct smc_hashinfo * h = sk - > sk_prot - > h . smc_hash ;
struct hlist_head * head ;
head = & h - > ht ;
write_lock_bh ( & h - > lock ) ;
sk_add_node ( sk , head ) ;
sock_prot_inuse_add ( sock_net ( sk ) , sk - > sk_prot , 1 ) ;
write_unlock_bh ( & h - > lock ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( smc_hash_sk ) ;
void smc_unhash_sk ( struct sock * sk )
{
struct smc_hashinfo * h = sk - > sk_prot - > h . smc_hash ;
write_lock_bh ( & h - > lock ) ;
if ( sk_del_node_init ( sk ) )
sock_prot_inuse_add ( sock_net ( sk ) , sk - > sk_prot , - 1 ) ;
write_unlock_bh ( & h - > lock ) ;
}
EXPORT_SYMBOL_GPL ( smc_unhash_sk ) ;
struct proto smc_proto = {
2017-01-09 16:55:13 +01:00
. name = " SMC " ,
. owner = THIS_MODULE ,
. keepalive = smc_set_keepalive ,
2017-01-09 16:55:26 +01:00
. hash = smc_hash_sk ,
. unhash = smc_unhash_sk ,
2017-01-09 16:55:13 +01:00
. obj_size = sizeof ( struct smc_sock ) ,
2017-01-09 16:55:26 +01:00
. h . smc_hash = & smc_v4_hashinfo ,
2017-01-18 02:53:44 -08:00
. slab_flags = SLAB_TYPESAFE_BY_RCU ,
2017-01-09 16:55:13 +01:00
} ;
2017-01-09 16:55:26 +01:00
EXPORT_SYMBOL_GPL ( smc_proto ) ;
2017-01-09 16:55:13 +01:00
static int smc_release ( struct socket * sock )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
2017-01-09 16:55:25 +01:00
int rc = 0 ;
2017-01-09 16:55:13 +01:00
if ( ! sk )
goto out ;
smc = smc_sk ( sk ) ;
2017-01-09 16:55:25 +01:00
if ( sk - > sk_state = = SMC_LISTEN )
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
*/
lock_sock_nested ( sk , SINGLE_DEPTH_NESTING ) ;
else
lock_sock ( sk ) ;
2017-01-09 16:55:13 +01:00
2018-01-26 09:28:48 +01:00
if ( ! smc - > use_fallback ) {
2017-01-09 16:55:25 +01:00
rc = smc_close_active ( smc ) ;
sock_set_flag ( sk , SOCK_DEAD ) ;
sk - > sk_shutdown | = SHUTDOWN_MASK ;
}
2017-01-09 16:55:13 +01:00
if ( smc - > clcsock ) {
sock_release ( smc - > clcsock ) ;
smc - > clcsock = NULL ;
}
2018-01-26 09:28:48 +01:00
if ( smc - > use_fallback ) {
sock_put ( sk ) ; /* passive closing */
sk - > sk_state = SMC_CLOSED ;
sk - > sk_state_change ( sk ) ;
}
2017-01-09 16:55:13 +01:00
/* detach socket */
sock_orphan ( sk ) ;
sock - > sk = NULL ;
2018-01-26 09:28:48 +01:00
if ( ! smc - > use_fallback & & sk - > sk_state = = SMC_CLOSED )
2017-01-09 16:55:25 +01:00
smc_conn_free ( & smc - > conn ) ;
2017-01-09 16:55:13 +01:00
release_sock ( sk ) ;
2018-01-26 09:28:48 +01:00
sk - > sk_prot - > unhash ( sk ) ;
sock_put ( sk ) ; /* final sock_put */
2017-01-09 16:55:13 +01:00
out :
2017-01-09 16:55:25 +01:00
return rc ;
2017-01-09 16:55:13 +01:00
}
static void smc_destruct ( struct sock * sk )
{
if ( sk - > sk_state ! = SMC_CLOSED )
return ;
if ( ! sock_flag ( sk , SOCK_DEAD ) )
return ;
sk_refcnt_debug_dec ( sk ) ;
}
static struct sock * smc_sock_alloc ( struct net * net , struct socket * sock )
{
struct smc_sock * smc ;
struct sock * sk ;
sk = sk_alloc ( net , PF_SMC , GFP_KERNEL , & smc_proto , 0 ) ;
if ( ! sk )
return NULL ;
sock_init_data ( sock , sk ) ; /* sets sk_refcnt to 1 */
sk - > sk_state = SMC_INIT ;
sk - > sk_destruct = smc_destruct ;
sk - > sk_protocol = SMCPROTO_SMC ;
smc = smc_sk ( sk ) ;
2017-01-09 16:55:16 +01:00
INIT_WORK ( & smc - > tcp_listen_work , smc_tcp_listen_work ) ;
INIT_LIST_HEAD ( & smc - > accept_q ) ;
spin_lock_init ( & smc - > accept_q_lock ) ;
2017-01-09 16:55:26 +01:00
sk - > sk_prot - > hash ( sk ) ;
2017-01-09 16:55:16 +01:00
sk_refcnt_debug_inc ( sk ) ;
2017-01-09 16:55:13 +01:00
return sk ;
}
static int smc_bind ( struct socket * sock , struct sockaddr * uaddr ,
int addr_len )
{
struct sockaddr_in * addr = ( struct sockaddr_in * ) uaddr ;
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc ;
smc = smc_sk ( sk ) ;
/* replicate tests from inet_bind(), to be safe wrt. future changes */
rc = - EINVAL ;
if ( addr_len < sizeof ( struct sockaddr_in ) )
goto out ;
rc = - EAFNOSUPPORT ;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
if ( ( addr - > sin_family ! = AF_INET ) & &
( ( addr - > sin_family ! = AF_UNSPEC ) | |
( addr - > sin_addr . s_addr ! = htonl ( INADDR_ANY ) ) ) )
goto out ;
lock_sock ( sk ) ;
/* Check if socket is already active */
rc = - EINVAL ;
if ( sk - > sk_state ! = SMC_INIT )
goto out_rel ;
smc - > clcsock - > sk - > sk_reuse = sk - > sk_reuse ;
rc = kernel_bind ( smc - > clcsock , uaddr , addr_len ) ;
out_rel :
release_sock ( sk ) ;
out :
return rc ;
}
static void smc_copy_sock_settings ( struct sock * nsk , struct sock * osk ,
unsigned long mask )
{
/* options we don't get control via setsockopt for */
nsk - > sk_type = osk - > sk_type ;
nsk - > sk_sndbuf = osk - > sk_sndbuf ;
nsk - > sk_rcvbuf = osk - > sk_rcvbuf ;
nsk - > sk_sndtimeo = osk - > sk_sndtimeo ;
nsk - > sk_rcvtimeo = osk - > sk_rcvtimeo ;
nsk - > sk_mark = osk - > sk_mark ;
nsk - > sk_priority = osk - > sk_priority ;
nsk - > sk_rcvlowat = osk - > sk_rcvlowat ;
nsk - > sk_bound_dev_if = osk - > sk_bound_dev_if ;
nsk - > sk_err = osk - > sk_err ;
nsk - > sk_flags & = ~ mask ;
nsk - > sk_flags | = osk - > sk_flags & mask ;
}
# define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
( 1UL < < SOCK_KEEPOPEN ) | \
( 1UL < < SOCK_LINGER ) | \
( 1UL < < SOCK_BROADCAST ) | \
( 1UL < < SOCK_TIMESTAMP ) | \
( 1UL < < SOCK_DBG ) | \
( 1UL < < SOCK_RCVTSTAMP ) | \
( 1UL < < SOCK_RCVTSTAMPNS ) | \
( 1UL < < SOCK_LOCALROUTE ) | \
( 1UL < < SOCK_TIMESTAMPING_RX_SOFTWARE ) | \
( 1UL < < SOCK_RXQ_OVFL ) | \
( 1UL < < SOCK_WIFI_STATUS ) | \
( 1UL < < SOCK_NOFCS ) | \
( 1UL < < SOCK_FILTER_LOCKED ) )
/* copy only relevant settings and flags of SOL_SOCKET level from smc to
* clc socket ( since smc is not called for these options from net / core )
*/
static void smc_copy_sock_settings_to_clc ( struct smc_sock * smc )
{
smc_copy_sock_settings ( smc - > clcsock - > sk , & smc - > sk , SK_FLAGS_SMC_TO_CLC ) ;
}
# define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
( 1UL < < SOCK_KEEPOPEN ) | \
( 1UL < < SOCK_LINGER ) | \
( 1UL < < SOCK_DBG ) )
/* copy only settings and flags relevant for smc from clc to smc socket */
static void smc_copy_sock_settings_to_smc ( struct smc_sock * smc )
{
smc_copy_sock_settings ( & smc - > sk , smc - > clcsock - > sk , SK_FLAGS_CLC_TO_SMC ) ;
}
2018-03-01 13:51:26 +01:00
static int smc_clnt_conf_first_link ( struct smc_sock * smc )
2017-01-09 16:55:21 +01:00
{
struct smc_link_group * lgr = smc - > conn . lgr ;
struct smc_link * link ;
int rest ;
int rc ;
link = & lgr - > lnk [ SMC_SINGLE_LINK ] ;
/* receive CONFIRM LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout (
& link - > llc_confirm ,
SMC_LLC_WAIT_FIRST_TIME ) ;
if ( rest < = 0 ) {
struct smc_clc_msg_decline dclc ;
rc = smc_clc_wait_msg ( smc , & dclc , sizeof ( dclc ) ,
SMC_CLC_DECLINE ) ;
return rc ;
}
2018-03-01 13:51:31 +01:00
if ( link - > llc_confirm_rc )
return SMC_CLC_DECL_RMBE_EC ;
2017-01-09 16:55:21 +01:00
rc = smc_ib_modify_qp_rts ( link ) ;
if ( rc )
return SMC_CLC_DECL_INTERR ;
smc_wr_remember_qp_attr ( link ) ;
2017-07-28 13:56:17 +02:00
rc = smc_wr_reg_send ( link ,
smc - > conn . rmb_desc - > mr_rx [ SMC_SINGLE_LINK ] ) ;
if ( rc )
return SMC_CLC_DECL_INTERR ;
2017-01-09 16:55:21 +01:00
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link ( link ,
link - > smcibdev - > mac [ link - > ibport - 1 ] ,
2018-03-01 13:51:26 +01:00
& link - > smcibdev - > gid [ link - > ibport - 1 ] ,
SMC_LLC_RESP ) ;
2017-01-09 16:55:21 +01:00
if ( rc < 0 )
return SMC_CLC_DECL_TCL ;
2018-03-01 13:51:32 +01:00
/* receive ADD LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout ( & link - > llc_add ,
SMC_LLC_WAIT_TIME ) ;
if ( rest < = 0 ) {
struct smc_clc_msg_decline dclc ;
rc = smc_clc_wait_msg ( smc , & dclc , sizeof ( dclc ) ,
SMC_CLC_DECLINE ) ;
return rc ;
}
/* send add link reject message, only one link supported for now */
rc = smc_llc_send_add_link ( link ,
link - > smcibdev - > mac [ link - > ibport - 1 ] ,
& link - > smcibdev - > gid [ link - > ibport - 1 ] ,
SMC_LLC_RESP ) ;
if ( rc < 0 )
return SMC_CLC_DECL_TCL ;
link - > state = SMC_LNK_ACTIVE ;
2018-03-01 13:51:31 +01:00
return 0 ;
2017-01-09 16:55:21 +01:00
}
2017-01-09 16:55:17 +01:00
static void smc_conn_save_peer_info ( struct smc_sock * smc ,
struct smc_clc_msg_accept_confirm * clc )
{
smc - > conn . peer_conn_idx = clc - > conn_idx ;
2017-01-09 16:55:22 +01:00
smc - > conn . local_tx_ctrl . token = ntohl ( clc - > rmbe_alert_token ) ;
2017-01-09 16:55:18 +01:00
smc - > conn . peer_rmbe_size = smc_uncompress_bufsize ( clc - > rmbe_size ) ;
atomic_set ( & smc - > conn . peer_rmbe_space , smc - > conn . peer_rmbe_size ) ;
2017-01-09 16:55:17 +01:00
}
static void smc_link_save_peer_info ( struct smc_link * link ,
struct smc_clc_msg_accept_confirm * clc )
{
link - > peer_qpn = ntoh24 ( clc - > qpn ) ;
memcpy ( link - > peer_gid , clc - > lcl . gid , SMC_GID_SIZE ) ;
memcpy ( link - > peer_mac , clc - > lcl . mac , sizeof ( link - > peer_mac ) ) ;
link - > peer_psn = ntoh24 ( clc - > psn ) ;
link - > peer_mtu = clc - > qp_mtu ;
}
2017-01-09 16:55:16 +01:00
/* setup for RDMA connection of client */
static int smc_connect_rdma ( struct smc_sock * smc )
{
struct smc_clc_msg_accept_confirm aclc ;
2017-01-09 16:55:17 +01:00
int local_contact = SMC_FIRST_CONTACT ;
2017-01-09 16:55:16 +01:00
struct smc_ib_device * smcibdev ;
2017-01-09 16:55:17 +01:00
struct smc_link * link ;
u8 srv_first_contact ;
2017-01-09 16:55:16 +01:00
int reason_code = 0 ;
int rc = 0 ;
u8 ibport ;
2018-01-26 09:28:48 +01:00
sock_hold ( & smc - > sk ) ; /* sock put in passive closing */
2017-10-25 11:01:46 +02:00
if ( ! tcp_sk ( smc - > clcsock - > sk ) - > syn_smc ) {
/* peer has not signalled SMC-capability */
smc - > use_fallback = true ;
goto out_connected ;
}
2017-01-09 16:55:16 +01:00
/* IPSec connections opt out of SMC-R optimizations */
if ( using_ipsec ( smc ) ) {
reason_code = SMC_CLC_DECL_IPSEC ;
goto decline_rdma ;
}
/* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device
* used for the internal TCP socket
*/
smc_pnet_find_roce_resource ( smc - > clcsock - > sk , & smcibdev , & ibport ) ;
if ( ! smcibdev ) {
reason_code = SMC_CLC_DECL_CNFERR ; /* configuration error */
goto decline_rdma ;
}
/* do inband token exchange */
reason_code = smc_clc_send_proposal ( smc , smcibdev , ibport ) ;
if ( reason_code < 0 ) {
rc = reason_code ;
goto out_err ;
}
if ( reason_code > 0 ) /* configuration error */
goto decline_rdma ;
/* receive SMC Accept CLC message */
reason_code = smc_clc_wait_msg ( smc , & aclc , sizeof ( aclc ) ,
SMC_CLC_ACCEPT ) ;
if ( reason_code < 0 ) {
rc = reason_code ;
goto out_err ;
}
if ( reason_code > 0 )
goto decline_rdma ;
2017-01-09 16:55:17 +01:00
srv_first_contact = aclc . hdr . flag ;
mutex_lock ( & smc_create_lgr_pending ) ;
2018-03-01 13:51:28 +01:00
local_contact = smc_conn_create ( smc , smcibdev , ibport , & aclc . lcl ,
srv_first_contact ) ;
2017-01-09 16:55:17 +01:00
if ( local_contact < 0 ) {
rc = local_contact ;
if ( rc = = - ENOMEM )
reason_code = SMC_CLC_DECL_MEM ; /* insufficient memory*/
else if ( rc = = - ENOLINK )
reason_code = SMC_CLC_DECL_SYNCERR ; /* synchr. error */
goto decline_rdma_unlock ;
}
link = & smc - > conn . lgr - > lnk [ SMC_SINGLE_LINK ] ;
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:17 +01:00
smc_conn_save_peer_info ( smc , & aclc ) ;
2017-01-09 16:55:18 +01:00
2017-07-28 13:56:20 +02:00
/* create send buffer and rmb */
rc = smc_buf_create ( smc ) ;
2017-01-09 16:55:18 +01:00
if ( rc ) {
reason_code = SMC_CLC_DECL_MEM ;
goto decline_rdma_unlock ;
}
2017-01-09 16:55:17 +01:00
if ( local_contact = = SMC_FIRST_CONTACT )
smc_link_save_peer_info ( link , & aclc ) ;
2017-01-09 16:55:20 +01:00
rc = smc_rmb_rtoken_handling ( & smc - > conn , & aclc ) ;
if ( rc ) {
reason_code = SMC_CLC_DECL_INTERR ;
goto decline_rdma_unlock ;
}
2017-04-10 14:58:01 +02:00
smc_close_init ( smc ) ;
smc_rx_init ( smc ) ;
2017-01-09 16:55:20 +01:00
if ( local_contact = = SMC_FIRST_CONTACT ) {
rc = smc_ib_ready_link ( link ) ;
if ( rc ) {
reason_code = SMC_CLC_DECL_INTERR ;
goto decline_rdma_unlock ;
}
2017-07-28 13:56:17 +02:00
} else {
struct smc_buf_desc * buf_desc = smc - > conn . rmb_desc ;
if ( ! buf_desc - > reused ) {
/* register memory region for new rmb */
rc = smc_wr_reg_send ( link ,
buf_desc - > mr_rx [ SMC_SINGLE_LINK ] ) ;
if ( rc ) {
reason_code = SMC_CLC_DECL_INTERR ;
goto decline_rdma_unlock ;
}
}
2017-01-09 16:55:20 +01:00
}
2017-07-28 13:56:22 +02:00
smc_rmb_sync_sg_for_device ( & smc - > conn ) ;
2017-01-09 16:55:16 +01:00
rc = smc_clc_send_confirm ( smc ) ;
if ( rc )
2017-01-09 16:55:17 +01:00
goto out_err_unlock ;
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:21 +01:00
if ( local_contact = = SMC_FIRST_CONTACT ) {
/* QP confirmation over RoCE fabric */
2018-03-01 13:51:26 +01:00
reason_code = smc_clnt_conf_first_link ( smc ) ;
2017-01-09 16:55:21 +01:00
if ( reason_code < 0 ) {
rc = reason_code ;
goto out_err_unlock ;
}
if ( reason_code > 0 )
goto decline_rdma_unlock ;
}
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:17 +01:00
mutex_unlock ( & smc_create_lgr_pending ) ;
2017-01-09 16:55:23 +01:00
smc_tx_init ( smc ) ;
2017-01-09 16:55:16 +01:00
out_connected :
smc_copy_sock_settings_to_clc ( smc ) ;
2017-01-09 16:55:25 +01:00
if ( smc - > sk . sk_state = = SMC_INIT )
smc - > sk . sk_state = SMC_ACTIVE ;
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:17 +01:00
return rc ? rc : local_contact ;
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:17 +01:00
decline_rdma_unlock :
2018-01-25 11:15:34 +01:00
if ( local_contact = = SMC_FIRST_CONTACT )
smc_lgr_forget ( smc - > conn . lgr ) ;
2017-01-09 16:55:17 +01:00
mutex_unlock ( & smc_create_lgr_pending ) ;
smc_conn_free ( & smc - > conn ) ;
2017-01-09 16:55:16 +01:00
decline_rdma :
/* RDMA setup failed, switch back to TCP */
smc - > use_fallback = true ;
if ( reason_code & & ( reason_code ! = SMC_CLC_DECL_REPLY ) ) {
2017-09-21 09:16:32 +02:00
rc = smc_clc_send_decline ( smc , reason_code ) ;
2017-12-07 13:38:45 +01:00
if ( rc < 0 )
2017-01-09 16:55:16 +01:00
goto out_err ;
}
goto out_connected ;
2017-01-09 16:55:17 +01:00
out_err_unlock :
2018-01-25 11:15:34 +01:00
if ( local_contact = = SMC_FIRST_CONTACT )
smc_lgr_forget ( smc - > conn . lgr ) ;
2017-01-09 16:55:17 +01:00
mutex_unlock ( & smc_create_lgr_pending ) ;
smc_conn_free ( & smc - > conn ) ;
2017-01-09 16:55:16 +01:00
out_err :
2018-01-26 09:28:48 +01:00
if ( smc - > sk . sk_state = = SMC_INIT )
sock_put ( & smc - > sk ) ; /* passive closing */
2017-01-09 16:55:16 +01:00
return rc ;
}
2017-01-09 16:55:13 +01:00
static int smc_connect ( struct socket * sock , struct sockaddr * addr ,
int alen , int flags )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc = - EINVAL ;
smc = smc_sk ( sk ) ;
/* separate smc parameter checking to be safe */
if ( alen < sizeof ( addr - > sa_family ) )
goto out_err ;
if ( addr - > sa_family ! = AF_INET )
goto out_err ;
lock_sock ( sk ) ;
switch ( sk - > sk_state ) {
default :
goto out ;
case SMC_ACTIVE :
rc = - EISCONN ;
goto out ;
case SMC_INIT :
rc = 0 ;
break ;
}
smc_copy_sock_settings_to_clc ( smc ) ;
2017-10-25 11:01:46 +02:00
tcp_sk ( smc - > clcsock - > sk ) - > syn_smc = 1 ;
2017-01-09 16:55:13 +01:00
rc = kernel_connect ( smc - > clcsock , addr , alen , flags ) ;
if ( rc )
goto out ;
2017-01-09 16:55:16 +01:00
/* setup RDMA connection */
rc = smc_connect_rdma ( smc ) ;
if ( rc < 0 )
goto out ;
else
rc = 0 ; /* success cases including fallback */
2017-01-09 16:55:13 +01:00
out :
release_sock ( sk ) ;
out_err :
return rc ;
}
static int smc_clcsock_accept ( struct smc_sock * lsmc , struct smc_sock * * new_smc )
{
2018-01-24 10:28:12 +01:00
struct socket * new_clcsock = NULL ;
struct sock * lsk = & lsmc - > sk ;
2017-01-09 16:55:13 +01:00
struct sock * new_sk ;
int rc ;
2018-01-24 10:28:12 +01:00
release_sock ( lsk ) ;
new_sk = smc_sock_alloc ( sock_net ( lsk ) , NULL ) ;
2017-01-09 16:55:13 +01:00
if ( ! new_sk ) {
rc = - ENOMEM ;
2018-01-24 10:28:12 +01:00
lsk - > sk_err = ENOMEM ;
2017-01-09 16:55:13 +01:00
* new_smc = NULL ;
2018-01-24 10:28:12 +01:00
lock_sock ( lsk ) ;
2017-01-09 16:55:13 +01:00
goto out ;
}
* new_smc = smc_sk ( new_sk ) ;
rc = kernel_accept ( lsmc - > clcsock , & new_clcsock , 0 ) ;
2018-01-24 10:28:12 +01:00
lock_sock ( lsk ) ;
2018-01-24 10:28:13 +01:00
if ( rc < 0 )
2018-01-24 10:28:12 +01:00
lsk - > sk_err = - rc ;
2018-01-24 10:28:13 +01:00
if ( rc < 0 | | lsk - > sk_state = = SMC_CLOSED ) {
2017-01-09 16:55:16 +01:00
if ( new_clcsock )
sock_release ( new_clcsock ) ;
new_sk - > sk_state = SMC_CLOSED ;
sock_set_flag ( new_sk , SOCK_DEAD ) ;
2018-01-24 10:28:12 +01:00
new_sk - > sk_prot - > unhash ( new_sk ) ;
2018-01-26 09:28:48 +01:00
sock_put ( new_sk ) ; /* final */
2017-01-09 16:55:13 +01:00
* new_smc = NULL ;
goto out ;
}
( * new_smc ) - > clcsock = new_clcsock ;
out :
return rc ;
}
2017-01-09 16:55:16 +01:00
/* add a just created sock to the accept queue of the listen sock as
* candidate for a following socket accept call from user space
*/
static void smc_accept_enqueue ( struct sock * parent , struct sock * sk )
{
struct smc_sock * par = smc_sk ( parent ) ;
2018-01-26 09:28:48 +01:00
sock_hold ( sk ) ; /* sock_put in smc_accept_unlink () */
2017-01-09 16:55:16 +01:00
spin_lock ( & par - > accept_q_lock ) ;
list_add_tail ( & smc_sk ( sk ) - > accept_q , & par - > accept_q ) ;
spin_unlock ( & par - > accept_q_lock ) ;
sk_acceptq_added ( parent ) ;
}
/* remove a socket from the accept queue of its parental listening socket */
static void smc_accept_unlink ( struct sock * sk )
{
struct smc_sock * par = smc_sk ( sk ) - > listen_smc ;
spin_lock ( & par - > accept_q_lock ) ;
list_del_init ( & smc_sk ( sk ) - > accept_q ) ;
spin_unlock ( & par - > accept_q_lock ) ;
sk_acceptq_removed ( & smc_sk ( sk ) - > listen_smc - > sk ) ;
2018-01-26 09:28:48 +01:00
sock_put ( sk ) ; /* sock_hold in smc_accept_enqueue */
2017-01-09 16:55:16 +01:00
}
/* remove a sock from the accept queue to bind it to a new socket created
* for a socket accept call from user space
*/
2017-01-09 16:55:25 +01:00
struct sock * smc_accept_dequeue ( struct sock * parent ,
struct socket * new_sock )
2017-01-09 16:55:16 +01:00
{
struct smc_sock * isk , * n ;
struct sock * new_sk ;
list_for_each_entry_safe ( isk , n , & smc_sk ( parent ) - > accept_q , accept_q ) {
new_sk = ( struct sock * ) isk ;
smc_accept_unlink ( new_sk ) ;
if ( new_sk - > sk_state = = SMC_CLOSED ) {
2018-01-26 09:28:49 +01:00
if ( isk - > clcsock ) {
sock_release ( isk - > clcsock ) ;
isk - > clcsock = NULL ;
}
2017-04-10 14:58:04 +02:00
new_sk - > sk_prot - > unhash ( new_sk ) ;
2018-01-26 09:28:48 +01:00
sock_put ( new_sk ) ; /* final */
2017-01-09 16:55:16 +01:00
continue ;
}
if ( new_sock )
sock_graft ( new_sk , new_sock ) ;
return new_sk ;
}
return NULL ;
}
/* clean up for a created but never accepted sock */
2017-01-09 16:55:25 +01:00
void smc_close_non_accepted ( struct sock * sk )
2017-01-09 16:55:16 +01:00
{
struct smc_sock * smc = smc_sk ( sk ) ;
2017-01-09 16:55:25 +01:00
lock_sock ( sk ) ;
if ( ! sk - > sk_lingertime )
/* wait for peer closing */
sk - > sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT ;
2018-01-26 09:28:48 +01:00
if ( ! smc - > use_fallback ) {
2017-01-09 16:55:25 +01:00
smc_close_active ( smc ) ;
2017-04-10 14:58:04 +02:00
sock_set_flag ( sk , SOCK_DEAD ) ;
sk - > sk_shutdown | = SHUTDOWN_MASK ;
}
2017-01-09 16:55:16 +01:00
if ( smc - > clcsock ) {
struct socket * tcp ;
tcp = smc - > clcsock ;
smc - > clcsock = NULL ;
sock_release ( tcp ) ;
}
2017-01-09 16:55:25 +01:00
if ( smc - > use_fallback ) {
2018-01-26 09:28:48 +01:00
sock_put ( sk ) ; /* passive closing */
sk - > sk_state = SMC_CLOSED ;
} else {
if ( sk - > sk_state = = SMC_CLOSED )
smc_conn_free ( & smc - > conn ) ;
2017-01-09 16:55:25 +01:00
}
release_sock ( sk ) ;
2018-01-26 09:28:48 +01:00
sk - > sk_prot - > unhash ( sk ) ;
sock_put ( sk ) ; /* final sock_put */
2017-01-09 16:55:16 +01:00
}
2017-01-09 16:55:21 +01:00
static int smc_serv_conf_first_link ( struct smc_sock * smc )
{
struct smc_link_group * lgr = smc - > conn . lgr ;
struct smc_link * link ;
int rest ;
int rc ;
link = & lgr - > lnk [ SMC_SINGLE_LINK ] ;
2017-07-28 13:56:17 +02:00
rc = smc_wr_reg_send ( link ,
smc - > conn . rmb_desc - > mr_rx [ SMC_SINGLE_LINK ] ) ;
if ( rc )
return SMC_CLC_DECL_INTERR ;
2017-01-09 16:55:21 +01:00
/* send CONFIRM LINK request to client over the RoCE fabric */
rc = smc_llc_send_confirm_link ( link ,
link - > smcibdev - > mac [ link - > ibport - 1 ] ,
& link - > smcibdev - > gid [ link - > ibport - 1 ] ,
SMC_LLC_REQ ) ;
if ( rc < 0 )
return SMC_CLC_DECL_TCL ;
/* receive CONFIRM LINK response from client over the RoCE fabric */
rest = wait_for_completion_interruptible_timeout (
& link - > llc_confirm_resp ,
SMC_LLC_WAIT_FIRST_TIME ) ;
if ( rest < = 0 ) {
struct smc_clc_msg_decline dclc ;
rc = smc_clc_wait_msg ( smc , & dclc , sizeof ( dclc ) ,
SMC_CLC_DECLINE ) ;
2018-03-01 13:51:31 +01:00
return rc ;
2017-01-09 16:55:21 +01:00
}
2018-03-01 13:51:31 +01:00
if ( link - > llc_confirm_resp_rc )
return SMC_CLC_DECL_RMBE_EC ;
2018-03-01 13:51:32 +01:00
/* send ADD LINK request to client over the RoCE fabric */
rc = smc_llc_send_add_link ( link ,
link - > smcibdev - > mac [ link - > ibport - 1 ] ,
& link - > smcibdev - > gid [ link - > ibport - 1 ] ,
SMC_LLC_REQ ) ;
if ( rc < 0 )
return SMC_CLC_DECL_TCL ;
/* receive ADD LINK response from client over the RoCE fabric */
rest = wait_for_completion_interruptible_timeout ( & link - > llc_add_resp ,
SMC_LLC_WAIT_TIME ) ;
if ( rest < = 0 ) {
struct smc_clc_msg_decline dclc ;
rc = smc_clc_wait_msg ( smc , & dclc , sizeof ( dclc ) ,
SMC_CLC_DECLINE ) ;
return rc ;
}
link - > state = SMC_LNK_ACTIVE ;
2018-03-01 13:51:31 +01:00
return 0 ;
2017-01-09 16:55:21 +01:00
}
2017-01-09 16:55:16 +01:00
/* setup for RDMA connection of server */
static void smc_listen_work ( struct work_struct * work )
{
struct smc_sock * new_smc = container_of ( work , struct smc_sock ,
smc_listen_work ) ;
2017-12-07 13:38:49 +01:00
struct smc_clc_msg_proposal_prefix * pclc_prfx ;
2017-01-09 16:55:16 +01:00
struct socket * newclcsock = new_smc - > clcsock ;
struct smc_sock * lsmc = new_smc - > listen_smc ;
struct smc_clc_msg_accept_confirm cclc ;
2017-01-09 16:55:17 +01:00
int local_contact = SMC_REUSE_CONTACT ;
2017-01-09 16:55:16 +01:00
struct sock * newsmcsk = & new_smc - > sk ;
2017-12-07 13:38:49 +01:00
struct smc_clc_msg_proposal * pclc ;
2017-01-09 16:55:16 +01:00
struct smc_ib_device * smcibdev ;
2017-12-07 13:38:49 +01:00
u8 buf [ SMC_CLC_MAX_LEN ] ;
2017-01-09 16:55:17 +01:00
struct smc_link * link ;
2017-01-09 16:55:16 +01:00
int reason_code = 0 ;
2018-02-12 20:00:20 +01:00
int rc = 0 ;
2017-01-09 16:55:16 +01:00
__be32 subnet ;
u8 prefix_len ;
u8 ibport ;
2017-10-25 11:01:46 +02:00
/* check if peer is smc capable */
if ( ! tcp_sk ( newclcsock - > sk ) - > syn_smc ) {
new_smc - > use_fallback = true ;
goto out_connected ;
}
2017-01-09 16:55:16 +01:00
/* do inband token exchange -
* wait for and receive SMC Proposal CLC message
*/
2017-12-07 13:38:49 +01:00
reason_code = smc_clc_wait_msg ( new_smc , & buf , sizeof ( buf ) ,
2017-01-09 16:55:16 +01:00
SMC_CLC_PROPOSAL ) ;
if ( reason_code < 0 )
goto out_err ;
if ( reason_code > 0 )
goto decline_rdma ;
/* IPSec connections opt out of SMC-R optimizations */
if ( using_ipsec ( new_smc ) ) {
reason_code = SMC_CLC_DECL_IPSEC ;
goto decline_rdma ;
}
/* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device
* used for the internal TCP socket
*/
smc_pnet_find_roce_resource ( newclcsock - > sk , & smcibdev , & ibport ) ;
if ( ! smcibdev ) {
reason_code = SMC_CLC_DECL_CNFERR ; /* configuration error */
goto decline_rdma ;
}
/* determine subnet and mask from internal TCP socket */
2018-03-01 13:51:27 +01:00
rc = smc_clc_netinfo_by_tcpsk ( newclcsock , & subnet , & prefix_len ) ;
2017-01-09 16:55:16 +01:00
if ( rc ) {
reason_code = SMC_CLC_DECL_CNFERR ; /* configuration error */
goto decline_rdma ;
}
2017-12-07 13:38:49 +01:00
pclc = ( struct smc_clc_msg_proposal * ) & buf ;
pclc_prfx = smc_clc_proposal_get_prefix ( pclc ) ;
if ( pclc_prfx - > outgoing_subnet ! = subnet | |
pclc_prfx - > prefix_len ! = prefix_len ) {
2017-01-09 16:55:16 +01:00
reason_code = SMC_CLC_DECL_CNFERR ; /* configuration error */
goto decline_rdma ;
}
2017-01-09 16:55:17 +01:00
/* allocate connection / link group */
mutex_lock ( & smc_create_lgr_pending ) ;
2018-03-01 13:51:28 +01:00
local_contact = smc_conn_create ( new_smc , smcibdev , ibport , & pclc - > lcl ,
0 ) ;
2017-01-09 16:55:17 +01:00
if ( local_contact < 0 ) {
rc = local_contact ;
if ( rc = = - ENOMEM )
reason_code = SMC_CLC_DECL_MEM ; /* insufficient memory*/
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-01-09 16:55:17 +01:00
}
link = & new_smc - > conn . lgr - > lnk [ SMC_SINGLE_LINK ] ;
2017-01-09 16:55:16 +01:00
2017-07-28 13:56:20 +02:00
/* create send buffer and rmb */
rc = smc_buf_create ( new_smc ) ;
2017-01-09 16:55:18 +01:00
if ( rc ) {
reason_code = SMC_CLC_DECL_MEM ;
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-01-09 16:55:18 +01:00
}
2017-01-09 16:55:16 +01:00
2017-04-10 14:58:01 +02:00
smc_close_init ( new_smc ) ;
smc_rx_init ( new_smc ) ;
2017-07-28 13:56:17 +02:00
if ( local_contact ! = SMC_FIRST_CONTACT ) {
struct smc_buf_desc * buf_desc = new_smc - > conn . rmb_desc ;
if ( ! buf_desc - > reused ) {
/* register memory region for new rmb */
rc = smc_wr_reg_send ( link ,
buf_desc - > mr_rx [ SMC_SINGLE_LINK ] ) ;
if ( rc ) {
reason_code = SMC_CLC_DECL_INTERR ;
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-07-28 13:56:17 +02:00
}
}
}
2017-07-28 13:56:22 +02:00
smc_rmb_sync_sg_for_device ( & new_smc - > conn ) ;
2017-07-28 13:56:17 +02:00
2017-01-09 16:55:17 +01:00
rc = smc_clc_send_accept ( new_smc , local_contact ) ;
2017-01-09 16:55:16 +01:00
if ( rc )
2017-10-25 11:01:44 +02:00
goto out_err_unlock ;
2017-01-09 16:55:16 +01:00
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg ( new_smc , & cclc , sizeof ( cclc ) ,
SMC_CLC_CONFIRM ) ;
if ( reason_code < 0 )
2017-10-25 11:01:44 +02:00
goto out_err_unlock ;
2017-01-09 16:55:16 +01:00
if ( reason_code > 0 )
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-01-09 16:55:17 +01:00
smc_conn_save_peer_info ( new_smc , & cclc ) ;
if ( local_contact = = SMC_FIRST_CONTACT )
smc_link_save_peer_info ( link , & cclc ) ;
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:20 +01:00
rc = smc_rmb_rtoken_handling ( & new_smc - > conn , & cclc ) ;
if ( rc ) {
reason_code = SMC_CLC_DECL_INTERR ;
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-01-09 16:55:20 +01:00
}
if ( local_contact = = SMC_FIRST_CONTACT ) {
rc = smc_ib_ready_link ( link ) ;
if ( rc ) {
reason_code = SMC_CLC_DECL_INTERR ;
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-01-09 16:55:20 +01:00
}
2017-01-09 16:55:21 +01:00
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link ( new_smc ) ;
2017-12-07 13:38:45 +01:00
if ( reason_code < 0 )
2017-01-09 16:55:21 +01:00
/* peer is not aware of a problem */
2017-10-25 11:01:44 +02:00
goto out_err_unlock ;
2017-01-09 16:55:21 +01:00
if ( reason_code > 0 )
2017-10-25 11:01:44 +02:00
goto decline_rdma_unlock ;
2017-01-09 16:55:20 +01:00
}
2017-01-09 16:55:16 +01:00
2017-01-09 16:55:23 +01:00
smc_tx_init ( new_smc ) ;
2017-10-25 11:01:44 +02:00
mutex_unlock ( & smc_create_lgr_pending ) ;
2017-01-09 16:55:23 +01:00
2017-01-09 16:55:16 +01:00
out_connected :
sk_refcnt_debug_inc ( newsmcsk ) ;
2017-01-09 16:55:25 +01:00
if ( newsmcsk - > sk_state = = SMC_INIT )
newsmcsk - > sk_state = SMC_ACTIVE ;
2017-01-09 16:55:16 +01:00
enqueue :
2017-01-09 16:55:25 +01:00
lock_sock_nested ( & lsmc - > sk , SINGLE_DEPTH_NESTING ) ;
2017-01-09 16:55:16 +01:00
if ( lsmc - > sk . sk_state = = SMC_LISTEN ) {
smc_accept_enqueue ( & lsmc - > sk , newsmcsk ) ;
} else { /* no longer listening */
smc_close_non_accepted ( newsmcsk ) ;
}
release_sock ( & lsmc - > sk ) ;
/* Wake up accept */
lsmc - > sk . sk_data_ready ( & lsmc - > sk ) ;
sock_put ( & lsmc - > sk ) ; /* sock_hold in smc_tcp_listen_work */
return ;
2017-10-25 11:01:44 +02:00
decline_rdma_unlock :
2018-01-25 11:15:34 +01:00
if ( local_contact = = SMC_FIRST_CONTACT )
smc_lgr_forget ( new_smc - > conn . lgr ) ;
2017-10-25 11:01:44 +02:00
mutex_unlock ( & smc_create_lgr_pending ) ;
2017-01-09 16:55:16 +01:00
decline_rdma :
/* RDMA setup failed, switch back to TCP */
2017-01-09 16:55:17 +01:00
smc_conn_free ( & new_smc - > conn ) ;
2017-01-09 16:55:16 +01:00
new_smc - > use_fallback = true ;
if ( reason_code & & ( reason_code ! = SMC_CLC_DECL_REPLY ) ) {
2017-12-07 13:38:45 +01:00
if ( smc_clc_send_decline ( new_smc , reason_code ) < 0 )
2017-01-09 16:55:16 +01:00
goto out_err ;
}
goto out_connected ;
2017-10-25 11:01:44 +02:00
out_err_unlock :
2018-01-25 11:15:34 +01:00
if ( local_contact = = SMC_FIRST_CONTACT )
smc_lgr_forget ( new_smc - > conn . lgr ) ;
2017-10-25 11:01:44 +02:00
mutex_unlock ( & smc_create_lgr_pending ) ;
2017-01-09 16:55:16 +01:00
out_err :
2018-01-26 09:28:48 +01:00
if ( newsmcsk - > sk_state = = SMC_INIT )
sock_put ( & new_smc - > sk ) ; /* passive closing */
2017-01-09 16:55:16 +01:00
newsmcsk - > sk_state = SMC_CLOSED ;
2017-01-09 16:55:25 +01:00
smc_conn_free ( & new_smc - > conn ) ;
2017-01-09 16:55:16 +01:00
goto enqueue ; /* queue new sock with sk_err set */
}
static void smc_tcp_listen_work ( struct work_struct * work )
{
struct smc_sock * lsmc = container_of ( work , struct smc_sock ,
tcp_listen_work ) ;
2018-01-24 10:28:12 +01:00
struct sock * lsk = & lsmc - > sk ;
2017-01-09 16:55:16 +01:00
struct smc_sock * new_smc ;
int rc = 0 ;
2018-01-24 10:28:12 +01:00
lock_sock ( lsk ) ;
while ( lsk - > sk_state = = SMC_LISTEN ) {
2017-01-09 16:55:16 +01:00
rc = smc_clcsock_accept ( lsmc , & new_smc ) ;
if ( rc )
goto out ;
if ( ! new_smc )
continue ;
new_smc - > listen_smc = lsmc ;
new_smc - > use_fallback = false ; /* assume rdma capability first*/
2018-01-24 10:28:12 +01:00
sock_hold ( lsk ) ; /* sock_put in smc_listen_work */
2017-01-09 16:55:16 +01:00
INIT_WORK ( & new_smc - > smc_listen_work , smc_listen_work ) ;
smc_copy_sock_settings_to_smc ( new_smc ) ;
2018-01-26 09:28:48 +01:00
sock_hold ( & new_smc - > sk ) ; /* sock_put in passive closing */
if ( ! schedule_work ( & new_smc - > smc_listen_work ) )
sock_put ( & new_smc - > sk ) ;
2017-01-09 16:55:16 +01:00
}
out :
2018-01-26 09:28:49 +01:00
if ( lsmc - > clcsock ) {
sock_release ( lsmc - > clcsock ) ;
lsmc - > clcsock = NULL ;
}
2018-01-24 10:28:12 +01:00
release_sock ( lsk ) ;
2018-01-26 09:28:49 +01:00
/* no more listening, wake up smc_close_wait_listen_clcsock and
* accept
*/
lsk - > sk_state_change ( lsk ) ;
2018-01-26 09:28:48 +01:00
sock_put ( & lsmc - > sk ) ; /* sock_hold in smc_listen */
2017-01-09 16:55:16 +01:00
}
2017-01-09 16:55:13 +01:00
static int smc_listen ( struct socket * sock , int backlog )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc ;
smc = smc_sk ( sk ) ;
lock_sock ( sk ) ;
rc = - EINVAL ;
if ( ( sk - > sk_state ! = SMC_INIT ) & & ( sk - > sk_state ! = SMC_LISTEN ) )
goto out ;
rc = 0 ;
if ( sk - > sk_state = = SMC_LISTEN ) {
sk - > sk_max_ack_backlog = backlog ;
goto out ;
}
/* some socket options are handled in core, so we could not apply
* them to the clc socket - - copy smc socket options to clc socket
*/
smc_copy_sock_settings_to_clc ( smc ) ;
2017-10-25 11:01:46 +02:00
tcp_sk ( smc - > clcsock - > sk ) - > syn_smc = 1 ;
2017-01-09 16:55:13 +01:00
rc = kernel_listen ( smc - > clcsock , backlog ) ;
if ( rc )
goto out ;
sk - > sk_max_ack_backlog = backlog ;
sk - > sk_ack_backlog = 0 ;
sk - > sk_state = SMC_LISTEN ;
2017-01-09 16:55:16 +01:00
INIT_WORK ( & smc - > tcp_listen_work , smc_tcp_listen_work ) ;
2018-01-26 09:28:48 +01:00
sock_hold ( sk ) ; /* sock_hold in tcp_listen_worker */
if ( ! schedule_work ( & smc - > tcp_listen_work ) )
sock_put ( sk ) ;
2017-01-09 16:55:13 +01:00
out :
release_sock ( sk ) ;
return rc ;
}
static int smc_accept ( struct socket * sock , struct socket * new_sock ,
2017-03-09 08:09:05 +00:00
int flags , bool kern )
2017-01-09 16:55:13 +01:00
{
2017-01-09 16:55:16 +01:00
struct sock * sk = sock - > sk , * nsk ;
DECLARE_WAITQUEUE ( wait , current ) ;
2017-01-09 16:55:13 +01:00
struct smc_sock * lsmc ;
2017-01-09 16:55:16 +01:00
long timeo ;
int rc = 0 ;
2017-01-09 16:55:13 +01:00
lsmc = smc_sk ( sk ) ;
2018-01-26 09:28:48 +01:00
sock_hold ( sk ) ; /* sock_put below */
2017-01-09 16:55:13 +01:00
lock_sock ( sk ) ;
if ( lsmc - > sk . sk_state ! = SMC_LISTEN ) {
rc = - EINVAL ;
goto out ;
}
2017-01-09 16:55:16 +01:00
/* Wait for an incoming connection */
timeo = sock_rcvtimeo ( sk , flags & O_NONBLOCK ) ;
add_wait_queue_exclusive ( sk_sleep ( sk ) , & wait ) ;
while ( ! ( nsk = smc_accept_dequeue ( sk , new_sock ) ) ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( ! timeo ) {
rc = - EAGAIN ;
break ;
}
release_sock ( sk ) ;
timeo = schedule_timeout ( timeo ) ;
/* wakeup by sk_data_ready in smc_listen_work() */
sched_annotate_sleep ( ) ;
lock_sock ( sk ) ;
if ( signal_pending ( current ) ) {
rc = sock_intr_errno ( timeo ) ;
break ;
}
}
set_current_state ( TASK_RUNNING ) ;
remove_wait_queue ( sk_sleep ( sk ) , & wait ) ;
2017-01-09 16:55:13 +01:00
2017-01-09 16:55:16 +01:00
if ( ! rc )
rc = sock_error ( nsk ) ;
2017-01-09 16:55:13 +01:00
out :
release_sock ( sk ) ;
2018-01-26 09:28:48 +01:00
sock_put ( sk ) ; /* sock_hold above */
2017-01-09 16:55:13 +01:00
return rc ;
}
static int smc_getname ( struct socket * sock , struct sockaddr * addr ,
2018-02-12 20:00:20 +01:00
int peer )
2017-01-09 16:55:13 +01:00
{
struct smc_sock * smc ;
2017-01-09 16:55:25 +01:00
if ( peer & & ( sock - > sk - > sk_state ! = SMC_ACTIVE ) & &
( sock - > sk - > sk_state ! = SMC_APPCLOSEWAIT1 ) )
2017-01-09 16:55:13 +01:00
return - ENOTCONN ;
smc = smc_sk ( sock - > sk ) ;
2018-02-12 20:00:20 +01:00
return smc - > clcsock - > ops - > getname ( smc - > clcsock , addr , peer ) ;
2017-01-09 16:55:13 +01:00
}
static int smc_sendmsg ( struct socket * sock , struct msghdr * msg , size_t len )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc = - EPIPE ;
smc = smc_sk ( sk ) ;
lock_sock ( sk ) ;
2017-01-09 16:55:25 +01:00
if ( ( sk - > sk_state ! = SMC_ACTIVE ) & &
( sk - > sk_state ! = SMC_APPCLOSEWAIT1 ) & &
( sk - > sk_state ! = SMC_INIT ) )
2017-01-09 16:55:13 +01:00
goto out ;
if ( smc - > use_fallback )
rc = smc - > clcsock - > ops - > sendmsg ( smc - > clcsock , msg , len ) ;
else
2017-01-09 16:55:23 +01:00
rc = smc_tx_sendmsg ( smc , msg , len ) ;
2017-01-09 16:55:13 +01:00
out :
release_sock ( sk ) ;
return rc ;
}
static int smc_recvmsg ( struct socket * sock , struct msghdr * msg , size_t len ,
int flags )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc = - ENOTCONN ;
smc = smc_sk ( sk ) ;
lock_sock ( sk ) ;
2017-01-09 16:55:25 +01:00
if ( ( sk - > sk_state = = SMC_INIT ) | |
( sk - > sk_state = = SMC_LISTEN ) | |
( sk - > sk_state = = SMC_CLOSED ) )
2017-01-09 16:55:13 +01:00
goto out ;
2017-01-09 16:55:25 +01:00
if ( sk - > sk_state = = SMC_PEERFINCLOSEWAIT ) {
rc = 0 ;
goto out ;
}
2017-01-09 16:55:13 +01:00
if ( smc - > use_fallback )
rc = smc - > clcsock - > ops - > recvmsg ( smc - > clcsock , msg , len , flags ) ;
else
2017-01-09 16:55:24 +01:00
rc = smc_rx_recvmsg ( smc , msg , len , flags ) ;
2017-01-09 16:55:25 +01:00
2017-01-09 16:55:13 +01:00
out :
release_sock ( sk ) ;
return rc ;
}
2017-07-03 00:01:49 -04:00
static __poll_t smc_accept_poll ( struct sock * parent )
2017-01-09 16:55:16 +01:00
{
2018-01-26 09:28:47 +01:00
struct smc_sock * isk = smc_sk ( parent ) ;
2018-02-01 10:02:53 -05:00
__poll_t mask = 0 ;
2017-01-09 16:55:16 +01:00
2018-01-26 09:28:47 +01:00
spin_lock ( & isk - > accept_q_lock ) ;
if ( ! list_empty ( & isk - > accept_q ) )
2018-02-11 14:34:03 -08:00
mask = EPOLLIN | EPOLLRDNORM ;
2018-01-26 09:28:47 +01:00
spin_unlock ( & isk - > accept_q_lock ) ;
2017-01-09 16:55:16 +01:00
2018-01-26 09:28:47 +01:00
return mask ;
2017-01-09 16:55:16 +01:00
}
2017-07-03 00:01:49 -04:00
static __poll_t smc_poll ( struct file * file , struct socket * sock ,
2017-01-09 16:55:13 +01:00
poll_table * wait )
{
struct sock * sk = sock - > sk ;
2017-07-03 22:25:56 -04:00
__poll_t mask = 0 ;
2017-01-09 16:55:13 +01:00
struct smc_sock * smc ;
2017-01-09 16:55:16 +01:00
int rc ;
2017-01-09 16:55:13 +01:00
2018-01-26 09:28:47 +01:00
if ( ! sk )
2018-02-11 14:34:03 -08:00
return EPOLLNVAL ;
2018-01-26 09:28:47 +01:00
2017-01-09 16:55:13 +01:00
smc = smc_sk ( sock - > sk ) ;
2018-01-26 09:28:47 +01:00
sock_hold ( sk ) ;
lock_sock ( sk ) ;
2017-01-09 16:55:16 +01:00
if ( ( sk - > sk_state = = SMC_INIT ) | | smc - > use_fallback ) {
/* delegate to CLC child sock */
2018-01-26 09:28:47 +01:00
release_sock ( sk ) ;
2017-01-09 16:55:13 +01:00
mask = smc - > clcsock - > ops - > poll ( file , smc - > clcsock , wait ) ;
/* if non-blocking connect finished ... */
lock_sock ( sk ) ;
2018-02-11 14:34:03 -08:00
if ( ( sk - > sk_state = = SMC_INIT ) & & ( mask & EPOLLOUT ) ) {
2017-01-09 16:55:16 +01:00
sk - > sk_err = smc - > clcsock - > sk - > sk_err ;
if ( sk - > sk_err ) {
2018-02-11 14:34:03 -08:00
mask | = EPOLLERR ;
2017-01-09 16:55:16 +01:00
} else {
rc = smc_connect_rdma ( smc ) ;
if ( rc < 0 )
2018-02-11 14:34:03 -08:00
mask | = EPOLLERR ;
2018-01-26 09:28:47 +01:00
/* success cases including fallback */
2018-02-11 14:34:03 -08:00
mask | = EPOLLOUT | EPOLLWRNORM ;
2017-01-09 16:55:16 +01:00
}
2017-01-09 16:55:13 +01:00
}
} else {
2018-01-26 09:28:47 +01:00
if ( sk - > sk_state ! = SMC_CLOSED ) {
release_sock ( sk ) ;
sock_poll_wait ( file , sk_sleep ( sk ) , wait ) ;
lock_sock ( sk ) ;
}
2017-01-09 16:55:16 +01:00
if ( sk - > sk_err )
2018-02-11 14:34:03 -08:00
mask | = EPOLLERR ;
2017-01-09 16:55:25 +01:00
if ( ( sk - > sk_shutdown = = SHUTDOWN_MASK ) | |
( sk - > sk_state = = SMC_CLOSED ) )
2018-02-11 14:34:03 -08:00
mask | = EPOLLHUP ;
2018-01-26 09:28:47 +01:00
if ( sk - > sk_state = = SMC_LISTEN ) {
/* woken up by sk_data_ready in smc_listen_work() */
mask = smc_accept_poll ( sk ) ;
} else {
if ( atomic_read ( & smc - > conn . sndbuf_space ) | |
sk - > sk_shutdown & SEND_SHUTDOWN ) {
2018-02-11 14:34:03 -08:00
mask | = EPOLLOUT | EPOLLWRNORM ;
2018-01-26 09:28:47 +01:00
} else {
sk_set_bit ( SOCKWQ_ASYNC_NOSPACE , sk ) ;
set_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ;
}
if ( atomic_read ( & smc - > conn . bytes_to_rcv ) )
2018-02-11 14:34:03 -08:00
mask | = EPOLLIN | EPOLLRDNORM ;
2018-01-26 09:28:47 +01:00
if ( sk - > sk_shutdown & RCV_SHUTDOWN )
2018-02-11 14:34:03 -08:00
mask | = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
2018-01-26 09:28:47 +01:00
if ( sk - > sk_state = = SMC_APPCLOSEWAIT1 )
2018-02-11 14:34:03 -08:00
mask | = EPOLLIN ;
2018-01-26 09:28:47 +01:00
}
2017-01-09 16:55:25 +01:00
2017-01-09 16:55:13 +01:00
}
2018-01-26 09:28:47 +01:00
release_sock ( sk ) ;
sock_put ( sk ) ;
2017-01-09 16:55:13 +01:00
return mask ;
}
static int smc_shutdown ( struct socket * sock , int how )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc = - EINVAL ;
2017-01-09 16:55:25 +01:00
int rc1 = 0 ;
2017-01-09 16:55:13 +01:00
smc = smc_sk ( sk ) ;
if ( ( how < SHUT_RD ) | | ( how > SHUT_RDWR ) )
2017-01-09 16:55:25 +01:00
return rc ;
2017-01-09 16:55:13 +01:00
lock_sock ( sk ) ;
rc = - ENOTCONN ;
2017-01-09 16:55:25 +01:00
if ( ( sk - > sk_state ! = SMC_LISTEN ) & &
( sk - > sk_state ! = SMC_ACTIVE ) & &
( sk - > sk_state ! = SMC_PEERCLOSEWAIT1 ) & &
( sk - > sk_state ! = SMC_PEERCLOSEWAIT2 ) & &
( sk - > sk_state ! = SMC_APPCLOSEWAIT1 ) & &
( sk - > sk_state ! = SMC_APPCLOSEWAIT2 ) & &
( sk - > sk_state ! = SMC_APPFINCLOSEWAIT ) )
2017-01-09 16:55:13 +01:00
goto out ;
if ( smc - > use_fallback ) {
rc = kernel_sock_shutdown ( smc - > clcsock , how ) ;
sk - > sk_shutdown = smc - > clcsock - > sk - > sk_shutdown ;
if ( sk - > sk_shutdown = = SHUTDOWN_MASK )
sk - > sk_state = SMC_CLOSED ;
2017-01-09 16:55:25 +01:00
goto out ;
2017-01-09 16:55:13 +01:00
}
2017-01-09 16:55:25 +01:00
switch ( how ) {
case SHUT_RDWR : /* shutdown in both directions */
rc = smc_close_active ( smc ) ;
break ;
case SHUT_WR :
rc = smc_close_shutdown_write ( smc ) ;
break ;
case SHUT_RD :
if ( sk - > sk_state = = SMC_LISTEN )
rc = smc_close_active ( smc ) ;
else
rc = 0 ;
/* nothing more to do because peer is not involved */
break ;
}
rc1 = kernel_sock_shutdown ( smc - > clcsock , how ) ;
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk - > sk_shutdown | = how + 1 ;
2017-01-09 16:55:13 +01:00
out :
release_sock ( sk ) ;
2017-01-09 16:55:25 +01:00
return rc ? rc : rc1 ;
2017-01-09 16:55:13 +01:00
}
static int smc_setsockopt ( struct socket * sock , int level , int optname ,
char __user * optval , unsigned int optlen )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
smc = smc_sk ( sk ) ;
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
return smc - > clcsock - > ops - > setsockopt ( smc - > clcsock , level , optname ,
optval , optlen ) ;
}
static int smc_getsockopt ( struct socket * sock , int level , int optname ,
char __user * optval , int __user * optlen )
{
struct smc_sock * smc ;
smc = smc_sk ( sock - > sk ) ;
/* socket options apply to the CLC socket */
return smc - > clcsock - > ops - > getsockopt ( smc - > clcsock , level , optname ,
optval , optlen ) ;
}
static int smc_ioctl ( struct socket * sock , unsigned int cmd ,
unsigned long arg )
{
struct smc_sock * smc ;
smc = smc_sk ( sock - > sk ) ;
if ( smc - > use_fallback )
return smc - > clcsock - > ops - > ioctl ( smc - > clcsock , cmd , arg ) ;
else
return sock_no_ioctl ( sock , cmd , arg ) ;
}
static ssize_t smc_sendpage ( struct socket * sock , struct page * page ,
int offset , size_t size , int flags )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc = - EPIPE ;
smc = smc_sk ( sk ) ;
lock_sock ( sk ) ;
if ( sk - > sk_state ! = SMC_ACTIVE )
goto out ;
if ( smc - > use_fallback )
rc = kernel_sendpage ( smc - > clcsock , page , offset ,
size , flags ) ;
else
rc = sock_no_sendpage ( sock , page , offset , size , flags ) ;
out :
release_sock ( sk ) ;
return rc ;
}
static ssize_t smc_splice_read ( struct socket * sock , loff_t * ppos ,
struct pipe_inode_info * pipe , size_t len ,
unsigned int flags )
{
struct sock * sk = sock - > sk ;
struct smc_sock * smc ;
int rc = - ENOTCONN ;
smc = smc_sk ( sk ) ;
lock_sock ( sk ) ;
if ( ( sk - > sk_state ! = SMC_ACTIVE ) & & ( sk - > sk_state ! = SMC_CLOSED ) )
goto out ;
if ( smc - > use_fallback ) {
rc = smc - > clcsock - > ops - > splice_read ( smc - > clcsock , ppos ,
pipe , len , flags ) ;
} else {
rc = - EOPNOTSUPP ;
}
out :
release_sock ( sk ) ;
return rc ;
}
/* must look like tcp */
static const struct proto_ops smc_sock_ops = {
. family = PF_SMC ,
. owner = THIS_MODULE ,
. release = smc_release ,
. bind = smc_bind ,
. connect = smc_connect ,
. socketpair = sock_no_socketpair ,
. accept = smc_accept ,
. getname = smc_getname ,
. poll = smc_poll ,
. ioctl = smc_ioctl ,
. listen = smc_listen ,
. shutdown = smc_shutdown ,
. setsockopt = smc_setsockopt ,
. getsockopt = smc_getsockopt ,
. sendmsg = smc_sendmsg ,
. recvmsg = smc_recvmsg ,
. mmap = sock_no_mmap ,
. sendpage = smc_sendpage ,
. splice_read = smc_splice_read ,
} ;
static int smc_create ( struct net * net , struct socket * sock , int protocol ,
int kern )
{
struct smc_sock * smc ;
struct sock * sk ;
int rc ;
rc = - ESOCKTNOSUPPORT ;
if ( sock - > type ! = SOCK_STREAM )
goto out ;
rc = - EPROTONOSUPPORT ;
if ( ( protocol ! = IPPROTO_IP ) & & ( protocol ! = IPPROTO_TCP ) )
goto out ;
rc = - ENOBUFS ;
sock - > ops = & smc_sock_ops ;
sk = smc_sock_alloc ( net , sock ) ;
if ( ! sk )
goto out ;
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk ( sk ) ;
2017-01-09 16:55:16 +01:00
smc - > use_fallback = false ; /* assume rdma capability first */
2017-01-09 16:55:13 +01:00
rc = sock_create_kern ( net , PF_INET , SOCK_STREAM ,
IPPROTO_TCP , & smc - > clcsock ) ;
if ( rc )
sk_common_release ( sk ) ;
2017-01-09 16:55:18 +01:00
smc - > sk . sk_sndbuf = max ( smc - > clcsock - > sk - > sk_sndbuf , SMC_BUF_MIN_SIZE ) ;
smc - > sk . sk_rcvbuf = max ( smc - > clcsock - > sk - > sk_rcvbuf , SMC_BUF_MIN_SIZE ) ;
2017-01-09 16:55:13 +01:00
out :
return rc ;
}
static const struct net_proto_family smc_sock_family_ops = {
. family = PF_SMC ,
. owner = THIS_MODULE ,
. create = smc_create ,
} ;
static int __init smc_init ( void )
{
int rc ;
2017-01-09 16:55:15 +01:00
rc = smc_pnet_init ( ) ;
if ( rc )
return rc ;
2017-01-09 16:55:21 +01:00
rc = smc_llc_init ( ) ;
if ( rc ) {
pr_err ( " %s: smc_llc_init fails with %d \n " , __func__ , rc ) ;
goto out_pnet ;
}
2017-01-09 16:55:22 +01:00
rc = smc_cdc_init ( ) ;
if ( rc ) {
pr_err ( " %s: smc_cdc_init fails with %d \n " , __func__ , rc ) ;
goto out_pnet ;
}
2017-01-09 16:55:13 +01:00
rc = proto_register ( & smc_proto , 1 ) ;
if ( rc ) {
pr_err ( " %s: proto_register fails with %d \n " , __func__ , rc ) ;
2017-01-09 16:55:15 +01:00
goto out_pnet ;
2017-01-09 16:55:13 +01:00
}
rc = sock_register ( & smc_sock_family_ops ) ;
if ( rc ) {
pr_err ( " %s: sock_register fails with %d \n " , __func__ , rc ) ;
goto out_proto ;
}
2017-01-09 16:55:26 +01:00
INIT_HLIST_HEAD ( & smc_v4_hashinfo . ht ) ;
2017-01-09 16:55:13 +01:00
2017-01-09 16:55:14 +01:00
rc = smc_ib_register_client ( ) ;
if ( rc ) {
pr_err ( " %s: ib_register fails with %d \n " , __func__ , rc ) ;
goto out_sock ;
}
2017-10-25 11:01:46 +02:00
static_branch_enable ( & tcp_have_smc ) ;
2017-01-09 16:55:13 +01:00
return 0 ;
2017-01-09 16:55:14 +01:00
out_sock :
sock_unregister ( PF_SMC ) ;
2017-01-09 16:55:13 +01:00
out_proto :
proto_unregister ( & smc_proto ) ;
2017-01-09 16:55:15 +01:00
out_pnet :
smc_pnet_exit ( ) ;
2017-01-09 16:55:13 +01:00
return rc ;
}
static void __exit smc_exit ( void )
{
2017-01-09 16:55:17 +01:00
struct smc_link_group * lgr , * lg ;
LIST_HEAD ( lgr_freeing_list ) ;
spin_lock_bh ( & smc_lgr_list . lock ) ;
if ( ! list_empty ( & smc_lgr_list . list ) )
list_splice_init ( & smc_lgr_list . list , & lgr_freeing_list ) ;
spin_unlock_bh ( & smc_lgr_list . lock ) ;
list_for_each_entry_safe ( lgr , lg , & lgr_freeing_list , list ) {
list_del_init ( & lgr - > list ) ;
smc_lgr_free ( lgr ) ; /* free link group */
}
2017-10-25 11:01:46 +02:00
static_branch_disable ( & tcp_have_smc ) ;
2017-01-09 16:55:14 +01:00
smc_ib_unregister_client ( ) ;
2017-01-09 16:55:13 +01:00
sock_unregister ( PF_SMC ) ;
proto_unregister ( & smc_proto ) ;
2017-01-09 16:55:15 +01:00
smc_pnet_exit ( ) ;
2017-01-09 16:55:13 +01:00
}
module_init ( smc_init ) ;
module_exit ( smc_exit ) ;
MODULE_AUTHOR ( " Ursula Braun <ubraun@linux.vnet.ibm.com> " ) ;
MODULE_DESCRIPTION ( " smc socket address family " ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_ALIAS_NETPROTO ( PF_SMC ) ;