2009-02-24 18:30:37 +03:00
/*
2018-07-24 06:51:21 +03:00
* Copyright ( c ) 2009 , 2018 Oracle and / or its affiliates . All rights reserved .
2009-02-24 18:30:37 +03:00
*
* This software is available to you under a choice of one of two
* licenses . You may choose to be licensed under the terms of the GNU
* General Public License ( GPL ) Version 2 , available from the file
* COPYING in the main directory of this source tree , or the
* OpenIB . org BSD license below :
*
* Redistribution and use in source and binary forms , with or
* without modification , are permitted provided that the following
* conditions are met :
*
* - Redistributions of source code must retain the above
* copyright notice , this list of conditions and the following
* disclaimer .
*
* - Redistributions in binary form must reproduce the above
* copyright notice , this list of conditions and the following
* disclaimer in the documentation and / or other materials
* provided with the distribution .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND ,
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY , WHETHER IN AN
* ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM , OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE .
*
*/
2011-05-27 17:12:25 +04:00
# include <linux/module.h>
2009-02-24 18:30:37 +03:00
# include <rdma/rdma_cm.h>
2016-06-13 19:44:26 +03:00
# include "rds_single_path.h"
2009-02-24 18:30:37 +03:00
# include "rdma_transport.h"
2015-08-23 01:45:35 +03:00
# include "ib.h"
2009-02-24 18:30:37 +03:00
2018-07-24 06:51:22 +03:00
/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
2009-04-09 18:09:37 +04:00
static struct rdma_cm_id * rds_rdma_listen_id ;
2018-07-31 08:48:42 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2018-07-24 06:51:22 +03:00
static struct rdma_cm_id * rds6_rdma_listen_id ;
2018-07-31 08:48:42 +03:00
# endif
2009-02-24 18:30:37 +03:00
2019-08-24 04:04:16 +03:00
/* Per IB specification 7.7.3, service level is a 4-bit field. */
# define TOS_TO_SL(tos) ((tos) & 0xF)
2018-07-24 06:51:21 +03:00
static int rds_rdma_cm_event_handler_cmn ( struct rdma_cm_id * cm_id ,
struct rdma_cm_event * event ,
bool isv6 )
2009-02-24 18:30:37 +03:00
{
/* this can be null in the listening path */
struct rds_connection * conn = cm_id - > context ;
struct rds_transport * trans ;
int ret = 0 ;
2018-10-24 06:09:00 +03:00
int * err ;
u8 len ;
2009-02-24 18:30:37 +03:00
2010-08-04 00:52:47 +04:00
rdsdebug ( " conn %p id %p handling event %u (%s) \n " , conn , cm_id ,
2015-05-18 13:40:33 +03:00
event - > event , rdma_event_msg ( event - > event ) ) ;
2009-02-24 18:30:37 +03:00
2016-03-02 02:20:42 +03:00
if ( cm_id - > device - > node_type = = RDMA_NODE_IB_CA )
2009-02-24 18:30:37 +03:00
trans = & rds_ib_transport ;
/* Prevent shutdown from tearing down the connection
* while we ' re executing . */
if ( conn ) {
mutex_lock ( & conn - > c_cm_lock ) ;
/* If the connection is being shut down, bail out
* right away . We return 0 so cm_id doesn ' t get
* destroyed prematurely */
if ( rds_conn_state ( conn ) = = RDS_CONN_DISCONNECTING ) {
/* Reject incoming connections while we're tearing
* down an existing one . */
if ( event - > event = = RDMA_CM_EVENT_CONNECT_REQUEST )
ret = 1 ;
goto out ;
}
}
switch ( event - > event ) {
case RDMA_CM_EVENT_CONNECT_REQUEST :
2018-07-24 06:51:21 +03:00
ret = trans - > cm_handle_connect ( cm_id , event , isv6 ) ;
2009-02-24 18:30:37 +03:00
break ;
case RDMA_CM_EVENT_ADDR_RESOLVED :
2018-10-13 17:13:23 +03:00
rdma_set_service_type ( cm_id , conn - > c_tos ) ;
2021-03-31 21:43:14 +03:00
rdma_set_min_rnr_timer ( cm_id , IB_RNR_TIMER_000_32 ) ;
2009-02-24 18:30:37 +03:00
/* XXX do we need to clean up if this fails? */
ret = rdma_resolve_route ( cm_id ,
RDS_RDMA_RESOLVE_TIMEOUT_MS ) ;
break ;
case RDMA_CM_EVENT_ROUTE_RESOLVED :
2015-08-23 01:45:35 +03:00
/* Connection could have been dropped so make sure the
* cm_id is valid before proceeding
*/
if ( conn ) {
struct rds_ib_connection * ibic ;
ibic = conn - > c_transport_data ;
2019-08-24 04:04:16 +03:00
if ( ibic & & ibic - > i_cm_id = = cm_id ) {
cm_id - > route . path_rec [ 0 ] . sl =
TOS_TO_SL ( conn - > c_tos ) ;
2018-07-24 06:51:21 +03:00
ret = trans - > cm_initiate_connect ( cm_id , isv6 ) ;
2019-08-24 04:04:16 +03:00
} else {
2015-08-23 01:45:35 +03:00
rds_conn_drop ( conn ) ;
2019-08-24 04:04:16 +03:00
}
2015-08-23 01:45:35 +03:00
}
2009-02-24 18:30:37 +03:00
break ;
case RDMA_CM_EVENT_ESTABLISHED :
2019-07-26 17:17:05 +03:00
if ( conn )
trans - > cm_connect_complete ( conn , event ) ;
2009-02-24 18:30:37 +03:00
break ;
2016-10-26 22:36:48 +03:00
case RDMA_CM_EVENT_REJECTED :
2018-10-24 06:09:00 +03:00
if ( ! conn )
break ;
err = ( int * ) rdma_consumer_reject_data ( cm_id , event , & len ) ;
rds: Accept peer connection reject messages due to incompatible version
Prior to
commit d021fabf525ff ("rds: rdma: add consumer reject")
function "rds_rdma_cm_event_handler_cmn" would always honor a rejected
connection attempt by issuing a "rds_conn_drop".
The commit mentioned above added a "break", eliminating
the "fallthrough" case and made the "rds_conn_drop" rather conditional:
Now it only happens if a "consumer defined" reject (i.e. "rdma_reject")
carries an integer-value of "1" inside "private_data":
if (!conn)
break;
err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) {
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
&conn->c_laddr, &conn->c_faddr);
conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
rds_conn_drop(conn);
}
rdsdebug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
break;
/* FALLTHROUGH */
A number of issues are worth mentioning here:
#1) Previous versions of the RDS code simply rejected a connection
by calling "rdma_reject(cm_id, NULL, 0);"
So the value of the payload in "private_data" will not be "1",
but "0".
#2) Now the code has become dependent on host byte order and sizing.
If one peer is big-endian, the other is little-endian,
or there's a difference in sizeof(int) (e.g. ILP64 vs LP64),
the *err check does not work as intended.
#3) There is no check for "len" to see if the data behind *err is even valid.
Luckily, it appears that the "rdma_reject(cm_id, NULL, 0)" will always
carry 148 bytes of zeroized payload.
But that should probably not be relied upon here.
#4) With the added "break;",
we might as well drop the misleading "/* FALLTHROUGH */" comment.
This commit does _not_ address issue #2, as the sender would have to
agree on a byte order as well.
Here is the sequence of messages in this observed error-scenario:
Host-A is pre-QoS changes (excluding the commit mentioned above)
Host-B is post-QoS changes (including the commit mentioned above)
#1 Host-B
issues a connection request via function "rds_conn_path_transition"
connection state transitions to "RDS_CONN_CONNECTING"
#2 Host-A
rejects the incompatible connection request (from #1)
It does so by calling "rdma_reject(cm_id, NULL, 0);"
#3 Host-B
receives an "RDMA_CM_EVENT_REJECTED" event (from #2)
But since the code is changed in the way described above,
it won't drop the connection here, simply because "*err == 0".
#4 Host-A
issues a connection request
#5 Host-B
receives an "RDMA_CM_EVENT_CONNECT_REQUEST" event
and ends up calling "rds_ib_cm_handle_connect".
But since the state is already in "RDS_CONN_CONNECTING"
(as of #1) it will end up issuing a "rdma_reject" without
dropping the connection:
if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
/* Wait and see - our connect may still be succeeding */
rds_ib_stats_inc(s_ib_connect_raced);
}
goto out;
#6 Host-A
receives an "RDMA_CM_EVENT_REJECTED" event (from #5),
drops the connection and tries again (goto #4) until it gives up.
Tested-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
2019-06-27 19:21:44 +03:00
if ( ! err | |
( err & & len > = sizeof ( * err ) & &
( ( * err ) < = RDS_RDMA_REJ_INCOMPAT ) ) ) {
2018-10-24 06:09:00 +03:00
pr_warn ( " RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection \n " ,
& conn - > c_laddr , & conn - > c_faddr ) ;
2019-06-29 01:38:58 +03:00
if ( ! conn - > c_tos )
conn - > c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION ;
2018-10-24 06:09:00 +03:00
rds_conn_drop ( conn ) ;
}
2016-10-26 22:36:48 +03:00
rdsdebug ( " Connection rejected: %s \n " ,
rdma_reject_msg ( cm_id , event - > status ) ) ;
2018-10-24 06:09:00 +03:00
break ;
2009-02-24 18:30:37 +03:00
case RDMA_CM_EVENT_ADDR_ERROR :
case RDMA_CM_EVENT_ROUTE_ERROR :
case RDMA_CM_EVENT_CONNECT_ERROR :
case RDMA_CM_EVENT_UNREACHABLE :
case RDMA_CM_EVENT_DEVICE_REMOVAL :
case RDMA_CM_EVENT_ADDR_CHANGE :
if ( conn )
rds_conn_drop ( conn ) ;
break ;
case RDMA_CM_EVENT_DISCONNECTED :
2019-07-26 17:17:05 +03:00
if ( ! conn )
break ;
2010-03-11 16:50:02 +03:00
rdsdebug ( " DISCONNECT event - dropping connection "
2018-07-24 06:51:21 +03:00
" %pI6c->%pI6c \n " , & conn - > c_laddr ,
2009-02-24 18:30:37 +03:00
& conn - > c_faddr ) ;
rds_conn_drop ( conn ) ;
break ;
2016-03-02 02:20:50 +03:00
case RDMA_CM_EVENT_TIMEWAIT_EXIT :
if ( conn ) {
2018-07-24 06:51:21 +03:00
pr_info ( " RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI6c->%pI6c \n " ,
2016-03-02 02:20:50 +03:00
& conn - > c_laddr , & conn - > c_faddr ) ;
rds_conn_drop ( conn ) ;
}
break ;
2009-02-24 18:30:37 +03:00
default :
/* things like device disconnect? */
2010-08-04 00:52:47 +04:00
printk ( KERN_ERR " RDS: unknown event %u (%s)! \n " ,
2015-05-18 13:40:33 +03:00
event - > event , rdma_event_msg ( event - > event ) ) ;
2009-02-24 18:30:37 +03:00
break ;
}
out :
if ( conn )
mutex_unlock ( & conn - > c_cm_lock ) ;
2010-08-04 00:52:47 +04:00
rdsdebug ( " id %p event %u (%s) handling ret %d \n " , cm_id , event - > event ,
2015-05-18 13:40:33 +03:00
rdma_event_msg ( event - > event ) , ret ) ;
2009-02-24 18:30:37 +03:00
return ret ;
}
2018-07-24 06:51:21 +03:00
int rds_rdma_cm_event_handler ( struct rdma_cm_id * cm_id ,
struct rdma_cm_event * event )
{
return rds_rdma_cm_event_handler_cmn ( cm_id , event , false ) ;
}
2018-07-31 08:48:42 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2018-07-24 06:51:22 +03:00
int rds6_rdma_cm_event_handler ( struct rdma_cm_id * cm_id ,
struct rdma_cm_event * event )
{
return rds_rdma_cm_event_handler_cmn ( cm_id , event , true ) ;
}
2018-07-31 08:48:42 +03:00
# endif
2018-07-24 06:51:22 +03:00
2018-07-24 06:51:21 +03:00
static int rds_rdma_listen_init_common ( rdma_cm_event_handler handler ,
struct sockaddr * sa ,
struct rdma_cm_id * * ret_cm_id )
2009-02-24 18:30:37 +03:00
{
struct rdma_cm_id * cm_id ;
int ret ;
2018-07-24 06:51:21 +03:00
cm_id = rdma_create_id ( & init_net , handler , NULL ,
2015-10-22 15:20:10 +03:00
RDMA_PS_TCP , IB_QPT_RC ) ;
2009-02-24 18:30:37 +03:00
if ( IS_ERR ( cm_id ) ) {
ret = PTR_ERR ( cm_id ) ;
2009-07-17 17:13:26 +04:00
printk ( KERN_ERR " RDS/RDMA: failed to setup listener, "
2009-02-24 18:30:37 +03:00
" rdma_create_id() returned %d \n " , ret ) ;
2010-04-22 03:55:27 +04:00
return ret ;
2009-02-24 18:30:37 +03:00
}
/*
* XXX I bet this binds the cm_id to a device . If we want to support
* fail - over we ' ll have to take this into consideration .
*/
2018-07-24 06:51:21 +03:00
ret = rdma_bind_addr ( cm_id , sa ) ;
2009-02-24 18:30:37 +03:00
if ( ret ) {
2009-07-17 17:13:26 +04:00
printk ( KERN_ERR " RDS/RDMA: failed to setup listener, "
2009-02-24 18:30:37 +03:00
" rdma_bind_addr() returned %d \n " , ret ) ;
goto out ;
}
ret = rdma_listen ( cm_id , 128 ) ;
if ( ret ) {
2009-07-17 17:13:26 +04:00
printk ( KERN_ERR " RDS/RDMA: failed to setup listener, "
2009-02-24 18:30:37 +03:00
" rdma_listen() returned %d \n " , ret ) ;
goto out ;
}
rdsdebug ( " cm %p listening on port %u \n " , cm_id , RDS_PORT ) ;
2018-07-24 06:51:21 +03:00
* ret_cm_id = cm_id ;
2009-02-24 18:30:37 +03:00
cm_id = NULL ;
out :
if ( cm_id )
rdma_destroy_id ( cm_id ) ;
return ret ;
}
2018-07-24 06:51:21 +03:00
/* Initialize the RDS RDMA listeners. We create two listeners for
* compatibility reason . The one on RDS_PORT is used for IPv4
* requests only . The one on RDS_CM_PORT is used for IPv6 requests
* only . So only IPv6 enabled RDS module will communicate using this
* port .
*/
static int rds_rdma_listen_init ( void )
{
int ret ;
2018-07-31 08:48:42 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2018-07-24 06:51:22 +03:00
struct sockaddr_in6 sin6 ;
2018-07-31 08:48:42 +03:00
# endif
2018-07-24 06:51:21 +03:00
struct sockaddr_in sin ;
sin . sin_family = PF_INET ;
sin . sin_addr . s_addr = htonl ( INADDR_ANY ) ;
sin . sin_port = htons ( RDS_PORT ) ;
ret = rds_rdma_listen_init_common ( rds_rdma_cm_event_handler ,
( struct sockaddr * ) & sin ,
& rds_rdma_listen_id ) ;
2018-07-24 06:51:22 +03:00
if ( ret ! = 0 )
return ret ;
2018-07-31 08:48:42 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2018-07-24 06:51:22 +03:00
sin6 . sin6_family = PF_INET6 ;
sin6 . sin6_addr = in6addr_any ;
sin6 . sin6_port = htons ( RDS_CM_PORT ) ;
sin6 . sin6_scope_id = 0 ;
sin6 . sin6_flowinfo = 0 ;
ret = rds_rdma_listen_init_common ( rds6_rdma_cm_event_handler ,
( struct sockaddr * ) & sin6 ,
& rds6_rdma_listen_id ) ;
/* Keep going even when IPv6 is not enabled in the system. */
if ( ret ! = 0 )
rdsdebug ( " Cannot set up IPv6 RDMA listener \n " ) ;
2018-07-31 08:48:42 +03:00
# endif
2018-07-24 06:51:22 +03:00
return 0 ;
2018-07-24 06:51:21 +03:00
}
2009-02-24 18:30:37 +03:00
static void rds_rdma_listen_stop ( void )
{
2009-04-09 18:09:37 +04:00
if ( rds_rdma_listen_id ) {
rdsdebug ( " cm %p \n " , rds_rdma_listen_id ) ;
rdma_destroy_id ( rds_rdma_listen_id ) ;
rds_rdma_listen_id = NULL ;
2009-02-24 18:30:37 +03:00
}
2018-07-31 08:48:42 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2018-07-24 06:51:22 +03:00
if ( rds6_rdma_listen_id ) {
rdsdebug ( " cm %p \n " , rds6_rdma_listen_id ) ;
rdma_destroy_id ( rds6_rdma_listen_id ) ;
rds6_rdma_listen_id = NULL ;
}
2018-07-31 08:48:42 +03:00
# endif
2009-02-24 18:30:37 +03:00
}
2022-09-09 12:18:40 +03:00
static int __init rds_rdma_init ( void )
2009-02-24 18:30:37 +03:00
{
int ret ;
2016-07-05 02:29:13 +03:00
ret = rds_ib_init ( ) ;
2009-02-24 18:30:37 +03:00
if ( ret )
goto out ;
2016-07-05 02:29:13 +03:00
ret = rds_rdma_listen_init ( ) ;
2009-02-24 18:30:37 +03:00
if ( ret )
2016-07-05 02:29:13 +03:00
rds_ib_exit ( ) ;
2009-02-24 18:30:37 +03:00
out :
return ret ;
}
2009-08-21 16:28:33 +04:00
module_init ( rds_rdma_init ) ;
2009-02-24 18:30:37 +03:00
2022-09-09 12:18:40 +03:00
static void __exit rds_rdma_exit ( void )
2009-02-24 18:30:37 +03:00
{
/* stop listening first to ensure no new connections are attempted */
rds_rdma_listen_stop ( ) ;
rds_ib_exit ( ) ;
}
2009-08-21 16:28:33 +04:00
module_exit ( rds_rdma_exit ) ;
MODULE_AUTHOR ( " Oracle Corporation <rds-devel@oss.oracle.com> " ) ;
2016-03-02 02:20:42 +03:00
MODULE_DESCRIPTION ( " RDS: IB transport " ) ;
2009-08-21 16:28:33 +04:00
MODULE_LICENSE ( " Dual BSD/GPL " ) ;