2009-02-24 15:30:23 +00:00
/*
* Copyright ( c ) 2006 Oracle . All rights reserved .
*
* This software is available to you under a choice of one of two
* licenses . You may choose to be licensed under the terms of the GNU
* General Public License ( GPL ) Version 2 , available from the file
* COPYING in the main directory of this source tree , or the
* OpenIB . org BSD license below :
*
* Redistribution and use in source and binary forms , with or
* without modification , are permitted provided that the following
* conditions are met :
*
* - Redistributions of source code must retain the above
* copyright notice , this list of conditions and the following
* disclaimer .
*
* - Redistributions in binary form must reproduce the above
* copyright notice , this list of conditions and the following
* disclaimer in the documentation and / or other materials
* provided with the distribution .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND ,
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY , WHETHER IN AN
* ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM , OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE .
*
*/
# include <linux/kernel.h>
# include <linux/random.h>
2011-07-15 11:47:34 -04:00
# include <linux/export.h>
2009-02-24 15:30:23 +00:00
# include "rds.h"
/*
* All of connection management is simplified by serializing it through
* work queues that execute in a connection managing thread .
*
* TCP wants to send acks through sendpage ( ) in response to data_ready ( ) ,
* but it needs a process context to do so .
*
* The receive paths need to allocate but can ' t drop packets ( ! ) so we have
* a thread around to block allocating if the receive fast path sees an
* allocation failure .
*/
/* Grand Unified Theory of connection life cycle:
* At any point in time , the connection can be in one of these states :
* DOWN , CONNECTING , UP , DISCONNECTING , ERROR
*
* The following transitions are possible :
* ANY - > ERROR
* UP - > DISCONNECTING
* ERROR - > DISCONNECTING
* DISCONNECTING - > DOWN
* DOWN - > CONNECTING
* CONNECTING - > UP
*
* Transition to state DISCONNECTING / DOWN :
* - Inside the shutdown worker ; synchronizes with xmit path
2010-06-04 14:41:41 -07:00
* through RDS_IN_XMIT , and with connection management callbacks
2009-02-24 15:30:23 +00:00
* via c_cm_lock .
*
* For receive callbacks , we rely on the underlying transport
* ( TCP , IB / RDMA ) to provide the necessary synchronisation .
*/
struct workqueue_struct * rds_wq ;
2009-08-21 12:28:32 +00:00
EXPORT_SYMBOL_GPL ( rds_wq ) ;
2009-02-24 15:30:23 +00:00
2016-06-13 09:44:26 -07:00
void rds_connect_path_complete ( struct rds_conn_path * cp , int curr )
2009-02-24 15:30:23 +00:00
{
2016-06-13 09:44:26 -07:00
if ( ! rds_conn_path_transition ( cp , curr , RDS_CONN_UP ) ) {
2009-02-24 15:30:23 +00:00
printk ( KERN_WARNING " %s: Cannot transition to state UP, "
" current state is %d \n " ,
__func__ ,
2016-06-13 09:44:26 -07:00
atomic_read ( & cp - > cp_state ) ) ;
rds_conn_path_drop ( cp ) ;
2009-02-24 15:30:23 +00:00
return ;
}
rdsdebug ( " conn %p for %pI4 to %pI4 complete \n " ,
2016-06-13 09:44:26 -07:00
cp - > cp_conn , & cp - > cp_conn - > c_laddr , & cp - > cp_conn - > c_faddr ) ;
2009-02-24 15:30:23 +00:00
2016-06-13 09:44:26 -07:00
cp - > cp_reconnect_jiffies = 0 ;
set_bit ( 0 , & cp - > cp_conn - > c_map_queued ) ;
queue_delayed_work ( rds_wq , & cp - > cp_send_w , 0 ) ;
queue_delayed_work ( rds_wq , & cp - > cp_recv_w , 0 ) ;
2009-02-24 15:30:23 +00:00
}
2016-06-04 14:00:00 -07:00
EXPORT_SYMBOL_GPL ( rds_connect_path_complete ) ;
void rds_connect_complete ( struct rds_connection * conn )
{
2016-06-13 09:44:26 -07:00
rds_connect_path_complete ( & conn - > c_path [ 0 ] , RDS_CONN_CONNECTING ) ;
2016-06-04 14:00:00 -07:00
}
2009-08-21 12:28:32 +00:00
EXPORT_SYMBOL_GPL ( rds_connect_complete ) ;
2009-02-24 15:30:23 +00:00
/*
* This random exponential backoff is relied on to eventually resolve racing
* connects .
*
* If connect attempts race then both parties drop both connections and come
* here to wait for a random amount of time before trying again . Eventually
* the backoff range will be so much greater than the time it takes to
* establish a connection that one of the pair will establish the connection
* before the other ' s random delay fires .
*
* Connection attempts that arrive while a connection is already established
* are also considered to be racing connects . This lets a connection from
* a rebooted machine replace an existing stale connection before the transport
* notices that the connection has failed .
*
* We should * always * start with a random backoff ; otherwise a broken connection
* will always take several iterations to be re - established .
*/
2016-06-13 09:44:26 -07:00
void rds_queue_reconnect ( struct rds_conn_path * cp )
2009-02-24 15:30:23 +00:00
{
unsigned long rand ;
2016-06-13 09:44:26 -07:00
struct rds_connection * conn = cp - > cp_conn ;
2009-02-24 15:30:23 +00:00
rdsdebug ( " conn %p for %pI4 to %pI4 reconnect jiffies %lu \n " ,
conn , & conn - > c_laddr , & conn - > c_faddr ,
2016-06-13 09:44:26 -07:00
cp - > cp_reconnect_jiffies ) ;
2009-02-24 15:30:23 +00:00
2016-06-30 16:11:17 -07:00
/* let peer with smaller addr initiate reconnect, to avoid duels */
if ( conn - > c_trans - > t_type = = RDS_TRANS_TCP & &
2017-06-15 11:28:54 -07:00
! IS_CANONICAL ( conn - > c_laddr , conn - > c_faddr ) )
2016-06-30 16:11:17 -07:00
return ;
2016-06-13 09:44:26 -07:00
set_bit ( RDS_RECONNECT_PENDING , & cp - > cp_flags ) ;
if ( cp - > cp_reconnect_jiffies = = 0 ) {
cp - > cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies ;
queue_delayed_work ( rds_wq , & cp - > cp_conn_w , 0 ) ;
2009-02-24 15:30:23 +00:00
return ;
}
get_random_bytes ( & rand , sizeof ( rand ) ) ;
rdsdebug ( " %lu delay %lu ceil conn %p for %pI4 -> %pI4 \n " ,
2016-06-13 09:44:26 -07:00
rand % cp - > cp_reconnect_jiffies , cp - > cp_reconnect_jiffies ,
2009-02-24 15:30:23 +00:00
conn , & conn - > c_laddr , & conn - > c_faddr ) ;
2016-06-13 09:44:26 -07:00
queue_delayed_work ( rds_wq , & cp - > cp_conn_w ,
rand % cp - > cp_reconnect_jiffies ) ;
2009-02-24 15:30:23 +00:00
2016-06-13 09:44:26 -07:00
cp - > cp_reconnect_jiffies = min ( cp - > cp_reconnect_jiffies * 2 ,
2009-02-24 15:30:23 +00:00
rds_sysctl_reconnect_max_jiffies ) ;
}
void rds_connect_worker ( struct work_struct * work )
{
2016-06-13 09:44:26 -07:00
struct rds_conn_path * cp = container_of ( work ,
struct rds_conn_path ,
cp_conn_w . work ) ;
struct rds_connection * conn = cp - > cp_conn ;
2009-02-24 15:30:23 +00:00
int ret ;
2017-06-15 11:28:54 -07:00
if ( cp - > cp_index > 0 & &
! IS_CANONICAL ( cp - > cp_conn - > c_laddr , cp - > cp_conn - > c_faddr ) )
2016-07-14 03:51:03 -07:00
return ;
2016-06-13 09:44:26 -07:00
clear_bit ( RDS_RECONNECT_PENDING , & cp - > cp_flags ) ;
2016-06-30 16:11:16 -07:00
ret = rds_conn_path_transition ( cp , RDS_CONN_DOWN , RDS_CONN_CONNECTING ) ;
if ( ret ) {
ret = conn - > c_trans - > conn_path_connect ( cp ) ;
2009-02-24 15:30:23 +00:00
rdsdebug ( " conn %p for %pI4 to %pI4 dispatched, ret %d \n " ,
conn , & conn - > c_laddr , & conn - > c_faddr , ret ) ;
if ( ret ) {
2016-06-13 09:44:26 -07:00
if ( rds_conn_path_transition ( cp ,
RDS_CONN_CONNECTING ,
RDS_CONN_DOWN ) )
rds_queue_reconnect ( cp ) ;
2009-02-24 15:30:23 +00:00
else
2016-10-15 11:53:22 -07:00
rds_conn_path_error ( cp , " connect failed \n " ) ;
2009-02-24 15:30:23 +00:00
}
}
}
void rds_send_worker ( struct work_struct * work )
{
2016-06-13 09:44:26 -07:00
struct rds_conn_path * cp = container_of ( work ,
struct rds_conn_path ,
cp_send_w . work ) ;
2009-02-24 15:30:23 +00:00
int ret ;
2016-06-13 09:44:26 -07:00
if ( rds_conn_path_state ( cp ) = = RDS_CONN_UP ) {
clear_bit ( RDS_LL_SEND_FULL , & cp - > cp_flags ) ;
2016-06-13 09:44:34 -07:00
ret = rds_send_xmit ( cp ) ;
2015-09-11 15:44:29 -07:00
cond_resched ( ) ;
2016-06-13 09:44:26 -07:00
rdsdebug ( " conn %p ret %d \n " , cp - > cp_conn , ret ) ;
2009-02-24 15:30:23 +00:00
switch ( ret ) {
case - EAGAIN :
rds_stats_inc ( s_send_immediate_retry ) ;
2016-06-13 09:44:26 -07:00
queue_delayed_work ( rds_wq , & cp - > cp_send_w , 0 ) ;
2009-02-24 15:30:23 +00:00
break ;
case - ENOMEM :
rds_stats_inc ( s_send_delayed_retry ) ;
2016-06-13 09:44:26 -07:00
queue_delayed_work ( rds_wq , & cp - > cp_send_w , 2 ) ;
2009-02-24 15:30:23 +00:00
default :
break ;
}
}
}
void rds_recv_worker ( struct work_struct * work )
{
2016-06-13 09:44:26 -07:00
struct rds_conn_path * cp = container_of ( work ,
struct rds_conn_path ,
cp_recv_w . work ) ;
2009-02-24 15:30:23 +00:00
int ret ;
2016-06-13 09:44:26 -07:00
if ( rds_conn_path_state ( cp ) = = RDS_CONN_UP ) {
2016-06-30 16:11:15 -07:00
ret = cp - > cp_conn - > c_trans - > recv_path ( cp ) ;
2016-06-13 09:44:26 -07:00
rdsdebug ( " conn %p ret %d \n " , cp - > cp_conn , ret ) ;
2009-02-24 15:30:23 +00:00
switch ( ret ) {
case - EAGAIN :
rds_stats_inc ( s_recv_immediate_retry ) ;
2016-06-13 09:44:26 -07:00
queue_delayed_work ( rds_wq , & cp - > cp_recv_w , 0 ) ;
2009-02-24 15:30:23 +00:00
break ;
case - ENOMEM :
rds_stats_inc ( s_recv_delayed_retry ) ;
2016-06-13 09:44:26 -07:00
queue_delayed_work ( rds_wq , & cp - > cp_recv_w , 2 ) ;
2009-02-24 15:30:23 +00:00
default :
break ;
}
}
}
2010-06-11 13:49:13 -07:00
void rds_shutdown_worker ( struct work_struct * work )
{
2016-06-13 09:44:26 -07:00
struct rds_conn_path * cp = container_of ( work ,
struct rds_conn_path ,
cp_down_w ) ;
2010-06-11 13:49:13 -07:00
2016-06-13 09:44:41 -07:00
rds_conn_shutdown ( cp ) ;
2010-06-11 13:49:13 -07:00
}
2009-02-24 15:30:23 +00:00
void rds_threads_exit ( void )
{
destroy_workqueue ( rds_wq ) ;
}
2010-07-09 12:26:20 -07:00
int rds_threads_init ( void )
2009-02-24 15:30:23 +00:00
{
2010-07-06 15:08:48 -07:00
rds_wq = create_singlethread_workqueue ( " krdsd " ) ;
2010-01-12 11:56:44 -08:00
if ( ! rds_wq )
2009-02-24 15:30:23 +00:00
return - ENOMEM ;
return 0 ;
}