2006-01-02 21:04:38 +03:00
/*
* net / tipc / link . c : TIPC link code
2007-02-09 17:25:21 +03:00
*
2014-01-08 02:02:41 +04:00
* Copyright ( c ) 1996 - 2007 , 2012 - 2014 , Ericsson AB
2013-06-17 18:54:42 +04:00
* Copyright ( c ) 2004 - 2007 , 2010 - 2013 , Wind River Systems
2006-01-02 21:04:38 +03:00
* All rights reserved .
*
2006-01-11 15:30:43 +03:00
* Redistribution and use in source and binary forms , with or without
2006-01-02 21:04:38 +03:00
* modification , are permitted provided that the following conditions are met :
*
2006-01-11 15:30:43 +03:00
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
2006-01-02 21:04:38 +03:00
*
2006-01-11 15:30:43 +03:00
* Alternatively , this software may be distributed under the terms of the
* GNU General Public License ( " GPL " ) version 2 as published by the Free
* Software Foundation .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
2006-01-02 21:04:38 +03:00
* POSSIBILITY OF SUCH DAMAGE .
*/
# include "core.h"
# include "link.h"
2014-11-20 12:29:12 +03:00
# include "bcast.h"
2014-05-14 13:39:15 +04:00
# include "socket.h"
2006-01-02 21:04:38 +03:00
# include "name_distr.h"
# include "discover.h"
# include "config.h"
2014-11-20 12:29:07 +03:00
# include "netlink.h"
2006-01-02 21:04:38 +03:00
2013-06-17 18:54:48 +04:00
# include <linux/pkt_sched.h>
2012-06-29 08:16:37 +04:00
/*
* Error message prefixes
*/
static const char * link_co_err = " Link changeover error, " ;
static const char * link_rst_msg = " Resetting link " ;
static const char * link_unk_evt = " Unknown link event " ;
2006-01-02 21:04:38 +03:00
2014-11-20 12:29:12 +03:00
static const struct nla_policy tipc_nl_link_policy [ TIPC_NLA_LINK_MAX + 1 ] = {
[ TIPC_NLA_LINK_UNSPEC ] = { . type = NLA_UNSPEC } ,
[ TIPC_NLA_LINK_NAME ] = {
. type = NLA_STRING ,
. len = TIPC_MAX_LINK_NAME
} ,
[ TIPC_NLA_LINK_MTU ] = { . type = NLA_U32 } ,
[ TIPC_NLA_LINK_BROADCAST ] = { . type = NLA_FLAG } ,
[ TIPC_NLA_LINK_UP ] = { . type = NLA_FLAG } ,
[ TIPC_NLA_LINK_ACTIVE ] = { . type = NLA_FLAG } ,
[ TIPC_NLA_LINK_PROP ] = { . type = NLA_NESTED } ,
[ TIPC_NLA_LINK_STATS ] = { . type = NLA_NESTED } ,
[ TIPC_NLA_LINK_RX ] = { . type = NLA_U32 } ,
[ TIPC_NLA_LINK_TX ] = { . type = NLA_U32 }
} ;
2014-11-20 12:29:07 +03:00
/* Properties valid for media, bearar and link */
static const struct nla_policy tipc_nl_prop_policy [ TIPC_NLA_PROP_MAX + 1 ] = {
[ TIPC_NLA_PROP_UNSPEC ] = { . type = NLA_UNSPEC } ,
[ TIPC_NLA_PROP_PRIO ] = { . type = NLA_U32 } ,
[ TIPC_NLA_PROP_TOL ] = { . type = NLA_U32 } ,
[ TIPC_NLA_PROP_WIN ] = { . type = NLA_U32 }
} ;
2008-06-05 04:29:39 +04:00
/*
* Out - of - range value for link session numbers
*/
# define INVALID_SESSION 0x10000
2007-02-09 17:25:21 +03:00
/*
* Link state events :
2006-01-02 21:04:38 +03:00
*/
# define STARTING_EVT 856384768 /* link processing trigger */
# define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
# define TIMEOUT_EVT 560817u /* link timer expired */
2007-02-09 17:25:21 +03:00
/*
* The following two ' message types ' is really just implementation
* data conveniently stored in the message header .
2006-01-02 21:04:38 +03:00
* They must not be considered part of the protocol
*/
# define OPEN_MSG 0
# define CLOSED_MSG 1
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* State value stored in ' exp_msg_count '
*/
# define START_CHANGEOVER 100000u
2015-01-09 10:27:04 +03:00
static void link_handle_out_of_seq_msg ( struct net * net ,
struct tipc_link * l_ptr ,
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ) ;
2015-01-09 10:27:04 +03:00
static void tipc_link_proto_rcv ( struct net * net , struct tipc_link * l_ptr ,
struct sk_buff * buf ) ;
static int tipc_link_tunnel_rcv ( struct net * net , struct tipc_node * n_ptr ,
2014-01-08 02:02:41 +04:00
struct sk_buff * * buf ) ;
2015-01-09 10:27:00 +03:00
static void link_set_supervision_props ( struct tipc_link * l_ptr , u32 tol ) ;
2011-12-30 05:58:42 +04:00
static void link_state_event ( struct tipc_link * l_ptr , u32 event ) ;
static void link_reset_statistics ( struct tipc_link * l_ptr ) ;
static void link_print ( struct tipc_link * l_ptr , const char * str ) ;
2014-02-18 12:06:46 +04:00
static void tipc_link_sync_xmit ( struct tipc_link * l ) ;
static void tipc_link_sync_rcv ( struct tipc_node * n , struct sk_buff * buf ) ;
2015-01-09 10:27:05 +03:00
static int tipc_link_input ( struct net * net , struct tipc_link * l ,
struct sk_buff * buf ) ;
2015-01-09 10:27:04 +03:00
static int tipc_link_prepare_input ( struct net * net , struct tipc_link * l ,
struct sk_buff * * buf ) ;
2010-10-13 17:20:35 +04:00
2006-01-02 21:04:38 +03:00
/*
2006-03-21 09:37:04 +03:00
* Simple link routines
2006-01-02 21:04:38 +03:00
*/
2006-03-21 09:37:04 +03:00
static unsigned int align ( unsigned int i )
2006-01-02 21:04:38 +03:00
{
return ( i + 3 ) & ~ 3u ;
}
2011-12-30 05:58:42 +04:00
static void link_init_max_pkt ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:06 +03:00
struct tipc_node * node = l_ptr - > owner ;
struct tipc_net * tn = net_generic ( node - > net , tipc_net_id ) ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
struct tipc_bearer * b_ptr ;
2006-01-02 21:04:38 +03:00
u32 max_pkt ;
2007-02-09 17:25:21 +03:00
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
rcu_read_lock ( ) ;
2015-01-09 10:27:06 +03:00
b_ptr = rcu_dereference_rtnl ( tn - > bearer_list [ l_ptr - > bearer_id ] ) ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
if ( ! b_ptr ) {
rcu_read_unlock ( ) ;
return ;
}
max_pkt = ( b_ptr - > mtu & ~ 3 ) ;
rcu_read_unlock ( ) ;
2006-01-02 21:04:38 +03:00
if ( max_pkt > MAX_MSG_SIZE )
max_pkt = MAX_MSG_SIZE ;
2007-02-09 17:25:21 +03:00
l_ptr - > max_pkt_target = max_pkt ;
2006-01-02 21:04:38 +03:00
if ( l_ptr - > max_pkt_target < MAX_PKT_DEFAULT )
l_ptr - > max_pkt = l_ptr - > max_pkt_target ;
2007-02-09 17:25:21 +03:00
else
2006-01-02 21:04:38 +03:00
l_ptr - > max_pkt = MAX_PKT_DEFAULT ;
2007-02-09 17:25:21 +03:00
l_ptr - > max_pkt_probes = 0 ;
2006-01-02 21:04:38 +03:00
}
/*
2006-03-21 09:37:04 +03:00
* Simple non - static link routines ( i . e . referenced outside this file )
2006-01-02 21:04:38 +03:00
*/
2011-12-30 05:58:42 +04:00
int tipc_link_is_up ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
if ( ! l_ptr )
return 0 ;
2010-09-23 00:43:57 +04:00
return link_working_working ( l_ptr ) | | link_working_unknown ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
int tipc_link_is_active ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2010-09-23 00:43:57 +04:00
return ( l_ptr - > owner - > active_links [ 0 ] = = l_ptr ) | |
( l_ptr - > owner - > active_links [ 1 ] = = l_ptr ) ;
2006-01-02 21:04:38 +03:00
}
/**
* link_timeout - handle expiration of link timer
* @ l_ptr : pointer to link
*/
2015-01-09 10:27:00 +03:00
static void link_timeout ( unsigned long data )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:00 +03:00
struct tipc_link * l_ptr = ( struct tipc_link * ) data ;
2014-11-26 06:41:52 +03:00
struct sk_buff * skb ;
2006-01-18 02:38:21 +03:00
tipc_node_lock ( l_ptr - > owner ) ;
2006-01-02 21:04:38 +03:00
/* update counters used in statistical profiling of send traffic */
2014-11-26 06:41:52 +03:00
l_ptr - > stats . accu_queue_sz + = skb_queue_len ( & l_ptr - > outqueue ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > stats . queue_sz_counts + + ;
2014-11-26 06:41:52 +03:00
skb = skb_peek ( & l_ptr - > outqueue ) ;
if ( skb ) {
struct tipc_msg * msg = buf_msg ( skb ) ;
2006-01-02 21:04:38 +03:00
u32 length = msg_size ( msg ) ;
2009-11-30 03:55:45 +03:00
if ( ( msg_user ( msg ) = = MSG_FRAGMENTER ) & &
( msg_type ( msg ) = = FIRST_FRAGMENT ) ) {
2006-01-02 21:04:38 +03:00
length = msg_size ( msg_get_wrapped ( msg ) ) ;
}
if ( length ) {
l_ptr - > stats . msg_lengths_total + = length ;
l_ptr - > stats . msg_length_counts + + ;
if ( length < = 64 )
l_ptr - > stats . msg_length_profile [ 0 ] + + ;
else if ( length < = 256 )
l_ptr - > stats . msg_length_profile [ 1 ] + + ;
else if ( length < = 1024 )
l_ptr - > stats . msg_length_profile [ 2 ] + + ;
else if ( length < = 4096 )
l_ptr - > stats . msg_length_profile [ 3 ] + + ;
else if ( length < = 16384 )
l_ptr - > stats . msg_length_profile [ 4 ] + + ;
else if ( length < = 32768 )
l_ptr - > stats . msg_length_profile [ 5 ] + + ;
else
l_ptr - > stats . msg_length_profile [ 6 ] + + ;
}
}
/* do all other link processing performed on a periodic basis */
link_state_event ( l_ptr , TIMEOUT_EVT ) ;
if ( l_ptr - > next_out )
2014-11-26 06:41:48 +03:00
tipc_link_push_packets ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( l_ptr - > owner ) ;
2006-01-02 21:04:38 +03:00
}
2015-01-09 10:27:00 +03:00
static void link_set_timer ( struct tipc_link * link , unsigned long time )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:00 +03:00
mod_timer ( & link - > timer , jiffies + time ) ;
2006-01-02 21:04:38 +03:00
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_create - create a new link
2011-02-28 19:32:27 +03:00
* @ n_ptr : pointer to associated node
2006-01-02 21:04:38 +03:00
* @ b_ptr : pointer to associated bearer
* @ media_addr : media address to use when sending messages over link
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns pointer to link .
*/
2011-12-30 05:58:42 +04:00
struct tipc_link * tipc_link_create ( struct tipc_node * n_ptr ,
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:09 +04:00
struct tipc_bearer * b_ptr ,
const struct tipc_media_addr * media_addr )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:10 +03:00
struct tipc_net * tn = net_generic ( n_ptr - > net , tipc_net_id ) ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * msg ;
char * if_name ;
2011-02-28 19:32:27 +03:00
char addr_string [ 16 ] ;
u32 peer = n_ptr - > addr ;
2014-11-14 20:33:19 +03:00
if ( n_ptr - > link_cnt > = MAX_BEARERS ) {
2011-02-28 19:32:27 +03:00
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ;
2014-11-14 20:33:19 +03:00
pr_err ( " Attempt to establish %uth link to %s. Max %u allowed. \n " ,
n_ptr - > link_cnt , addr_string , MAX_BEARERS ) ;
2011-02-28 19:32:27 +03:00
return NULL ;
}
if ( n_ptr - > links [ b_ptr - > identity ] ) {
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ;
2012-06-29 08:16:37 +04:00
pr_err ( " Attempt to establish second link on <%s> to %s \n " ,
b_ptr - > name , addr_string ) ;
2011-02-28 19:32:27 +03:00
return NULL ;
}
2006-01-02 21:04:38 +03:00
2006-07-22 01:51:30 +04:00
l_ptr = kzalloc ( sizeof ( * l_ptr ) , GFP_ATOMIC ) ;
2006-01-02 21:04:38 +03:00
if ( ! l_ptr ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " Link creation failed, no memory \n " ) ;
2006-01-02 21:04:38 +03:00
return NULL ;
}
l_ptr - > addr = peer ;
2011-01-07 21:00:11 +03:00
if_name = strchr ( b_ptr - > name , ' : ' ) + 1 ;
2011-04-07 17:28:47 +04:00
sprintf ( l_ptr - > name , " %u.%u.%u:%s-%u.%u.%u:unknown " ,
2015-01-09 10:27:10 +03:00
tipc_zone ( tn - > own_addr ) , tipc_cluster ( tn - > own_addr ) ,
tipc_node ( tn - > own_addr ) ,
2006-01-02 21:04:38 +03:00
if_name ,
tipc_zone ( peer ) , tipc_cluster ( peer ) , tipc_node ( peer ) ) ;
2011-04-07 17:28:47 +04:00
/* note: peer i/f name is updated by reset/activate message */
2006-01-02 21:04:38 +03:00
memcpy ( & l_ptr - > media_addr , media_addr , sizeof ( * media_addr ) ) ;
2011-02-28 19:32:27 +03:00
l_ptr - > owner = n_ptr ;
2006-01-02 21:04:38 +03:00
l_ptr - > checkpoint = 1 ;
2011-04-07 17:43:27 +04:00
l_ptr - > peer_session = INVALID_SESSION ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
l_ptr - > bearer_id = b_ptr - > identity ;
2011-10-18 19:34:29 +04:00
link_set_supervision_props ( l_ptr , b_ptr - > tolerance ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_UNKNOWN ;
l_ptr - > pmsg = ( struct tipc_msg * ) & l_ptr - > proto_msg ;
msg = l_ptr - > pmsg ;
2015-01-09 10:27:10 +03:00
tipc_msg_init ( n_ptr - > net , msg , LINK_PROTOCOL , RESET_MSG , INT_H_SIZE ,
l_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( msg , sizeof ( l_ptr - > proto_msg ) ) ;
2015-01-09 10:27:12 +03:00
msg_set_session ( msg , ( tn - > random & 0xffff ) ) ;
2006-01-02 21:04:38 +03:00
msg_set_bearer_id ( msg , b_ptr - > identity ) ;
strcpy ( ( char * ) msg_data ( msg ) , if_name ) ;
l_ptr - > priority = b_ptr - > priority ;
2011-10-18 19:34:29 +04:00
tipc_link_set_queue_limits ( l_ptr , b_ptr - > window ) ;
2006-01-02 21:04:38 +03:00
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
l_ptr - > net_plane = b_ptr - > net_plane ;
2006-01-02 21:04:38 +03:00
link_init_max_pkt ( l_ptr ) ;
l_ptr - > next_out_no = 1 ;
2014-11-26 06:41:52 +03:00
__skb_queue_head_init ( & l_ptr - > outqueue ) ;
2014-11-26 06:41:53 +03:00
__skb_queue_head_init ( & l_ptr - > deferred_queue ) ;
2014-12-10 11:46:54 +03:00
skb_queue_head_init ( & l_ptr - > waiting_sks ) ;
2006-01-02 21:04:38 +03:00
link_reset_statistics ( l_ptr ) ;
2011-02-28 19:32:27 +03:00
tipc_node_attach_link ( n_ptr , l_ptr ) ;
2006-01-02 21:04:38 +03:00
2015-01-09 10:27:00 +03:00
setup_timer ( & l_ptr - > timer , link_timeout , ( unsigned long ) l_ptr ) ;
2014-01-08 02:02:44 +04:00
link_state_event ( l_ptr , STARTING_EVT ) ;
2006-01-02 21:04:38 +03:00
return l_ptr ;
}
2015-01-09 10:27:05 +03:00
void tipc_link_delete_list ( struct net * net , unsigned int bearer_id ,
bool shutting_down )
2014-02-14 02:29:07 +04:00
{
2015-01-09 10:27:05 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-02-14 02:29:07 +04:00
struct tipc_link * l_ptr ;
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:09 +04:00
struct tipc_node * n_ptr ;
2014-02-14 02:29:07 +04:00
2014-03-27 08:54:37 +04:00
rcu_read_lock ( ) ;
2015-01-09 10:27:05 +03:00
list_for_each_entry_rcu ( n_ptr , & tn - > node_list , list ) {
2014-05-05 04:56:09 +04:00
tipc_node_lock ( n_ptr ) ;
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:09 +04:00
l_ptr = n_ptr - > links [ bearer_id ] ;
if ( l_ptr ) {
tipc_link_reset ( l_ptr ) ;
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:16 +04:00
if ( shutting_down | | ! tipc_node_is_up ( n_ptr ) ) {
tipc_node_detach_link ( l_ptr - > owner , l_ptr ) ;
tipc_link_reset_fragments ( l_ptr ) ;
2014-05-05 04:56:09 +04:00
tipc_node_unlock ( n_ptr ) ;
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:16 +04:00
/* Nobody else can access this link now: */
del_timer_sync ( & l_ptr - > timer ) ;
kfree ( l_ptr ) ;
} else {
/* Detach/delete when failover is finished: */
l_ptr - > flags | = LINK_STOPPED ;
2014-05-05 04:56:09 +04:00
tipc_node_unlock ( n_ptr ) ;
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:16 +04:00
del_timer_sync ( & l_ptr - > timer ) ;
}
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:09 +04:00
continue ;
}
2014-05-05 04:56:09 +04:00
tipc_node_unlock ( n_ptr ) ;
2014-02-14 02:29:07 +04:00
}
2014-03-27 08:54:37 +04:00
rcu_read_unlock ( ) ;
2014-02-14 02:29:07 +04:00
}
2006-01-02 21:04:38 +03:00
/**
2014-08-23 02:09:07 +04:00
* link_schedule_user - schedule user for wakeup after congestion
* @ link : congested link
* @ oport : sending port
* @ chain_sz : size of buffer chain that was attempted sent
* @ imp : importance of message attempted sent
* Create pseudo msg to send back to user when congestion abates
2006-01-02 21:04:38 +03:00
*/
2014-08-23 02:09:07 +04:00
static bool link_schedule_user ( struct tipc_link * link , u32 oport ,
uint chain_sz , uint imp )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:10 +03:00
struct net * net = link - > owner - > net ;
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-08-23 02:09:07 +04:00
struct sk_buff * buf ;
2015-01-09 10:27:10 +03:00
buf = tipc_msg_create ( net , SOCK_WAKEUP , 0 , INT_H_SIZE , 0 , tn - > own_addr ,
tn - > own_addr , oport , 0 , 0 ) ;
2014-08-23 02:09:07 +04:00
if ( ! buf )
return false ;
TIPC_SKB_CB ( buf ) - > chain_sz = chain_sz ;
TIPC_SKB_CB ( buf ) - > chain_imp = imp ;
2014-12-10 11:46:54 +03:00
skb_queue_tail ( & link - > waiting_sks , buf ) ;
2014-08-23 02:09:07 +04:00
link - > stats . link_congs + + ;
return true ;
2006-01-02 21:04:38 +03:00
}
2014-08-23 02:09:07 +04:00
/**
* link_prepare_wakeup - prepare users for wakeup after congestion
* @ link : congested link
* Move a number of waiting users , as permitted by available space in
* the send queue , from link wait queue to node wait queue for wakeup
*/
static void link_prepare_wakeup ( struct tipc_link * link )
2006-01-02 21:04:38 +03:00
{
2014-11-26 06:41:52 +03:00
uint pend_qsz = skb_queue_len ( & link - > outqueue ) ;
2014-11-26 06:41:51 +03:00
struct sk_buff * skb , * tmp ;
2014-08-23 02:09:07 +04:00
2014-11-26 06:41:51 +03:00
skb_queue_walk_safe ( & link - > waiting_sks , skb , tmp ) {
if ( pend_qsz > = link - > queue_limit [ TIPC_SKB_CB ( skb ) - > chain_imp ] )
2006-01-02 21:04:38 +03:00
break ;
2014-11-26 06:41:51 +03:00
pend_qsz + = TIPC_SKB_CB ( skb ) - > chain_sz ;
2014-12-10 11:46:54 +03:00
skb_unlink ( skb , & link - > waiting_sks ) ;
skb_queue_tail ( & link - > owner - > waiting_sks , skb ) ;
2006-01-02 21:04:38 +03:00
}
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_reset_fragments - purge link ' s inbound message fragments queue
2006-01-02 21:04:38 +03:00
* @ l_ptr : pointer to link
*/
2011-12-30 05:58:42 +04:00
void tipc_link_reset_fragments ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2014-05-14 13:39:12 +04:00
kfree_skb ( l_ptr - > reasm_buf ) ;
l_ptr - > reasm_buf = NULL ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2014-01-08 02:02:44 +04:00
* tipc_link_purge_queues - purge all pkt queues associated with link
2006-01-02 21:04:38 +03:00
* @ l_ptr : pointer to link
*/
2014-01-08 02:02:44 +04:00
void tipc_link_purge_queues ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2014-11-26 06:41:53 +03:00
__skb_queue_purge ( & l_ptr - > deferred_queue ) ;
2014-11-26 06:41:52 +03:00
__skb_queue_purge ( & l_ptr - > outqueue ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset_fragments ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
void tipc_link_reset ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
u32 prev_state = l_ptr - > state ;
u32 checkpoint = l_ptr - > next_in_no ;
2006-06-26 10:52:50 +04:00
int was_active_link = tipc_link_is_active ( l_ptr ) ;
2014-08-23 02:09:07 +04:00
struct tipc_node * owner = l_ptr - > owner ;
2007-02-09 17:25:21 +03:00
2008-06-05 04:29:39 +04:00
msg_set_session ( l_ptr - > pmsg , ( ( msg_session ( l_ptr - > pmsg ) + 1 ) & 0xffff ) ) ;
2006-01-02 21:04:38 +03:00
2008-06-05 04:29:39 +04:00
/* Link is down, accept any session */
l_ptr - > peer_session = INVALID_SESSION ;
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
/* Prepare for max packet size negotiation */
2006-01-02 21:04:38 +03:00
link_init_max_pkt ( l_ptr ) ;
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_UNKNOWN ;
if ( ( prev_state = = RESET_UNKNOWN ) | | ( prev_state = = RESET_RESET ) )
return ;
2006-01-18 02:38:21 +03:00
tipc_node_link_down ( l_ptr - > owner , l_ptr ) ;
2015-01-09 10:27:06 +03:00
tipc_bearer_remove_dest ( owner - > net , l_ptr - > bearer_id , l_ptr - > addr ) ;
2010-10-12 18:25:58 +04:00
2014-01-08 02:02:42 +04:00
if ( was_active_link & & tipc_node_active_links ( l_ptr - > owner ) ) {
2006-01-02 21:04:38 +03:00
l_ptr - > reset_checkpoint = checkpoint ;
l_ptr - > exp_msg_count = START_CHANGEOVER ;
}
/* Clean up all queues: */
2014-11-26 06:41:52 +03:00
__skb_queue_purge ( & l_ptr - > outqueue ) ;
2014-11-26 06:41:53 +03:00
__skb_queue_purge ( & l_ptr - > deferred_queue ) ;
2014-08-23 02:09:07 +04:00
if ( ! skb_queue_empty ( & l_ptr - > waiting_sks ) ) {
skb_queue_splice_init ( & l_ptr - > waiting_sks , & owner - > waiting_sks ) ;
owner - > action_flags | = TIPC_WAKEUP_USERS ;
}
2006-01-02 21:04:38 +03:00
l_ptr - > next_out = NULL ;
l_ptr - > unacked_window = 0 ;
l_ptr - > checkpoint = 1 ;
l_ptr - > next_out_no = 1 ;
l_ptr - > fsm_msg_cnt = 0 ;
l_ptr - > stale_count = 0 ;
link_reset_statistics ( l_ptr ) ;
}
2015-01-09 10:27:05 +03:00
void tipc_link_reset_list ( struct net * net , unsigned int bearer_id )
2014-02-14 02:29:06 +04:00
{
2015-01-09 10:27:05 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-02-14 02:29:06 +04:00
struct tipc_link * l_ptr ;
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:09 +04:00
struct tipc_node * n_ptr ;
2014-02-14 02:29:06 +04:00
2014-03-27 08:54:37 +04:00
rcu_read_lock ( ) ;
2015-01-09 10:27:05 +03:00
list_for_each_entry_rcu ( n_ptr , & tn - > node_list , list ) {
2014-05-05 04:56:09 +04:00
tipc_node_lock ( n_ptr ) ;
tipc: remove 'links' list from tipc_bearer struct
In our ongoing effort to simplify the TIPC locking structure,
we see a need to remove the linked list for tipc_links
in the bearer. This can be explained as follows.
Currently, we have three different ways to access a link,
via three different lists/tables:
1: Via a node hash table:
Used by the time-critical outgoing/incoming data paths.
(e.g. link_send_sections_fast() and tipc_recv_msg() ):
grab net_lock(read)
find node from node hash table
grab node_lock
select link
grab bearer_lock
send_msg()
release bearer_lock
release node lock
release net_lock
2: Via a global linked list for nodes:
Used by configuration commands (link_cmd_set_value())
grab net_lock(read)
find node and link from global node list (using link name)
grab node_lock
update link
release node lock
release net_lock
(Same locking order as above. No problem.)
3: Via the bearer's linked link list:
Used by notifications from interface (e.g. tipc_disable_bearer() )
grab net_lock(write)
grab bearer_lock
get link ptr from bearer's link list
get node from link
grab node_lock
delete link
release node lock
release bearer_lock
release net_lock
(Different order from above, but works because we grab the
outer net_lock in write mode first, excluding all other access.)
The first major goal in our simplification effort is to get rid
of the "big" net_lock, replacing it with rcu-locks when accessing
the node list and node hash array. This will come in a later patch
series.
But to get there we first need to rewrite access methods ##2 and 3,
since removal of net_lock would introduce three major problems:
a) In access method #2, we access the link before taking the
protecting node_lock. This will not work once net_lock is gone,
so we will have to change the access order. We will deal with
this in a later commit in this series, "tipc: add node lock
protection to link found by link_find_link()".
b) When the outer protection from net_lock is gone, taking
bearer_lock and node_lock in opposite order of method 1) and 2)
will become an obvious deadlock hazard. This is fixed in the
commit ("tipc: remove bearer_lock from tipc_bearer struct")
later in this series.
c) Similar to what is described in problem a), access method #3
starts with using a link pointer that is unprotected by node_lock,
in order to via that pointer find the correct node struct and
lock it. Before we remove net_lock, this access order must be
altered. This is what we do with this commit.
We can avoid introducing problem problem c) by even here using the
global node list to find the node, before accessing its links. When
we loop though the node list we use the own bearer identity as search
criteria, thus easily finding the links that are associated to the
resetting/disabling bearer. It should be noted that although this
method is somewhat slower than the current list traversal, it is in
no way time critical. This is only about resetting or deleting links,
something that must be considered relatively infrequent events.
As a bonus, we can get rid of the mutual pointers between links and
bearers. After this commit, pointer dependency go in one direction
only: from the link to the bearer.
This commit pre-empts introduction of problem c) as described above.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:09 +04:00
l_ptr = n_ptr - > links [ bearer_id ] ;
if ( l_ptr )
tipc_link_reset ( l_ptr ) ;
2014-05-05 04:56:09 +04:00
tipc_node_unlock ( n_ptr ) ;
2014-02-14 02:29:06 +04:00
}
2014-03-27 08:54:37 +04:00
rcu_read_unlock ( ) ;
2014-02-14 02:29:06 +04:00
}
2006-01-02 21:04:38 +03:00
2015-01-09 10:27:06 +03:00
static void link_activate ( struct tipc_link * link )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:06 +03:00
struct tipc_node * node = link - > owner ;
link - > next_in_no = 1 ;
link - > stats . recv_info = 1 ;
tipc_node_link_up ( node , link ) ;
tipc_bearer_add_dest ( node - > net , link - > bearer_id , link - > addr ) ;
2006-01-02 21:04:38 +03:00
}
/**
* link_state_event - link finite state machine
* @ l_ptr : pointer to link
* @ event : state machine event to process
*/
2012-04-15 09:58:06 +04:00
static void link_state_event ( struct tipc_link * l_ptr , unsigned int event )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * other ;
2015-01-09 10:27:00 +03:00
unsigned long cont_intv = l_ptr - > cont_intv ;
2006-01-02 21:04:38 +03:00
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:16 +04:00
if ( l_ptr - > flags & LINK_STOPPED )
return ;
2014-02-14 02:29:08 +04:00
if ( ! ( l_ptr - > flags & LINK_STARTED ) & & ( event ! = STARTING_EVT ) )
2006-01-02 21:04:38 +03:00
return ; /* Not yet. */
2013-12-11 08:45:44 +04:00
/* Check whether changeover is going on */
if ( l_ptr - > exp_msg_count ) {
2010-12-31 21:59:35 +03:00
if ( event = = TIMEOUT_EVT )
2006-01-02 21:04:38 +03:00
link_set_timer ( l_ptr , cont_intv ) ;
2013-12-11 08:45:44 +04:00
return ;
2006-01-02 21:04:38 +03:00
}
switch ( l_ptr - > state ) {
case WORKING_WORKING :
switch ( event ) {
case TRAFFIC_MSG_EVT :
case ACTIVATE_MSG :
break ;
case TIMEOUT_EVT :
if ( l_ptr - > next_in_no ! = l_ptr - > checkpoint ) {
l_ptr - > checkpoint = l_ptr - > next_in_no ;
2006-01-18 02:38:21 +03:00
if ( tipc_bclink_acks_missing ( l_ptr - > owner ) ) {
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
} else if ( l_ptr - > max_pkt < l_ptr - > max_pkt_target ) {
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG ,
1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
}
link_set_timer ( l_ptr , cont_intv ) ;
break ;
}
l_ptr - > state = WORKING_UNKNOWN ;
l_ptr - > fsm_msg_cnt = 0 ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv / 4 ) ;
break ;
case RESET_MSG :
2015-01-22 19:10:31 +03:00
pr_debug ( " %s<%s>, requested by peer \n " ,
link_rst_msg , l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_RESET ;
l_ptr - > fsm_msg_cnt = 0 ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , ACTIVATE_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
default :
2015-01-22 19:10:31 +03:00
pr_debug ( " %s%u in WW state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
case WORKING_UNKNOWN :
switch ( event ) {
case TRAFFIC_MSG_EVT :
case ACTIVATE_MSG :
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case RESET_MSG :
2015-01-22 19:10:31 +03:00
pr_debug ( " %s<%s>, requested by peer while probing \n " ,
link_rst_msg , l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_RESET ;
l_ptr - > fsm_msg_cnt = 0 ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , ACTIVATE_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case TIMEOUT_EVT :
if ( l_ptr - > next_in_no ! = l_ptr - > checkpoint ) {
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
l_ptr - > checkpoint = l_ptr - > next_in_no ;
2006-01-18 02:38:21 +03:00
if ( tipc_bclink_acks_missing ( l_ptr - > owner ) ) {
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
}
link_set_timer ( l_ptr , cont_intv ) ;
} else if ( l_ptr - > fsm_msg_cnt < l_ptr - > abort_limit ) {
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG ,
1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv / 4 ) ;
} else { /* Link has failed */
2015-01-22 19:10:31 +03:00
pr_debug ( " %s<%s>, peer not responding \n " ,
link_rst_msg , l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_UNKNOWN ;
l_ptr - > fsm_msg_cnt = 0 ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , RESET_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
}
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in WU state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
case RESET_UNKNOWN :
switch ( event ) {
case TRAFFIC_MSG_EVT :
break ;
case ACTIVATE_MSG :
other = l_ptr - > owner - > active_links [ 0 ] ;
2010-12-31 21:59:27 +03:00
if ( other & & link_working_unknown ( other ) )
2006-01-02 21:04:38 +03:00
break ;
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
link_activate ( l_ptr ) ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
if ( l_ptr - > owner - > working_links = = 1 )
2014-02-18 12:06:46 +04:00
tipc_link_sync_xmit ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case RESET_MSG :
l_ptr - > state = RESET_RESET ;
l_ptr - > fsm_msg_cnt = 0 ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , ACTIVATE_MSG ,
1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case STARTING_EVT :
2014-02-14 02:29:08 +04:00
l_ptr - > flags | = LINK_STARTED ;
2006-01-02 21:04:38 +03:00
/* fall through */
case TIMEOUT_EVT :
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , RESET_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in RU state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
case RESET_RESET :
switch ( event ) {
case TRAFFIC_MSG_EVT :
case ACTIVATE_MSG :
other = l_ptr - > owner - > active_links [ 0 ] ;
2010-12-31 21:59:27 +03:00
if ( other & & link_working_unknown ( other ) )
2006-01-02 21:04:38 +03:00
break ;
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
link_activate ( l_ptr ) ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
if ( l_ptr - > owner - > working_links = = 1 )
2014-02-18 12:06:46 +04:00
tipc_link_sync_xmit ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case RESET_MSG :
break ;
case TIMEOUT_EVT :
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , ACTIVATE_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in RR state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " Unknown link state %u/%u \n " , l_ptr - > state , event ) ;
2006-01-02 21:04:38 +03:00
}
}
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
/* tipc_link_cong: determine return value and how to treat the
* sent buffer during link congestion .
* - For plain , errorless user data messages we keep the buffer and
* return - ELINKONG .
* - For all other messages we discard the buffer and return - EHOSTUNREACH
* - For TIPC internal messages we also reset the link
*/
2014-11-26 06:41:55 +03:00
static int tipc_link_cong ( struct tipc_link * link , struct sk_buff_head * list )
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
{
2014-11-26 06:41:55 +03:00
struct sk_buff * skb = skb_peek ( list ) ;
struct tipc_msg * msg = buf_msg ( skb ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
uint imp = tipc_msg_tot_importance ( msg ) ;
u32 oport = msg_tot_origport ( msg ) ;
2014-08-23 02:09:07 +04:00
if ( unlikely ( imp > TIPC_CRITICAL_IMPORTANCE ) ) {
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
pr_warn ( " %s<%s>, send queue full " , link_rst_msg , link - > name ) ;
tipc_link_reset ( link ) ;
2014-08-23 02:09:07 +04:00
goto drop ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
}
2014-08-23 02:09:07 +04:00
if ( unlikely ( msg_errcode ( msg ) ) )
goto drop ;
if ( unlikely ( msg_reroute_cnt ( msg ) ) )
goto drop ;
2014-11-26 06:41:55 +03:00
if ( TIPC_SKB_CB ( skb ) - > wakeup_pending )
2014-08-23 02:09:07 +04:00
return - ELINKCONG ;
2014-11-26 06:41:55 +03:00
if ( link_schedule_user ( link , oport , skb_queue_len ( list ) , imp ) )
2014-08-23 02:09:07 +04:00
return - ELINKCONG ;
drop :
2014-11-26 06:41:55 +03:00
__skb_queue_purge ( list ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
return - EHOSTUNREACH ;
}
/**
2014-07-17 04:41:03 +04:00
* __tipc_link_xmit ( ) : same as tipc_link_xmit , but destlink is known & locked
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
* @ link : link to use
2014-11-26 06:41:55 +03:00
* @ list : chain of buffers containing message
*
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
* Consumes the buffer chain , except when returning - ELINKCONG
* Returns 0 if success , otherwise errno : - ELINKCONG , - EMSGSIZE ( plain socket
* user data messages ) or - EHOSTUNREACH ( all other messages / senders )
* Only the socket functions tipc_send_stream ( ) and tipc_send_packet ( ) need
* to act on the return value , since they may need to do more send attempts .
*/
2015-01-09 10:27:06 +03:00
int __tipc_link_xmit ( struct net * net , struct tipc_link * link ,
struct sk_buff_head * list )
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
{
2014-11-26 06:41:55 +03:00
struct tipc_msg * msg = buf_msg ( skb_peek ( list ) ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
uint psz = msg_size ( msg ) ;
uint sndlim = link - > queue_limit [ 0 ] ;
uint imp = tipc_msg_tot_importance ( msg ) ;
uint mtu = link - > max_pkt ;
uint ack = mod ( link - > next_in_no - 1 ) ;
uint seqno = link - > next_out_no ;
uint bc_last_in = link - > owner - > bclink . last_in ;
struct tipc_media_addr * addr = & link - > media_addr ;
2014-11-26 06:41:52 +03:00
struct sk_buff_head * outqueue = & link - > outqueue ;
2014-11-26 06:41:55 +03:00
struct sk_buff * skb , * tmp ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
/* Match queue limits against msg importance: */
2014-11-26 06:41:52 +03:00
if ( unlikely ( skb_queue_len ( outqueue ) > = link - > queue_limit [ imp ] ) )
2014-11-26 06:41:55 +03:00
return tipc_link_cong ( link , list ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
/* Has valid packet limit been used ? */
if ( unlikely ( psz > mtu ) ) {
2014-11-26 06:41:55 +03:00
__skb_queue_purge ( list ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
return - EMSGSIZE ;
}
/* Prepare each packet for sending, and add to outqueue: */
2014-11-26 06:41:55 +03:00
skb_queue_walk_safe ( list , skb , tmp ) {
__skb_unlink ( skb , list ) ;
2014-11-26 06:41:52 +03:00
msg = buf_msg ( skb ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
msg_set_word ( msg , 2 , ( ( ack < < 16 ) | mod ( seqno ) ) ) ;
msg_set_bcast_ack ( msg , bc_last_in ) ;
2014-11-26 06:41:52 +03:00
if ( skb_queue_len ( outqueue ) < sndlim ) {
__skb_queue_tail ( outqueue , skb ) ;
2015-01-09 10:27:06 +03:00
tipc_bearer_send ( net , link - > bearer_id ,
skb , addr ) ;
2014-11-26 06:41:52 +03:00
link - > next_out = NULL ;
link - > unacked_window = 0 ;
} else if ( tipc_msg_bundle ( outqueue , skb , mtu ) ) {
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
link - > stats . sent_bundled + + ;
continue ;
2015-01-09 10:27:10 +03:00
} else if ( tipc_msg_make_bundle ( net , outqueue , skb , mtu ,
2014-11-26 06:41:52 +03:00
link - > addr ) ) {
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
link - > stats . sent_bundled + + ;
link - > stats . sent_bundles + + ;
if ( ! link - > next_out )
2014-11-26 06:41:52 +03:00
link - > next_out = skb_peek_tail ( outqueue ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
} else {
2014-11-26 06:41:52 +03:00
__skb_queue_tail ( outqueue , skb ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
if ( ! link - > next_out )
2014-11-26 06:41:52 +03:00
link - > next_out = skb ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
}
seqno + + ;
}
link - > next_out_no = seqno ;
return 0 ;
}
2014-11-26 06:41:55 +03:00
static void skb2list ( struct sk_buff * skb , struct sk_buff_head * list )
{
__skb_queue_head_init ( list ) ;
__skb_queue_tail ( list , skb ) ;
}
static int __tipc_link_xmit_skb ( struct tipc_link * link , struct sk_buff * skb )
{
struct sk_buff_head head ;
skb2list ( skb , & head ) ;
2015-01-09 10:27:06 +03:00
return __tipc_link_xmit ( link - > owner - > net , link , & head ) ;
2014-11-26 06:41:55 +03:00
}
2015-01-09 10:27:05 +03:00
int tipc_link_xmit_skb ( struct net * net , struct sk_buff * skb , u32 dnode ,
u32 selector )
2014-11-26 06:41:55 +03:00
{
struct sk_buff_head head ;
skb2list ( skb , & head ) ;
2015-01-09 10:27:05 +03:00
return tipc_link_xmit ( net , & head , dnode , selector ) ;
2014-11-26 06:41:55 +03:00
}
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
/**
2014-07-17 04:41:03 +04:00
* tipc_link_xmit ( ) is the general link level function for message sending
2015-01-09 10:27:05 +03:00
* @ net : the applicable net namespace
2014-11-26 06:41:55 +03:00
* @ list : chain of buffers containing message
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
* @ dsz : amount of user data to be sent
* @ dnode : address of destination node
* @ selector : a number used for deterministic link selection
* Consumes the buffer chain , except when returning - ELINKCONG
* Returns 0 if success , otherwise errno : - ELINKCONG , - EHOSTUNREACH , - EMSGSIZE
*/
2015-01-09 10:27:05 +03:00
int tipc_link_xmit ( struct net * net , struct sk_buff_head * list , u32 dnode ,
u32 selector )
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
{
struct tipc_link * link = NULL ;
struct tipc_node * node ;
int rc = - EHOSTUNREACH ;
2015-01-09 10:27:05 +03:00
node = tipc_node_find ( net , dnode ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
if ( node ) {
tipc_node_lock ( node ) ;
link = node - > active_links [ selector & 1 ] ;
if ( link )
2015-01-09 10:27:06 +03:00
rc = __tipc_link_xmit ( net , link , list ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
tipc_node_unlock ( node ) ;
}
if ( link )
return rc ;
2015-01-09 10:27:10 +03:00
if ( likely ( in_own_node ( net , dnode ) ) ) {
2014-11-26 06:41:55 +03:00
/* As a node local message chain never contains more than one
* buffer , we just need to dequeue one SKB buffer from the
* head list .
*/
2015-01-09 10:27:05 +03:00
return tipc_sk_rcv ( net , __skb_dequeue ( list ) ) ;
2014-11-26 06:41:55 +03:00
}
__skb_queue_purge ( list ) ;
tipc: introduce send functions for chained buffers in link
The current link implementation provides several different transmit
functions, depending on the characteristics of the message to be
sent: if it is an iovec or an sk_buff, if it needs fragmentation or
not, if the caller holds the node_lock or not. The permutation of
these options gives us an unwanted amount of unnecessarily complex
code.
As a first step towards simplifying the send path for all messages,
we introduce two new send functions at link level, tipc_link_xmit2()
and __tipc_link_xmit2(). The former looks up a link to the message
destination, and if one is found, it grabs the node lock and calls
the second function, which works exclusively inside the node lock
protection. If no link is found, and the destination is on the same
node, it delivers the message directly to the local destination
socket.
The new functions take a buffer chain where all packet headers are
already prepared, and the correct MTU has been used. These two
functions will later replace all other link-level transmit functions.
The functions are not backwards compatible, so we have added them
as new functions with temporary names. They are tested, but have no
users yet. Those will be added later in this series.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-26 05:41:32 +04:00
return rc ;
}
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
/*
2014-02-18 12:06:46 +04:00
* tipc_link_sync_xmit - synchronize broadcast link endpoints .
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
*
* Give a newly added peer node the sequence number where it should
* start receiving and acking broadcast packets .
*
* Called with node locked
*/
2014-07-17 04:40:59 +04:00
static void tipc_link_sync_xmit ( struct tipc_link * link )
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
{
2014-11-26 06:41:55 +03:00
struct sk_buff * skb ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
struct tipc_msg * msg ;
2014-11-26 06:41:55 +03:00
skb = tipc_buf_acquire ( INT_H_SIZE ) ;
if ( ! skb )
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
return ;
2014-11-26 06:41:55 +03:00
msg = buf_msg ( skb ) ;
2015-01-09 10:27:10 +03:00
tipc_msg_init ( link - > owner - > net , msg , BCAST_PROTOCOL , STATE_MSG ,
INT_H_SIZE , link - > addr ) ;
2014-07-17 04:40:59 +04:00
msg_set_last_bcast ( msg , link - > owner - > bclink . acked ) ;
2014-11-26 06:41:55 +03:00
__tipc_link_xmit_skb ( link , skb ) ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
}
/*
2014-02-18 12:06:46 +04:00
* tipc_link_sync_rcv - synchronize broadcast link endpoints .
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
* Receive the sequence number where we should start receiving and
* acking broadcast packets from a newly added peer node , and open
* up for reception of such packets .
*
* Called with node locked
*/
2014-02-18 12:06:46 +04:00
static void tipc_link_sync_rcv ( struct tipc_node * n , struct sk_buff * buf )
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
{
struct tipc_msg * msg = buf_msg ( buf ) ;
n - > bclink . last_sent = n - > bclink . last_in = msg_last_bcast ( msg ) ;
n - > bclink . recv_permitted = true ;
kfree_skb ( buf ) ;
}
2014-11-26 06:41:52 +03:00
struct sk_buff * tipc_skb_queue_next ( const struct sk_buff_head * list ,
const struct sk_buff * skb )
{
if ( skb_queue_is_last ( list , skb ) )
return NULL ;
return skb - > next ;
}
2007-02-09 17:25:21 +03:00
/*
2014-11-26 06:41:48 +03:00
* tipc_link_push_packets - push unsent packets to bearer
*
* Push out the unsent messages of a link where congestion
* has abated . Node is locked .
*
* Called with node locked
2006-01-02 21:04:38 +03:00
*/
2014-11-26 06:41:48 +03:00
void tipc_link_push_packets ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2014-11-26 06:41:52 +03:00
struct sk_buff_head * outqueue = & l_ptr - > outqueue ;
struct sk_buff * skb = l_ptr - > next_out ;
2014-11-26 06:41:48 +03:00
struct tipc_msg * msg ;
u32 next , first ;
2006-01-02 21:04:38 +03:00
2014-11-26 06:41:52 +03:00
skb_queue_walk_from ( outqueue , skb ) {
2014-11-26 06:41:48 +03:00
msg = buf_msg ( skb ) ;
next = msg_seqno ( msg ) ;
2014-11-26 06:41:52 +03:00
first = buf_seqno ( skb_peek ( outqueue ) ) ;
2006-01-02 21:04:38 +03:00
if ( mod ( next - first ) < l_ptr - > queue_limit [ 0 ] ) {
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ;
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2012-11-15 07:34:45 +04:00
if ( msg_user ( msg ) = = MSG_BUNDLER )
2014-11-26 06:41:49 +03:00
TIPC_SKB_CB ( skb ) - > bundling = false ;
2015-01-09 10:27:06 +03:00
tipc_bearer_send ( l_ptr - > owner - > net ,
l_ptr - > bearer_id , skb ,
2014-11-26 06:41:48 +03:00
& l_ptr - > media_addr ) ;
2014-11-26 06:41:52 +03:00
l_ptr - > next_out = tipc_skb_queue_next ( outqueue , skb ) ;
2014-11-26 06:41:48 +03:00
} else {
break ;
2006-01-02 21:04:38 +03:00
}
}
}
2014-05-05 04:56:17 +04:00
void tipc_link_reset_all ( struct tipc_node * node )
2006-06-26 10:40:01 +04:00
{
char addr_string [ 16 ] ;
u32 i ;
2014-05-05 04:56:17 +04:00
tipc_node_lock ( node ) ;
2006-06-26 10:40:01 +04:00
2012-06-29 08:16:37 +04:00
pr_warn ( " Resetting all links to %s \n " ,
2014-05-05 04:56:17 +04:00
tipc_addr_string_fill ( addr_string , node - > addr ) ) ;
2006-06-26 10:40:01 +04:00
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
2014-05-05 04:56:17 +04:00
if ( node - > links [ i ] ) {
link_print ( node - > links [ i ] , " Resetting link \n " ) ;
tipc_link_reset ( node - > links [ i ] ) ;
2006-06-26 10:40:01 +04:00
}
}
2014-05-05 04:56:17 +04:00
tipc_node_unlock ( node ) ;
2006-06-26 10:40:01 +04:00
}
2011-12-30 05:58:42 +04:00
static void link_retransmit_failure ( struct tipc_link * l_ptr ,
2013-06-17 18:54:47 +04:00
struct sk_buff * buf )
2006-06-26 10:40:01 +04:00
{
struct tipc_msg * msg = buf_msg ( buf ) ;
2015-01-09 10:27:07 +03:00
struct net * net = l_ptr - > owner - > net ;
2006-06-26 10:40:01 +04:00
2012-06-29 08:16:37 +04:00
pr_warn ( " Retransmission failure on link <%s> \n " , l_ptr - > name ) ;
2006-06-26 10:40:01 +04:00
if ( l_ptr - > addr ) {
/* Handle failure on standard link */
2010-12-31 21:59:27 +03:00
link_print ( l_ptr , " Resetting link \n " ) ;
2006-06-26 10:40:01 +04:00
tipc_link_reset ( l_ptr ) ;
} else {
/* Handle failure on broadcast link */
2008-09-03 10:38:32 +04:00
struct tipc_node * n_ptr ;
2006-06-26 10:40:01 +04:00
char addr_string [ 16 ] ;
2012-06-29 08:16:37 +04:00
pr_info ( " Msg seq number: %u, " , msg_seqno ( msg ) ) ;
pr_cont ( " Outstanding acks: %lu \n " ,
( unsigned long ) TIPC_SKB_CB ( buf ) - > handle ) ;
2006-10-04 03:25:34 +04:00
2015-01-09 10:27:07 +03:00
n_ptr = tipc_bclink_retransmit_to ( net ) ;
2006-06-26 10:40:01 +04:00
tipc_node_lock ( n_ptr ) ;
2010-05-11 18:30:12 +04:00
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ;
2012-06-29 08:16:37 +04:00
pr_info ( " Broadcast link info for %s \n " , addr_string ) ;
2012-11-16 09:51:30 +04:00
pr_info ( " Reception permitted: %d, Acked: %u \n " ,
n_ptr - > bclink . recv_permitted ,
2012-06-29 08:16:37 +04:00
n_ptr - > bclink . acked ) ;
pr_info ( " Last in: %u, Oos state: %u, Last sent: %u \n " ,
n_ptr - > bclink . last_in ,
n_ptr - > bclink . oos_state ,
n_ptr - > bclink . last_sent ) ;
2006-06-26 10:40:01 +04:00
tipc_node_unlock ( n_ptr ) ;
2015-01-09 10:27:07 +03:00
tipc_bclink_set_flags ( net , TIPC_BCLINK_RESET ) ;
2006-06-26 10:40:01 +04:00
l_ptr - > stale_count = 0 ;
}
}
2014-11-26 06:41:52 +03:00
void tipc_link_retransmit ( struct tipc_link * l_ptr , struct sk_buff * skb ,
2006-01-18 02:38:21 +03:00
u32 retransmits )
2006-01-02 21:04:38 +03:00
{
struct tipc_msg * msg ;
2014-11-26 06:41:52 +03:00
if ( ! skb )
2006-06-26 10:40:01 +04:00
return ;
2014-11-26 06:41:52 +03:00
msg = buf_msg ( skb ) ;
2007-02-09 17:25:21 +03:00
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
/* Detect repeated retransmit failures */
if ( l_ptr - > last_retransmitted = = msg_seqno ( msg ) ) {
if ( + + l_ptr - > stale_count > 100 ) {
2014-11-26 06:41:52 +03:00
link_retransmit_failure ( l_ptr , skb ) ;
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
return ;
2006-06-26 10:40:01 +04:00
}
} else {
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
l_ptr - > last_retransmitted = msg_seqno ( msg ) ;
l_ptr - > stale_count = 1 ;
2006-01-02 21:04:38 +03:00
}
2006-06-26 10:40:01 +04:00
2014-11-26 06:41:52 +03:00
skb_queue_walk_from ( & l_ptr - > outqueue , skb ) {
if ( ! retransmits | | skb = = l_ptr - > next_out )
break ;
msg = buf_msg ( skb ) ;
2006-01-02 21:04:38 +03:00
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ;
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2015-01-09 10:27:06 +03:00
tipc_bearer_send ( l_ptr - > owner - > net , l_ptr - > bearer_id , skb ,
& l_ptr - > media_addr ) ;
2012-11-15 07:34:45 +04:00
retransmits - - ;
l_ptr - > stats . retransmitted + + ;
2006-01-02 21:04:38 +03:00
}
}
2014-11-26 06:41:54 +03:00
static void link_retrieve_defq ( struct tipc_link * link ,
struct sk_buff_head * list )
2006-01-02 21:04:38 +03:00
{
u32 seq_no ;
2014-11-26 06:41:54 +03:00
if ( skb_queue_empty ( & link - > deferred_queue ) )
return ;
seq_no = buf_seqno ( skb_peek ( & link - > deferred_queue ) ) ;
if ( seq_no = = mod ( link - > next_in_no ) )
skb_queue_splice_tail_init ( & link - > deferred_queue , list ) ;
2006-01-02 21:04:38 +03:00
}
2008-04-16 06:04:54 +04:00
/**
* link_recv_buf_validate - validate basic format of received message
*
* This routine ensures a TIPC message has an acceptable header , and at least
* as much data as the header indicates it should . The routine also ensures
* that the entire message header is stored in the main fragment of the message
* buffer , to simplify future access to message header fields .
*
* Note : Having extra info present in the message header or data areas is OK .
* TIPC will ignore the excess , under the assumption that it is optional info
* introduced by a later release of the protocol .
*/
static int link_recv_buf_validate ( struct sk_buff * buf )
{
static u32 min_data_hdr_size [ 8 ] = {
2011-05-31 23:03:18 +04:00
SHORT_H_SIZE , MCAST_H_SIZE , NAMED_H_SIZE , BASIC_H_SIZE ,
2008-04-16 06:04:54 +04:00
MAX_H_SIZE , MAX_H_SIZE , MAX_H_SIZE , MAX_H_SIZE
} ;
struct tipc_msg * msg ;
u32 tipc_hdr [ 2 ] ;
u32 size ;
u32 hdr_size ;
u32 min_hdr_size ;
2014-02-11 14:38:26 +04:00
/* If this packet comes from the defer queue, the skb has already
* been validated
*/
if ( unlikely ( TIPC_SKB_CB ( buf ) - > deferred ) )
return 1 ;
2008-04-16 06:04:54 +04:00
if ( unlikely ( buf - > len < MIN_H_SIZE ) )
return 0 ;
msg = skb_header_pointer ( buf , 0 , sizeof ( tipc_hdr ) , tipc_hdr ) ;
if ( msg = = NULL )
return 0 ;
if ( unlikely ( msg_version ( msg ) ! = TIPC_VERSION ) )
return 0 ;
size = msg_size ( msg ) ;
hdr_size = msg_hdr_sz ( msg ) ;
min_hdr_size = msg_isdata ( msg ) ?
min_data_hdr_size [ msg_type ( msg ) ] : INT_H_SIZE ;
if ( unlikely ( ( hdr_size < min_hdr_size ) | |
( size < hdr_size ) | |
( buf - > len < size ) | |
( size - hdr_size > TIPC_MAX_USER_MSG_SIZE ) ) )
return 0 ;
return pskb_may_pull ( buf , hdr_size ) ;
}
2010-08-17 15:00:07 +04:00
/**
2014-01-08 02:02:41 +04:00
* tipc_rcv - process TIPC packets / messages arriving from off - node
2015-01-09 10:27:05 +03:00
* @ net : the applicable net namespace
2014-11-26 06:41:54 +03:00
* @ skb : TIPC packet
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
* @ b_ptr : pointer to bearer message arrived on
2010-08-17 15:00:07 +04:00
*
* Invoked with no locks held . Bearer pointer must point to a valid bearer
* structure ( i . e . cannot be NULL ) , but bearer can be inactive .
*/
2015-01-09 10:27:04 +03:00
void tipc_rcv ( struct net * net , struct sk_buff * skb , struct tipc_bearer * b_ptr )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:10 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-11-26 06:41:54 +03:00
struct sk_buff_head head ;
struct tipc_node * n_ptr ;
struct tipc_link * l_ptr ;
struct sk_buff * skb1 , * tmp ;
struct tipc_msg * msg ;
u32 seq_no ;
u32 ackd ;
u32 released ;
2006-01-02 21:04:38 +03:00
2014-11-26 06:41:55 +03:00
skb2list ( skb , & head ) ;
2008-04-16 06:04:54 +04:00
2014-11-26 06:41:54 +03:00
while ( ( skb = __skb_dequeue ( & head ) ) ) {
2008-04-16 06:04:54 +04:00
/* Ensure message is well-formed */
2014-11-26 06:41:54 +03:00
if ( unlikely ( ! link_recv_buf_validate ( skb ) ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2006-01-02 21:04:38 +03:00
2008-04-16 06:03:23 +04:00
/* Ensure message data is a single contiguous unit */
2014-11-26 06:41:54 +03:00
if ( unlikely ( skb_linearize ( skb ) ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2008-04-16 06:03:23 +04:00
2008-04-16 06:04:54 +04:00
/* Handle arrival of a non-unicast link message */
2014-11-26 06:41:54 +03:00
msg = buf_msg ( skb ) ;
2008-04-16 06:04:54 +04:00
2006-01-02 21:04:38 +03:00
if ( unlikely ( msg_non_seq ( msg ) ) ) {
2008-06-05 04:32:35 +04:00
if ( msg_user ( msg ) = = LINK_CONFIG )
2015-01-09 10:27:04 +03:00
tipc_disc_rcv ( net , skb , b_ptr ) ;
2008-06-05 04:32:35 +04:00
else
2015-01-09 10:27:04 +03:00
tipc_bclink_rcv ( net , skb ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2007-02-09 17:25:21 +03:00
2011-04-05 23:15:04 +04:00
/* Discard unicast link messages destined for another node */
2006-06-26 10:39:31 +04:00
if ( unlikely ( ! msg_short ( msg ) & &
2015-01-09 10:27:10 +03:00
( msg_destnode ( msg ) ! = tn - > own_addr ) ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2007-02-09 17:25:21 +03:00
2010-08-17 15:00:16 +04:00
/* Locate neighboring node that sent message */
2015-01-09 10:27:05 +03:00
n_ptr = tipc_node_find ( net , msg_prevnode ( msg ) ) ;
2006-01-02 21:04:38 +03:00
if ( unlikely ( ! n_ptr ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2006-01-18 02:38:21 +03:00
tipc_node_lock ( n_ptr ) ;
2008-04-16 06:04:54 +04:00
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
/* Locate unicast link endpoint that should handle message */
l_ptr = n_ptr - > links [ b_ptr - > identity ] ;
2013-10-30 07:26:57 +04:00
if ( unlikely ( ! l_ptr ) )
goto unlock_discard ;
2010-08-17 15:00:16 +04:00
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
/* Verify that communication with node is currently allowed */
2014-05-08 04:54:39 +04:00
if ( ( n_ptr - > action_flags & TIPC_WAIT_PEER_LINKS_DOWN ) & &
2014-05-05 04:56:11 +04:00
msg_user ( msg ) = = LINK_PROTOCOL & &
( msg_type ( msg ) = = RESET_MSG | |
msg_type ( msg ) = = ACTIVATE_MSG ) & &
! msg_redundant_link ( msg ) )
2014-05-08 04:54:39 +04:00
n_ptr - > action_flags & = ~ TIPC_WAIT_PEER_LINKS_DOWN ;
2014-05-05 04:56:11 +04:00
if ( tipc_node_blocked ( n_ptr ) )
2013-10-30 07:26:57 +04:00
goto unlock_discard ;
2008-04-16 06:04:54 +04:00
/* Validate message sequence number info */
seq_no = msg_seqno ( msg ) ;
ackd = msg_ack ( msg ) ;
/* Release acked messages */
2012-11-16 09:51:30 +04:00
if ( n_ptr - > bclink . recv_permitted )
2011-10-24 23:26:24 +04:00
tipc_bclink_acknowledge ( n_ptr , msg_bcast_ack ( msg ) ) ;
2006-01-02 21:04:38 +03:00
2014-11-26 06:41:52 +03:00
released = 0 ;
skb_queue_walk_safe ( & l_ptr - > outqueue , skb1 , tmp ) {
if ( skb1 = = l_ptr - > next_out | |
more ( buf_seqno ( skb1 ) , ackd ) )
break ;
__skb_unlink ( skb1 , & l_ptr - > outqueue ) ;
kfree_skb ( skb1 ) ;
released = 1 ;
2006-01-02 21:04:38 +03:00
}
2008-04-16 06:04:54 +04:00
/* Try sending any messages link endpoint has pending */
2006-01-02 21:04:38 +03:00
if ( unlikely ( l_ptr - > next_out ) )
2014-11-26 06:41:48 +03:00
tipc_link_push_packets ( l_ptr ) ;
2014-02-14 02:29:15 +04:00
2014-08-23 02:09:07 +04:00
if ( released & & ! skb_queue_empty ( & l_ptr - > waiting_sks ) ) {
link_prepare_wakeup ( l_ptr ) ;
l_ptr - > owner - > action_flags | = TIPC_WAKEUP_USERS ;
}
2014-02-14 02:29:15 +04:00
/* Process the incoming packet */
2013-10-30 07:26:57 +04:00
if ( unlikely ( ! link_working_working ( l_ptr ) ) ) {
if ( msg_user ( msg ) = = LINK_PROTOCOL ) {
2015-01-09 10:27:04 +03:00
tipc_link_proto_rcv ( net , l_ptr , skb ) ;
2014-11-26 06:41:54 +03:00
link_retrieve_defq ( l_ptr , & head ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2013-10-30 07:26:57 +04:00
/* Traffic message. Conditionally activate link */
link_state_event ( l_ptr , TRAFFIC_MSG_EVT ) ;
if ( link_working_working ( l_ptr ) ) {
/* Re-insert buffer in front of queue */
2014-11-26 06:41:54 +03:00
__skb_queue_head ( & head , skb ) ;
2013-10-30 07:26:57 +04:00
tipc_node_unlock ( n_ptr ) ;
continue ;
}
goto unlock_discard ;
}
/* Link is now in state WORKING_WORKING */
if ( unlikely ( seq_no ! = mod ( l_ptr - > next_in_no ) ) ) {
2015-01-09 10:27:04 +03:00
link_handle_out_of_seq_msg ( net , l_ptr , skb ) ;
2014-11-26 06:41:54 +03:00
link_retrieve_defq ( l_ptr , & head ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2013-10-30 07:26:57 +04:00
l_ptr - > next_in_no + + ;
2014-11-26 06:41:53 +03:00
if ( unlikely ( ! skb_queue_empty ( & l_ptr - > deferred_queue ) ) )
2014-11-26 06:41:54 +03:00
link_retrieve_defq ( l_ptr , & head ) ;
2014-02-14 02:29:15 +04:00
2014-07-01 12:22:41 +04:00
if ( unlikely ( + + l_ptr - > unacked_window > = TIPC_MIN_LINK_WIN ) ) {
l_ptr - > stats . sent_acks + + ;
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
}
2015-01-09 10:27:04 +03:00
if ( tipc_link_prepare_input ( net , l_ptr , & skb ) ) {
2013-10-30 07:26:57 +04:00
tipc_node_unlock ( n_ptr ) ;
continue ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2014-11-26 06:41:54 +03:00
2015-01-09 10:27:05 +03:00
if ( tipc_link_input ( net , l_ptr , skb ) ! = 0 )
2014-07-01 12:22:40 +04:00
goto discard ;
2013-10-30 07:26:57 +04:00
continue ;
unlock_discard :
tipc_node_unlock ( n_ptr ) ;
discard :
2014-11-26 06:41:54 +03:00
kfree_skb ( skb ) ;
2006-01-02 21:04:38 +03:00
}
}
2014-07-01 12:22:40 +04:00
/**
* tipc_link_prepare_input - process TIPC link messages
*
* returns nonzero if the message was consumed
*
* Node lock must be held
*/
2015-01-09 10:27:04 +03:00
static int tipc_link_prepare_input ( struct net * net , struct tipc_link * l ,
struct sk_buff * * buf )
2014-07-01 12:22:40 +04:00
{
struct tipc_node * n ;
struct tipc_msg * msg ;
int res = - EINVAL ;
n = l - > owner ;
msg = buf_msg ( * buf ) ;
switch ( msg_user ( msg ) ) {
case CHANGEOVER_PROTOCOL :
2015-01-09 10:27:04 +03:00
if ( tipc_link_tunnel_rcv ( net , n , buf ) )
2014-07-01 12:22:40 +04:00
res = 0 ;
break ;
case MSG_FRAGMENTER :
l - > stats . recv_fragments + + ;
if ( tipc_buf_append ( & l - > reasm_buf , buf ) ) {
l - > stats . recv_fragmented + + ;
res = 0 ;
} else if ( ! l - > reasm_buf ) {
tipc_link_reset ( l ) ;
}
break ;
case MSG_BUNDLER :
l - > stats . recv_bundles + + ;
l - > stats . recv_bundled + = msg_msgcnt ( msg ) ;
res = 0 ;
break ;
case NAME_DISTRIBUTOR :
n - > bclink . recv_permitted = true ;
res = 0 ;
break ;
case BCAST_PROTOCOL :
tipc_link_sync_rcv ( n , * buf ) ;
break ;
default :
res = 0 ;
}
return res ;
}
/**
* tipc_link_input - Deliver message too higher layers
*/
2015-01-09 10:27:05 +03:00
static int tipc_link_input ( struct net * net , struct tipc_link * l ,
struct sk_buff * buf )
2014-07-01 12:22:40 +04:00
{
struct tipc_msg * msg = buf_msg ( buf ) ;
int res = 0 ;
switch ( msg_user ( msg ) ) {
case TIPC_LOW_IMPORTANCE :
case TIPC_MEDIUM_IMPORTANCE :
case TIPC_HIGH_IMPORTANCE :
case TIPC_CRITICAL_IMPORTANCE :
case CONN_MANAGER :
2015-01-09 10:27:05 +03:00
tipc_sk_rcv ( net , buf ) ;
2014-07-01 12:22:40 +04:00
break ;
case NAME_DISTRIBUTOR :
2015-01-09 10:27:05 +03:00
tipc_named_rcv ( net , buf ) ;
2014-07-01 12:22:40 +04:00
break ;
case MSG_BUNDLER :
2015-01-09 10:27:05 +03:00
tipc_link_bundle_rcv ( net , buf ) ;
2014-07-01 12:22:40 +04:00
break ;
default :
res = - EINVAL ;
}
return res ;
}
2012-07-10 14:55:09 +04:00
/**
2011-10-25 18:44:35 +04:00
* tipc_link_defer_pkt - Add out - of - sequence message to deferred reception queue
*
* Returns increase in queue length ( i . e . 0 or 1 )
2006-01-02 21:04:38 +03:00
*/
2014-11-26 06:41:53 +03:00
u32 tipc_link_defer_pkt ( struct sk_buff_head * list , struct sk_buff * skb )
2006-01-02 21:04:38 +03:00
{
2014-11-26 06:41:53 +03:00
struct sk_buff * skb1 ;
u32 seq_no = buf_seqno ( skb ) ;
2006-01-02 21:04:38 +03:00
/* Empty queue ? */
2014-11-26 06:41:53 +03:00
if ( skb_queue_empty ( list ) ) {
__skb_queue_tail ( list , skb ) ;
2006-01-02 21:04:38 +03:00
return 1 ;
}
/* Last ? */
2014-11-26 06:41:53 +03:00
if ( less ( buf_seqno ( skb_peek_tail ( list ) ) , seq_no ) ) {
__skb_queue_tail ( list , skb ) ;
2006-01-02 21:04:38 +03:00
return 1 ;
}
2011-10-25 18:44:35 +04:00
/* Locate insertion point in queue, then insert; discard if duplicate */
2014-11-26 06:41:53 +03:00
skb_queue_walk ( list , skb1 ) {
u32 curr_seqno = buf_seqno ( skb1 ) ;
2006-01-02 21:04:38 +03:00
2011-10-25 18:44:35 +04:00
if ( seq_no = = curr_seqno ) {
2014-11-26 06:41:53 +03:00
kfree_skb ( skb ) ;
2011-10-25 18:44:35 +04:00
return 0 ;
2006-01-02 21:04:38 +03:00
}
2011-10-25 18:44:35 +04:00
if ( less ( seq_no , curr_seqno ) )
2006-01-02 21:04:38 +03:00
break ;
2011-10-25 18:44:35 +04:00
}
2006-01-02 21:04:38 +03:00
2014-11-26 06:41:53 +03:00
__skb_queue_before ( list , skb1 , skb ) ;
2011-10-25 18:44:35 +04:00
return 1 ;
2006-01-02 21:04:38 +03:00
}
2011-10-25 18:44:35 +04:00
/*
2006-01-02 21:04:38 +03:00
* link_handle_out_of_seq_msg - handle arrival of out - of - sequence packet
*/
2015-01-09 10:27:04 +03:00
static void link_handle_out_of_seq_msg ( struct net * net ,
struct tipc_link * l_ptr ,
2006-01-02 21:04:38 +03:00
struct sk_buff * buf )
{
2011-10-25 00:03:12 +04:00
u32 seq_no = buf_seqno ( buf ) ;
2006-01-02 21:04:38 +03:00
if ( likely ( msg_user ( buf_msg ( buf ) ) = = LINK_PROTOCOL ) ) {
2015-01-09 10:27:04 +03:00
tipc_link_proto_rcv ( net , l_ptr , buf ) ;
2006-01-02 21:04:38 +03:00
return ;
}
/* Record OOS packet arrival (force mismatch on next timeout) */
l_ptr - > checkpoint - - ;
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* Discard packet if a duplicate ; otherwise add it to deferred queue
* and notify peer of gap as per protocol specification
*/
if ( less ( seq_no , mod ( l_ptr - > next_in_no ) ) ) {
l_ptr - > stats . duplicates + + ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
return ;
}
2014-11-26 06:41:53 +03:00
if ( tipc_link_defer_pkt ( & l_ptr - > deferred_queue , buf ) ) {
2006-01-02 21:04:38 +03:00
l_ptr - > stats . deferred_recv + + ;
2014-02-11 14:38:26 +04:00
TIPC_SKB_CB ( buf ) - > deferred = true ;
2014-11-26 06:41:53 +03:00
if ( ( skb_queue_len ( & l_ptr - > deferred_queue ) % 16 ) = = 1 )
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2014-11-26 06:41:53 +03:00
} else {
2006-01-02 21:04:38 +03:00
l_ptr - > stats . duplicates + + ;
2014-11-26 06:41:53 +03:00
}
2006-01-02 21:04:38 +03:00
}
/*
* Send protocol message to the other endpoint .
*/
2014-02-18 12:06:46 +04:00
void tipc_link_proto_xmit ( struct tipc_link * l_ptr , u32 msg_typ , int probe_msg ,
u32 gap , u32 tolerance , u32 priority , u32 ack_mtu )
2006-01-02 21:04:38 +03:00
{
2006-03-21 09:36:47 +03:00
struct sk_buff * buf = NULL ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * msg = l_ptr - > pmsg ;
2007-02-09 17:25:21 +03:00
u32 msg_size = sizeof ( l_ptr - > proto_msg ) ;
2011-02-28 23:30:20 +03:00
int r_flag ;
2006-01-02 21:04:38 +03:00
2013-12-11 08:45:44 +04:00
/* Don't send protocol message during link changeover */
if ( l_ptr - > exp_msg_count )
2006-01-02 21:04:38 +03:00
return ;
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
/* Abort non-RESET send if communication with node is prohibited */
2014-05-05 04:56:11 +04:00
if ( ( tipc_node_blocked ( l_ptr - > owner ) ) & & ( msg_typ ! = RESET_MSG ) )
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
return ;
2011-10-25 19:20:26 +04:00
/* Create protocol message with "out-of-sequence" sequence number */
2006-01-02 21:04:38 +03:00
msg_set_type ( msg , msg_typ ) ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
msg_set_net_plane ( msg , l_ptr - > net_plane ) ;
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-27 22:17:53 +04:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2015-01-09 10:27:07 +03:00
msg_set_last_bcast ( msg , tipc_bclink_get_last_sent ( l_ptr - > owner - > net ) ) ;
2006-01-02 21:04:38 +03:00
if ( msg_typ = = STATE_MSG ) {
u32 next_sent = mod ( l_ptr - > next_out_no ) ;
2006-01-18 02:38:21 +03:00
if ( ! tipc_link_is_up ( l_ptr ) )
2006-01-02 21:04:38 +03:00
return ;
if ( l_ptr - > next_out )
2011-10-25 00:03:12 +04:00
next_sent = buf_seqno ( l_ptr - > next_out ) ;
2006-01-02 21:04:38 +03:00
msg_set_next_sent ( msg , next_sent ) ;
2014-11-26 06:41:53 +03:00
if ( ! skb_queue_empty ( & l_ptr - > deferred_queue ) ) {
u32 rec = buf_seqno ( skb_peek ( & l_ptr - > deferred_queue ) ) ;
2006-01-02 21:04:38 +03:00
gap = mod ( rec - mod ( l_ptr - > next_in_no ) ) ;
}
msg_set_seq_gap ( msg , gap ) ;
if ( gap )
l_ptr - > stats . sent_nacks + + ;
msg_set_link_tolerance ( msg , tolerance ) ;
msg_set_linkprio ( msg , priority ) ;
msg_set_max_pkt ( msg , ack_mtu ) ;
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ;
msg_set_probe ( msg , probe_msg ! = 0 ) ;
2007-02-09 17:25:21 +03:00
if ( probe_msg ) {
2006-01-02 21:04:38 +03:00
u32 mtu = l_ptr - > max_pkt ;
2007-02-09 17:25:21 +03:00
if ( ( mtu < l_ptr - > max_pkt_target ) & &
2006-01-02 21:04:38 +03:00
link_working_working ( l_ptr ) & &
l_ptr - > fsm_msg_cnt ) {
msg_size = ( mtu + ( l_ptr - > max_pkt_target - mtu ) / 2 + 2 ) & ~ 3 ;
2007-02-09 17:25:21 +03:00
if ( l_ptr - > max_pkt_probes = = 10 ) {
l_ptr - > max_pkt_target = ( msg_size - 4 ) ;
l_ptr - > max_pkt_probes = 0 ;
2006-01-02 21:04:38 +03:00
msg_size = ( mtu + ( l_ptr - > max_pkt_target - mtu ) / 2 + 2 ) & ~ 3 ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
l_ptr - > max_pkt_probes + + ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
l_ptr - > stats . sent_probes + + ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
l_ptr - > stats . sent_states + + ;
} else { /* RESET_MSG or ACTIVATE_MSG */
msg_set_ack ( msg , mod ( l_ptr - > reset_checkpoint - 1 ) ) ;
msg_set_seq_gap ( msg , 0 ) ;
msg_set_next_sent ( msg , 1 ) ;
2011-01-18 23:15:34 +03:00
msg_set_probe ( msg , 0 ) ;
2006-01-02 21:04:38 +03:00
msg_set_link_tolerance ( msg , l_ptr - > tolerance ) ;
msg_set_linkprio ( msg , l_ptr - > priority ) ;
msg_set_max_pkt ( msg , l_ptr - > max_pkt_target ) ;
}
2011-02-28 23:30:20 +03:00
r_flag = ( l_ptr - > owner - > working_links > tipc_link_is_up ( l_ptr ) ) ;
msg_set_redundant_link ( msg , r_flag ) ;
2006-01-02 21:04:38 +03:00
msg_set_linkprio ( msg , l_ptr - > priority ) ;
2011-10-25 19:20:26 +04:00
msg_set_size ( msg , msg_size ) ;
2006-01-02 21:04:38 +03:00
msg_set_seqno ( msg , mod ( l_ptr - > next_out_no + ( 0xffff / 2 ) ) ) ;
2010-10-13 17:20:35 +04:00
buf = tipc_buf_acquire ( msg_size ) ;
2006-01-02 21:04:38 +03:00
if ( ! buf )
return ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( buf , msg , sizeof ( l_ptr - > proto_msg ) ) ;
2013-06-17 18:54:48 +04:00
buf - > priority = TC_PRIO_CONTROL ;
2006-01-02 21:04:38 +03:00
2015-01-09 10:27:06 +03:00
tipc_bearer_send ( l_ptr - > owner - > net , l_ptr - > bearer_id , buf ,
& l_ptr - > media_addr ) ;
2011-10-25 19:20:26 +04:00
l_ptr - > unacked_window = 0 ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
/*
* Receive protocol message :
2007-02-09 17:25:21 +03:00
* Note that network plane id propagates through the network , and may
* change at any time . The node with lowest address rules
2006-01-02 21:04:38 +03:00
*/
2015-01-09 10:27:04 +03:00
static void tipc_link_proto_rcv ( struct net * net , struct tipc_link * l_ptr ,
struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:10 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2006-01-02 21:04:38 +03:00
u32 rec_gap = 0 ;
u32 max_pkt_info ;
2007-02-09 17:25:21 +03:00
u32 max_pkt_ack ;
2006-01-02 21:04:38 +03:00
u32 msg_tol ;
struct tipc_msg * msg = buf_msg ( buf ) ;
2013-12-11 08:45:44 +04:00
/* Discard protocol message during link changeover */
if ( l_ptr - > exp_msg_count )
2006-01-02 21:04:38 +03:00
goto exit ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
if ( l_ptr - > net_plane ! = msg_net_plane ( msg ) )
2015-01-09 10:27:10 +03:00
if ( tn - > own_addr > msg_prevnode ( msg ) )
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
l_ptr - > net_plane = msg_net_plane ( msg ) ;
2006-01-02 21:04:38 +03:00
switch ( msg_type ( msg ) ) {
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
case RESET_MSG :
2008-06-05 04:29:39 +04:00
if ( ! link_working_unknown ( l_ptr ) & &
( l_ptr - > peer_session ! = INVALID_SESSION ) ) {
2011-04-07 17:54:43 +04:00
if ( less_eq ( msg_session ( msg ) , l_ptr - > peer_session ) )
break ; /* duplicate or old reset: ignore */
2006-01-02 21:04:38 +03:00
}
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
if ( ! msg_redundant_link ( msg ) & & ( link_working_working ( l_ptr ) | |
link_working_unknown ( l_ptr ) ) ) {
/*
* peer has lost contact - - don ' t allow peer ' s links
* to reactivate before we recognize loss & clean up
*/
2014-05-08 04:54:40 +04:00
l_ptr - > owner - > action_flags | = TIPC_WAIT_OWN_LINKS_DOWN ;
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
}
2011-10-26 18:55:16 +04:00
link_state_event ( l_ptr , RESET_MSG ) ;
2006-01-02 21:04:38 +03:00
/* fall thru' */
case ACTIVATE_MSG :
/* Update link settings according other endpoint's values */
strcpy ( ( strrchr ( l_ptr - > name , ' : ' ) + 1 ) , ( char * ) msg_data ( msg ) ) ;
2010-12-31 21:59:33 +03:00
msg_tol = msg_link_tolerance ( msg ) ;
if ( msg_tol > l_ptr - > tolerance )
2006-01-02 21:04:38 +03:00
link_set_supervision_props ( l_ptr , msg_tol ) ;
if ( msg_linkprio ( msg ) > l_ptr - > priority )
l_ptr - > priority = msg_linkprio ( msg ) ;
max_pkt_info = msg_max_pkt ( msg ) ;
2007-02-09 17:25:21 +03:00
if ( max_pkt_info ) {
2006-01-02 21:04:38 +03:00
if ( max_pkt_info < l_ptr - > max_pkt_target )
l_ptr - > max_pkt_target = max_pkt_info ;
if ( l_ptr - > max_pkt > l_ptr - > max_pkt_target )
l_ptr - > max_pkt = l_ptr - > max_pkt_target ;
} else {
2007-02-09 17:25:21 +03:00
l_ptr - > max_pkt = l_ptr - > max_pkt_target ;
2006-01-02 21:04:38 +03:00
}
2011-10-25 20:19:05 +04:00
/* Synchronize broadcast link info, if not done previously */
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-27 22:17:53 +04:00
if ( ! tipc_node_is_up ( l_ptr - > owner ) ) {
l_ptr - > owner - > bclink . last_sent =
l_ptr - > owner - > bclink . last_in =
msg_last_bcast ( msg ) ;
l_ptr - > owner - > bclink . oos_state = 0 ;
}
2011-10-25 20:19:05 +04:00
2006-01-02 21:04:38 +03:00
l_ptr - > peer_session = msg_session ( msg ) ;
l_ptr - > peer_bearer_id = msg_bearer_id ( msg ) ;
2011-10-26 18:55:16 +04:00
if ( msg_type ( msg ) = = ACTIVATE_MSG )
link_state_event ( l_ptr , ACTIVATE_MSG ) ;
2006-01-02 21:04:38 +03:00
break ;
case STATE_MSG :
2010-12-31 21:59:33 +03:00
msg_tol = msg_link_tolerance ( msg ) ;
if ( msg_tol )
2006-01-02 21:04:38 +03:00
link_set_supervision_props ( l_ptr , msg_tol ) ;
2007-02-09 17:25:21 +03:00
if ( msg_linkprio ( msg ) & &
2006-01-02 21:04:38 +03:00
( msg_linkprio ( msg ) ! = l_ptr - > priority ) ) {
2015-01-22 19:10:31 +03:00
pr_debug ( " %s<%s>, priority change %u->%u \n " ,
link_rst_msg , l_ptr - > name ,
l_ptr - > priority , msg_linkprio ( msg ) ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > priority = msg_linkprio ( msg ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ; /* Enforce change to take effect */
2006-01-02 21:04:38 +03:00
break ;
}
tipc: don't record link RESET or ACTIVATE messages as traffic
In the current code, all incoming LINK_PROTOCOL messages, irrespective
of type, nudge the "last message received" checkpoint, informing the
link state machine that a message was received from the peer since last
supervision timeout event. This inhibits the link from starting probing
the peer unnecessarily.
However, not only STATE messages are recorded as legitimate incoming
traffic this way, but even RESET and ACTIVATE messages, which in
reality are there to inform the link that the peer endpoint has been
reset. At the same time, some RESET messages may be dropped instead
of causing a link reset. This happens when the link endpoint thinks
it is fully up and working, and the session number of the RESET is
lower than or equal to the current link session. In such cases the
RESET is perceived as a delayed remnant from an earlier session, or
the current one, and dropped.
Now, if a TIPC module is removed and then immediately reinserted, e.g.
when using a script, RESET messages may arrive at the peer link endpoint
before this one has had time to discover the failure. The RESET may be
dropped because of the session number, but only after it has been
recorded as a legitimate traffic event. Hence, the receiving link will
not start probing, and not discover that the peer endpoint is down, at
the same time ignoring the periodic RESET messages coming from that
endpoint. We have ended up in a stale state where a failed link cannot
be re-established.
In this commit, we remedy this by nudging the checkpoint only for
received STATE messages, not for RESET or ACTIVATE messages.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 13:39:10 +04:00
/* Record reception; force mismatch at next timeout: */
l_ptr - > checkpoint - - ;
2006-01-02 21:04:38 +03:00
link_state_event ( l_ptr , TRAFFIC_MSG_EVT ) ;
l_ptr - > stats . recv_states + + ;
if ( link_reset_unknown ( l_ptr ) )
break ;
if ( less_eq ( mod ( l_ptr - > next_in_no ) , msg_next_sent ( msg ) ) ) {
2007-02-09 17:25:21 +03:00
rec_gap = mod ( msg_next_sent ( msg ) -
2006-01-02 21:04:38 +03:00
mod ( l_ptr - > next_in_no ) ) ;
}
max_pkt_ack = msg_max_pkt ( msg ) ;
2007-02-09 17:25:21 +03:00
if ( max_pkt_ack > l_ptr - > max_pkt ) {
l_ptr - > max_pkt = max_pkt_ack ;
l_ptr - > max_pkt_probes = 0 ;
}
2006-01-02 21:04:38 +03:00
max_pkt_ack = 0 ;
2007-02-09 17:25:21 +03:00
if ( msg_probe ( msg ) ) {
2006-01-02 21:04:38 +03:00
l_ptr - > stats . recv_probes + + ;
2010-12-31 21:59:35 +03:00
if ( msg_size ( msg ) > sizeof ( l_ptr - > proto_msg ) )
2007-02-09 17:25:21 +03:00
max_pkt_ack = msg_size ( msg ) ;
}
2006-01-02 21:04:38 +03:00
/* Protocol message before retransmits, reduce loss risk */
2012-11-16 09:51:30 +04:00
if ( l_ptr - > owner - > bclink . recv_permitted )
2015-01-09 10:27:04 +03:00
tipc_bclink_update_link_state ( net , l_ptr - > owner ,
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-27 22:17:53 +04:00
msg_last_bcast ( msg ) ) ;
2006-01-02 21:04:38 +03:00
if ( rec_gap | | ( msg_probe ( msg ) ) ) {
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 0 , rec_gap , 0 ,
0 , max_pkt_ack ) ;
2006-01-02 21:04:38 +03:00
}
if ( msg_seq_gap ( msg ) ) {
l_ptr - > stats . recv_nacks + + ;
2014-11-26 06:41:52 +03:00
tipc_link_retransmit ( l_ptr , skb_peek ( & l_ptr - > outqueue ) ,
2006-01-18 02:38:21 +03:00
msg_seq_gap ( msg ) ) ;
2006-01-02 21:04:38 +03:00
}
break ;
}
exit :
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
2014-01-08 02:02:41 +04:00
/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to
* a different bearer . Owner node is locked .
2006-01-02 21:04:38 +03:00
*/
2014-01-08 02:02:41 +04:00
static void tipc_link_tunnel_xmit ( struct tipc_link * l_ptr ,
struct tipc_msg * tunnel_hdr ,
struct tipc_msg * msg ,
u32 selector )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * tunnel ;
2014-11-26 06:41:55 +03:00
struct sk_buff * skb ;
2006-01-02 21:04:38 +03:00
u32 length = msg_size ( msg ) ;
tunnel = l_ptr - > owner - > active_links [ selector & 1 ] ;
2006-06-26 10:52:50 +04:00
if ( ! tipc_link_is_up ( tunnel ) ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %stunnel link no longer available \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
return ;
2006-06-26 10:52:50 +04:00
}
2006-01-02 21:04:38 +03:00
msg_set_size ( tunnel_hdr , length + INT_H_SIZE ) ;
2014-11-26 06:41:55 +03:00
skb = tipc_buf_acquire ( length + INT_H_SIZE ) ;
if ( ! skb ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sunable to send tunnel msg \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
return ;
2006-06-26 10:52:50 +04:00
}
2014-11-26 06:41:55 +03:00
skb_copy_to_linear_data ( skb , tunnel_hdr , INT_H_SIZE ) ;
skb_copy_to_linear_data_offset ( skb , INT_H_SIZE , msg , length ) ;
__tipc_link_xmit_skb ( tunnel , skb ) ;
2006-01-02 21:04:38 +03:00
}
2014-01-08 02:02:41 +04:00
/* tipc_link_failover_send_queue(): A link has gone down, but a second
* link is still active . We can do failover . Tunnel the failing link ' s
* whole send queue via the remaining link . This way , we don ' t lose
* any packets , and sequence order is preserved for subsequent traffic
* sent over the remaining link . Owner node is locked .
2006-01-02 21:04:38 +03:00
*/
2014-01-08 02:02:41 +04:00
void tipc_link_failover_send_queue ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2014-11-26 06:41:52 +03:00
u32 msgcount = skb_queue_len ( & l_ptr - > outqueue ) ;
2011-12-30 05:58:42 +04:00
struct tipc_link * tunnel = l_ptr - > owner - > active_links [ 0 ] ;
2006-01-02 21:04:38 +03:00
struct tipc_msg tunnel_hdr ;
2014-11-26 06:41:52 +03:00
struct sk_buff * skb ;
2006-06-26 10:52:50 +04:00
int split_bundles ;
2006-01-02 21:04:38 +03:00
if ( ! tunnel )
return ;
2015-01-09 10:27:10 +03:00
tipc_msg_init ( l_ptr - > owner - > net , & tunnel_hdr , CHANGEOVER_PROTOCOL ,
ORIGINAL_MSG , INT_H_SIZE , l_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
msg_set_bearer_id ( & tunnel_hdr , l_ptr - > peer_bearer_id ) ;
msg_set_msgcnt ( & tunnel_hdr , msgcount ) ;
2006-06-26 10:51:37 +04:00
2014-11-26 06:41:52 +03:00
if ( skb_queue_empty ( & l_ptr - > outqueue ) ) {
skb = tipc_buf_acquire ( INT_H_SIZE ) ;
if ( skb ) {
skb_copy_to_linear_data ( skb , & tunnel_hdr , INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( & tunnel_hdr , INT_H_SIZE ) ;
2014-11-26 06:41:55 +03:00
__tipc_link_xmit_skb ( tunnel , skb ) ;
2006-01-02 21:04:38 +03:00
} else {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sunable to send changeover msg \n " ,
link_co_err ) ;
2006-01-02 21:04:38 +03:00
}
return ;
}
2006-06-26 10:51:37 +04:00
2007-02-09 17:25:21 +03:00
split_bundles = ( l_ptr - > owner - > active_links [ 0 ] ! =
2006-06-26 10:52:50 +04:00
l_ptr - > owner - > active_links [ 1 ] ) ;
2014-11-26 06:41:52 +03:00
skb_queue_walk ( & l_ptr - > outqueue , skb ) {
struct tipc_msg * msg = buf_msg ( skb ) ;
2006-01-02 21:04:38 +03:00
if ( ( msg_user ( msg ) = = MSG_BUNDLER ) & & split_bundles ) {
struct tipc_msg * m = msg_get_wrapped ( msg ) ;
2010-12-31 21:59:32 +03:00
unchar * pos = ( unchar * ) m ;
2006-01-02 21:04:38 +03:00
2007-08-03 06:28:06 +04:00
msgcount = msg_msgcnt ( msg ) ;
2006-01-02 21:04:38 +03:00
while ( msgcount - - ) {
2010-12-31 21:59:32 +03:00
msg_set_seqno ( m , msg_seqno ( msg ) ) ;
2014-01-08 02:02:41 +04:00
tipc_link_tunnel_xmit ( l_ptr , & tunnel_hdr , m ,
msg_link_selector ( m ) ) ;
2006-01-02 21:04:38 +03:00
pos + = align ( msg_size ( m ) ) ;
m = ( struct tipc_msg * ) pos ;
}
} else {
2014-01-08 02:02:41 +04:00
tipc_link_tunnel_xmit ( l_ptr , & tunnel_hdr , msg ,
msg_link_selector ( msg ) ) ;
2006-01-02 21:04:38 +03:00
}
}
}
2014-02-18 12:06:46 +04:00
/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a
2014-01-08 02:02:41 +04:00
* duplicate of the first link ' s send queue via the new link . This way , we
* are guaranteed that currently queued packets from a socket are delivered
* before future traffic from the same socket , even if this is using the
* new link . The last arriving copy of each duplicate packet is dropped at
* the receiving end by the regular protocol check , so packet cardinality
* and sequence order is preserved per sender / receiver socket pair .
* Owner node is locked .
*/
2014-02-18 12:06:46 +04:00
void tipc_link_dup_queue_xmit ( struct tipc_link * l_ptr ,
2014-01-08 02:02:41 +04:00
struct tipc_link * tunnel )
2006-01-02 21:04:38 +03:00
{
2014-11-26 06:41:52 +03:00
struct sk_buff * skb ;
2006-01-02 21:04:38 +03:00
struct tipc_msg tunnel_hdr ;
2015-01-09 10:27:10 +03:00
tipc_msg_init ( l_ptr - > owner - > net , & tunnel_hdr , CHANGEOVER_PROTOCOL ,
DUPLICATE_MSG , INT_H_SIZE , l_ptr - > addr ) ;
2014-11-26 06:41:52 +03:00
msg_set_msgcnt ( & tunnel_hdr , skb_queue_len ( & l_ptr - > outqueue ) ) ;
2006-01-02 21:04:38 +03:00
msg_set_bearer_id ( & tunnel_hdr , l_ptr - > peer_bearer_id ) ;
2014-11-26 06:41:52 +03:00
skb_queue_walk ( & l_ptr - > outqueue , skb ) {
struct sk_buff * outskb ;
struct tipc_msg * msg = buf_msg ( skb ) ;
2006-01-02 21:04:38 +03:00
u32 length = msg_size ( msg ) ;
if ( msg_user ( msg ) = = MSG_BUNDLER )
msg_set_type ( msg , CLOSED_MSG ) ;
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ; /* Update */
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( & tunnel_hdr , length + INT_H_SIZE ) ;
2014-11-26 06:41:52 +03:00
outskb = tipc_buf_acquire ( length + INT_H_SIZE ) ;
if ( outskb = = NULL ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sunable to send duplicate msg \n " ,
link_co_err ) ;
2006-01-02 21:04:38 +03:00
return ;
}
2014-11-26 06:41:52 +03:00
skb_copy_to_linear_data ( outskb , & tunnel_hdr , INT_H_SIZE ) ;
skb_copy_to_linear_data_offset ( outskb , INT_H_SIZE , skb - > data ,
2007-03-31 18:55:19 +04:00
length ) ;
2014-11-26 06:41:55 +03:00
__tipc_link_xmit_skb ( tunnel , outskb ) ;
2006-01-18 02:38:21 +03:00
if ( ! tipc_link_is_up ( l_ptr ) )
2006-01-02 21:04:38 +03:00
return ;
}
}
/**
* buf_extract - extracts embedded TIPC message from another message
* @ skb : encapsulating message buffer
* @ from_pos : offset to extract from
*
2007-02-09 17:25:21 +03:00
* Returns a new message buffer containing an embedded message . The
2006-01-02 21:04:38 +03:00
* encapsulating message itself is left unchanged .
*/
static struct sk_buff * buf_extract ( struct sk_buff * skb , u32 from_pos )
{
struct tipc_msg * msg = ( struct tipc_msg * ) ( skb - > data + from_pos ) ;
u32 size = msg_size ( msg ) ;
struct sk_buff * eb ;
2010-10-13 17:20:35 +04:00
eb = tipc_buf_acquire ( size ) ;
2006-01-02 21:04:38 +03:00
if ( eb )
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( eb , msg , size ) ;
2006-01-02 21:04:38 +03:00
return eb ;
}
tipc: change reception of tunnelled duplicate packets
When a second link to a destination comes up, some sender sockets will
steer their subsequent traffic through the new link. In order to
guarantee preserved packet order and cardinality for those sockets, we
tunnel a duplicate of the old link's send queue through the new link
before we open it for regular traffic. The last arriving packet copy,
on whichever link, will be dropped at the receiving end based on the
original sequence number, to ensure that only one copy is delivered to
the end receiver.
In this commit, we change the algorithm for receiving DUPLICATE_MSG
packets, at the same time delegating it to a new subfunction,
tipc_link_dup_rcv(). Instead of returning an extracted inner packet to
the packet reception loop in tipc_rcv(), we just add it to the receiving
(new) link's deferred packet queue. The packet will then be processed by
that link when it receives its first non-tunneled packet, i.e., at
latest when the changeover procedure is finished.
Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all
packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit
testing for this. This in turn means that the current conditional jump
to the label 'protocol_check' becomes redundant, and we can remove that
label.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:10 +04:00
/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet.
* Owner node is locked .
*/
2015-01-09 10:27:04 +03:00
static void tipc_link_dup_rcv ( struct net * net , struct tipc_link * l_ptr ,
tipc: change reception of tunnelled duplicate packets
When a second link to a destination comes up, some sender sockets will
steer their subsequent traffic through the new link. In order to
guarantee preserved packet order and cardinality for those sockets, we
tunnel a duplicate of the old link's send queue through the new link
before we open it for regular traffic. The last arriving packet copy,
on whichever link, will be dropped at the receiving end based on the
original sequence number, to ensure that only one copy is delivered to
the end receiver.
In this commit, we change the algorithm for receiving DUPLICATE_MSG
packets, at the same time delegating it to a new subfunction,
tipc_link_dup_rcv(). Instead of returning an extracted inner packet to
the packet reception loop in tipc_rcv(), we just add it to the receiving
(new) link's deferred packet queue. The packet will then be processed by
that link when it receives its first non-tunneled packet, i.e., at
latest when the changeover procedure is finished.
Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all
packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit
testing for this. This in turn means that the current conditional jump
to the label 'protocol_check' becomes redundant, and we can remove that
label.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:10 +04:00
struct sk_buff * t_buf )
{
struct sk_buff * buf ;
if ( ! tipc_link_is_up ( l_ptr ) )
return ;
buf = buf_extract ( t_buf , INT_H_SIZE ) ;
if ( buf = = NULL ) {
pr_warn ( " %sfailed to extract inner dup pkt \n " , link_co_err ) ;
return ;
}
/* Add buffer to deferred queue, if applicable: */
2015-01-09 10:27:04 +03:00
link_handle_out_of_seq_msg ( net , l_ptr , buf ) ;
tipc: change reception of tunnelled duplicate packets
When a second link to a destination comes up, some sender sockets will
steer their subsequent traffic through the new link. In order to
guarantee preserved packet order and cardinality for those sockets, we
tunnel a duplicate of the old link's send queue through the new link
before we open it for regular traffic. The last arriving packet copy,
on whichever link, will be dropped at the receiving end based on the
original sequence number, to ensure that only one copy is delivered to
the end receiver.
In this commit, we change the algorithm for receiving DUPLICATE_MSG
packets, at the same time delegating it to a new subfunction,
tipc_link_dup_rcv(). Instead of returning an extracted inner packet to
the packet reception loop in tipc_rcv(), we just add it to the receiving
(new) link's deferred packet queue. The packet will then be processed by
that link when it receives its first non-tunneled packet, i.e., at
latest when the changeover procedure is finished.
Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all
packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit
testing for this. This in turn means that the current conditional jump
to the label 'protocol_check' becomes redundant, and we can remove that
label.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:10 +04:00
}
tipc: change reception of tunnelled failover packets
When a link is reset, and there is a redundant link available, all
sender sockets will steer their subsequent traffic through the
remaining link. In order to guarantee preserved packet order and
cardinality during the transition, we tunnel the failing link's send
queue through the remaining link before we allow any sockets to use it.
In this commit, we change the algorithm for receiving failover
("ORIGINAL_MSG") packets in tipc_link_tunnel_rcv(), at the same time
delegating it to a new subfuncton, tipc_link_failover_rcv(). Instead
of directly returning an extracted inner packet to the packet reception
loop in tipc_rcv(), we first check if it is a message fragment, in which
case we append it to the reset link's fragment chain. If the fragment
chain is complete, we return the whole chain instead of the individual
buffer, eliminating any need for the tipc_rcv() loop to do reassembly of
tunneled packets.
This change makes it possible to further simplify tipc_link_tunnel_rcv(),
as well as the calling tipc_rcv() loop. We will do that in later
commits. It also makes it possible to identify a single spot in the code
where we can tell that a failover procedure is finished, something that
is useful when we are deleting links after a failover. This will also
be done in a later commit.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:11 +04:00
/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet
* Owner node is locked .
*/
static struct sk_buff * tipc_link_failover_rcv ( struct tipc_link * l_ptr ,
struct sk_buff * t_buf )
{
struct tipc_msg * t_msg = buf_msg ( t_buf ) ;
struct sk_buff * buf = NULL ;
struct tipc_msg * msg ;
if ( tipc_link_is_up ( l_ptr ) )
tipc_link_reset ( l_ptr ) ;
/* First failover packet? */
if ( l_ptr - > exp_msg_count = = START_CHANGEOVER )
l_ptr - > exp_msg_count = msg_msgcnt ( t_msg ) ;
/* Should there be an inner packet? */
if ( l_ptr - > exp_msg_count ) {
l_ptr - > exp_msg_count - - ;
buf = buf_extract ( t_buf , INT_H_SIZE ) ;
if ( buf = = NULL ) {
pr_warn ( " %sno inner failover pkt \n " , link_co_err ) ;
goto exit ;
}
msg = buf_msg ( buf ) ;
if ( less ( msg_seqno ( msg ) , l_ptr - > reset_checkpoint ) ) {
kfree_skb ( buf ) ;
buf = NULL ;
goto exit ;
}
if ( msg_user ( msg ) = = MSG_FRAGMENTER ) {
l_ptr - > stats . recv_fragments + + ;
2014-05-14 13:39:12 +04:00
tipc_buf_append ( & l_ptr - > reasm_buf , & buf ) ;
tipc: change reception of tunnelled failover packets
When a link is reset, and there is a redundant link available, all
sender sockets will steer their subsequent traffic through the
remaining link. In order to guarantee preserved packet order and
cardinality during the transition, we tunnel the failing link's send
queue through the remaining link before we allow any sockets to use it.
In this commit, we change the algorithm for receiving failover
("ORIGINAL_MSG") packets in tipc_link_tunnel_rcv(), at the same time
delegating it to a new subfuncton, tipc_link_failover_rcv(). Instead
of directly returning an extracted inner packet to the packet reception
loop in tipc_rcv(), we first check if it is a message fragment, in which
case we append it to the reset link's fragment chain. If the fragment
chain is complete, we return the whole chain instead of the individual
buffer, eliminating any need for the tipc_rcv() loop to do reassembly of
tunneled packets.
This change makes it possible to further simplify tipc_link_tunnel_rcv(),
as well as the calling tipc_rcv() loop. We will do that in later
commits. It also makes it possible to identify a single spot in the code
where we can tell that a failover procedure is finished, something that
is useful when we are deleting links after a failover. This will also
be done in a later commit.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:11 +04:00
}
}
exit :
tipc: delay delete of link when failover is needed
When a bearer is disabled, all its attached links are deleted.
Ideally, we should do link failover to redundant links on other bearers,
if there are any, in such cases. This would be consistent with current
behavior when a link is reset, but not deleted. However, due to the
complexity involved, and the (wrongly) perceived low demand for this
feature, it was never implemented until now.
We mark the doomed link for deletion with a new flag, but wait until the
failover process is finished before we actually delete it. With the
improved link tunnelling/failover code introduced earlier in this commit
series, it is now easy to identify a spot in the code where the failover
is finished and it is safe to delete the marked link. Moreover, the test
for the flag and the deletion can be done synchronously, and outside the
most time critical data path.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:16 +04:00
if ( ( l_ptr - > exp_msg_count = = 0 ) & & ( l_ptr - > flags & LINK_STOPPED ) ) {
tipc_node_detach_link ( l_ptr - > owner , l_ptr ) ;
kfree ( l_ptr ) ;
}
tipc: change reception of tunnelled failover packets
When a link is reset, and there is a redundant link available, all
sender sockets will steer their subsequent traffic through the
remaining link. In order to guarantee preserved packet order and
cardinality during the transition, we tunnel the failing link's send
queue through the remaining link before we allow any sockets to use it.
In this commit, we change the algorithm for receiving failover
("ORIGINAL_MSG") packets in tipc_link_tunnel_rcv(), at the same time
delegating it to a new subfuncton, tipc_link_failover_rcv(). Instead
of directly returning an extracted inner packet to the packet reception
loop in tipc_rcv(), we first check if it is a message fragment, in which
case we append it to the reset link's fragment chain. If the fragment
chain is complete, we return the whole chain instead of the individual
buffer, eliminating any need for the tipc_rcv() loop to do reassembly of
tunneled packets.
This change makes it possible to further simplify tipc_link_tunnel_rcv(),
as well as the calling tipc_rcv() loop. We will do that in later
commits. It also makes it possible to identify a single spot in the code
where we can tell that a failover procedure is finished, something that
is useful when we are deleting links after a failover. This will also
be done in a later commit.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:11 +04:00
return buf ;
}
tipc: change reception of tunnelled duplicate packets
When a second link to a destination comes up, some sender sockets will
steer their subsequent traffic through the new link. In order to
guarantee preserved packet order and cardinality for those sockets, we
tunnel a duplicate of the old link's send queue through the new link
before we open it for regular traffic. The last arriving packet copy,
on whichever link, will be dropped at the receiving end based on the
original sequence number, to ensure that only one copy is delivered to
the end receiver.
In this commit, we change the algorithm for receiving DUPLICATE_MSG
packets, at the same time delegating it to a new subfunction,
tipc_link_dup_rcv(). Instead of returning an extracted inner packet to
the packet reception loop in tipc_rcv(), we just add it to the receiving
(new) link's deferred packet queue. The packet will then be processed by
that link when it receives its first non-tunneled packet, i.e., at
latest when the changeover procedure is finished.
Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all
packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit
testing for this. This in turn means that the current conditional jump
to the label 'protocol_check' becomes redundant, and we can remove that
label.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:10 +04:00
/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent
2014-01-08 02:02:41 +04:00
* via other link as result of a failover ( ORIGINAL_MSG ) or
* a new active link ( DUPLICATE_MSG ) . Failover packets are
* returned to the active link for delivery upwards .
* Owner node is locked .
2006-01-02 21:04:38 +03:00
*/
2015-01-09 10:27:04 +03:00
static int tipc_link_tunnel_rcv ( struct net * net , struct tipc_node * n_ptr ,
2014-01-08 02:02:41 +04:00
struct sk_buff * * buf )
2006-01-02 21:04:38 +03:00
{
2014-02-14 02:29:14 +04:00
struct sk_buff * t_buf = * buf ;
struct tipc_link * l_ptr ;
struct tipc_msg * t_msg = buf_msg ( t_buf ) ;
u32 bearer_id = msg_bearer_id ( t_msg ) ;
2006-01-02 21:04:38 +03:00
2014-02-14 02:29:13 +04:00
* buf = NULL ;
2013-05-06 12:28:41 +04:00
if ( bearer_id > = MAX_BEARERS )
goto exit ;
tipc: change reception of tunnelled duplicate packets
When a second link to a destination comes up, some sender sockets will
steer their subsequent traffic through the new link. In order to
guarantee preserved packet order and cardinality for those sockets, we
tunnel a duplicate of the old link's send queue through the new link
before we open it for regular traffic. The last arriving packet copy,
on whichever link, will be dropped at the receiving end based on the
original sequence number, to ensure that only one copy is delivered to
the end receiver.
In this commit, we change the algorithm for receiving DUPLICATE_MSG
packets, at the same time delegating it to a new subfunction,
tipc_link_dup_rcv(). Instead of returning an extracted inner packet to
the packet reception loop in tipc_rcv(), we just add it to the receiving
(new) link's deferred packet queue. The packet will then be processed by
that link when it receives its first non-tunneled packet, i.e., at
latest when the changeover procedure is finished.
Because tipc_link_tunnel_rcv()/tipc_link_dup_rcv() now is consuming all
packets of type DUPLICATE_MSG, the calling tipc_rcv() function can omit
testing for this. This in turn means that the current conditional jump
to the label 'protocol_check' becomes redundant, and we can remove that
label.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-14 02:29:10 +04:00
2014-02-14 02:29:14 +04:00
l_ptr = n_ptr - > links [ bearer_id ] ;
if ( ! l_ptr )
2006-01-02 21:04:38 +03:00
goto exit ;
2014-02-14 02:29:14 +04:00
if ( msg_type ( t_msg ) = = DUPLICATE_MSG )
2015-01-09 10:27:04 +03:00
tipc_link_dup_rcv ( net , l_ptr , t_buf ) ;
2014-02-14 02:29:14 +04:00
else if ( msg_type ( t_msg ) = = ORIGINAL_MSG )
* buf = tipc_link_failover_rcv ( l_ptr , t_buf ) ;
2014-02-14 02:29:13 +04:00
else
pr_warn ( " %sunknown tunnel pkt received \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
exit :
2014-02-14 02:29:14 +04:00
kfree_skb ( t_buf ) ;
2014-02-14 02:29:13 +04:00
return * buf ! = NULL ;
2006-01-02 21:04:38 +03:00
}
/*
* Bundler functionality :
*/
2015-01-09 10:27:05 +03:00
void tipc_link_bundle_rcv ( struct net * net , struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
u32 msgcount = msg_msgcnt ( buf_msg ( buf ) ) ;
u32 pos = INT_H_SIZE ;
struct sk_buff * obuf ;
2014-06-26 05:41:40 +04:00
struct tipc_msg * omsg ;
2006-01-02 21:04:38 +03:00
while ( msgcount - - ) {
obuf = buf_extract ( buf , pos ) ;
if ( obuf = = NULL ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " Link unable to unbundle message(s) \n " ) ;
2006-06-26 10:52:17 +04:00
break ;
2007-04-21 04:09:22 +04:00
}
2014-06-26 05:41:40 +04:00
omsg = buf_msg ( obuf ) ;
pos + = align ( msg_size ( omsg ) ) ;
2014-10-17 23:25:28 +04:00
if ( msg_isdata ( omsg ) ) {
if ( unlikely ( msg_type ( omsg ) = = TIPC_MCAST_MSG ) )
2015-01-09 10:27:05 +03:00
tipc_sk_mcast_rcv ( net , obuf ) ;
2014-10-17 23:25:28 +04:00
else
2015-01-09 10:27:05 +03:00
tipc_sk_rcv ( net , obuf ) ;
2014-10-17 23:25:28 +04:00
} else if ( msg_user ( omsg ) = = CONN_MANAGER ) {
2015-01-09 10:27:05 +03:00
tipc_sk_rcv ( net , obuf ) ;
2014-06-26 05:41:40 +04:00
} else if ( msg_user ( omsg ) = = NAME_DISTRIBUTOR ) {
2015-01-09 10:27:05 +03:00
tipc_named_rcv ( net , obuf ) ;
2014-06-26 05:41:40 +04:00
} else {
pr_warn ( " Illegal bundled msg: %u \n " , msg_user ( omsg ) ) ;
kfree_skb ( obuf ) ;
}
2006-01-02 21:04:38 +03:00
}
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
2015-01-09 10:27:00 +03:00
static void link_set_supervision_props ( struct tipc_link * l_ptr , u32 tol )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:00 +03:00
unsigned long intv = ( ( tol / 4 ) > 500 ) ? 500 : tol / 4 ;
if ( ( tol < TIPC_MIN_LINK_TOL ) | | ( tol > TIPC_MAX_LINK_TOL ) )
2011-01-18 21:24:55 +03:00
return ;
2015-01-09 10:27:00 +03:00
l_ptr - > tolerance = tol ;
l_ptr - > cont_intv = msecs_to_jiffies ( intv ) ;
l_ptr - > abort_limit = tol / ( jiffies_to_msecs ( l_ptr - > cont_intv ) / 4 ) ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
void tipc_link_set_queue_limits ( struct tipc_link * l_ptr , u32 window )
2006-01-02 21:04:38 +03:00
{
/* Data messages from this node, inclusive FIRST_FRAGM */
2008-03-07 02:06:55 +03:00
l_ptr - > queue_limit [ TIPC_LOW_IMPORTANCE ] = window ;
l_ptr - > queue_limit [ TIPC_MEDIUM_IMPORTANCE ] = ( window / 3 ) * 4 ;
l_ptr - > queue_limit [ TIPC_HIGH_IMPORTANCE ] = ( window / 3 ) * 5 ;
l_ptr - > queue_limit [ TIPC_CRITICAL_IMPORTANCE ] = ( window / 3 ) * 6 ;
2006-01-02 21:04:38 +03:00
/* Transiting data messages,inclusive FIRST_FRAGM */
2008-03-07 02:06:55 +03:00
l_ptr - > queue_limit [ TIPC_LOW_IMPORTANCE + 4 ] = 300 ;
l_ptr - > queue_limit [ TIPC_MEDIUM_IMPORTANCE + 4 ] = 600 ;
l_ptr - > queue_limit [ TIPC_HIGH_IMPORTANCE + 4 ] = 900 ;
l_ptr - > queue_limit [ TIPC_CRITICAL_IMPORTANCE + 4 ] = 1200 ;
2006-01-02 21:04:38 +03:00
l_ptr - > queue_limit [ CONN_MANAGER ] = 1200 ;
l_ptr - > queue_limit [ CHANGEOVER_PROTOCOL ] = 2500 ;
l_ptr - > queue_limit [ NAME_DISTRIBUTOR ] = 3000 ;
/* FRAGMENT and LAST_FRAGMENT packets */
l_ptr - > queue_limit [ MSG_FRAGMENTER ] = 4000 ;
}
2014-02-14 02:29:18 +04:00
/* tipc_link_find_owner - locate owner node of link by link's name
2015-01-09 10:27:05 +03:00
* @ net : the applicable net namespace
2014-02-14 02:29:18 +04:00
* @ name : pointer to link name string
* @ bearer_id : pointer to index in ' node - > links ' array where the link was found .
2007-02-09 17:25:21 +03:00
*
2014-02-14 02:29:18 +04:00
* Returns pointer to node owning the link , or 0 if no matching link is found .
2006-01-02 21:04:38 +03:00
*/
2015-01-09 10:27:05 +03:00
static struct tipc_node * tipc_link_find_owner ( struct net * net ,
const char * link_name ,
2014-02-14 02:29:18 +04:00
unsigned int * bearer_id )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:05 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2013-10-18 09:23:21 +04:00
struct tipc_node * n_ptr ;
2014-12-25 14:05:50 +03:00
struct tipc_node * found_node = NULL ;
2013-10-18 09:23:21 +04:00
int i ;
2006-01-02 21:04:38 +03:00
2014-02-14 02:29:18 +04:00
* bearer_id = 0 ;
2014-03-27 08:54:37 +04:00
rcu_read_lock ( ) ;
2015-01-09 10:27:05 +03:00
list_for_each_entry_rcu ( n_ptr , & tn - > node_list , list ) {
2014-02-15 01:40:44 +04:00
tipc_node_lock ( n_ptr ) ;
2013-10-18 09:23:21 +04:00
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
l_ptr = n_ptr - > links [ i ] ;
2014-02-14 02:29:18 +04:00
if ( l_ptr & & ! strcmp ( l_ptr - > name , link_name ) ) {
* bearer_id = i ;
found_node = n_ptr ;
break ;
}
2013-10-18 09:23:21 +04:00
}
2014-02-15 01:40:44 +04:00
tipc_node_unlock ( n_ptr ) ;
2014-02-14 02:29:18 +04:00
if ( found_node )
break ;
2013-10-18 09:23:21 +04:00
}
2014-03-27 08:54:37 +04:00
rcu_read_unlock ( ) ;
2014-02-14 02:29:18 +04:00
return found_node ;
2006-01-02 21:04:38 +03:00
}
2011-10-18 19:34:29 +04:00
/**
* link_value_is_valid - - validate proposed link tolerance / priority / window
*
2012-07-10 14:55:09 +04:00
* @ cmd : value type ( TIPC_CMD_SET_LINK_ * )
* @ new_value : the new value
2011-10-18 19:34:29 +04:00
*
* Returns 1 if value is within range , 0 if not .
*/
static int link_value_is_valid ( u16 cmd , u32 new_value )
{
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
return ( new_value > = TIPC_MIN_LINK_TOL ) & &
( new_value < = TIPC_MAX_LINK_TOL ) ;
case TIPC_CMD_SET_LINK_PRI :
return ( new_value < = TIPC_MAX_LINK_PRI ) ;
case TIPC_CMD_SET_LINK_WINDOW :
return ( new_value > = TIPC_MIN_LINK_WIN ) & &
( new_value < = TIPC_MAX_LINK_WIN ) ;
}
return 0 ;
}
/**
* link_cmd_set_value - change priority / tolerance / window for link / bearer / media
2015-01-09 10:27:05 +03:00
* @ net : the applicable net namespace
2012-07-10 14:55:09 +04:00
* @ name : ptr to link , bearer , or media name
* @ new_value : new value of link , bearer , or media setting
* @ cmd : which link , bearer , or media attribute to set ( TIPC_CMD_SET_LINK_ * )
2011-10-18 19:34:29 +04:00
*
tipc: purge tipc_net_lock lock
Now tipc routing hierarchy comprises the structures 'node', 'link'and
'bearer'. The whole hierarchy is protected by a big read/write lock,
tipc_net_lock, to ensure that nothing is added or removed while code
is accessing any of these structures. Obviously the locking policy
makes node, link and bearer components closely bound together so that
their relationship becomes unnecessarily complex. In the worst case,
such locking policy not only has a negative influence on performance,
but also it's prone to lead to deadlock occasionally.
In order o decouple the complex relationship between bearer and node
as well as link, the locking policy is adjusted as follows:
- Bearer level
RTNL lock is used on update side, and RCU is used on read side.
Meanwhile, all bearer instances including broadcast bearer are
saved into bearer_list array.
- Node and link level
All node instances are saved into two tipc_node_list and node_htable
lists. The two lists are protected by node_list_lock on write side,
and they are guarded with RCU lock on read side. All members in node
structure including link instances are protected by node spin lock.
- The relationship between bearer and node
When link accesses bearer, it first needs to find the bearer with
its bearer identity from the bearer_list array. When bearer accesses
node, it can iterate the node_htable hash list with the node
address to find the corresponding node.
In the new locking policy, every component has its private locking
solution and the relationship between bearer and node is very simple,
that is, they can find each other with node address or bearer identity
from node_htable hash list or bearer_list array.
Until now above all changes have been done, so tipc_net_lock can be
removed safely.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:48 +04:00
* Caller must hold RTNL lock to ensure link / bearer / media is not deleted .
2011-10-18 19:34:29 +04:00
*
* Returns 0 if value updated and negative value on error .
*/
2015-01-09 10:27:05 +03:00
static int link_cmd_set_value ( struct net * net , const char * name , u32 new_value ,
u16 cmd )
2011-10-18 19:34:29 +04:00
{
struct tipc_node * node ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2011-10-18 19:34:29 +04:00
struct tipc_bearer * b_ptr ;
2011-12-30 05:19:42 +04:00
struct tipc_media * m_ptr ;
2014-02-14 02:29:18 +04:00
int bearer_id ;
2013-10-18 09:23:20 +04:00
int res = 0 ;
2011-10-18 19:34:29 +04:00
2015-01-09 10:27:05 +03:00
node = tipc_link_find_owner ( net , name , & bearer_id ) ;
2014-02-14 02:29:18 +04:00
if ( node ) {
2011-10-18 19:34:29 +04:00
tipc_node_lock ( node ) ;
2014-02-14 02:29:18 +04:00
l_ptr = node - > links [ bearer_id ] ;
if ( l_ptr ) {
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
link_set_supervision_props ( l_ptr , new_value ) ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 0 , 0 ,
new_value , 0 , 0 ) ;
2014-02-14 02:29:18 +04:00
break ;
case TIPC_CMD_SET_LINK_PRI :
l_ptr - > priority = new_value ;
2014-02-18 12:06:46 +04:00
tipc_link_proto_xmit ( l_ptr , STATE_MSG , 0 , 0 ,
0 , new_value , 0 ) ;
2014-02-14 02:29:18 +04:00
break ;
case TIPC_CMD_SET_LINK_WINDOW :
tipc_link_set_queue_limits ( l_ptr , new_value ) ;
break ;
default :
res = - EINVAL ;
break ;
}
2011-10-18 19:34:29 +04:00
}
tipc_node_unlock ( node ) ;
2013-10-18 09:23:20 +04:00
return res ;
2011-10-18 19:34:29 +04:00
}
2015-01-09 10:27:06 +03:00
b_ptr = tipc_bearer_find ( net , name ) ;
2011-10-18 19:34:29 +04:00
if ( b_ptr ) {
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
b_ptr - > tolerance = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_PRI :
b_ptr - > priority = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_WINDOW :
b_ptr - > window = new_value ;
2013-10-18 09:23:20 +04:00
break ;
default :
res = - EINVAL ;
break ;
2011-10-18 19:34:29 +04:00
}
2013-10-18 09:23:20 +04:00
return res ;
2011-10-18 19:34:29 +04:00
}
m_ptr = tipc_media_find ( name ) ;
if ( ! m_ptr )
return - ENODEV ;
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
m_ptr - > tolerance = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_PRI :
m_ptr - > priority = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_WINDOW :
m_ptr - > window = new_value ;
2013-10-18 09:23:20 +04:00
break ;
default :
res = - EINVAL ;
break ;
2011-10-18 19:34:29 +04:00
}
2013-10-18 09:23:20 +04:00
return res ;
2011-10-18 19:34:29 +04:00
}
2015-01-09 10:27:05 +03:00
struct sk_buff * tipc_link_cmd_config ( struct net * net , const void * req_tlv_area ,
int req_tlv_space , u16 cmd )
2006-01-02 21:04:38 +03:00
{
struct tipc_link_config * args ;
2007-02-09 17:25:21 +03:00
u32 new_value ;
int res ;
2006-01-02 21:04:38 +03:00
if ( ! TLV_CHECK ( req_tlv_area , req_tlv_space , TIPC_TLV_LINK_CONFIG ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_TLV_ERROR ) ;
2006-01-02 21:04:38 +03:00
args = ( struct tipc_link_config * ) TLV_DATA ( req_tlv_area ) ;
new_value = ntohl ( args - > value ) ;
2011-10-18 19:34:29 +04:00
if ( ! link_value_is_valid ( cmd , new_value ) )
return tipc_cfg_reply_error_string (
" cannot change, value invalid " ) ;
2006-01-18 02:38:21 +03:00
if ( ! strcmp ( args - > name , tipc_bclink_name ) ) {
2006-01-02 21:04:38 +03:00
if ( ( cmd = = TIPC_CMD_SET_LINK_WINDOW ) & &
2015-01-09 10:27:07 +03:00
( tipc_bclink_set_queue_limits ( net , new_value ) = = 0 ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_none ( ) ;
2007-02-09 17:25:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_NOT_SUPPORTED
2006-01-18 02:38:21 +03:00
" (cannot change setting on broadcast link) " ) ;
2006-01-02 21:04:38 +03:00
}
2015-01-09 10:27:05 +03:00
res = link_cmd_set_value ( net , args - > name , new_value , cmd ) ;
2006-01-02 21:04:38 +03:00
if ( res )
2007-02-09 17:25:21 +03:00
return tipc_cfg_reply_error_string ( " cannot change link setting " ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_none ( ) ;
2006-01-02 21:04:38 +03:00
}
/**
* link_reset_statistics - reset link statistics
* @ l_ptr : pointer to link
*/
2011-12-30 05:58:42 +04:00
static void link_reset_statistics ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
memset ( & l_ptr - > stats , 0 , sizeof ( l_ptr - > stats ) ) ;
l_ptr - > stats . sent_info = l_ptr - > next_out_no ;
l_ptr - > stats . recv_info = l_ptr - > next_in_no ;
}
2015-01-09 10:27:05 +03:00
struct sk_buff * tipc_link_cmd_reset_stats ( struct net * net ,
const void * req_tlv_area ,
int req_tlv_space )
2006-01-02 21:04:38 +03:00
{
char * link_name ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2008-09-03 10:38:32 +04:00
struct tipc_node * node ;
2014-02-14 02:29:18 +04:00
unsigned int bearer_id ;
2006-01-02 21:04:38 +03:00
if ( ! TLV_CHECK ( req_tlv_area , req_tlv_space , TIPC_TLV_LINK_NAME ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_TLV_ERROR ) ;
2006-01-02 21:04:38 +03:00
link_name = ( char * ) TLV_DATA ( req_tlv_area ) ;
2006-01-18 02:38:21 +03:00
if ( ! strcmp ( link_name , tipc_bclink_name ) ) {
2015-01-09 10:27:07 +03:00
if ( tipc_bclink_reset_stats ( net ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( " link not found " ) ;
return tipc_cfg_reply_none ( ) ;
2006-01-02 21:04:38 +03:00
}
2015-01-09 10:27:05 +03:00
node = tipc_link_find_owner ( net , link_name , & bearer_id ) ;
tipc: purge tipc_net_lock lock
Now tipc routing hierarchy comprises the structures 'node', 'link'and
'bearer'. The whole hierarchy is protected by a big read/write lock,
tipc_net_lock, to ensure that nothing is added or removed while code
is accessing any of these structures. Obviously the locking policy
makes node, link and bearer components closely bound together so that
their relationship becomes unnecessarily complex. In the worst case,
such locking policy not only has a negative influence on performance,
but also it's prone to lead to deadlock occasionally.
In order o decouple the complex relationship between bearer and node
as well as link, the locking policy is adjusted as follows:
- Bearer level
RTNL lock is used on update side, and RCU is used on read side.
Meanwhile, all bearer instances including broadcast bearer are
saved into bearer_list array.
- Node and link level
All node instances are saved into two tipc_node_list and node_htable
lists. The two lists are protected by node_list_lock on write side,
and they are guarded with RCU lock on read side. All members in node
structure including link instances are protected by node spin lock.
- The relationship between bearer and node
When link accesses bearer, it first needs to find the bearer with
its bearer identity from the bearer_list array. When bearer accesses
node, it can iterate the node_htable hash list with the node
address to find the corresponding node.
In the new locking policy, every component has its private locking
solution and the relationship between bearer and node is very simple,
that is, they can find each other with node address or bearer identity
from node_htable hash list or bearer_list array.
Until now above all changes have been done, so tipc_net_lock can be
removed safely.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:48 +04:00
if ( ! node )
2014-02-14 02:29:18 +04:00
return tipc_cfg_reply_error_string ( " link not found " ) ;
tipc: purge tipc_net_lock lock
Now tipc routing hierarchy comprises the structures 'node', 'link'and
'bearer'. The whole hierarchy is protected by a big read/write lock,
tipc_net_lock, to ensure that nothing is added or removed while code
is accessing any of these structures. Obviously the locking policy
makes node, link and bearer components closely bound together so that
their relationship becomes unnecessarily complex. In the worst case,
such locking policy not only has a negative influence on performance,
but also it's prone to lead to deadlock occasionally.
In order o decouple the complex relationship between bearer and node
as well as link, the locking policy is adjusted as follows:
- Bearer level
RTNL lock is used on update side, and RCU is used on read side.
Meanwhile, all bearer instances including broadcast bearer are
saved into bearer_list array.
- Node and link level
All node instances are saved into two tipc_node_list and node_htable
lists. The two lists are protected by node_list_lock on write side,
and they are guarded with RCU lock on read side. All members in node
structure including link instances are protected by node spin lock.
- The relationship between bearer and node
When link accesses bearer, it first needs to find the bearer with
its bearer identity from the bearer_list array. When bearer accesses
node, it can iterate the node_htable hash list with the node
address to find the corresponding node.
In the new locking policy, every component has its private locking
solution and the relationship between bearer and node is very simple,
that is, they can find each other with node address or bearer identity
from node_htable hash list or bearer_list array.
Until now above all changes have been done, so tipc_net_lock can be
removed safely.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:48 +04:00
2014-02-15 01:40:44 +04:00
tipc_node_lock ( node ) ;
2014-02-14 02:29:18 +04:00
l_ptr = node - > links [ bearer_id ] ;
2006-01-02 21:04:38 +03:00
if ( ! l_ptr ) {
2014-02-14 02:29:18 +04:00
tipc_node_unlock ( node ) ;
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( " link not found " ) ;
2006-01-02 21:04:38 +03:00
}
link_reset_statistics ( l_ptr ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
return tipc_cfg_reply_none ( ) ;
2006-01-02 21:04:38 +03:00
}
/**
* percent - convert count to a percentage of total ( rounding up or down )
*/
static u32 percent ( u32 count , u32 total )
{
return ( count * 100 + ( total / 2 ) ) / total ;
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_stats - print link statistics
2015-01-09 10:27:05 +03:00
* @ net : the applicable net namespace
2006-01-02 21:04:38 +03:00
* @ name : link name
* @ buf : print buffer area
* @ buf_size : size of print buffer area
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns length of print buffer data string ( or 0 if error )
*/
2015-01-09 10:27:05 +03:00
static int tipc_link_stats ( struct net * net , const char * name , char * buf ,
const u32 buf_size )
2006-01-02 21:04:38 +03:00
{
2012-06-29 08:50:23 +04:00
struct tipc_link * l ;
struct tipc_stats * s ;
2008-09-03 10:38:32 +04:00
struct tipc_node * node ;
2006-01-02 21:04:38 +03:00
char * status ;
u32 profile_total = 0 ;
2014-02-14 02:29:18 +04:00
unsigned int bearer_id ;
2012-06-29 08:50:23 +04:00
int ret ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
if ( ! strcmp ( name , tipc_bclink_name ) )
2015-01-09 10:27:07 +03:00
return tipc_bclink_stats ( net , buf , buf_size ) ;
2006-01-02 21:04:38 +03:00
2015-01-09 10:27:05 +03:00
node = tipc_link_find_owner ( net , name , & bearer_id ) ;
tipc: purge tipc_net_lock lock
Now tipc routing hierarchy comprises the structures 'node', 'link'and
'bearer'. The whole hierarchy is protected by a big read/write lock,
tipc_net_lock, to ensure that nothing is added or removed while code
is accessing any of these structures. Obviously the locking policy
makes node, link and bearer components closely bound together so that
their relationship becomes unnecessarily complex. In the worst case,
such locking policy not only has a negative influence on performance,
but also it's prone to lead to deadlock occasionally.
In order o decouple the complex relationship between bearer and node
as well as link, the locking policy is adjusted as follows:
- Bearer level
RTNL lock is used on update side, and RCU is used on read side.
Meanwhile, all bearer instances including broadcast bearer are
saved into bearer_list array.
- Node and link level
All node instances are saved into two tipc_node_list and node_htable
lists. The two lists are protected by node_list_lock on write side,
and they are guarded with RCU lock on read side. All members in node
structure including link instances are protected by node spin lock.
- The relationship between bearer and node
When link accesses bearer, it first needs to find the bearer with
its bearer identity from the bearer_list array. When bearer accesses
node, it can iterate the node_htable hash list with the node
address to find the corresponding node.
In the new locking policy, every component has its private locking
solution and the relationship between bearer and node is very simple,
that is, they can find each other with node address or bearer identity
from node_htable hash list or bearer_list array.
Until now above all changes have been done, so tipc_net_lock can be
removed safely.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:48 +04:00
if ( ! node )
2006-01-02 21:04:38 +03:00
return 0 ;
tipc: purge tipc_net_lock lock
Now tipc routing hierarchy comprises the structures 'node', 'link'and
'bearer'. The whole hierarchy is protected by a big read/write lock,
tipc_net_lock, to ensure that nothing is added or removed while code
is accessing any of these structures. Obviously the locking policy
makes node, link and bearer components closely bound together so that
their relationship becomes unnecessarily complex. In the worst case,
such locking policy not only has a negative influence on performance,
but also it's prone to lead to deadlock occasionally.
In order o decouple the complex relationship between bearer and node
as well as link, the locking policy is adjusted as follows:
- Bearer level
RTNL lock is used on update side, and RCU is used on read side.
Meanwhile, all bearer instances including broadcast bearer are
saved into bearer_list array.
- Node and link level
All node instances are saved into two tipc_node_list and node_htable
lists. The two lists are protected by node_list_lock on write side,
and they are guarded with RCU lock on read side. All members in node
structure including link instances are protected by node spin lock.
- The relationship between bearer and node
When link accesses bearer, it first needs to find the bearer with
its bearer identity from the bearer_list array. When bearer accesses
node, it can iterate the node_htable hash list with the node
address to find the corresponding node.
In the new locking policy, every component has its private locking
solution and the relationship between bearer and node is very simple,
that is, they can find each other with node address or bearer identity
from node_htable hash list or bearer_list array.
Until now above all changes have been done, so tipc_net_lock can be
removed safely.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:48 +04:00
2006-01-18 02:38:21 +03:00
tipc_node_lock ( node ) ;
2014-02-14 02:29:18 +04:00
l = node - > links [ bearer_id ] ;
if ( ! l ) {
tipc_node_unlock ( node ) ;
return 0 ;
}
2012-06-29 08:50:23 +04:00
s = & l - > stats ;
2006-01-02 21:04:38 +03:00
2012-06-29 08:50:23 +04:00
if ( tipc_link_is_active ( l ) )
2006-01-02 21:04:38 +03:00
status = " ACTIVE " ;
2012-06-29 08:50:23 +04:00
else if ( tipc_link_is_up ( l ) )
2006-01-02 21:04:38 +03:00
status = " STANDBY " ;
else
status = " DEFUNCT " ;
2012-06-29 08:50:23 +04:00
ret = tipc_snprintf ( buf , buf_size , " Link <%s> \n "
" %s MTU:%u Priority:%u Tolerance:%u ms "
" Window:%u packets \n " ,
l - > name , status , l - > max_pkt , l - > priority ,
l - > tolerance , l - > queue_limit [ 0 ] ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" RX packets:%u fragments:%u/%u bundles:%u/%u \n " ,
l - > next_in_no - s - > recv_info , s - > recv_fragments ,
s - > recv_fragmented , s - > recv_bundles ,
s - > recv_bundled ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" TX packets:%u fragments:%u/%u bundles:%u/%u \n " ,
l - > next_out_no - s - > sent_info , s - > sent_fragments ,
s - > sent_fragmented , s - > sent_bundles ,
s - > sent_bundled ) ;
profile_total = s - > msg_length_counts ;
2006-01-02 21:04:38 +03:00
if ( ! profile_total )
profile_total = 1 ;
2012-06-29 08:50:23 +04:00
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" TX profile sample:%u packets average:%u octets \n "
" 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
" -16384:%u%% -32768:%u%% -66000:%u%% \n " ,
s - > msg_length_counts ,
s - > msg_lengths_total / profile_total ,
percent ( s - > msg_length_profile [ 0 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 1 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 2 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 3 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 4 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 5 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 6 ] , profile_total ) ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" RX states:%u probes:%u naks:%u defs:%u "
" dups:%u \n " , s - > recv_states , s - > recv_probes ,
s - > recv_nacks , s - > deferred_recv , s - > duplicates ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" TX states:%u probes:%u naks:%u acks:%u "
" dups:%u \n " , s - > sent_states , s - > sent_probes ,
s - > sent_nacks , s - > sent_acks , s - > retransmitted ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
2012-11-15 07:34:45 +04:00
" Congestion link:%u Send queue "
" max:%u avg:%u \n " , s - > link_congs ,
2012-06-29 08:50:23 +04:00
s - > max_queue_sz , s - > queue_sz_counts ?
( s - > accu_queue_sz / s - > queue_sz_counts ) : 0 ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
2012-06-29 08:50:23 +04:00
return ret ;
2006-01-02 21:04:38 +03:00
}
2015-01-09 10:27:05 +03:00
struct sk_buff * tipc_link_cmd_show_stats ( struct net * net ,
const void * req_tlv_area ,
int req_tlv_space )
2006-01-02 21:04:38 +03:00
{
struct sk_buff * buf ;
struct tlv_desc * rep_tlv ;
int str_len ;
2012-06-29 08:50:23 +04:00
int pb_len ;
char * pb ;
2006-01-02 21:04:38 +03:00
if ( ! TLV_CHECK ( req_tlv_area , req_tlv_space , TIPC_TLV_LINK_NAME ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_TLV_ERROR ) ;
2006-01-02 21:04:38 +03:00
2012-06-29 08:50:23 +04:00
buf = tipc_cfg_reply_alloc ( TLV_SPACE ( ULTRA_STRING_MAX_LEN ) ) ;
2006-01-02 21:04:38 +03:00
if ( ! buf )
return NULL ;
rep_tlv = ( struct tlv_desc * ) buf - > data ;
2012-06-29 08:50:23 +04:00
pb = TLV_DATA ( rep_tlv ) ;
pb_len = ULTRA_STRING_MAX_LEN ;
2015-01-09 10:27:05 +03:00
str_len = tipc_link_stats ( net , ( char * ) TLV_DATA ( req_tlv_area ) ,
2012-06-29 08:50:23 +04:00
pb , pb_len ) ;
2006-01-02 21:04:38 +03:00
if ( ! str_len ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2007-02-09 17:25:21 +03:00
return tipc_cfg_reply_error_string ( " link not found " ) ;
2006-01-02 21:04:38 +03:00
}
2012-06-29 08:50:23 +04:00
str_len + = 1 ; /* for "\0" */
2006-01-02 21:04:38 +03:00
skb_put ( buf , TLV_SPACE ( str_len ) ) ;
TLV_SET ( rep_tlv , TIPC_TLV_ULTRA_STRING , NULL , str_len ) ;
return buf ;
}
2011-12-30 05:58:42 +04:00
static void link_print ( struct tipc_link * l_ptr , const char * str )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:06 +03:00
struct tipc_net * tn = net_generic ( l_ptr - > owner - > net , tipc_net_id ) ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
struct tipc_bearer * b_ptr ;
rcu_read_lock ( ) ;
2015-01-09 10:27:06 +03:00
b_ptr = rcu_dereference_rtnl ( tn - > bearer_list [ l_ptr - > bearer_id ] ) ;
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 06:55:46 +04:00
if ( b_ptr )
pr_info ( " %s Link %x<%s>: " , str , l_ptr - > addr , b_ptr - > name ) ;
rcu_read_unlock ( ) ;
2010-12-31 21:59:27 +03:00
2006-01-02 21:04:38 +03:00
if ( link_working_unknown ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :WU \n " ) ;
2010-12-31 21:59:27 +03:00
else if ( link_reset_reset ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :RR \n " ) ;
2010-12-31 21:59:27 +03:00
else if ( link_reset_unknown ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :RU \n " ) ;
2010-12-31 21:59:27 +03:00
else if ( link_working_working ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :WW \n " ) ;
else
pr_cont ( " \n " ) ;
2006-01-02 21:04:38 +03:00
}
2014-11-20 12:29:07 +03:00
/* Parse and validate nested (link) properties valid for media, bearer and link
*/
int tipc_nl_parse_link_prop ( struct nlattr * prop , struct nlattr * props [ ] )
{
int err ;
err = nla_parse_nested ( props , TIPC_NLA_PROP_MAX , prop ,
tipc_nl_prop_policy ) ;
if ( err )
return err ;
if ( props [ TIPC_NLA_PROP_PRIO ] ) {
u32 prio ;
prio = nla_get_u32 ( props [ TIPC_NLA_PROP_PRIO ] ) ;
if ( prio > TIPC_MAX_LINK_PRI )
return - EINVAL ;
}
if ( props [ TIPC_NLA_PROP_TOL ] ) {
u32 tol ;
tol = nla_get_u32 ( props [ TIPC_NLA_PROP_TOL ] ) ;
if ( ( tol < TIPC_MIN_LINK_TOL ) | | ( tol > TIPC_MAX_LINK_TOL ) )
return - EINVAL ;
}
if ( props [ TIPC_NLA_PROP_WIN ] ) {
u32 win ;
win = nla_get_u32 ( props [ TIPC_NLA_PROP_WIN ] ) ;
if ( ( win < TIPC_MIN_LINK_WIN ) | | ( win > TIPC_MAX_LINK_WIN ) )
return - EINVAL ;
}
return 0 ;
}
2014-11-20 12:29:12 +03:00
2014-11-20 12:29:13 +03:00
int tipc_nl_link_set ( struct sk_buff * skb , struct genl_info * info )
{
int err ;
int res = 0 ;
int bearer_id ;
char * name ;
struct tipc_link * link ;
struct tipc_node * node ;
struct nlattr * attrs [ TIPC_NLA_LINK_MAX + 1 ] ;
2015-01-09 10:27:05 +03:00
struct net * net = genl_info_net ( info ) ;
2014-11-20 12:29:13 +03:00
if ( ! info - > attrs [ TIPC_NLA_LINK ] )
return - EINVAL ;
err = nla_parse_nested ( attrs , TIPC_NLA_LINK_MAX ,
info - > attrs [ TIPC_NLA_LINK ] ,
tipc_nl_link_policy ) ;
if ( err )
return err ;
if ( ! attrs [ TIPC_NLA_LINK_NAME ] )
return - EINVAL ;
name = nla_data ( attrs [ TIPC_NLA_LINK_NAME ] ) ;
2015-01-09 10:27:05 +03:00
node = tipc_link_find_owner ( net , name , & bearer_id ) ;
2014-11-20 12:29:13 +03:00
if ( ! node )
return - EINVAL ;
tipc_node_lock ( node ) ;
link = node - > links [ bearer_id ] ;
if ( ! link ) {
res = - EINVAL ;
goto out ;
}
if ( attrs [ TIPC_NLA_LINK_PROP ] ) {
struct nlattr * props [ TIPC_NLA_PROP_MAX + 1 ] ;
err = tipc_nl_parse_link_prop ( attrs [ TIPC_NLA_LINK_PROP ] ,
props ) ;
if ( err ) {
res = err ;
goto out ;
}
if ( props [ TIPC_NLA_PROP_TOL ] ) {
u32 tol ;
tol = nla_get_u32 ( props [ TIPC_NLA_PROP_TOL ] ) ;
link_set_supervision_props ( link , tol ) ;
tipc_link_proto_xmit ( link , STATE_MSG , 0 , 0 , tol , 0 , 0 ) ;
}
if ( props [ TIPC_NLA_PROP_PRIO ] ) {
u32 prio ;
prio = nla_get_u32 ( props [ TIPC_NLA_PROP_PRIO ] ) ;
link - > priority = prio ;
tipc_link_proto_xmit ( link , STATE_MSG , 0 , 0 , 0 , prio , 0 ) ;
}
if ( props [ TIPC_NLA_PROP_WIN ] ) {
u32 win ;
win = nla_get_u32 ( props [ TIPC_NLA_PROP_WIN ] ) ;
tipc_link_set_queue_limits ( link , win ) ;
}
}
out :
tipc_node_unlock ( node ) ;
return res ;
}
2014-11-24 13:10:29 +03:00
static int __tipc_nl_add_stats ( struct sk_buff * skb , struct tipc_stats * s )
2014-11-20 12:29:12 +03:00
{
int i ;
struct nlattr * stats ;
struct nla_map {
u32 key ;
u32 val ;
} ;
struct nla_map map [ ] = {
{ TIPC_NLA_STATS_RX_INFO , s - > recv_info } ,
{ TIPC_NLA_STATS_RX_FRAGMENTS , s - > recv_fragments } ,
{ TIPC_NLA_STATS_RX_FRAGMENTED , s - > recv_fragmented } ,
{ TIPC_NLA_STATS_RX_BUNDLES , s - > recv_bundles } ,
{ TIPC_NLA_STATS_RX_BUNDLED , s - > recv_bundled } ,
{ TIPC_NLA_STATS_TX_INFO , s - > sent_info } ,
{ TIPC_NLA_STATS_TX_FRAGMENTS , s - > sent_fragments } ,
{ TIPC_NLA_STATS_TX_FRAGMENTED , s - > sent_fragmented } ,
{ TIPC_NLA_STATS_TX_BUNDLES , s - > sent_bundles } ,
{ TIPC_NLA_STATS_TX_BUNDLED , s - > sent_bundled } ,
{ TIPC_NLA_STATS_MSG_PROF_TOT , ( s - > msg_length_counts ) ?
s - > msg_length_counts : 1 } ,
{ TIPC_NLA_STATS_MSG_LEN_CNT , s - > msg_length_counts } ,
{ TIPC_NLA_STATS_MSG_LEN_TOT , s - > msg_lengths_total } ,
{ TIPC_NLA_STATS_MSG_LEN_P0 , s - > msg_length_profile [ 0 ] } ,
{ TIPC_NLA_STATS_MSG_LEN_P1 , s - > msg_length_profile [ 1 ] } ,
{ TIPC_NLA_STATS_MSG_LEN_P2 , s - > msg_length_profile [ 2 ] } ,
{ TIPC_NLA_STATS_MSG_LEN_P3 , s - > msg_length_profile [ 3 ] } ,
{ TIPC_NLA_STATS_MSG_LEN_P4 , s - > msg_length_profile [ 4 ] } ,
{ TIPC_NLA_STATS_MSG_LEN_P5 , s - > msg_length_profile [ 5 ] } ,
{ TIPC_NLA_STATS_MSG_LEN_P6 , s - > msg_length_profile [ 6 ] } ,
{ TIPC_NLA_STATS_RX_STATES , s - > recv_states } ,
{ TIPC_NLA_STATS_RX_PROBES , s - > recv_probes } ,
{ TIPC_NLA_STATS_RX_NACKS , s - > recv_nacks } ,
{ TIPC_NLA_STATS_RX_DEFERRED , s - > deferred_recv } ,
{ TIPC_NLA_STATS_TX_STATES , s - > sent_states } ,
{ TIPC_NLA_STATS_TX_PROBES , s - > sent_probes } ,
{ TIPC_NLA_STATS_TX_NACKS , s - > sent_nacks } ,
{ TIPC_NLA_STATS_TX_ACKS , s - > sent_acks } ,
{ TIPC_NLA_STATS_RETRANSMITTED , s - > retransmitted } ,
{ TIPC_NLA_STATS_DUPLICATES , s - > duplicates } ,
{ TIPC_NLA_STATS_LINK_CONGS , s - > link_congs } ,
{ TIPC_NLA_STATS_MAX_QUEUE , s - > max_queue_sz } ,
{ TIPC_NLA_STATS_AVG_QUEUE , s - > queue_sz_counts ?
( s - > accu_queue_sz / s - > queue_sz_counts ) : 0 }
} ;
stats = nla_nest_start ( skb , TIPC_NLA_LINK_STATS ) ;
if ( ! stats )
return - EMSGSIZE ;
for ( i = 0 ; i < ARRAY_SIZE ( map ) ; i + + )
if ( nla_put_u32 ( skb , map [ i ] . key , map [ i ] . val ) )
goto msg_full ;
nla_nest_end ( skb , stats ) ;
return 0 ;
msg_full :
nla_nest_cancel ( skb , stats ) ;
return - EMSGSIZE ;
}
/* Caller should hold appropriate locks to protect the link */
2015-01-09 10:27:10 +03:00
static int __tipc_nl_add_link ( struct net * net , struct tipc_nl_msg * msg ,
struct tipc_link * link )
2014-11-20 12:29:12 +03:00
{
int err ;
void * hdr ;
struct nlattr * attrs ;
struct nlattr * prop ;
2015-01-09 10:27:10 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-11-20 12:29:12 +03:00
hdr = genlmsg_put ( msg - > skb , msg - > portid , msg - > seq , & tipc_genl_v2_family ,
NLM_F_MULTI , TIPC_NL_LINK_GET ) ;
if ( ! hdr )
return - EMSGSIZE ;
attrs = nla_nest_start ( msg - > skb , TIPC_NLA_LINK ) ;
if ( ! attrs )
goto msg_full ;
if ( nla_put_string ( msg - > skb , TIPC_NLA_LINK_NAME , link - > name ) )
goto attr_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_LINK_DEST ,
2015-01-09 10:27:10 +03:00
tipc_cluster_mask ( tn - > own_addr ) ) )
2014-11-20 12:29:12 +03:00
goto attr_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_LINK_MTU , link - > max_pkt ) )
goto attr_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_LINK_RX , link - > next_in_no ) )
goto attr_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_LINK_TX , link - > next_out_no ) )
goto attr_msg_full ;
if ( tipc_link_is_up ( link ) )
if ( nla_put_flag ( msg - > skb , TIPC_NLA_LINK_UP ) )
goto attr_msg_full ;
if ( tipc_link_is_active ( link ) )
if ( nla_put_flag ( msg - > skb , TIPC_NLA_LINK_ACTIVE ) )
goto attr_msg_full ;
prop = nla_nest_start ( msg - > skb , TIPC_NLA_LINK_PROP ) ;
if ( ! prop )
goto attr_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_PROP_PRIO , link - > priority ) )
goto prop_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_PROP_TOL , link - > tolerance ) )
goto prop_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_PROP_WIN ,
link - > queue_limit [ TIPC_LOW_IMPORTANCE ] ) )
goto prop_msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_PROP_PRIO , link - > priority ) )
goto prop_msg_full ;
nla_nest_end ( msg - > skb , prop ) ;
err = __tipc_nl_add_stats ( msg - > skb , & link - > stats ) ;
if ( err )
goto attr_msg_full ;
nla_nest_end ( msg - > skb , attrs ) ;
genlmsg_end ( msg - > skb , hdr ) ;
return 0 ;
prop_msg_full :
nla_nest_cancel ( msg - > skb , prop ) ;
attr_msg_full :
nla_nest_cancel ( msg - > skb , attrs ) ;
msg_full :
genlmsg_cancel ( msg - > skb , hdr ) ;
return - EMSGSIZE ;
}
/* Caller should hold node lock */
2015-01-09 10:27:10 +03:00
static int __tipc_nl_add_node_links ( struct net * net , struct tipc_nl_msg * msg ,
struct tipc_node * node , u32 * prev_link )
2014-11-20 12:29:12 +03:00
{
u32 i ;
int err ;
for ( i = * prev_link ; i < MAX_BEARERS ; i + + ) {
* prev_link = i ;
if ( ! node - > links [ i ] )
continue ;
2015-01-09 10:27:10 +03:00
err = __tipc_nl_add_link ( net , msg , node - > links [ i ] ) ;
2014-11-20 12:29:12 +03:00
if ( err )
return err ;
}
* prev_link = 0 ;
return 0 ;
}
int tipc_nl_link_dump ( struct sk_buff * skb , struct netlink_callback * cb )
{
2015-01-09 10:27:05 +03:00
struct net * net = sock_net ( skb - > sk ) ;
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-11-20 12:29:12 +03:00
struct tipc_node * node ;
struct tipc_nl_msg msg ;
u32 prev_node = cb - > args [ 0 ] ;
u32 prev_link = cb - > args [ 1 ] ;
int done = cb - > args [ 2 ] ;
int err ;
if ( done )
return 0 ;
msg . skb = skb ;
msg . portid = NETLINK_CB ( cb - > skb ) . portid ;
msg . seq = cb - > nlh - > nlmsg_seq ;
rcu_read_lock ( ) ;
if ( prev_node ) {
2015-01-09 10:27:05 +03:00
node = tipc_node_find ( net , prev_node ) ;
2014-11-20 12:29:12 +03:00
if ( ! node ) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
* handler . Resulting in the last NLMSG_DONE message
* having the NLM_F_DUMP_INTR flag set .
*/
cb - > prev_seq = 1 ;
goto out ;
}
2015-01-09 10:27:05 +03:00
list_for_each_entry_continue_rcu ( node , & tn - > node_list ,
list ) {
2014-11-20 12:29:12 +03:00
tipc_node_lock ( node ) ;
2015-01-09 10:27:10 +03:00
err = __tipc_nl_add_node_links ( net , & msg , node ,
& prev_link ) ;
2014-11-20 12:29:12 +03:00
tipc_node_unlock ( node ) ;
if ( err )
goto out ;
prev_node = node - > addr ;
}
} else {
2015-01-09 10:27:07 +03:00
err = tipc_nl_add_bc_link ( net , & msg ) ;
2014-11-20 12:29:12 +03:00
if ( err )
goto out ;
2015-01-09 10:27:05 +03:00
list_for_each_entry_rcu ( node , & tn - > node_list , list ) {
2014-11-20 12:29:12 +03:00
tipc_node_lock ( node ) ;
2015-01-09 10:27:10 +03:00
err = __tipc_nl_add_node_links ( net , & msg , node ,
& prev_link ) ;
2014-11-20 12:29:12 +03:00
tipc_node_unlock ( node ) ;
if ( err )
goto out ;
prev_node = node - > addr ;
}
}
done = 1 ;
out :
rcu_read_unlock ( ) ;
cb - > args [ 0 ] = prev_node ;
cb - > args [ 1 ] = prev_link ;
cb - > args [ 2 ] = done ;
return skb - > len ;
}
int tipc_nl_link_get ( struct sk_buff * skb , struct genl_info * info )
{
2015-01-09 10:27:05 +03:00
struct net * net = genl_info_net ( info ) ;
2014-11-20 12:29:12 +03:00
struct sk_buff * ans_skb ;
struct tipc_nl_msg msg ;
struct tipc_link * link ;
struct tipc_node * node ;
char * name ;
int bearer_id ;
int err ;
if ( ! info - > attrs [ TIPC_NLA_LINK_NAME ] )
return - EINVAL ;
name = nla_data ( info - > attrs [ TIPC_NLA_LINK_NAME ] ) ;
2015-01-09 10:27:05 +03:00
node = tipc_link_find_owner ( net , name , & bearer_id ) ;
2014-11-20 12:29:12 +03:00
if ( ! node )
return - EINVAL ;
ans_skb = nlmsg_new ( NLMSG_GOODSIZE , GFP_KERNEL ) ;
if ( ! ans_skb )
return - ENOMEM ;
msg . skb = ans_skb ;
msg . portid = info - > snd_portid ;
msg . seq = info - > snd_seq ;
tipc_node_lock ( node ) ;
link = node - > links [ bearer_id ] ;
if ( ! link ) {
err = - EINVAL ;
goto err_out ;
}
2015-01-09 10:27:10 +03:00
err = __tipc_nl_add_link ( net , & msg , link ) ;
2014-11-20 12:29:12 +03:00
if ( err )
goto err_out ;
tipc_node_unlock ( node ) ;
return genlmsg_reply ( ans_skb , info ) ;
err_out :
tipc_node_unlock ( node ) ;
nlmsg_free ( ans_skb ) ;
return err ;
}
2014-11-20 12:29:14 +03:00
int tipc_nl_link_reset_stats ( struct sk_buff * skb , struct genl_info * info )
{
int err ;
char * link_name ;
unsigned int bearer_id ;
struct tipc_link * link ;
struct tipc_node * node ;
struct nlattr * attrs [ TIPC_NLA_LINK_MAX + 1 ] ;
2015-01-09 10:27:05 +03:00
struct net * net = genl_info_net ( info ) ;
2014-11-20 12:29:14 +03:00
if ( ! info - > attrs [ TIPC_NLA_LINK ] )
return - EINVAL ;
err = nla_parse_nested ( attrs , TIPC_NLA_LINK_MAX ,
info - > attrs [ TIPC_NLA_LINK ] ,
tipc_nl_link_policy ) ;
if ( err )
return err ;
if ( ! attrs [ TIPC_NLA_LINK_NAME ] )
return - EINVAL ;
link_name = nla_data ( attrs [ TIPC_NLA_LINK_NAME ] ) ;
if ( strcmp ( link_name , tipc_bclink_name ) = = 0 ) {
2015-01-09 10:27:07 +03:00
err = tipc_bclink_reset_stats ( net ) ;
2014-11-20 12:29:14 +03:00
if ( err )
return err ;
return 0 ;
}
2015-01-09 10:27:05 +03:00
node = tipc_link_find_owner ( net , link_name , & bearer_id ) ;
2014-11-20 12:29:14 +03:00
if ( ! node )
return - EINVAL ;
tipc_node_lock ( node ) ;
link = node - > links [ bearer_id ] ;
if ( ! link ) {
tipc_node_unlock ( node ) ;
return - EINVAL ;
}
link_reset_statistics ( link ) ;
tipc_node_unlock ( node ) ;
return 0 ;
}