2006-01-02 21:04:38 +03:00
/*
* net / tipc / node . c : TIPC node management routines
2007-02-09 17:25:21 +03:00
*
2015-05-14 17:46:18 +03:00
* Copyright ( c ) 2000 - 2006 , 2012 - 2015 , Ericsson AB
2014-03-27 08:54:36 +04:00
* Copyright ( c ) 2005 - 2006 , 2010 - 2014 , Wind River Systems
2006-01-02 21:04:38 +03:00
* All rights reserved .
*
2006-01-11 15:30:43 +03:00
* Redistribution and use in source and binary forms , with or without
2006-01-02 21:04:38 +03:00
* modification , are permitted provided that the following conditions are met :
*
2006-01-11 15:30:43 +03:00
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
2006-01-02 21:04:38 +03:00
*
2006-01-11 15:30:43 +03:00
* Alternatively , this software may be distributed under the terms of the
* GNU General Public License ( " GPL " ) version 2 as published by the Free
* Software Foundation .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
2006-01-02 21:04:38 +03:00
* POSSIBILITY OF SUCH DAMAGE .
*/
# include "core.h"
2015-02-09 11:50:18 +03:00
# include "link.h"
2006-01-02 21:04:38 +03:00
# include "node.h"
# include "name_distr.h"
2014-08-23 02:09:07 +04:00
# include "socket.h"
2015-05-14 17:46:13 +03:00
# include "bcast.h"
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
# include "discover.h"
2015-10-15 21:52:46 +03:00
2015-07-31 01:24:19 +03:00
/* Node FSM states and events:
*/
enum {
SELF_DOWN_PEER_DOWN = 0xdd ,
SELF_UP_PEER_UP = 0xaa ,
SELF_DOWN_PEER_LEAVING = 0xd1 ,
SELF_UP_PEER_COMING = 0xac ,
SELF_COMING_PEER_UP = 0xca ,
SELF_LEAVING_PEER_DOWN = 0x1d ,
NODE_FAILINGOVER = 0xf0 ,
NODE_SYNCHING = 0xcc
} ;
enum {
SELF_ESTABL_CONTACT_EVT = 0xece ,
SELF_LOST_CONTACT_EVT = 0x1ce ,
PEER_ESTABL_CONTACT_EVT = 0x9ece ,
PEER_LOST_CONTACT_EVT = 0x91ce ,
NODE_FAILOVER_BEGIN_EVT = 0xfbe ,
NODE_FAILOVER_END_EVT = 0xfee ,
NODE_SYNCH_BEGIN_EVT = 0xcbe ,
NODE_SYNCH_END_EVT = 0xcee
} ;
2015-07-31 01:24:23 +03:00
static void __tipc_node_link_down ( struct tipc_node * n , int * bearer_id ,
struct sk_buff_head * xmitq ,
struct tipc_media_addr * * maddr ) ;
static void tipc_node_link_down ( struct tipc_node * n , int bearer_id ,
bool delete ) ;
static void node_lost_contact ( struct tipc_node * n , struct sk_buff_head * inputq ) ;
2008-09-03 10:38:32 +04:00
static void node_established_contact ( struct tipc_node * n_ptr ) ;
2015-03-26 13:10:24 +03:00
static void tipc_node_delete ( struct tipc_node * node ) ;
2015-07-16 23:54:29 +03:00
static void tipc_node_timeout ( unsigned long data ) ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
static void tipc_node_fsm_evt ( struct tipc_node * n , int evt ) ;
2006-01-02 21:04:38 +03:00
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
struct tipc_sock_conn {
u32 port ;
u32 peer_port ;
u32 peer_node ;
struct list_head list ;
} ;
2014-11-20 12:29:17 +03:00
static const struct nla_policy tipc_nl_node_policy [ TIPC_NLA_NODE_MAX + 1 ] = {
[ TIPC_NLA_NODE_UNSPEC ] = { . type = NLA_UNSPEC } ,
[ TIPC_NLA_NODE_ADDR ] = { . type = NLA_U32 } ,
[ TIPC_NLA_NODE_UP ] = { . type = NLA_FLAG }
} ;
2011-11-04 19:54:43 +04:00
/*
* A trivial power - of - two bitmask technique is used for speed , since this
* operation is done for every incoming TIPC packet . The number of hash table
* entries has been chosen so that no hash chain exceeds 8 nodes and will
* usually be much smaller ( typically only a single node ) .
*/
2012-04-23 08:49:13 +04:00
static unsigned int tipc_hashfn ( u32 addr )
2011-11-04 19:54:43 +04:00
{
return addr & ( NODE_HTABLE_SIZE - 1 ) ;
}
2015-03-26 13:10:24 +03:00
static void tipc_node_kref_release ( struct kref * kref )
{
struct tipc_node * node = container_of ( kref , struct tipc_node , kref ) ;
tipc_node_delete ( node ) ;
}
void tipc_node_put ( struct tipc_node * node )
{
kref_put ( & node - > kref , tipc_node_kref_release ) ;
}
static void tipc_node_get ( struct tipc_node * node )
{
kref_get ( & node - > kref ) ;
}
2011-10-27 23:03:24 +04:00
/*
2011-02-26 02:42:52 +03:00
* tipc_node_find - locate specified node object , if it exists
*/
2015-01-09 10:27:05 +03:00
struct tipc_node * tipc_node_find ( struct net * net , u32 addr )
2011-02-26 02:42:52 +03:00
{
2015-01-09 10:27:05 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2011-02-26 02:42:52 +03:00
struct tipc_node * node ;
2015-01-09 10:27:10 +03:00
if ( unlikely ( ! in_own_cluster_exact ( net , addr ) ) )
2011-02-26 02:42:52 +03:00
return NULL ;
2014-03-27 08:54:37 +04:00
rcu_read_lock ( ) ;
2015-01-09 10:27:05 +03:00
hlist_for_each_entry_rcu ( node , & tn - > node_htable [ tipc_hashfn ( addr ) ] ,
hash ) {
2014-03-27 08:54:36 +04:00
if ( node - > addr = = addr ) {
2015-03-26 13:10:24 +03:00
tipc_node_get ( node ) ;
2014-03-27 08:54:37 +04:00
rcu_read_unlock ( ) ;
2011-02-26 02:42:52 +03:00
return node ;
2014-03-27 08:54:36 +04:00
}
2011-02-26 02:42:52 +03:00
}
2014-03-27 08:54:37 +04:00
rcu_read_unlock ( ) ;
2011-02-26 02:42:52 +03:00
return NULL ;
}
2015-07-31 01:24:22 +03:00
struct tipc_node * tipc_node_create ( struct net * net , u32 addr , u16 capabilities )
2006-01-02 21:04:38 +03:00
{
2015-01-09 10:27:05 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2011-02-26 02:42:52 +03:00
struct tipc_node * n_ptr , * temp_node ;
2006-01-02 21:04:38 +03:00
2015-01-09 10:27:05 +03:00
spin_lock_bh ( & tn - > node_list_lock ) ;
2015-02-03 16:59:19 +03:00
n_ptr = tipc_node_find ( net , addr ) ;
if ( n_ptr )
goto exit ;
2010-12-31 21:59:23 +03:00
n_ptr = kzalloc ( sizeof ( * n_ptr ) , GFP_ATOMIC ) ;
2006-06-26 10:52:17 +04:00
if ( ! n_ptr ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " Node creation failed, no memory \n " ) ;
2015-02-03 16:59:19 +03:00
goto exit ;
2006-06-26 10:52:17 +04:00
}
n_ptr - > addr = addr ;
2015-01-09 10:27:05 +03:00
n_ptr - > net = net ;
2015-07-31 01:24:22 +03:00
n_ptr - > capabilities = capabilities ;
2015-03-26 13:10:24 +03:00
kref_init ( & n_ptr - > kref ) ;
2010-12-31 21:59:18 +03:00
spin_lock_init ( & n_ptr - > lock ) ;
2011-02-26 02:42:52 +03:00
INIT_HLIST_NODE ( & n_ptr - > hash ) ;
INIT_LIST_HEAD ( & n_ptr - > list ) ;
2014-11-26 06:41:45 +03:00
INIT_LIST_HEAD ( & n_ptr - > publ_list ) ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
INIT_LIST_HEAD ( & n_ptr - > conn_sks ) ;
2015-07-16 23:54:21 +03:00
skb_queue_head_init ( & n_ptr - > bclink . namedq ) ;
2015-03-13 23:08:10 +03:00
__skb_queue_head_init ( & n_ptr - > bclink . deferdq ) ;
2015-01-09 10:27:05 +03:00
hlist_add_head_rcu ( & n_ptr - > hash , & tn - > node_htable [ tipc_hashfn ( addr ) ] ) ;
list_for_each_entry_rcu ( temp_node , & tn - > node_list , list ) {
2011-02-26 02:42:52 +03:00
if ( n_ptr - > addr < temp_node - > addr )
break ;
}
2014-03-27 08:54:37 +04:00
list_add_tail_rcu ( & n_ptr - > list , & temp_node - > list ) ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
n_ptr - > state = SELF_DOWN_PEER_LEAVING ;
2011-10-29 00:26:41 +04:00
n_ptr - > signature = INVALID_NODE_SIG ;
2015-07-16 23:54:22 +03:00
n_ptr - > active_links [ 0 ] = INVALID_BEARER_ID ;
n_ptr - > active_links [ 1 ] = INVALID_BEARER_ID ;
2015-03-26 13:10:24 +03:00
tipc_node_get ( n_ptr ) ;
2015-07-16 23:54:29 +03:00
setup_timer ( & n_ptr - > timer , tipc_node_timeout , ( unsigned long ) n_ptr ) ;
n_ptr - > keepalive_intv = U32_MAX ;
2015-02-03 16:59:19 +03:00
exit :
2015-01-09 10:27:05 +03:00
spin_unlock_bh ( & tn - > node_list_lock ) ;
2006-01-02 21:04:38 +03:00
return n_ptr ;
}
2015-07-16 23:54:29 +03:00
static void tipc_node_calculate_timer ( struct tipc_node * n , struct tipc_link * l )
{
unsigned long tol = l - > tolerance ;
unsigned long intv = ( ( tol / 4 ) > 500 ) ? 500 : tol / 4 ;
unsigned long keepalive_intv = msecs_to_jiffies ( intv ) ;
/* Link with lowest tolerance determines timer interval */
if ( keepalive_intv < n - > keepalive_intv )
n - > keepalive_intv = keepalive_intv ;
/* Ensure link's abort limit corresponds to current interval */
l - > abort_limit = l - > tolerance / jiffies_to_msecs ( n - > keepalive_intv ) ;
}
2015-03-26 13:10:24 +03:00
static void tipc_node_delete ( struct tipc_node * node )
2006-01-02 21:04:38 +03:00
{
2015-03-26 13:10:24 +03:00
list_del_rcu ( & node - > list ) ;
hlist_del_rcu ( & node - > hash ) ;
kfree_rcu ( node , rcu ) ;
2006-01-02 21:04:38 +03:00
}
2015-01-09 10:27:05 +03:00
void tipc_node_stop ( struct net * net )
2014-03-27 08:54:36 +04:00
{
2015-01-09 10:27:05 +03:00
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-03-27 08:54:36 +04:00
struct tipc_node * node , * t_node ;
2015-01-09 10:27:05 +03:00
spin_lock_bh ( & tn - > node_list_lock ) ;
2015-07-16 23:54:29 +03:00
list_for_each_entry_safe ( node , t_node , & tn - > node_list , list ) {
if ( del_timer ( & node - > timer ) )
tipc_node_put ( node ) ;
2015-03-26 13:10:24 +03:00
tipc_node_put ( node ) ;
2015-07-16 23:54:29 +03:00
}
2015-01-09 10:27:05 +03:00
spin_unlock_bh ( & tn - > node_list_lock ) ;
2014-03-27 08:54:36 +04:00
}
2015-01-09 10:27:05 +03:00
int tipc_node_add_conn ( struct net * net , u32 dnode , u32 port , u32 peer_port )
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
{
struct tipc_node * node ;
struct tipc_sock_conn * conn ;
2015-03-26 13:10:24 +03:00
int err = 0 ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
2015-01-09 10:27:10 +03:00
if ( in_own_node ( net , dnode ) )
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
return 0 ;
2015-01-09 10:27:05 +03:00
node = tipc_node_find ( net , dnode ) ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
if ( ! node ) {
pr_warn ( " Connecting sock to node 0x%x failed \n " , dnode ) ;
return - EHOSTUNREACH ;
}
conn = kmalloc ( sizeof ( * conn ) , GFP_ATOMIC ) ;
2015-03-26 13:10:24 +03:00
if ( ! conn ) {
err = - EHOSTUNREACH ;
goto exit ;
}
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
conn - > peer_node = dnode ;
conn - > port = port ;
conn - > peer_port = peer_port ;
tipc_node_lock ( node ) ;
list_add_tail ( & conn - > list , & node - > conn_sks ) ;
tipc_node_unlock ( node ) ;
2015-03-26 13:10:24 +03:00
exit :
tipc_node_put ( node ) ;
return err ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
}
2015-01-09 10:27:05 +03:00
void tipc_node_remove_conn ( struct net * net , u32 dnode , u32 port )
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
{
struct tipc_node * node ;
struct tipc_sock_conn * conn , * safe ;
2015-01-09 10:27:10 +03:00
if ( in_own_node ( net , dnode ) )
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
return ;
2015-01-09 10:27:05 +03:00
node = tipc_node_find ( net , dnode ) ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
if ( ! node )
return ;
tipc_node_lock ( node ) ;
list_for_each_entry_safe ( conn , safe , & node - > conn_sks , list ) {
if ( port ! = conn - > port )
continue ;
list_del ( & conn - > list ) ;
kfree ( conn ) ;
}
tipc_node_unlock ( node ) ;
2015-03-26 13:10:24 +03:00
tipc_node_put ( node ) ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-23 02:09:08 +04:00
}
2015-07-16 23:54:29 +03:00
/* tipc_node_timeout - handle expiration of node timer
*/
static void tipc_node_timeout ( unsigned long data )
{
struct tipc_node * n = ( struct tipc_node * ) data ;
2015-07-31 01:24:23 +03:00
struct tipc_link_entry * le ;
2015-07-16 23:54:29 +03:00
struct sk_buff_head xmitq ;
int bearer_id ;
int rc = 0 ;
__skb_queue_head_init ( & xmitq ) ;
for ( bearer_id = 0 ; bearer_id < MAX_BEARERS ; bearer_id + + ) {
tipc_node_lock ( n ) ;
2015-07-31 01:24:23 +03:00
le = & n - > links [ bearer_id ] ;
if ( le - > link ) {
2015-07-16 23:54:29 +03:00
/* Link tolerance may change asynchronously: */
2015-07-31 01:24:23 +03:00
tipc_node_calculate_timer ( n , le - > link ) ;
rc = tipc_link_timeout ( le - > link , & xmitq ) ;
2015-07-16 23:54:29 +03:00
}
tipc_node_unlock ( n ) ;
2015-07-31 01:24:23 +03:00
tipc_bearer_xmit ( n - > net , bearer_id , & xmitq , & le - > maddr ) ;
if ( rc & TIPC_LINK_DOWN_EVT )
tipc_node_link_down ( n , bearer_id , false ) ;
2015-07-16 23:54:29 +03:00
}
if ( ! mod_timer ( & n - > timer , jiffies + n - > keepalive_intv ) )
tipc_node_get ( n ) ;
tipc_node_put ( n ) ;
}
2006-01-02 21:04:38 +03:00
/**
2015-07-31 01:24:23 +03:00
* __tipc_node_link_up - handle addition of link
* Node lock must be held by caller
2006-01-02 21:04:38 +03:00
* Link becomes active ( alone or shared ) or standby , depending on its priority .
*/
2015-07-31 01:24:23 +03:00
static void __tipc_node_link_up ( struct tipc_node * n , int bearer_id ,
struct sk_buff_head * xmitq )
2006-01-02 21:04:38 +03:00
{
2015-07-16 23:54:22 +03:00
int * slot0 = & n - > active_links [ 0 ] ;
int * slot1 = & n - > active_links [ 1 ] ;
2015-07-31 01:24:19 +03:00
struct tipc_link * ol = node_active_link ( n , 0 ) ;
struct tipc_link * nl = n - > links [ bearer_id ] . link ;
2015-07-16 23:54:19 +03:00
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 21:52:44 +03:00
if ( ! nl )
return ;
tipc_link_fsm_evt ( nl , LINK_ESTABLISH_EVT ) ;
if ( ! tipc_link_is_up ( nl ) )
2015-07-31 01:24:23 +03:00
return ;
2015-07-16 23:54:19 +03:00
n - > working_links + + ;
n - > action_flags | = TIPC_NOTIFY_LINK_UP ;
2015-07-31 01:24:19 +03:00
n - > link_id = nl - > peer_bearer_id < < 16 | bearer_id ;
/* Leave room for tunnel header when returning 'mtu' to users: */
n - > links [ bearer_id ] . mtu = nl - > mtu - INT_H_SIZE ;
2014-10-20 10:44:25 +04:00
2015-07-31 01:24:15 +03:00
tipc_bearer_add_dest ( n - > net , bearer_id , n - > addr ) ;
2015-01-22 19:10:31 +03:00
pr_debug ( " Established link <%s> on network plane %c \n " ,
2015-07-31 01:24:19 +03:00
nl - > name , nl - > net_plane ) ;
2007-02-09 17:25:21 +03:00
2015-07-31 01:24:19 +03:00
/* First link? => give it both slots */
if ( ! ol ) {
2015-07-16 23:54:22 +03:00
* slot0 = bearer_id ;
* slot1 = bearer_id ;
2015-07-31 01:24:20 +03:00
tipc_link_build_bcast_sync_msg ( nl , xmitq ) ;
2015-07-16 23:54:19 +03:00
node_established_contact ( n ) ;
return ;
2006-01-02 21:04:38 +03:00
}
2015-07-16 23:54:22 +03:00
2015-07-31 01:24:19 +03:00
/* Second link => redistribute slots */
if ( nl - > priority > ol - > priority ) {
pr_debug ( " Old link <%s> becomes standby \n " , ol - > name ) ;
2015-07-16 23:54:22 +03:00
* slot0 = bearer_id ;
2015-07-31 01:24:19 +03:00
* slot1 = bearer_id ;
} else if ( nl - > priority = = ol - > priority ) {
* slot0 = bearer_id ;
} else {
pr_debug ( " New link <%s> is standby \n " , nl - > name ) ;
2006-01-02 21:04:38 +03:00
}
2015-07-31 01:24:19 +03:00
/* Prepare synchronization with first link */
tipc_link_tnl_prepare ( ol , nl , SYNCH_MSG , xmitq ) ;
2006-01-02 21:04:38 +03:00
}
/**
2015-07-31 01:24:23 +03:00
* tipc_node_link_up - handle addition of link
*
* Link becomes active ( alone or shared ) or standby , depending on its priority .
2006-01-02 21:04:38 +03:00
*/
2015-07-31 01:24:23 +03:00
static void tipc_node_link_up ( struct tipc_node * n , int bearer_id ,
struct sk_buff_head * xmitq )
2006-01-02 21:04:38 +03:00
{
2015-07-31 01:24:23 +03:00
tipc_node_lock ( n ) ;
__tipc_node_link_up ( n , bearer_id , xmitq ) ;
tipc_node_unlock ( n ) ;
}
/**
* __tipc_node_link_down - handle loss of link
*/
static void __tipc_node_link_down ( struct tipc_node * n , int * bearer_id ,
struct sk_buff_head * xmitq ,
struct tipc_media_addr * * maddr )
{
struct tipc_link_entry * le = & n - > links [ * bearer_id ] ;
2015-07-16 23:54:22 +03:00
int * slot0 = & n - > active_links [ 0 ] ;
int * slot1 = & n - > active_links [ 1 ] ;
int i , highest = 0 ;
2015-07-31 01:24:19 +03:00
struct tipc_link * l , * _l , * tnl ;
2006-01-02 21:04:38 +03:00
2015-07-31 01:24:23 +03:00
l = n - > links [ * bearer_id ] . link ;
2015-07-31 01:24:21 +03:00
if ( ! l | | tipc_link_is_reset ( l ) )
2015-07-31 01:24:17 +03:00
return ;
2015-07-16 23:54:19 +03:00
n - > working_links - - ;
n - > action_flags | = TIPC_NOTIFY_LINK_DOWN ;
2015-07-31 01:24:23 +03:00
n - > link_id = l - > peer_bearer_id < < 16 | * bearer_id ;
2006-06-26 10:52:50 +04:00
2015-07-31 01:24:23 +03:00
tipc_bearer_remove_dest ( n - > net , * bearer_id , n - > addr ) ;
2015-07-31 01:24:17 +03:00
2015-01-22 19:10:31 +03:00
pr_debug ( " Lost link <%s> on network plane %c \n " ,
2015-07-16 23:54:19 +03:00
l - > name , l - > net_plane ) ;
2014-06-26 05:41:33 +04:00
2015-07-16 23:54:22 +03:00
/* Select new active link if any available */
* slot0 = INVALID_BEARER_ID ;
* slot1 = INVALID_BEARER_ID ;
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
_l = n - > links [ i ] . link ;
if ( ! _l | | ! tipc_link_is_up ( _l ) )
continue ;
2015-07-31 01:24:17 +03:00
if ( _l = = l )
continue ;
2015-07-16 23:54:22 +03:00
if ( _l - > priority < highest )
continue ;
if ( _l - > priority > highest ) {
highest = _l - > priority ;
* slot0 = i ;
* slot1 = i ;
continue ;
}
* slot1 = i ;
}
2015-07-31 01:24:17 +03:00
2015-07-31 01:24:19 +03:00
if ( ! tipc_node_is_up ( n ) ) {
2015-10-15 21:52:46 +03:00
if ( tipc_link_peer_is_down ( l ) )
tipc_node_fsm_evt ( n , PEER_LOST_CONTACT_EVT ) ;
tipc_node_fsm_evt ( n , SELF_LOST_CONTACT_EVT ) ;
tipc_link_fsm_evt ( l , LINK_RESET_EVT ) ;
2015-07-31 01:24:19 +03:00
tipc_link_reset ( l ) ;
2015-10-15 21:52:45 +03:00
tipc_link_build_reset_msg ( l , xmitq ) ;
* maddr = & n - > links [ * bearer_id ] . maddr ;
2015-07-31 01:24:23 +03:00
node_lost_contact ( n , & le - > inputq ) ;
2015-07-31 01:24:19 +03:00
return ;
}
2015-07-31 01:24:17 +03:00
2015-07-31 01:24:19 +03:00
/* There is still a working link => initiate failover */
tnl = node_active_link ( n , 0 ) ;
2015-08-20 09:12:55 +03:00
tipc_link_fsm_evt ( tnl , LINK_SYNCH_END_EVT ) ;
tipc_node_fsm_evt ( n , NODE_SYNCH_END_EVT ) ;
2015-07-31 01:24:19 +03:00
n - > sync_point = tnl - > rcv_nxt + ( U16_MAX / 2 - 1 ) ;
2015-07-31 01:24:23 +03:00
tipc_link_tnl_prepare ( l , tnl , FAILOVER_MSG , xmitq ) ;
2015-07-31 01:24:17 +03:00
tipc_link_reset ( l ) ;
2015-10-15 21:52:46 +03:00
tipc_link_fsm_evt ( l , LINK_RESET_EVT ) ;
2015-07-31 01:24:21 +03:00
tipc_link_fsm_evt ( l , LINK_FAILOVER_BEGIN_EVT ) ;
2015-07-31 01:24:23 +03:00
tipc_node_fsm_evt ( n , NODE_FAILOVER_BEGIN_EVT ) ;
* maddr = & n - > links [ tnl - > bearer_id ] . maddr ;
* bearer_id = tnl - > bearer_id ;
}
static void tipc_node_link_down ( struct tipc_node * n , int bearer_id , bool delete )
{
struct tipc_link_entry * le = & n - > links [ bearer_id ] ;
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 21:52:44 +03:00
struct tipc_link * l = le - > link ;
2015-07-31 01:24:23 +03:00
struct tipc_media_addr * maddr ;
struct sk_buff_head xmitq ;
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 21:52:44 +03:00
if ( ! l )
return ;
2015-07-31 01:24:23 +03:00
__skb_queue_head_init ( & xmitq ) ;
tipc_node_lock ( n ) ;
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 21:52:44 +03:00
if ( ! tipc_link_is_establishing ( l ) ) {
__tipc_node_link_down ( n , & bearer_id , & xmitq , & maddr ) ;
if ( delete ) {
kfree ( l ) ;
le - > link = NULL ;
n - > link_cnt - - ;
}
} else {
/* Defuse pending tipc_node_link_up() */
tipc_link_fsm_evt ( l , LINK_RESET_EVT ) ;
2015-07-31 01:24:23 +03:00
}
tipc_node_unlock ( n ) ;
tipc_bearer_xmit ( n - > net , bearer_id , & xmitq , maddr ) ;
tipc_sk_rcv ( n - > net , & le - > inputq ) ;
2006-01-02 21:04:38 +03:00
}
2015-07-16 23:54:19 +03:00
bool tipc_node_is_up ( struct tipc_node * n )
2006-01-02 21:04:38 +03:00
{
2015-07-16 23:54:22 +03:00
return n - > active_links [ 0 ] ! = INVALID_BEARER_ID ;
2006-01-02 21:04:38 +03:00
}
2015-07-31 01:24:22 +03:00
void tipc_node_check_dest ( struct net * net , u32 onode ,
struct tipc_bearer * b ,
u16 capabilities , u32 signature ,
struct tipc_media_addr * maddr ,
bool * respond , bool * dupl_addr )
2015-07-16 23:54:20 +03:00
{
2015-07-31 01:24:22 +03:00
struct tipc_node * n ;
struct tipc_link * l ;
2015-07-31 01:24:26 +03:00
struct tipc_link_entry * le ;
2015-07-31 01:24:22 +03:00
bool addr_match = false ;
bool sign_match = false ;
bool link_up = false ;
bool accept_addr = false ;
2015-07-31 01:24:23 +03:00
bool reset = true ;
2015-07-31 01:24:26 +03:00
2015-07-31 01:24:22 +03:00
* dupl_addr = false ;
* respond = false ;
n = tipc_node_create ( net , onode , capabilities ) ;
if ( ! n )
return ;
2015-07-16 23:54:20 +03:00
2015-07-31 01:24:22 +03:00
tipc_node_lock ( n ) ;
2015-07-31 01:24:26 +03:00
le = & n - > links [ b - > identity ] ;
2015-07-31 01:24:22 +03:00
/* Prepare to validate requesting node's signature and media address */
2015-07-31 01:24:26 +03:00
l = le - > link ;
2015-07-31 01:24:22 +03:00
link_up = l & & tipc_link_is_up ( l ) ;
2015-07-31 01:24:26 +03:00
addr_match = l & & ! memcmp ( & le - > maddr , maddr , sizeof ( * maddr ) ) ;
2015-07-31 01:24:22 +03:00
sign_match = ( signature = = n - > signature ) ;
/* These three flags give us eight permutations: */
if ( sign_match & & addr_match & & link_up ) {
/* All is fine. Do nothing. */
2015-07-31 01:24:23 +03:00
reset = false ;
2015-07-31 01:24:22 +03:00
} else if ( sign_match & & addr_match & & ! link_up ) {
/* Respond. The link will come up in due time */
* respond = true ;
} else if ( sign_match & & ! addr_match & & link_up ) {
/* Peer has changed i/f address without rebooting.
* If so , the link will reset soon , and the next
* discovery will be accepted . So we can ignore it .
* It may also be an cloned or malicious peer having
* chosen the same node address and signature as an
* existing one .
* Ignore requests until the link goes down , if ever .
*/
* dupl_addr = true ;
} else if ( sign_match & & ! addr_match & & ! link_up ) {
/* Peer link has changed i/f address without rebooting.
* It may also be a cloned or malicious peer ; we can ' t
* distinguish between the two .
* The signature is correct , so we must accept .
*/
accept_addr = true ;
* respond = true ;
} else if ( ! sign_match & & addr_match & & link_up ) {
/* Peer node rebooted. Two possibilities:
* - Delayed re - discovery ; this link endpoint has already
* reset and re - established contact with the peer , before
* receiving a discovery message from that node .
* ( The peer happened to receive one from this node first ) .
* - The peer came back so fast that our side has not
* discovered it yet . Probing from this side will soon
* reset the link , since there can be no working link
* endpoint at the peer end , and the link will re - establish .
* Accept the signature , since it comes from a known peer .
*/
n - > signature = signature ;
} else if ( ! sign_match & & addr_match & & ! link_up ) {
/* The peer node has rebooted.
* Accept signature , since it is a known peer .
*/
n - > signature = signature ;
* respond = true ;
} else if ( ! sign_match & & ! addr_match & & link_up ) {
/* Peer rebooted with new address, or a new/duplicate peer.
* Ignore until the link goes down , if ever .
*/
* dupl_addr = true ;
} else if ( ! sign_match & & ! addr_match & & ! link_up ) {
/* Peer rebooted with new address, or it is a new peer.
* Accept signature and address .
*/
n - > signature = signature ;
accept_addr = true ;
* respond = true ;
}
2015-07-16 23:54:20 +03:00
2015-07-31 01:24:22 +03:00
if ( ! accept_addr )
goto exit ;
2015-07-16 23:54:20 +03:00
2015-07-31 01:24:22 +03:00
/* Now create new link if not already existing */
2015-07-16 23:54:29 +03:00
if ( ! l ) {
2015-07-31 01:24:26 +03:00
if ( n - > link_cnt = = 2 ) {
pr_warn ( " Cannot establish 3rd link to %x \n " , n - > addr ) ;
goto exit ;
}
if ( ! tipc_link_create ( n , b , mod ( tipc_net ( net ) - > random ) ,
tipc_own_addr ( net ) , onode , & le - > maddr ,
& le - > inputq , & n - > bclink . namedq , & l ) ) {
2015-07-31 01:24:22 +03:00
* respond = false ;
goto exit ;
}
2015-07-31 01:24:26 +03:00
tipc_link_reset ( l ) ;
2015-10-15 21:52:46 +03:00
tipc_link_fsm_evt ( l , LINK_RESET_EVT ) ;
tipc: eliminate risk of premature link setup during failover
When a link goes down, and there is still a working link towards its
destination node, a failover is initiated, and the failed link is not
allowed to re-establish until that procedure is finished. To ensure
this, the concerned link endpoints are set to state LINK_FAILINGOVER,
and the node endpoints to NODE_FAILINGOVER during the failover period.
However, if the link reset is due to a disabled bearer, the corres-
ponding link endpoint is deleted, and only the node endpoint knows
about the ongoing failover. Now, if the disabled bearer is re-enabled
during the failover period, the discovery mechanism may create a new
link endpoint that is ready to be established, despite that this is not
permitted. This situation may cause both the ongoing failover and any
subsequent link synchronization to fail.
In this commit, we ensure that a newly created link goes directly to
state LINK_FAILINGOVER if the corresponding node state is
NODE_FAILINGOVER. This eliminates the problem described above.
Furthermore, we tighten the criteria for which packets are allowed
to end a failover state in the function tipc_node_check_state().
By checking that the receiving link is up and running, instead of just
checking that it is not in failover mode, we eliminate the risk that
protocol packets from the re-created link may cause the failover to
be prematurely terminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 09:12:54 +03:00
if ( n - > state = = NODE_FAILINGOVER )
tipc_link_fsm_evt ( l , LINK_FAILOVER_BEGIN_EVT ) ;
2015-07-31 01:24:26 +03:00
le - > link = l ;
n - > link_cnt + + ;
2015-07-16 23:54:29 +03:00
tipc_node_calculate_timer ( n , l ) ;
2015-07-31 01:24:22 +03:00
if ( n - > link_cnt = = 1 )
2015-07-16 23:54:29 +03:00
if ( ! mod_timer ( & n - > timer , jiffies + n - > keepalive_intv ) )
tipc_node_get ( n ) ;
}
2015-07-31 01:24:26 +03:00
memcpy ( & le - > maddr , maddr , sizeof ( * maddr ) ) ;
2015-07-31 01:24:22 +03:00
exit :
tipc_node_unlock ( n ) ;
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 21:52:44 +03:00
if ( reset & & ! tipc_link_is_reset ( l ) )
2015-07-31 01:24:23 +03:00
tipc_node_link_down ( n , b - > identity , false ) ;
2015-07-31 01:24:22 +03:00
tipc_node_put ( n ) ;
2015-07-16 23:54:20 +03:00
}
2015-07-31 01:24:16 +03:00
void tipc_node_delete_links ( struct net * net , int bearer_id )
{
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
struct tipc_node * n ;
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( n , & tn - > node_list , list ) {
2015-07-31 01:24:23 +03:00
tipc_node_link_down ( n , bearer_id , true ) ;
2015-07-31 01:24:16 +03:00
}
rcu_read_unlock ( ) ;
}
static void tipc_node_reset_links ( struct tipc_node * n )
{
char addr_string [ 16 ] ;
2015-07-31 01:24:23 +03:00
int i ;
2015-07-31 01:24:16 +03:00
pr_warn ( " Resetting all links to %s \n " ,
tipc_addr_string_fill ( addr_string , n - > addr ) ) ;
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
2015-07-31 01:24:23 +03:00
tipc_node_link_down ( n , i , false ) ;
2015-07-31 01:24:16 +03:00
}
}
2015-07-16 23:54:30 +03:00
/* tipc_node_fsm_evt - node finite state machine
* Determines when contact is allowed with peer node
*/
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
static void tipc_node_fsm_evt ( struct tipc_node * n , int evt )
2015-07-16 23:54:30 +03:00
{
int state = n - > state ;
switch ( state ) {
case SELF_DOWN_PEER_DOWN :
switch ( evt ) {
case SELF_ESTABL_CONTACT_EVT :
state = SELF_UP_PEER_COMING ;
break ;
case PEER_ESTABL_CONTACT_EVT :
state = SELF_COMING_PEER_UP ;
break ;
case SELF_LOST_CONTACT_EVT :
case PEER_LOST_CONTACT_EVT :
break ;
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_END_EVT :
case NODE_SYNCH_BEGIN_EVT :
case NODE_FAILOVER_BEGIN_EVT :
case NODE_FAILOVER_END_EVT :
2015-07-16 23:54:30 +03:00
default :
2015-07-31 01:24:18 +03:00
goto illegal_evt ;
2015-07-16 23:54:30 +03:00
}
break ;
case SELF_UP_PEER_UP :
switch ( evt ) {
case SELF_LOST_CONTACT_EVT :
state = SELF_DOWN_PEER_LEAVING ;
break ;
case PEER_LOST_CONTACT_EVT :
state = SELF_LEAVING_PEER_DOWN ;
break ;
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_BEGIN_EVT :
state = NODE_SYNCHING ;
break ;
case NODE_FAILOVER_BEGIN_EVT :
state = NODE_FAILINGOVER ;
break ;
2015-07-16 23:54:30 +03:00
case SELF_ESTABL_CONTACT_EVT :
case PEER_ESTABL_CONTACT_EVT :
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_END_EVT :
case NODE_FAILOVER_END_EVT :
2015-07-16 23:54:30 +03:00
break ;
default :
2015-07-31 01:24:18 +03:00
goto illegal_evt ;
2015-07-16 23:54:30 +03:00
}
break ;
case SELF_DOWN_PEER_LEAVING :
switch ( evt ) {
case PEER_LOST_CONTACT_EVT :
state = SELF_DOWN_PEER_DOWN ;
break ;
case SELF_ESTABL_CONTACT_EVT :
case PEER_ESTABL_CONTACT_EVT :
case SELF_LOST_CONTACT_EVT :
break ;
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_END_EVT :
case NODE_SYNCH_BEGIN_EVT :
case NODE_FAILOVER_BEGIN_EVT :
case NODE_FAILOVER_END_EVT :
2015-07-16 23:54:30 +03:00
default :
2015-07-31 01:24:18 +03:00
goto illegal_evt ;
2015-07-16 23:54:30 +03:00
}
break ;
case SELF_UP_PEER_COMING :
switch ( evt ) {
case PEER_ESTABL_CONTACT_EVT :
state = SELF_UP_PEER_UP ;
break ;
case SELF_LOST_CONTACT_EVT :
state = SELF_DOWN_PEER_LEAVING ;
break ;
case SELF_ESTABL_CONTACT_EVT :
case PEER_LOST_CONTACT_EVT :
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_END_EVT :
case NODE_FAILOVER_BEGIN_EVT :
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 21:52:44 +03:00
break ;
case NODE_SYNCH_BEGIN_EVT :
2015-07-31 01:24:18 +03:00
case NODE_FAILOVER_END_EVT :
2015-07-16 23:54:30 +03:00
default :
2015-07-31 01:24:18 +03:00
goto illegal_evt ;
2015-07-16 23:54:30 +03:00
}
break ;
case SELF_COMING_PEER_UP :
switch ( evt ) {
case SELF_ESTABL_CONTACT_EVT :
state = SELF_UP_PEER_UP ;
break ;
case PEER_LOST_CONTACT_EVT :
state = SELF_LEAVING_PEER_DOWN ;
break ;
case SELF_LOST_CONTACT_EVT :
case PEER_ESTABL_CONTACT_EVT :
break ;
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_END_EVT :
case NODE_SYNCH_BEGIN_EVT :
case NODE_FAILOVER_BEGIN_EVT :
case NODE_FAILOVER_END_EVT :
2015-07-16 23:54:30 +03:00
default :
2015-07-31 01:24:18 +03:00
goto illegal_evt ;
2015-07-16 23:54:30 +03:00
}
break ;
case SELF_LEAVING_PEER_DOWN :
switch ( evt ) {
case SELF_LOST_CONTACT_EVT :
state = SELF_DOWN_PEER_DOWN ;
break ;
case SELF_ESTABL_CONTACT_EVT :
case PEER_ESTABL_CONTACT_EVT :
case PEER_LOST_CONTACT_EVT :
break ;
2015-07-31 01:24:18 +03:00
case NODE_SYNCH_END_EVT :
case NODE_SYNCH_BEGIN_EVT :
case NODE_FAILOVER_BEGIN_EVT :
case NODE_FAILOVER_END_EVT :
default :
goto illegal_evt ;
}
break ;
case NODE_FAILINGOVER :
switch ( evt ) {
case SELF_LOST_CONTACT_EVT :
state = SELF_DOWN_PEER_LEAVING ;
break ;
case PEER_LOST_CONTACT_EVT :
state = SELF_LEAVING_PEER_DOWN ;
break ;
case NODE_FAILOVER_END_EVT :
state = SELF_UP_PEER_UP ;
break ;
case NODE_FAILOVER_BEGIN_EVT :
case SELF_ESTABL_CONTACT_EVT :
case PEER_ESTABL_CONTACT_EVT :
break ;
case NODE_SYNCH_BEGIN_EVT :
case NODE_SYNCH_END_EVT :
2015-07-16 23:54:30 +03:00
default :
2015-07-31 01:24:18 +03:00
goto illegal_evt ;
}
break ;
case NODE_SYNCHING :
switch ( evt ) {
case SELF_LOST_CONTACT_EVT :
state = SELF_DOWN_PEER_LEAVING ;
break ;
case PEER_LOST_CONTACT_EVT :
state = SELF_LEAVING_PEER_DOWN ;
break ;
case NODE_SYNCH_END_EVT :
state = SELF_UP_PEER_UP ;
break ;
case NODE_FAILOVER_BEGIN_EVT :
state = NODE_FAILINGOVER ;
break ;
case NODE_SYNCH_BEGIN_EVT :
case SELF_ESTABL_CONTACT_EVT :
case PEER_ESTABL_CONTACT_EVT :
break ;
case NODE_FAILOVER_END_EVT :
default :
goto illegal_evt ;
2015-07-16 23:54:30 +03:00
}
break ;
default :
pr_err ( " Unknown node fsm state %x \n " , state ) ;
break ;
}
n - > state = state ;
2015-07-31 01:24:18 +03:00
return ;
illegal_evt :
pr_err ( " Illegal node fsm evt %x in state %x \n " , evt , state ) ;
2015-07-16 23:54:30 +03:00
}
2015-07-31 01:24:19 +03:00
bool tipc_node_filter_pkt ( struct tipc_node * n , struct tipc_msg * hdr )
2015-07-16 23:54:30 +03:00
{
int state = n - > state ;
if ( likely ( state = = SELF_UP_PEER_UP ) )
return true ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
2015-07-16 23:54:30 +03:00
if ( state = = SELF_LEAVING_PEER_DOWN )
return false ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
if ( state = = SELF_DOWN_PEER_LEAVING ) {
2015-07-31 01:24:19 +03:00
if ( msg_peer_node_is_up ( hdr ) )
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
return false ;
}
2015-07-31 01:24:19 +03:00
return true ;
2015-07-16 23:54:30 +03:00
}
2008-09-03 10:38:32 +04:00
static void node_established_contact ( struct tipc_node * n_ptr )
2006-01-02 21:04:38 +03:00
{
2015-07-16 23:54:30 +03:00
tipc_node_fsm_evt ( n_ptr , SELF_ESTABL_CONTACT_EVT ) ;
2014-05-08 04:54:39 +04:00
n_ptr - > action_flags | = TIPC_NOTIFY_NODE_UP ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
n_ptr - > bclink . oos_state = 0 ;
2015-01-09 10:27:07 +03:00
n_ptr - > bclink . acked = tipc_bclink_get_last_sent ( n_ptr - > net ) ;
tipc_bclink_add_node ( n_ptr - > net , n_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
}
2015-07-31 01:24:23 +03:00
static void node_lost_contact ( struct tipc_node * n_ptr ,
struct sk_buff_head * inputq )
2006-01-02 21:04:38 +03:00
{
char addr_string [ 16 ] ;
2015-02-05 16:36:42 +03:00
struct tipc_sock_conn * conn , * safe ;
2015-07-31 01:24:23 +03:00
struct tipc_link * l ;
2015-02-05 16:36:42 +03:00
struct list_head * conns = & n_ptr - > conn_sks ;
struct sk_buff * skb ;
struct tipc_net * tn = net_generic ( n_ptr - > net , tipc_net_id ) ;
uint i ;
2006-01-02 21:04:38 +03:00
2015-01-22 19:10:31 +03:00
pr_debug ( " Lost contact with %s \n " ,
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ) ;
2011-04-07 19:58:08 +04:00
/* Flush broadcast link info associated with lost node */
2012-11-16 09:51:30 +04:00
if ( n_ptr - > bclink . recv_permitted ) {
2015-03-13 23:08:10 +03:00
__skb_queue_purge ( & n_ptr - > bclink . deferdq ) ;
2011-04-07 19:58:08 +04:00
2014-05-14 13:39:12 +04:00
if ( n_ptr - > bclink . reasm_buf ) {
kfree_skb ( n_ptr - > bclink . reasm_buf ) ;
n_ptr - > bclink . reasm_buf = NULL ;
2011-04-07 19:58:08 +04:00
}
2015-01-09 10:27:07 +03:00
tipc_bclink_remove_node ( n_ptr - > net , n_ptr - > addr ) ;
2011-10-24 23:26:24 +04:00
tipc_bclink_acknowledge ( n_ptr , INVALID_LINK_SEQ ) ;
2006-01-02 21:04:38 +03:00
2012-11-16 09:51:30 +04:00
n_ptr - > bclink . recv_permitted = false ;
2011-04-07 19:58:08 +04:00
}
2006-01-02 21:04:38 +03:00
tipc: eliminate delayed link deletion at link failover
When a bearer is disabled manually, all its links have to be reset
and deleted. However, if there is a remaining, parallel link ready
to take over a deleted link's traffic, we currently delay the delete
of the removed link until the failover procedure is finished. This
is because the remaining link needs to access state from the reset
link, such as the last received packet number, and any partially
reassembled buffer, in order to perform a successful failover.
In this commit, we do instead move the state data over to the new
link, so that it can fulfill the procedure autonomously, without
accessing any data on the old link. This means that we can now
proceed and delete all pertaining links immediately when a bearer
is disabled. This saves us from some unnecessary complexity in such
situations.
We also choose to change the confusing definitions CHANGEOVER_PROTOCOL,
ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL,
FAILOVER_MSG and SYNCH_MSG respectively.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 16:33:01 +03:00
/* Abort any ongoing link failover */
2006-01-02 21:04:38 +03:00
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
2015-07-31 01:24:23 +03:00
l = n_ptr - > links [ i ] . link ;
if ( l )
tipc_link_fsm_evt ( l , LINK_FAILOVER_END_EVT ) ;
2006-01-02 21:04:38 +03:00
}
2015-07-31 01:24:23 +03:00
2015-02-05 16:36:42 +03:00
/* Notify publications from this node */
n_ptr - > action_flags | = TIPC_NOTIFY_NODE_DOWN ;
/* Notify sockets connected to node */
list_for_each_entry_safe ( conn , safe , conns , list ) {
skb = tipc_msg_create ( TIPC_CRITICAL_IMPORTANCE , TIPC_CONN_MSG ,
SHORT_H_SIZE , 0 , tn - > own_addr ,
conn - > peer_node , conn - > port ,
conn - > peer_port , TIPC_ERR_NO_NODE ) ;
2015-07-31 01:24:24 +03:00
if ( likely ( skb ) )
2015-07-31 01:24:23 +03:00
skb_queue_tail ( inputq , skb ) ;
2015-02-05 16:36:42 +03:00
list_del ( & conn - > list ) ;
kfree ( conn ) ;
}
2006-01-02 21:04:38 +03:00
}
2014-04-24 18:26:47 +04:00
/**
* tipc_node_get_linkname - get the name of a link
*
* @ bearer_id : id of the bearer
* @ node : peer node address
* @ linkname : link name output buffer
*
* Returns 0 on success
*/
2015-01-09 10:27:05 +03:00
int tipc_node_get_linkname ( struct net * net , u32 bearer_id , u32 addr ,
char * linkname , size_t len )
2014-04-24 18:26:47 +04:00
{
struct tipc_link * link ;
2015-03-26 13:10:24 +03:00
int err = - EINVAL ;
2015-01-09 10:27:05 +03:00
struct tipc_node * node = tipc_node_find ( net , addr ) ;
2014-04-24 18:26:47 +04:00
2015-03-26 13:10:24 +03:00
if ( ! node )
return err ;
if ( bearer_id > = MAX_BEARERS )
goto exit ;
2014-04-24 18:26:47 +04:00
tipc_node_lock ( node ) ;
2015-07-16 23:54:19 +03:00
link = node - > links [ bearer_id ] . link ;
2014-04-24 18:26:47 +04:00
if ( link ) {
strncpy ( linkname , link - > name , len ) ;
2015-03-26 13:10:24 +03:00
err = 0 ;
2014-04-24 18:26:47 +04:00
}
2015-03-26 13:10:24 +03:00
exit :
2014-04-24 18:26:47 +04:00
tipc_node_unlock ( node ) ;
2015-03-26 13:10:24 +03:00
tipc_node_put ( node ) ;
return err ;
2014-04-24 18:26:47 +04:00
}
2014-05-05 04:56:12 +04:00
void tipc_node_unlock ( struct tipc_node * node )
{
2015-01-09 10:27:05 +03:00
struct net * net = node - > net ;
2014-05-05 04:56:14 +04:00
u32 addr = 0 ;
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 16:36:41 +03:00
u32 flags = node - > action_flags ;
2014-10-20 10:44:25 +04:00
u32 link_id = 0 ;
2015-02-05 16:36:42 +03:00
struct list_head * publ_list ;
2014-05-05 04:56:12 +04:00
2015-07-31 01:24:24 +03:00
if ( likely ( ! flags ) ) {
2014-05-05 04:56:12 +04:00
spin_unlock_bh ( & node - > lock ) ;
return ;
}
2014-10-20 10:44:25 +04:00
addr = node - > addr ;
link_id = node - > link_id ;
2015-02-05 16:36:42 +03:00
publ_list = & node - > publ_list ;
2014-10-20 10:44:25 +04:00
2015-07-31 01:24:24 +03:00
node - > action_flags & = ~ ( TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
2015-02-05 16:36:44 +03:00
TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
2015-07-31 01:24:24 +03:00
TIPC_BCAST_RESET ) ;
2014-10-20 10:44:25 +04:00
2014-05-05 04:56:12 +04:00
spin_unlock_bh ( & node - > lock ) ;
2015-02-05 16:36:42 +03:00
if ( flags & TIPC_NOTIFY_NODE_DOWN )
tipc_publ_notify ( net , publ_list , addr ) ;
2014-08-23 02:09:07 +04:00
tipc: fix bug in multicast congestion handling
One aim of commit 50100a5e39461b2a61d6040e73c384766c29975d ("tipc:
use pseudo message to wake up sockets after link congestion") was
to handle link congestion abatement in a uniform way for both unicast
and multicast transmit. However, the latter doesn't work correctly,
and has been broken since the referenced commit was applied.
If a user now sends a burst of multicast messages that is big
enough to cause broadcast link congestion, it will be put to sleep,
and not be waked up when the congestion abates as it should be.
This has two reasons. First, the flag that is used, TIPC_WAKEUP_USERS,
is set correctly, but in the wrong field. Instead of setting it in the
'action_flags' field of the arrival node struct, it is by mistake set
in the dummy node struct that is owned by the broadcast link, where it
will never tested for. Second, we cannot use the same flag for waking
up unicast and multicast users, since the function tipc_node_unlock()
needs to pick the wakeup pseudo messages to deliver from different
queues. It must hence be able to distinguish between the two cases.
This commit solves this problem by adding a new flag
TIPC_WAKEUP_BCAST_USERS, and a new function tipc_bclink_wakeup_user().
The latter is to be called by tipc_node_unlock() when the named flag,
now set in the correct field, is encountered.
v2: using explicit 'unsigned int' declaration instead of 'uint', as
per comment from David Miller.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-07 22:12:34 +04:00
if ( flags & TIPC_WAKEUP_BCAST_USERS )
2015-01-09 10:27:05 +03:00
tipc_bclink_wakeup_users ( net ) ;
tipc: fix bug in multicast congestion handling
One aim of commit 50100a5e39461b2a61d6040e73c384766c29975d ("tipc:
use pseudo message to wake up sockets after link congestion") was
to handle link congestion abatement in a uniform way for both unicast
and multicast transmit. However, the latter doesn't work correctly,
and has been broken since the referenced commit was applied.
If a user now sends a burst of multicast messages that is big
enough to cause broadcast link congestion, it will be put to sleep,
and not be waked up when the congestion abates as it should be.
This has two reasons. First, the flag that is used, TIPC_WAKEUP_USERS,
is set correctly, but in the wrong field. Instead of setting it in the
'action_flags' field of the arrival node struct, it is by mistake set
in the dummy node struct that is owned by the broadcast link, where it
will never tested for. Second, we cannot use the same flag for waking
up unicast and multicast users, since the function tipc_node_unlock()
needs to pick the wakeup pseudo messages to deliver from different
queues. It must hence be able to distinguish between the two cases.
This commit solves this problem by adding a new flag
TIPC_WAKEUP_BCAST_USERS, and a new function tipc_bclink_wakeup_user().
The latter is to be called by tipc_node_unlock() when the named flag,
now set in the correct field, is encountered.
v2: using explicit 'unsigned int' declaration instead of 'uint', as
per comment from David Miller.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-07 22:12:34 +04:00
2014-10-20 10:44:25 +04:00
if ( flags & TIPC_NOTIFY_NODE_UP )
2015-01-09 10:27:05 +03:00
tipc_named_node_up ( net , addr ) ;
2014-10-20 10:44:25 +04:00
if ( flags & TIPC_NOTIFY_LINK_UP )
2015-01-09 10:27:05 +03:00
tipc_nametbl_publish ( net , TIPC_LINK_STATE , addr , addr ,
2014-10-20 10:44:25 +04:00
TIPC_NODE_SCOPE , link_id , addr ) ;
if ( flags & TIPC_NOTIFY_LINK_DOWN )
2015-01-09 10:27:05 +03:00
tipc_nametbl_withdraw ( net , TIPC_LINK_STATE , addr ,
2014-10-20 10:44:25 +04:00
link_id , addr ) ;
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 16:36:41 +03:00
2015-02-05 16:36:44 +03:00
if ( flags & TIPC_BCAST_MSG_EVT )
tipc_bclink_input ( net ) ;
tipc: fix potential deadlock when all links are reset
[ 60.988363] ======================================================
[ 60.988754] [ INFO: possible circular locking dependency detected ]
[ 60.989152] 3.19.0+ #194 Not tainted
[ 60.989377] -------------------------------------------------------
[ 60.989781] swapper/3/0 is trying to acquire lock:
[ 60.990079] (&(&n_ptr->lock)->rlock){+.-...}, at: [<ffffffffa0006dca>] tipc_link_retransmit+0x1aa/0x240 [tipc]
[ 60.990743]
[ 60.990743] but task is already holding lock:
[ 60.991106] (&(&bclink->lock)->rlock){+.-...}, at: [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.991738]
[ 60.991738] which lock already depends on the new lock.
[ 60.991738]
[ 60.992174]
[ 60.992174] the existing dependency chain (in reverse order) is:
[ 60.992174]
-> #1 (&(&bclink->lock)->rlock){+.-...}:
[ 60.992174] [<ffffffff810a9c0c>] lock_acquire+0x9c/0x140
[ 60.992174] [<ffffffff8179c41f>] _raw_spin_lock_bh+0x3f/0x50
[ 60.992174] [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.992174] [<ffffffffa0000f57>] tipc_bclink_add_node+0x97/0xf0 [tipc]
[ 60.992174] [<ffffffffa0011815>] tipc_node_link_up+0xf5/0x110 [tipc]
[ 60.992174] [<ffffffffa0007783>] link_state_event+0x2b3/0x4f0 [tipc]
[ 60.992174] [<ffffffffa00193c0>] tipc_link_proto_rcv+0x24c/0x418 [tipc]
[ 60.992174] [<ffffffffa0008857>] tipc_rcv+0x827/0xac0 [tipc]
[ 60.992174] [<ffffffffa0002ca3>] tipc_l2_rcv_msg+0x73/0xd0 [tipc]
[ 60.992174] [<ffffffff81646e66>] __netif_receive_skb_core+0x746/0x980
[ 60.992174] [<ffffffff816470c1>] __netif_receive_skb+0x21/0x70
[ 60.992174] [<ffffffff81647295>] netif_receive_skb_internal+0x35/0x130
[ 60.992174] [<ffffffff81648218>] napi_gro_receive+0x158/0x1d0
[ 60.992174] [<ffffffff81559e05>] e1000_clean_rx_irq+0x155/0x490
[ 60.992174] [<ffffffff8155c1b7>] e1000_clean+0x267/0x990
[ 60.992174] [<ffffffff81647b60>] net_rx_action+0x150/0x360
[ 60.992174] [<ffffffff8105ec43>] __do_softirq+0x123/0x360
[ 60.992174] [<ffffffff8105f12e>] irq_exit+0x8e/0xb0
[ 60.992174] [<ffffffff8179f9f5>] do_IRQ+0x65/0x110
[ 60.992174] [<ffffffff8179da6f>] ret_from_intr+0x0/0x13
[ 60.992174] [<ffffffff8100de9f>] arch_cpu_idle+0xf/0x20
[ 60.992174] [<ffffffff8109dfa6>] cpu_startup_entry+0x2f6/0x3f0
[ 60.992174] [<ffffffff81033cda>] start_secondary+0x13a/0x150
[ 60.992174]
-> #0 (&(&n_ptr->lock)->rlock){+.-...}:
[ 60.992174] [<ffffffff810a8f7d>] __lock_acquire+0x163d/0x1ca0
[ 60.992174] [<ffffffff810a9c0c>] lock_acquire+0x9c/0x140
[ 60.992174] [<ffffffff8179c41f>] _raw_spin_lock_bh+0x3f/0x50
[ 60.992174] [<ffffffffa0006dca>] tipc_link_retransmit+0x1aa/0x240 [tipc]
[ 60.992174] [<ffffffffa0001e11>] tipc_bclink_rcv+0x611/0x640 [tipc]
[ 60.992174] [<ffffffffa0008646>] tipc_rcv+0x616/0xac0 [tipc]
[ 60.992174] [<ffffffffa0002ca3>] tipc_l2_rcv_msg+0x73/0xd0 [tipc]
[ 60.992174] [<ffffffff81646e66>] __netif_receive_skb_core+0x746/0x980
[ 60.992174] [<ffffffff816470c1>] __netif_receive_skb+0x21/0x70
[ 60.992174] [<ffffffff81647295>] netif_receive_skb_internal+0x35/0x130
[ 60.992174] [<ffffffff81648218>] napi_gro_receive+0x158/0x1d0
[ 60.992174] [<ffffffff81559e05>] e1000_clean_rx_irq+0x155/0x490
[ 60.992174] [<ffffffff8155c1b7>] e1000_clean+0x267/0x990
[ 60.992174] [<ffffffff81647b60>] net_rx_action+0x150/0x360
[ 60.992174] [<ffffffff8105ec43>] __do_softirq+0x123/0x360
[ 60.992174] [<ffffffff8105f12e>] irq_exit+0x8e/0xb0
[ 60.992174] [<ffffffff8179f9f5>] do_IRQ+0x65/0x110
[ 60.992174] [<ffffffff8179da6f>] ret_from_intr+0x0/0x13
[ 60.992174] [<ffffffff8100de9f>] arch_cpu_idle+0xf/0x20
[ 60.992174] [<ffffffff8109dfa6>] cpu_startup_entry+0x2f6/0x3f0
[ 60.992174] [<ffffffff81033cda>] start_secondary+0x13a/0x150
[ 60.992174]
[ 60.992174] other info that might help us debug this:
[ 60.992174]
[ 60.992174] Possible unsafe locking scenario:
[ 60.992174]
[ 60.992174] CPU0 CPU1
[ 60.992174] ---- ----
[ 60.992174] lock(&(&bclink->lock)->rlock);
[ 60.992174] lock(&(&n_ptr->lock)->rlock);
[ 60.992174] lock(&(&bclink->lock)->rlock);
[ 60.992174] lock(&(&n_ptr->lock)->rlock);
[ 60.992174]
[ 60.992174] *** DEADLOCK ***
[ 60.992174]
[ 60.992174] 3 locks held by swapper/3/0:
[ 60.992174] #0: (rcu_read_lock){......}, at: [<ffffffff81646791>] __netif_receive_skb_core+0x71/0x980
[ 60.992174] #1: (rcu_read_lock){......}, at: [<ffffffffa0002c35>] tipc_l2_rcv_msg+0x5/0xd0 [tipc]
[ 60.992174] #2: (&(&bclink->lock)->rlock){+.-...}, at: [<ffffffffa00004be>] tipc_bclink_lock+0x8e/0xa0 [tipc]
[ 60.992174]
The correct the sequence of grabbing n_ptr->lock and bclink->lock
should be that the former is first held and the latter is then taken,
which exactly happened on CPU1. But especially when the retransmission
of broadcast link is failed, bclink->lock is first held in
tipc_bclink_rcv(), and n_ptr->lock is taken in link_retransmit_failure()
called by tipc_link_retransmit() subsequently, which is demonstrated on
CPU0. As a result, deadlock occurs.
If the order of holding the two locks happening on CPU0 is reversed, the
deadlock risk will be relieved. Therefore, the node lock taken in
link_retransmit_failure() originally is moved to tipc_bclink_rcv()
so that it's obtained before bclink lock. But the precondition of
the adjustment of node lock is that responding to bclink reset event
must be moved from tipc_bclink_unlock() to tipc_node_unlock().
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-26 13:10:23 +03:00
if ( flags & TIPC_BCAST_RESET )
2015-07-31 01:24:16 +03:00
tipc_node_reset_links ( node ) ;
2014-05-05 04:56:12 +04:00
}
2014-11-20 12:29:17 +03:00
/* Caller should hold node lock for the passed node */
2014-11-24 13:10:29 +03:00
static int __tipc_nl_add_node ( struct tipc_nl_msg * msg , struct tipc_node * node )
2014-11-20 12:29:17 +03:00
{
void * hdr ;
struct nlattr * attrs ;
2015-02-09 11:50:03 +03:00
hdr = genlmsg_put ( msg - > skb , msg - > portid , msg - > seq , & tipc_genl_family ,
2014-11-20 12:29:17 +03:00
NLM_F_MULTI , TIPC_NL_NODE_GET ) ;
if ( ! hdr )
return - EMSGSIZE ;
attrs = nla_nest_start ( msg - > skb , TIPC_NLA_NODE ) ;
if ( ! attrs )
goto msg_full ;
if ( nla_put_u32 ( msg - > skb , TIPC_NLA_NODE_ADDR , node - > addr ) )
goto attr_msg_full ;
if ( tipc_node_is_up ( node ) )
if ( nla_put_flag ( msg - > skb , TIPC_NLA_NODE_UP ) )
goto attr_msg_full ;
nla_nest_end ( msg - > skb , attrs ) ;
genlmsg_end ( msg - > skb , hdr ) ;
return 0 ;
attr_msg_full :
nla_nest_cancel ( msg - > skb , attrs ) ;
msg_full :
genlmsg_cancel ( msg - > skb , hdr ) ;
return - EMSGSIZE ;
}
2015-07-16 23:54:24 +03:00
static struct tipc_link * tipc_node_select_link ( struct tipc_node * n , int sel ,
int * bearer_id ,
struct tipc_media_addr * * maddr )
{
int id = n - > active_links [ sel & 1 ] ;
if ( unlikely ( id < 0 ) )
return NULL ;
* bearer_id = id ;
* maddr = & n - > links [ id ] . maddr ;
return n - > links [ id ] . link ;
}
/**
* tipc_node_xmit ( ) is the general link level function for message sending
* @ net : the applicable net namespace
* @ list : chain of buffers containing message
* @ dnode : address of destination node
* @ selector : a number used for deterministic link selection
* Consumes the buffer chain , except when returning - ELINKCONG
* Returns 0 if success , otherwise errno : - ELINKCONG , - EHOSTUNREACH , - EMSGSIZE
*/
int tipc_node_xmit ( struct net * net , struct sk_buff_head * list ,
u32 dnode , int selector )
{
struct tipc_link * l = NULL ;
struct tipc_node * n ;
struct sk_buff_head xmitq ;
struct tipc_media_addr * maddr ;
int bearer_id ;
int rc = - EHOSTUNREACH ;
__skb_queue_head_init ( & xmitq ) ;
n = tipc_node_find ( net , dnode ) ;
if ( likely ( n ) ) {
tipc_node_lock ( n ) ;
l = tipc_node_select_link ( n , selector , & bearer_id , & maddr ) ;
if ( likely ( l ) )
rc = tipc_link_xmit ( l , list , & xmitq ) ;
tipc_node_unlock ( n ) ;
2015-07-31 01:24:23 +03:00
if ( unlikely ( rc = = - ENOBUFS ) )
tipc_node_link_down ( n , bearer_id , false ) ;
2015-07-16 23:54:24 +03:00
tipc_node_put ( n ) ;
}
if ( likely ( ! rc ) ) {
tipc_bearer_xmit ( net , bearer_id , & xmitq , maddr ) ;
return 0 ;
}
if ( likely ( in_own_node ( net , dnode ) ) ) {
tipc_sk_rcv ( net , list ) ;
return 0 ;
}
return rc ;
}
/* tipc_node_xmit_skb(): send single buffer to destination
* Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
* messages , which will not be rejected
* The only exception is datagram messages rerouted after secondary
* lookup , which are rare and safe to dispose of anyway .
* TODO : Return real return value , and let callers use
* tipc_wait_for_sendpkt ( ) where applicable
*/
int tipc_node_xmit_skb ( struct net * net , struct sk_buff * skb , u32 dnode ,
u32 selector )
{
struct sk_buff_head head ;
int rc ;
skb_queue_head_init ( & head ) ;
__skb_queue_tail ( & head , skb ) ;
rc = tipc_node_xmit ( net , & head , dnode , selector ) ;
if ( rc = = - ELINKCONG )
kfree_skb ( skb ) ;
return 0 ;
}
2015-07-31 01:24:19 +03:00
/**
* tipc_node_check_state - check and if necessary update node state
* @ skb : TIPC packet
* @ bearer_id : identity of bearer delivering the packet
* Returns true if state is ok , otherwise consumes buffer and returns false
2015-07-31 01:24:16 +03:00
*/
2015-07-31 01:24:19 +03:00
static bool tipc_node_check_state ( struct tipc_node * n , struct sk_buff * skb ,
2015-07-31 01:24:21 +03:00
int bearer_id , struct sk_buff_head * xmitq )
2015-07-31 01:24:16 +03:00
{
struct tipc_msg * hdr = buf_msg ( skb ) ;
2015-07-31 01:24:19 +03:00
int usr = msg_user ( hdr ) ;
int mtyp = msg_type ( hdr ) ;
2015-07-31 01:24:16 +03:00
u16 oseqno = msg_seqno ( hdr ) ;
2015-07-31 01:24:19 +03:00
u16 iseqno = msg_seqno ( msg_get_wrapped ( hdr ) ) ;
u16 exp_pkts = msg_msgcnt ( hdr ) ;
u16 rcv_nxt , syncpt , dlv_nxt ;
int state = n - > state ;
tipc: fix stale link problem during synchronization
Recent changes to the link synchronization means that we can now just
drop packets arriving on the synchronizing link before the synch point
is reached. This has lead to significant simplifications to the
implementation, but also turns out to have a flip side that we need
to consider.
Under unlucky circumstances, the two endpoints may end up
repeatedly dropping each other's packets, while immediately
asking for retransmission of the same packets, just to drop
them once more. This pattern will eventually be broken when
the synch point is reached on the other link, but before that,
the endpoints may have arrived at the retransmission limit
(stale counter) that indicates that the link should be broken.
We see this happen at rare occasions.
The fix for this is to not ask for retransmissions when a link is in
state LINK_SYNCHING. The fact that the link has reached this state
means that it has already received the first SYNCH packet, and that it
knows the synch point. Hence, it doesn't need any more packets until the
other link has reached the synch point, whereafter it can go ahead and
ask for the missing packets.
However, because of the reduced traffic on the synching link that
follows this change, it may now take longer to discover that the
synch point has been reached. We compensate for this by letting all
packets, on any of the links, trig a check for synchronization
termination. This is possible because the packets themselves don't
contain any information that is needed for discovering this condition.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 09:12:56 +03:00
struct tipc_link * l , * tnl , * pl = NULL ;
2015-07-31 01:24:23 +03:00
struct tipc_media_addr * maddr ;
int i , pb_id ;
2015-07-31 01:24:16 +03:00
2015-07-31 01:24:19 +03:00
l = n - > links [ bearer_id ] . link ;
if ( ! l )
return false ;
rcv_nxt = l - > rcv_nxt ;
2015-07-31 01:24:16 +03:00
2015-07-31 01:24:19 +03:00
if ( likely ( ( state = = SELF_UP_PEER_UP ) & & ( usr ! = TUNNEL_PROTOCOL ) ) )
return true ;
2015-07-31 01:24:16 +03:00
2015-07-31 01:24:19 +03:00
/* Find parallel link, if any */
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
if ( ( i ! = bearer_id ) & & n - > links [ i ] . link ) {
pl = n - > links [ i ] . link ;
break ;
}
}
2015-07-31 01:24:16 +03:00
2015-07-31 01:24:19 +03:00
/* Update node accesibility if applicable */
if ( state = = SELF_UP_PEER_COMING ) {
if ( ! tipc_link_is_up ( l ) )
return true ;
if ( ! msg_peer_link_is_up ( hdr ) )
return true ;
tipc_node_fsm_evt ( n , PEER_ESTABL_CONTACT_EVT ) ;
}
if ( state = = SELF_DOWN_PEER_LEAVING ) {
if ( msg_peer_node_is_up ( hdr ) )
return false ;
tipc_node_fsm_evt ( n , PEER_LOST_CONTACT_EVT ) ;
}
/* Ignore duplicate packets */
tipc: eliminate risk of stalled link synchronization
In commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level")
we introduced a new mechanism for performing link failover and
synchronization. We have now detected a bug in this mechanism.
During link synchronization we use the arrival of any packet on
the tunnel link to trig a check for whether it has reached the
synchronization point or not. This has turned out to be too
permissive, since it may cause an arriving non-last SYNCH packet to
end the synch state, just to see the next SYNCH packet initiate a
new synch state with a new, higher synch point. This is not fatal,
but should be avoided, because it may significantly extend the
synchronization period, while at the same time we are not allowed
to send NACKs if packets are lost. In the worst case, a low-traffic
user may see its traffic stall until a LINK_PROTOCOL state message
trigs the link to leave synchronization state.
At the same time, LINK_PROTOCOL packets which happen to have a (non-
valid) sequence number lower than the tunnel link's rcv_nxt value will
be consistently dropped, and will never be able to resolve the situation
described above.
We fix this by exempting LINK_PROTOCOL packets from the sequence number
check, as they should be. We also reduce (but don't completely
eliminate) the risk of entering multiple synchronization states by only
allowing the (logically) first SYNCH packet to initiate a synchronization
state. This works independently of actual packet arrival order.
Fixes: commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-13 19:41:51 +03:00
if ( ( usr ! = LINK_PROTOCOL ) & & less ( oseqno , rcv_nxt ) )
2015-07-31 01:24:19 +03:00
return true ;
/* Initiate or update failover mode if applicable */
if ( ( usr = = TUNNEL_PROTOCOL ) & & ( mtyp = = FAILOVER_MSG ) ) {
syncpt = oseqno + exp_pkts - 1 ;
2015-07-31 01:24:23 +03:00
if ( pl & & tipc_link_is_up ( pl ) ) {
pb_id = pl - > bearer_id ;
__tipc_node_link_down ( n , & pb_id , xmitq , & maddr ) ;
tipc_skb_queue_splice_tail_init ( pl - > inputq , l - > inputq ) ;
}
2015-07-31 01:24:19 +03:00
/* If pkts arrive out of order, use lowest calculated syncpt */
if ( less ( syncpt , n - > sync_point ) )
n - > sync_point = syncpt ;
}
/* Open parallel link when tunnel link reaches synch point */
tipc: eliminate risk of premature link setup during failover
When a link goes down, and there is still a working link towards its
destination node, a failover is initiated, and the failed link is not
allowed to re-establish until that procedure is finished. To ensure
this, the concerned link endpoints are set to state LINK_FAILINGOVER,
and the node endpoints to NODE_FAILINGOVER during the failover period.
However, if the link reset is due to a disabled bearer, the corres-
ponding link endpoint is deleted, and only the node endpoint knows
about the ongoing failover. Now, if the disabled bearer is re-enabled
during the failover period, the discovery mechanism may create a new
link endpoint that is ready to be established, despite that this is not
permitted. This situation may cause both the ongoing failover and any
subsequent link synchronization to fail.
In this commit, we ensure that a newly created link goes directly to
state LINK_FAILINGOVER if the corresponding node state is
NODE_FAILINGOVER. This eliminates the problem described above.
Furthermore, we tighten the criteria for which packets are allowed
to end a failover state in the function tipc_node_check_state().
By checking that the receiving link is up and running, instead of just
checking that it is not in failover mode, we eliminate the risk that
protocol packets from the re-created link may cause the failover to
be prematurely terminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 09:12:54 +03:00
if ( ( n - > state = = NODE_FAILINGOVER ) & & tipc_link_is_up ( l ) ) {
2015-07-31 01:24:21 +03:00
if ( ! more ( rcv_nxt , n - > sync_point ) )
return true ;
2015-07-31 01:24:19 +03:00
tipc_node_fsm_evt ( n , NODE_FAILOVER_END_EVT ) ;
if ( pl )
2015-07-31 01:24:21 +03:00
tipc_link_fsm_evt ( pl , LINK_FAILOVER_END_EVT ) ;
2015-07-31 01:24:19 +03:00
return true ;
}
2015-08-20 09:12:55 +03:00
/* No synching needed if only one link */
if ( ! pl | | ! tipc_link_is_up ( pl ) )
return true ;
tipc: eliminate risk of stalled link synchronization
In commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level")
we introduced a new mechanism for performing link failover and
synchronization. We have now detected a bug in this mechanism.
During link synchronization we use the arrival of any packet on
the tunnel link to trig a check for whether it has reached the
synchronization point or not. This has turned out to be too
permissive, since it may cause an arriving non-last SYNCH packet to
end the synch state, just to see the next SYNCH packet initiate a
new synch state with a new, higher synch point. This is not fatal,
but should be avoided, because it may significantly extend the
synchronization period, while at the same time we are not allowed
to send NACKs if packets are lost. In the worst case, a low-traffic
user may see its traffic stall until a LINK_PROTOCOL state message
trigs the link to leave synchronization state.
At the same time, LINK_PROTOCOL packets which happen to have a (non-
valid) sequence number lower than the tunnel link's rcv_nxt value will
be consistently dropped, and will never be able to resolve the situation
described above.
We fix this by exempting LINK_PROTOCOL packets from the sequence number
check, as they should be. We also reduce (but don't completely
eliminate) the risk of entering multiple synchronization states by only
allowing the (logically) first SYNCH packet to initiate a synchronization
state. This works independently of actual packet arrival order.
Fixes: commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-13 19:41:51 +03:00
/* Initiate synch mode if applicable */
if ( ( usr = = TUNNEL_PROTOCOL ) & & ( mtyp = = SYNCH_MSG ) & & ( oseqno = = 1 ) ) {
2015-07-31 01:24:19 +03:00
syncpt = iseqno + exp_pkts - 1 ;
2015-07-31 01:24:21 +03:00
if ( ! tipc_link_is_up ( l ) ) {
tipc_link_fsm_evt ( l , LINK_ESTABLISH_EVT ) ;
2015-07-31 01:24:23 +03:00
__tipc_node_link_up ( n , bearer_id , xmitq ) ;
2015-07-31 01:24:21 +03:00
}
2015-07-31 01:24:19 +03:00
if ( n - > state = = SELF_UP_PEER_UP ) {
n - > sync_point = syncpt ;
2015-07-31 01:24:21 +03:00
tipc_link_fsm_evt ( l , LINK_SYNCH_BEGIN_EVT ) ;
2015-07-31 01:24:19 +03:00
tipc_node_fsm_evt ( n , NODE_SYNCH_BEGIN_EVT ) ;
}
if ( less ( syncpt , n - > sync_point ) )
n - > sync_point = syncpt ;
2015-07-31 01:24:16 +03:00
}
2015-07-31 01:24:19 +03:00
/* Open tunnel link when parallel link reaches synch point */
2015-07-31 01:24:21 +03:00
if ( ( n - > state = = NODE_SYNCHING ) & & tipc_link_is_synching ( l ) ) {
tipc: fix stale link problem during synchronization
Recent changes to the link synchronization means that we can now just
drop packets arriving on the synchronizing link before the synch point
is reached. This has lead to significant simplifications to the
implementation, but also turns out to have a flip side that we need
to consider.
Under unlucky circumstances, the two endpoints may end up
repeatedly dropping each other's packets, while immediately
asking for retransmission of the same packets, just to drop
them once more. This pattern will eventually be broken when
the synch point is reached on the other link, but before that,
the endpoints may have arrived at the retransmission limit
(stale counter) that indicates that the link should be broken.
We see this happen at rare occasions.
The fix for this is to not ask for retransmissions when a link is in
state LINK_SYNCHING. The fact that the link has reached this state
means that it has already received the first SYNCH packet, and that it
knows the synch point. Hence, it doesn't need any more packets until the
other link has reached the synch point, whereafter it can go ahead and
ask for the missing packets.
However, because of the reduced traffic on the synching link that
follows this change, it may now take longer to discover that the
synch point has been reached. We compensate for this by letting all
packets, on any of the links, trig a check for synchronization
termination. This is possible because the packets themselves don't
contain any information that is needed for discovering this condition.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 09:12:56 +03:00
if ( tipc_link_is_synching ( l ) ) {
tnl = l ;
} else {
tnl = pl ;
pl = l ;
}
2015-08-20 09:12:55 +03:00
dlv_nxt = pl - > rcv_nxt - mod ( skb_queue_len ( pl - > inputq ) ) ;
if ( more ( dlv_nxt , n - > sync_point ) ) {
tipc: fix stale link problem during synchronization
Recent changes to the link synchronization means that we can now just
drop packets arriving on the synchronizing link before the synch point
is reached. This has lead to significant simplifications to the
implementation, but also turns out to have a flip side that we need
to consider.
Under unlucky circumstances, the two endpoints may end up
repeatedly dropping each other's packets, while immediately
asking for retransmission of the same packets, just to drop
them once more. This pattern will eventually be broken when
the synch point is reached on the other link, but before that,
the endpoints may have arrived at the retransmission limit
(stale counter) that indicates that the link should be broken.
We see this happen at rare occasions.
The fix for this is to not ask for retransmissions when a link is in
state LINK_SYNCHING. The fact that the link has reached this state
means that it has already received the first SYNCH packet, and that it
knows the synch point. Hence, it doesn't need any more packets until the
other link has reached the synch point, whereafter it can go ahead and
ask for the missing packets.
However, because of the reduced traffic on the synching link that
follows this change, it may now take longer to discover that the
synch point has been reached. We compensate for this by letting all
packets, on any of the links, trig a check for synchronization
termination. This is possible because the packets themselves don't
contain any information that is needed for discovering this condition.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 09:12:56 +03:00
tipc_link_fsm_evt ( tnl , LINK_SYNCH_END_EVT ) ;
2015-07-31 01:24:19 +03:00
tipc_node_fsm_evt ( n , NODE_SYNCH_END_EVT ) ;
return true ;
}
tipc: fix stale link problem during synchronization
Recent changes to the link synchronization means that we can now just
drop packets arriving on the synchronizing link before the synch point
is reached. This has lead to significant simplifications to the
implementation, but also turns out to have a flip side that we need
to consider.
Under unlucky circumstances, the two endpoints may end up
repeatedly dropping each other's packets, while immediately
asking for retransmission of the same packets, just to drop
them once more. This pattern will eventually be broken when
the synch point is reached on the other link, but before that,
the endpoints may have arrived at the retransmission limit
(stale counter) that indicates that the link should be broken.
We see this happen at rare occasions.
The fix for this is to not ask for retransmissions when a link is in
state LINK_SYNCHING. The fact that the link has reached this state
means that it has already received the first SYNCH packet, and that it
knows the synch point. Hence, it doesn't need any more packets until the
other link has reached the synch point, whereafter it can go ahead and
ask for the missing packets.
However, because of the reduced traffic on the synching link that
follows this change, it may now take longer to discover that the
synch point has been reached. We compensate for this by letting all
packets, on any of the links, trig a check for synchronization
termination. This is possible because the packets themselves don't
contain any information that is needed for discovering this condition.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-20 09:12:56 +03:00
if ( l = = pl )
return true ;
2015-07-31 01:24:19 +03:00
if ( ( usr = = TUNNEL_PROTOCOL ) & & ( mtyp = = SYNCH_MSG ) )
return true ;
if ( usr = = LINK_PROTOCOL )
return true ;
return false ;
}
return true ;
2015-07-31 01:24:16 +03:00
}
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
/**
* tipc_rcv - process TIPC packets / messages arriving from off - node
* @ net : the applicable net namespace
* @ skb : TIPC packet
* @ bearer : pointer to bearer message arrived on
*
* Invoked with no locks held . Bearer pointer must point to a valid bearer
* structure ( i . e . cannot be NULL ) , but bearer can be inactive .
*/
void tipc_rcv ( struct net * net , struct sk_buff * skb , struct tipc_bearer * b )
{
struct sk_buff_head xmitq ;
struct tipc_node * n ;
2015-07-31 01:24:19 +03:00
struct tipc_msg * hdr = buf_msg ( skb ) ;
int usr = msg_user ( hdr ) ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
int bearer_id = b - > identity ;
2015-07-31 01:24:19 +03:00
struct tipc_link_entry * le ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
int rc = 0 ;
__skb_queue_head_init ( & xmitq ) ;
/* Ensure message is well-formed */
if ( unlikely ( ! tipc_msg_validate ( skb ) ) )
goto discard ;
/* Handle arrival of a non-unicast link packet */
if ( unlikely ( msg_non_seq ( hdr ) ) ) {
2015-07-31 01:24:16 +03:00
if ( usr = = LINK_CONFIG )
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
tipc_disc_rcv ( net , skb , b ) ;
else
tipc_bclink_rcv ( net , skb ) ;
return ;
}
/* Locate neighboring node that sent packet */
n = tipc_node_find ( net , msg_prevnode ( hdr ) ) ;
if ( unlikely ( ! n ) )
goto discard ;
2015-07-31 01:24:19 +03:00
le = & n - > links [ bearer_id ] ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
2015-07-31 01:24:19 +03:00
tipc_node_lock ( n ) ;
2015-07-31 01:24:16 +03:00
2015-07-31 01:24:19 +03:00
/* Is reception permitted at the moment ? */
if ( ! tipc_node_filter_pkt ( n , hdr ) )
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
goto unlock ;
2015-07-31 01:24:19 +03:00
if ( unlikely ( msg_user ( hdr ) = = LINK_PROTOCOL ) )
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
tipc_bclink_sync_state ( n , hdr ) ;
2015-07-31 01:24:23 +03:00
/* Release acked broadcast packets */
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
if ( unlikely ( n - > bclink . acked ! = msg_bcast_ack ( hdr ) ) )
tipc_bclink_acknowledge ( n , msg_bcast_ack ( hdr ) ) ;
2015-07-31 01:24:19 +03:00
/* Check and if necessary update node state */
2015-07-31 01:24:21 +03:00
if ( likely ( tipc_node_check_state ( n , skb , bearer_id , & xmitq ) ) ) {
2015-07-31 01:24:19 +03:00
rc = tipc_link_rcv ( le - > link , skb , & xmitq ) ;
skb = NULL ;
}
2015-07-31 01:24:23 +03:00
unlock :
tipc_node_unlock ( n ) ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
if ( unlikely ( rc & TIPC_LINK_UP_EVT ) )
2015-07-31 01:24:19 +03:00
tipc_node_link_up ( n , bearer_id , & xmitq ) ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
if ( unlikely ( rc & TIPC_LINK_DOWN_EVT ) )
2015-07-31 01:24:23 +03:00
tipc_node_link_down ( n , bearer_id , false ) ;
2015-07-31 01:24:19 +03:00
2015-07-31 01:24:24 +03:00
if ( unlikely ( ! skb_queue_empty ( & n - > bclink . namedq ) ) )
tipc_named_rcv ( net , & n - > bclink . namedq ) ;
2015-07-31 01:24:19 +03:00
if ( ! skb_queue_empty ( & le - > inputq ) )
tipc_sk_rcv ( net , & le - > inputq ) ;
if ( ! skb_queue_empty ( & xmitq ) )
tipc_bearer_xmit ( net , bearer_id , & xmitq , & le - > maddr ) ;
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 23:54:31 +03:00
tipc_node_put ( n ) ;
discard :
kfree_skb ( skb ) ;
}
2014-11-20 12:29:17 +03:00
int tipc_nl_node_dump ( struct sk_buff * skb , struct netlink_callback * cb )
{
int err ;
2015-01-09 10:27:05 +03:00
struct net * net = sock_net ( skb - > sk ) ;
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
2014-11-20 12:29:17 +03:00
int done = cb - > args [ 0 ] ;
int last_addr = cb - > args [ 1 ] ;
struct tipc_node * node ;
struct tipc_nl_msg msg ;
if ( done )
return 0 ;
msg . skb = skb ;
msg . portid = NETLINK_CB ( cb - > skb ) . portid ;
msg . seq = cb - > nlh - > nlmsg_seq ;
rcu_read_lock ( ) ;
2015-03-26 13:10:24 +03:00
if ( last_addr ) {
node = tipc_node_find ( net , last_addr ) ;
if ( ! node ) {
rcu_read_unlock ( ) ;
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
* handler . Resulting in the NLMSG_DONE message having
* the NLM_F_DUMP_INTR flag set if the node state
* changed while we released the lock .
*/
cb - > prev_seq = 1 ;
return - EPIPE ;
}
tipc_node_put ( node ) ;
2014-11-20 12:29:17 +03:00
}
2015-01-09 10:27:05 +03:00
list_for_each_entry_rcu ( node , & tn - > node_list , list ) {
2014-11-20 12:29:17 +03:00
if ( last_addr ) {
if ( node - > addr = = last_addr )
last_addr = 0 ;
else
continue ;
}
tipc_node_lock ( node ) ;
err = __tipc_nl_add_node ( & msg , node ) ;
if ( err ) {
last_addr = node - > addr ;
tipc_node_unlock ( node ) ;
goto out ;
}
tipc_node_unlock ( node ) ;
}
done = 1 ;
out :
cb - > args [ 0 ] = done ;
cb - > args [ 1 ] = last_addr ;
rcu_read_unlock ( ) ;
return skb - > len ;
}