2006-01-02 19:04:38 +01:00
/*
* net / tipc / node . h : Include file for TIPC node management routines
2007-02-09 23:25:21 +09:00
*
2015-02-05 08:36:44 -05:00
* Copyright ( c ) 2000 - 2006 , 2014 - 2015 , Ericsson AB
2014-03-27 12:54:36 +08:00
* Copyright ( c ) 2005 , 2010 - 2014 , Wind River Systems
2006-01-02 19:04:38 +01:00
* All rights reserved .
*
2006-01-11 13:30:43 +01:00
* Redistribution and use in source and binary forms , with or without
2006-01-02 19:04:38 +01:00
* modification , are permitted provided that the following conditions are met :
*
2006-01-11 13:30:43 +01:00
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
2006-01-02 19:04:38 +01:00
*
2006-01-11 13:30:43 +01:00
* Alternatively , this software may be distributed under the terms of the
* GNU General Public License ( " GPL " ) version 2 as published by the Free
* Software Foundation .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
2006-01-02 19:04:38 +01:00
* POSSIBILITY OF SUCH DAMAGE .
*/
# ifndef _TIPC_NODE_H
# define _TIPC_NODE_H
2010-12-31 18:59:19 +00:00
# include "addr.h"
# include "net.h"
2006-01-02 19:04:38 +01:00
# include "bearer.h"
2014-06-25 20:41:33 -05:00
# include "msg.h"
2006-01-02 19:04:38 +01:00
2015-01-09 15:27:05 +08:00
/* Out-of-range value for node signature */
# define INVALID_NODE_SIG 0x10000
2015-07-16 16:54:22 -04:00
# define INVALID_BEARER_ID -1
2014-05-08 08:54:39 +08:00
/* Flags used to take different actions according to flag type
* TIPC_NOTIFY_NODE_DOWN : notify node is down
* TIPC_NOTIFY_NODE_UP : notify node is up
2014-10-20 14:44:25 +08:00
* TIPC_DISTRIBUTE_NAME : publish or withdraw link state name type
2014-05-05 08:56:11 +08:00
*/
enum {
2014-05-08 08:54:39 +08:00
TIPC_NOTIFY_NODE_DOWN = ( 1 < < 3 ) ,
2014-08-22 18:09:07 -04:00
TIPC_NOTIFY_NODE_UP = ( 1 < < 4 ) ,
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 08:36:41 -05:00
TIPC_NOTIFY_LINK_UP = ( 1 < < 6 ) ,
2015-10-22 08:51:48 -04:00
TIPC_NOTIFY_LINK_DOWN = ( 1 < < 7 )
2014-05-05 08:56:11 +08:00
} ;
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 11:00:51 -04:00
2015-10-22 08:51:40 -04:00
/* Optional capabilities supported by this code version
*/
enum {
TIPC_BCAST_SYNCH = ( 1 < < 1 )
} ;
# define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
2015-07-16 16:54:19 -04:00
struct tipc_link_entry {
struct tipc_link * link ;
u32 mtu ;
2015-07-16 16:54:21 -04:00
struct sk_buff_head inputq ;
2015-07-16 16:54:20 -04:00
struct tipc_media_addr maddr ;
2015-07-16 16:54:19 -04:00
} ;
2015-10-22 08:51:41 -04:00
struct tipc_bclink_entry {
struct tipc_link * link ;
struct sk_buff_head inputq1 ;
struct sk_buff_head arrvq ;
struct sk_buff_head inputq2 ;
struct sk_buff_head namedq ;
} ;
2006-01-02 19:04:38 +01:00
/**
2008-09-02 23:38:32 -07:00
* struct tipc_node - TIPC node structure
2006-01-02 19:04:38 +01:00
* @ addr : network address of node
2015-03-26 18:10:24 +08:00
* @ ref : reference counter to node object
2006-01-02 19:04:38 +01:00
* @ lock : spinlock governing access to structure
2015-01-09 15:27:05 +08:00
* @ net : the applicable net namespace
2011-02-25 18:42:52 -05:00
* @ hash : links to adjacent nodes in unsorted hash chain
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 08:36:41 -05:00
* @ inputq : pointer to input queue containing messages for msg event
* @ namedq : pointer to name table input queue with name table messages
2015-07-16 16:54:22 -04:00
* @ active_links : bearer ids of active links , used as index into links [ ] array
2015-07-16 16:54:19 -04:00
* @ links : array containing references to all links to node
2014-05-08 08:54:39 +08:00
* @ action_flags : bit mask of different types of node actions
2015-07-30 18:24:19 -04:00
* @ state : connectivity state vs peer node
* @ sync_point : sequence number where synch / failover is finished
2014-05-05 08:56:10 +08:00
* @ list : links to adjacent nodes in sorted list of cluster ' s nodes
* @ working_links : number of working links to node ( both active and standby )
2006-01-02 19:04:38 +01:00
* @ link_cnt : number of links to node
2015-03-13 16:08:05 -04:00
* @ capabilities : bitmap , indicating peer node ' s functional capabilities
2011-10-28 16:26:41 -04:00
* @ signature : node instance identifier
2014-10-20 14:44:25 +08:00
* @ link_id : local and remote bearer ids of changing link , if any
2014-11-26 11:41:45 +08:00
* @ publ_list : list of publications
2014-03-27 12:54:37 +08:00
* @ rcu : rcu struct for tipc_node
2006-01-02 19:04:38 +01:00
*/
2008-09-02 23:38:32 -07:00
struct tipc_node {
2006-01-02 19:04:38 +01:00
u32 addr ;
2015-03-26 18:10:24 +08:00
struct kref kref ;
2006-01-02 19:04:38 +01:00
spinlock_t lock ;
2015-01-09 15:27:05 +08:00
struct net * net ;
2011-02-25 18:42:52 -05:00
struct hlist_node hash ;
2015-07-16 16:54:22 -04:00
int active_links [ 2 ] ;
2015-07-16 16:54:19 -04:00
struct tipc_link_entry links [ MAX_BEARERS ] ;
2015-10-22 08:51:41 -04:00
struct tipc_bclink_entry bc_entry ;
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 08:36:41 -05:00
int action_flags ;
2014-05-05 08:56:10 +08:00
struct list_head list ;
2015-07-16 16:54:30 -04:00
int state ;
2015-07-30 18:24:19 -04:00
u16 sync_point ;
2006-01-02 19:04:38 +01:00
int link_cnt ;
2015-03-13 16:08:05 -04:00
u16 working_links ;
u16 capabilities ;
2011-10-28 16:26:41 -04:00
u32 signature ;
2014-10-20 14:44:25 +08:00
u32 link_id ;
2014-11-26 11:41:45 +08:00
struct list_head publ_list ;
tipc: use message to abort connections when losing contact to node
In the current implementation, each 'struct tipc_node' instance keeps
a linked list of those ports/sockets that are connected to the node
represented by that struct. The purpose of this is to let the node
object know which sockets to alert when it loses contact with its peer
node, i.e., which sockets need to have their connections aborted.
This entails an unwanted direct reference from the node structure
back to the port/socket structure, and a need to grab port_lock
when we have to make an upcall to the port. We want to get rid of
this unecessary BH entry point into the socket, and also eliminate
its use of port_lock.
In this commit, we instead let the node struct keep list of "connected
socket" structs, which each represents a connected socket, but is
allocated independently by the node at the moment of connection. If
the node loses contact with its peer node, the list is traversed, and
a "connection abort" message is created for each entry in the list. The
message is sent to it respective connected socket using the ordinary
data path, and the receiving socket aborts its connections upon reception
of the message.
This enables us to get rid of the direct reference from 'struct node' to
´struct port', and another unwanted BH access point to the latter.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-22 18:09:08 -04:00
struct list_head conn_sks ;
2015-07-16 16:54:29 -04:00
unsigned long keepalive_intv ;
struct timer_list timer ;
2014-03-27 12:54:37 +08:00
struct rcu_head rcu ;
2006-01-02 19:04:38 +01:00
} ;
2015-01-09 15:27:05 +08:00
struct tipc_node * tipc_node_find ( struct net * net , u32 addr ) ;
2015-03-26 18:10:24 +08:00
void tipc_node_put ( struct tipc_node * node ) ;
2015-01-09 15:27:05 +08:00
void tipc_node_stop ( struct net * net ) ;
2015-07-30 18:24:22 -04:00
void tipc_node_check_dest ( struct net * net , u32 onode ,
struct tipc_bearer * bearer ,
u16 capabilities , u32 signature ,
struct tipc_media_addr * maddr ,
bool * respond , bool * dupl_addr ) ;
2015-07-30 18:24:16 -04:00
void tipc_node_delete_links ( struct net * net , int bearer_id ) ;
2011-12-29 20:58:42 -05:00
void tipc_node_attach_link ( struct tipc_node * n_ptr , struct tipc_link * l_ptr ) ;
void tipc_node_detach_link ( struct tipc_node * n_ptr , struct tipc_link * l_ptr ) ;
2015-07-16 16:54:19 -04:00
bool tipc_node_is_up ( struct tipc_node * n ) ;
2015-01-09 15:27:05 +08:00
int tipc_node_get_linkname ( struct net * net , u32 bearer_id , u32 node ,
char * linkname , size_t len ) ;
2014-05-05 08:56:12 +08:00
void tipc_node_unlock ( struct tipc_node * node ) ;
2015-07-16 16:54:24 -04:00
int tipc_node_xmit ( struct net * net , struct sk_buff_head * list , u32 dnode ,
int selector ) ;
int tipc_node_xmit_skb ( struct net * net , struct sk_buff * skb , u32 dest ,
u32 selector ) ;
2015-01-09 15:27:05 +08:00
int tipc_node_add_conn ( struct net * net , u32 dnode , u32 port , u32 peer_port ) ;
void tipc_node_remove_conn ( struct net * net , u32 dnode , u32 port ) ;
2014-11-20 10:29:17 +01:00
int tipc_nl_node_dump ( struct sk_buff * skb , struct netlink_callback * cb ) ;
2014-05-05 08:56:12 +08:00
static inline void tipc_node_lock ( struct tipc_node * node )
2006-01-02 19:04:38 +01:00
{
2014-05-05 08:56:12 +08:00
spin_lock_bh ( & node - > lock ) ;
2006-01-02 19:04:38 +01:00
}
2015-07-16 16:54:19 -04:00
static inline struct tipc_link * node_active_link ( struct tipc_node * n , int sel )
2014-06-25 20:41:33 -05:00
{
2015-07-16 16:54:22 -04:00
int bearer_id = n - > active_links [ sel & 1 ] ;
if ( unlikely ( bearer_id = = INVALID_BEARER_ID ) )
return NULL ;
2014-06-25 20:41:33 -05:00
2015-07-16 16:54:22 -04:00
return n - > links [ bearer_id ] . link ;
2015-07-16 16:54:19 -04:00
}
2014-06-25 20:41:33 -05:00
2015-07-16 16:54:22 -04:00
static inline unsigned int tipc_node_get_mtu ( struct net * net , u32 addr , u32 sel )
2015-07-16 16:54:19 -04:00
{
struct tipc_node * n ;
2015-07-16 16:54:22 -04:00
int bearer_id ;
2015-07-16 16:54:19 -04:00
unsigned int mtu = MAX_MSG_SIZE ;
2014-06-25 20:41:33 -05:00
2015-07-16 16:54:19 -04:00
n = tipc_node_find ( net , addr ) ;
if ( unlikely ( ! n ) )
return mtu ;
2015-07-16 16:54:22 -04:00
bearer_id = n - > active_links [ sel & 1 ] ;
if ( likely ( bearer_id ! = INVALID_BEARER_ID ) )
mtu = n - > links [ bearer_id ] . mtu ;
2015-07-16 16:54:19 -04:00
tipc_node_put ( n ) ;
2014-06-25 20:41:33 -05:00
return mtu ;
}
2006-01-02 19:04:38 +01:00
# endif