2006-01-02 21:04:38 +03:00
/*
* net / tipc / link . c : TIPC link code
2007-02-09 17:25:21 +03:00
*
2014-01-08 02:02:41 +04:00
* Copyright ( c ) 1996 - 2007 , 2012 - 2014 , Ericsson AB
2013-06-17 18:54:42 +04:00
* Copyright ( c ) 2004 - 2007 , 2010 - 2013 , Wind River Systems
2006-01-02 21:04:38 +03:00
* All rights reserved .
*
2006-01-11 15:30:43 +03:00
* Redistribution and use in source and binary forms , with or without
2006-01-02 21:04:38 +03:00
* modification , are permitted provided that the following conditions are met :
*
2006-01-11 15:30:43 +03:00
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
2006-01-02 21:04:38 +03:00
*
2006-01-11 15:30:43 +03:00
* Alternatively , this software may be distributed under the terms of the
* GNU General Public License ( " GPL " ) version 2 as published by the Free
* Software Foundation .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
2006-01-02 21:04:38 +03:00
* POSSIBILITY OF SUCH DAMAGE .
*/
# include "core.h"
# include "link.h"
# include "port.h"
# include "name_distr.h"
# include "discover.h"
# include "config.h"
2013-06-17 18:54:48 +04:00
# include <linux/pkt_sched.h>
2012-06-29 08:16:37 +04:00
/*
* Error message prefixes
*/
static const char * link_co_err = " Link changeover error, " ;
static const char * link_rst_msg = " Resetting link " ;
static const char * link_unk_evt = " Unknown link event " ;
2006-01-02 21:04:38 +03:00
2008-06-05 04:29:39 +04:00
/*
* Out - of - range value for link session numbers
*/
# define INVALID_SESSION 0x10000
2007-02-09 17:25:21 +03:00
/*
* Link state events :
2006-01-02 21:04:38 +03:00
*/
# define STARTING_EVT 856384768 /* link processing trigger */
# define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
# define TIMEOUT_EVT 560817u /* link timer expired */
2007-02-09 17:25:21 +03:00
/*
* The following two ' message types ' is really just implementation
* data conveniently stored in the message header .
2006-01-02 21:04:38 +03:00
* They must not be considered part of the protocol
*/
# define OPEN_MSG 0
# define CLOSED_MSG 1
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* State value stored in ' exp_msg_count '
*/
# define START_CHANGEOVER 100000u
2011-12-30 05:58:42 +04:00
static void link_handle_out_of_seq_msg ( struct tipc_link * l_ptr ,
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ) ;
2011-12-30 05:58:42 +04:00
static void link_recv_proto_msg ( struct tipc_link * l_ptr , struct sk_buff * buf ) ;
2014-01-08 02:02:41 +04:00
static int tipc_link_tunnel_rcv ( struct tipc_link * * l_ptr ,
struct sk_buff * * buf ) ;
2011-12-30 05:58:42 +04:00
static void link_set_supervision_props ( struct tipc_link * l_ptr , u32 tolerance ) ;
2011-01-07 19:43:40 +03:00
static int link_send_sections_long ( struct tipc_port * sender ,
2006-01-02 21:04:38 +03:00
struct iovec const * msg_sect ,
2013-10-18 09:23:15 +04:00
unsigned int len , u32 destnode ) ;
2011-12-30 05:58:42 +04:00
static void link_state_event ( struct tipc_link * l_ptr , u32 event ) ;
static void link_reset_statistics ( struct tipc_link * l_ptr ) ;
static void link_print ( struct tipc_link * l_ptr , const char * str ) ;
static int link_send_long_buf ( struct tipc_link * l_ptr , struct sk_buff * buf ) ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
static void tipc_link_send_sync ( struct tipc_link * l ) ;
static void tipc_link_recv_sync ( struct tipc_node * n , struct sk_buff * buf ) ;
2010-10-13 17:20:35 +04:00
2006-01-02 21:04:38 +03:00
/*
2006-03-21 09:37:04 +03:00
* Simple link routines
2006-01-02 21:04:38 +03:00
*/
2006-03-21 09:37:04 +03:00
static unsigned int align ( unsigned int i )
2006-01-02 21:04:38 +03:00
{
return ( i + 3 ) & ~ 3u ;
}
2011-12-30 05:58:42 +04:00
static void link_init_max_pkt ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
u32 max_pkt ;
2007-02-09 17:25:21 +03:00
2011-01-07 21:00:11 +03:00
max_pkt = ( l_ptr - > b_ptr - > mtu & ~ 3 ) ;
2006-01-02 21:04:38 +03:00
if ( max_pkt > MAX_MSG_SIZE )
max_pkt = MAX_MSG_SIZE ;
2007-02-09 17:25:21 +03:00
l_ptr - > max_pkt_target = max_pkt ;
2006-01-02 21:04:38 +03:00
if ( l_ptr - > max_pkt_target < MAX_PKT_DEFAULT )
l_ptr - > max_pkt = l_ptr - > max_pkt_target ;
2007-02-09 17:25:21 +03:00
else
2006-01-02 21:04:38 +03:00
l_ptr - > max_pkt = MAX_PKT_DEFAULT ;
2007-02-09 17:25:21 +03:00
l_ptr - > max_pkt_probes = 0 ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
static u32 link_next_sent ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
if ( l_ptr - > next_out )
2011-10-25 00:03:12 +04:00
return buf_seqno ( l_ptr - > next_out ) ;
2006-01-02 21:04:38 +03:00
return mod ( l_ptr - > next_out_no ) ;
}
2011-12-30 05:58:42 +04:00
static u32 link_last_sent ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
return mod ( link_next_sent ( l_ptr ) - 1 ) ;
}
/*
2006-03-21 09:37:04 +03:00
* Simple non - static link routines ( i . e . referenced outside this file )
2006-01-02 21:04:38 +03:00
*/
2011-12-30 05:58:42 +04:00
int tipc_link_is_up ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
if ( ! l_ptr )
return 0 ;
2010-09-23 00:43:57 +04:00
return link_working_working ( l_ptr ) | | link_working_unknown ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
int tipc_link_is_active ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2010-09-23 00:43:57 +04:00
return ( l_ptr - > owner - > active_links [ 0 ] = = l_ptr ) | |
( l_ptr - > owner - > active_links [ 1 ] = = l_ptr ) ;
2006-01-02 21:04:38 +03:00
}
/**
* link_timeout - handle expiration of link timer
* @ l_ptr : pointer to link
2007-02-09 17:25:21 +03:00
*
2006-01-18 02:38:21 +03:00
* This routine must not grab " tipc_net_lock " to avoid a potential deadlock conflict
* with tipc_link_delete ( ) . ( There is no risk that the node will be deleted by
* another thread because tipc_link_delete ( ) always cancels the link timer before
* tipc_node_delete ( ) is called . )
2006-01-02 21:04:38 +03:00
*/
2011-12-30 05:58:42 +04:00
static void link_timeout ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2006-01-18 02:38:21 +03:00
tipc_node_lock ( l_ptr - > owner ) ;
2006-01-02 21:04:38 +03:00
/* update counters used in statistical profiling of send traffic */
l_ptr - > stats . accu_queue_sz + = l_ptr - > out_queue_size ;
l_ptr - > stats . queue_sz_counts + + ;
if ( l_ptr - > first_out ) {
struct tipc_msg * msg = buf_msg ( l_ptr - > first_out ) ;
u32 length = msg_size ( msg ) ;
2009-11-30 03:55:45 +03:00
if ( ( msg_user ( msg ) = = MSG_FRAGMENTER ) & &
( msg_type ( msg ) = = FIRST_FRAGMENT ) ) {
2006-01-02 21:04:38 +03:00
length = msg_size ( msg_get_wrapped ( msg ) ) ;
}
if ( length ) {
l_ptr - > stats . msg_lengths_total + = length ;
l_ptr - > stats . msg_length_counts + + ;
if ( length < = 64 )
l_ptr - > stats . msg_length_profile [ 0 ] + + ;
else if ( length < = 256 )
l_ptr - > stats . msg_length_profile [ 1 ] + + ;
else if ( length < = 1024 )
l_ptr - > stats . msg_length_profile [ 2 ] + + ;
else if ( length < = 4096 )
l_ptr - > stats . msg_length_profile [ 3 ] + + ;
else if ( length < = 16384 )
l_ptr - > stats . msg_length_profile [ 4 ] + + ;
else if ( length < = 32768 )
l_ptr - > stats . msg_length_profile [ 5 ] + + ;
else
l_ptr - > stats . msg_length_profile [ 6 ] + + ;
}
}
/* do all other link processing performed on a periodic basis */
link_state_event ( l_ptr , TIMEOUT_EVT ) ;
if ( l_ptr - > next_out )
2006-01-18 02:38:21 +03:00
tipc_link_push_queue ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( l_ptr - > owner ) ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
static void link_set_timer ( struct tipc_link * l_ptr , u32 time )
2006-01-02 21:04:38 +03:00
{
k_start_timer ( & l_ptr - > timer , time ) ;
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_create - create a new link
2011-02-28 19:32:27 +03:00
* @ n_ptr : pointer to associated node
2006-01-02 21:04:38 +03:00
* @ b_ptr : pointer to associated bearer
* @ media_addr : media address to use when sending messages over link
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns pointer to link .
*/
2011-12-30 05:58:42 +04:00
struct tipc_link * tipc_link_create ( struct tipc_node * n_ptr ,
2011-02-28 19:32:27 +03:00
struct tipc_bearer * b_ptr ,
2006-01-18 02:38:21 +03:00
const struct tipc_media_addr * media_addr )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * msg ;
char * if_name ;
2011-02-28 19:32:27 +03:00
char addr_string [ 16 ] ;
u32 peer = n_ptr - > addr ;
if ( n_ptr - > link_cnt > = 2 ) {
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ;
2012-06-29 08:16:37 +04:00
pr_err ( " Attempt to establish third link to %s \n " , addr_string ) ;
2011-02-28 19:32:27 +03:00
return NULL ;
}
if ( n_ptr - > links [ b_ptr - > identity ] ) {
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ;
2012-06-29 08:16:37 +04:00
pr_err ( " Attempt to establish second link on <%s> to %s \n " ,
b_ptr - > name , addr_string ) ;
2011-02-28 19:32:27 +03:00
return NULL ;
}
2006-01-02 21:04:38 +03:00
2006-07-22 01:51:30 +04:00
l_ptr = kzalloc ( sizeof ( * l_ptr ) , GFP_ATOMIC ) ;
2006-01-02 21:04:38 +03:00
if ( ! l_ptr ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " Link creation failed, no memory \n " ) ;
2006-01-02 21:04:38 +03:00
return NULL ;
}
l_ptr - > addr = peer ;
2011-01-07 21:00:11 +03:00
if_name = strchr ( b_ptr - > name , ' : ' ) + 1 ;
2011-04-07 17:28:47 +04:00
sprintf ( l_ptr - > name , " %u.%u.%u:%s-%u.%u.%u:unknown " ,
2006-01-02 21:04:38 +03:00
tipc_zone ( tipc_own_addr ) , tipc_cluster ( tipc_own_addr ) ,
2007-02-09 17:25:21 +03:00
tipc_node ( tipc_own_addr ) ,
2006-01-02 21:04:38 +03:00
if_name ,
tipc_zone ( peer ) , tipc_cluster ( peer ) , tipc_node ( peer ) ) ;
2011-04-07 17:28:47 +04:00
/* note: peer i/f name is updated by reset/activate message */
2006-01-02 21:04:38 +03:00
memcpy ( & l_ptr - > media_addr , media_addr , sizeof ( * media_addr ) ) ;
2011-02-28 19:32:27 +03:00
l_ptr - > owner = n_ptr ;
2006-01-02 21:04:38 +03:00
l_ptr - > checkpoint = 1 ;
2011-04-07 17:43:27 +04:00
l_ptr - > peer_session = INVALID_SESSION ;
2006-01-02 21:04:38 +03:00
l_ptr - > b_ptr = b_ptr ;
2011-10-18 19:34:29 +04:00
link_set_supervision_props ( l_ptr , b_ptr - > tolerance ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_UNKNOWN ;
l_ptr - > pmsg = ( struct tipc_msg * ) & l_ptr - > proto_msg ;
msg = l_ptr - > pmsg ;
2010-05-11 18:30:12 +04:00
tipc_msg_init ( msg , LINK_PROTOCOL , RESET_MSG , INT_H_SIZE , l_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( msg , sizeof ( l_ptr - > proto_msg ) ) ;
2008-06-05 04:29:39 +04:00
msg_set_session ( msg , ( tipc_random & 0xffff ) ) ;
2006-01-02 21:04:38 +03:00
msg_set_bearer_id ( msg , b_ptr - > identity ) ;
strcpy ( ( char * ) msg_data ( msg ) , if_name ) ;
l_ptr - > priority = b_ptr - > priority ;
2011-10-18 19:34:29 +04:00
tipc_link_set_queue_limits ( l_ptr , b_ptr - > window ) ;
2006-01-02 21:04:38 +03:00
link_init_max_pkt ( l_ptr ) ;
l_ptr - > next_out_no = 1 ;
INIT_LIST_HEAD ( & l_ptr - > waiting_ports ) ;
link_reset_statistics ( l_ptr ) ;
2011-02-28 19:32:27 +03:00
tipc_node_attach_link ( n_ptr , l_ptr ) ;
2006-01-02 21:04:38 +03:00
2014-01-08 02:02:41 +04:00
k_init_timer ( & l_ptr - > timer , ( Handler ) link_timeout ,
( unsigned long ) l_ptr ) ;
2007-07-26 11:05:07 +04:00
list_add_tail ( & l_ptr - > link_list , & b_ptr - > links ) ;
2014-01-08 02:02:44 +04:00
link_state_event ( l_ptr , STARTING_EVT ) ;
2006-01-02 21:04:38 +03:00
return l_ptr ;
}
2007-02-09 17:25:21 +03:00
/**
2006-01-18 02:38:21 +03:00
* tipc_link_delete - delete a link
2006-01-02 21:04:38 +03:00
* @ l_ptr : pointer to link
2007-02-09 17:25:21 +03:00
*
2006-01-18 02:38:21 +03:00
* Note : ' tipc_net_lock ' is write_locked , bearer is locked .
2006-01-02 21:04:38 +03:00
* This routine must not grab the node lock until after link timer cancellation
2007-02-09 17:25:21 +03:00
* to avoid a potential deadlock situation .
2006-01-02 21:04:38 +03:00
*/
2011-12-30 05:58:42 +04:00
void tipc_link_delete ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
if ( ! l_ptr ) {
2012-06-29 08:16:37 +04:00
pr_err ( " Attempt to delete non-existent link \n " ) ;
2006-01-02 21:04:38 +03:00
return ;
}
k_cancel_timer ( & l_ptr - > timer ) ;
2007-02-09 17:25:21 +03:00
2006-01-18 02:38:21 +03:00
tipc_node_lock ( l_ptr - > owner ) ;
tipc_link_reset ( l_ptr ) ;
tipc_node_detach_link ( l_ptr - > owner , l_ptr ) ;
2014-01-08 02:02:44 +04:00
tipc_link_purge_queues ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
list_del_init ( & l_ptr - > link_list ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( l_ptr - > owner ) ;
2006-01-02 21:04:38 +03:00
k_term_timer ( & l_ptr - > timer ) ;
kfree ( l_ptr ) ;
}
/**
2007-02-09 17:25:21 +03:00
* link_schedule_port - schedule port for deferred sending
2006-01-02 21:04:38 +03:00
* @ l_ptr : pointer to link
* @ origport : reference to sending port
* @ sz : amount of data to be sent
2007-02-09 17:25:21 +03:00
*
* Schedules port for renewed sending of messages after link congestion
2006-01-02 21:04:38 +03:00
* has abated .
*/
2011-12-30 05:58:42 +04:00
static int link_schedule_port ( struct tipc_link * l_ptr , u32 origport , u32 sz )
2006-01-02 21:04:38 +03:00
{
2011-01-07 19:43:40 +03:00
struct tipc_port * p_ptr ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
spin_lock_bh ( & tipc_port_list_lock ) ;
p_ptr = tipc_port_lock ( origport ) ;
2006-01-02 21:04:38 +03:00
if ( p_ptr ) {
if ( ! p_ptr - > wakeup )
goto exit ;
if ( ! list_empty ( & p_ptr - > wait_list ) )
goto exit ;
2011-01-07 19:43:40 +03:00
p_ptr - > congested = 1 ;
2010-05-11 18:30:10 +04:00
p_ptr - > waiting_pkts = 1 + ( ( sz - 1 ) / l_ptr - > max_pkt ) ;
2006-01-02 21:04:38 +03:00
list_add_tail ( & p_ptr - > wait_list , & l_ptr - > waiting_ports ) ;
l_ptr - > stats . link_congs + + ;
exit :
2006-01-18 02:38:21 +03:00
tipc_port_unlock ( p_ptr ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
spin_unlock_bh ( & tipc_port_list_lock ) ;
2006-01-02 21:04:38 +03:00
return - ELINKCONG ;
}
2011-12-30 05:58:42 +04:00
void tipc_link_wakeup_ports ( struct tipc_link * l_ptr , int all )
2006-01-02 21:04:38 +03:00
{
2011-01-07 19:43:40 +03:00
struct tipc_port * p_ptr ;
struct tipc_port * temp_p_ptr ;
2006-01-02 21:04:38 +03:00
int win = l_ptr - > queue_limit [ 0 ] - l_ptr - > out_queue_size ;
if ( all )
win = 100000 ;
if ( win < = 0 )
return ;
2006-01-18 02:38:21 +03:00
if ( ! spin_trylock_bh ( & tipc_port_list_lock ) )
2006-01-02 21:04:38 +03:00
return ;
if ( link_congested ( l_ptr ) )
goto exit ;
2007-02-09 17:25:21 +03:00
list_for_each_entry_safe ( p_ptr , temp_p_ptr , & l_ptr - > waiting_ports ,
2006-01-02 21:04:38 +03:00
wait_list ) {
if ( win < = 0 )
break ;
list_del_init ( & p_ptr - > wait_list ) ;
2011-01-07 19:43:40 +03:00
spin_lock_bh ( p_ptr - > lock ) ;
p_ptr - > congested = 0 ;
p_ptr - > wakeup ( p_ptr ) ;
2006-01-02 21:04:38 +03:00
win - = p_ptr - > waiting_pkts ;
2011-01-07 19:43:40 +03:00
spin_unlock_bh ( p_ptr - > lock ) ;
2006-01-02 21:04:38 +03:00
}
exit :
2006-01-18 02:38:21 +03:00
spin_unlock_bh ( & tipc_port_list_lock ) ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* link_release_outqueue - purge link ' s outbound message queue
* @ l_ptr : pointer to link
*/
2011-12-30 05:58:42 +04:00
static void link_release_outqueue ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2013-12-11 08:45:38 +04:00
kfree_skb_list ( l_ptr - > first_out ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > first_out = NULL ;
l_ptr - > out_queue_size = 0 ;
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_reset_fragments - purge link ' s inbound message fragments queue
2006-01-02 21:04:38 +03:00
* @ l_ptr : pointer to link
*/
2011-12-30 05:58:42 +04:00
void tipc_link_reset_fragments ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
kfree_skb ( l_ptr - > reasm_head ) ;
l_ptr - > reasm_head = NULL ;
l_ptr - > reasm_tail = NULL ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2014-01-08 02:02:44 +04:00
* tipc_link_purge_queues - purge all pkt queues associated with link
2006-01-02 21:04:38 +03:00
* @ l_ptr : pointer to link
*/
2014-01-08 02:02:44 +04:00
void tipc_link_purge_queues ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2013-12-11 08:45:38 +04:00
kfree_skb_list ( l_ptr - > oldest_deferred_in ) ;
kfree_skb_list ( l_ptr - > first_out ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset_fragments ( l_ptr ) ;
2011-11-04 21:24:29 +04:00
kfree_skb ( l_ptr - > proto_msg_queue ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > proto_msg_queue = NULL ;
}
2011-12-30 05:58:42 +04:00
void tipc_link_reset ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
u32 prev_state = l_ptr - > state ;
u32 checkpoint = l_ptr - > next_in_no ;
2006-06-26 10:52:50 +04:00
int was_active_link = tipc_link_is_active ( l_ptr ) ;
2007-02-09 17:25:21 +03:00
2008-06-05 04:29:39 +04:00
msg_set_session ( l_ptr - > pmsg , ( ( msg_session ( l_ptr - > pmsg ) + 1 ) & 0xffff ) ) ;
2006-01-02 21:04:38 +03:00
2008-06-05 04:29:39 +04:00
/* Link is down, accept any session */
l_ptr - > peer_session = INVALID_SESSION ;
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
/* Prepare for max packet size negotiation */
2006-01-02 21:04:38 +03:00
link_init_max_pkt ( l_ptr ) ;
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_UNKNOWN ;
if ( ( prev_state = = RESET_UNKNOWN ) | | ( prev_state = = RESET_RESET ) )
return ;
2006-01-18 02:38:21 +03:00
tipc_node_link_down ( l_ptr - > owner , l_ptr ) ;
tipc_bearer_remove_dest ( l_ptr - > b_ptr , l_ptr - > addr ) ;
2010-10-12 18:25:58 +04:00
2014-01-08 02:02:42 +04:00
if ( was_active_link & & tipc_node_active_links ( l_ptr - > owner ) ) {
2006-01-02 21:04:38 +03:00
l_ptr - > reset_checkpoint = checkpoint ;
l_ptr - > exp_msg_count = START_CHANGEOVER ;
}
/* Clean up all queues: */
link_release_outqueue ( l_ptr ) ;
2011-11-04 21:24:29 +04:00
kfree_skb ( l_ptr - > proto_msg_queue ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > proto_msg_queue = NULL ;
2013-12-11 08:45:38 +04:00
kfree_skb_list ( l_ptr - > oldest_deferred_in ) ;
2006-01-02 21:04:38 +03:00
if ( ! list_empty ( & l_ptr - > waiting_ports ) )
2006-01-18 02:38:21 +03:00
tipc_link_wakeup_ports ( l_ptr , 1 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > retransm_queue_head = 0 ;
l_ptr - > retransm_queue_size = 0 ;
l_ptr - > last_out = NULL ;
l_ptr - > first_out = NULL ;
l_ptr - > next_out = NULL ;
l_ptr - > unacked_window = 0 ;
l_ptr - > checkpoint = 1 ;
l_ptr - > next_out_no = 1 ;
l_ptr - > deferred_inqueue_sz = 0 ;
l_ptr - > oldest_deferred_in = NULL ;
l_ptr - > newest_deferred_in = NULL ;
l_ptr - > fsm_msg_cnt = 0 ;
l_ptr - > stale_count = 0 ;
link_reset_statistics ( l_ptr ) ;
}
2014-02-14 02:29:06 +04:00
void tipc_link_reset_list ( struct tipc_bearer * b_ptr )
{
struct tipc_link * l_ptr ;
list_for_each_entry ( l_ptr , & b_ptr - > links , link_list ) {
struct tipc_node * n_ptr = l_ptr - > owner ;
spin_lock_bh ( & n_ptr - > lock ) ;
tipc_link_reset ( l_ptr ) ;
spin_unlock_bh ( & n_ptr - > lock ) ;
}
}
2006-01-02 21:04:38 +03:00
2011-12-30 05:58:42 +04:00
static void link_activate ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
2006-06-26 10:52:50 +04:00
l_ptr - > next_in_no = l_ptr - > stats . recv_info = 1 ;
2006-01-18 02:38:21 +03:00
tipc_node_link_up ( l_ptr - > owner , l_ptr ) ;
tipc_bearer_add_dest ( l_ptr - > b_ptr , l_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
}
/**
* link_state_event - link finite state machine
* @ l_ptr : pointer to link
* @ event : state machine event to process
*/
2012-04-15 09:58:06 +04:00
static void link_state_event ( struct tipc_link * l_ptr , unsigned int event )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * other ;
2006-01-02 21:04:38 +03:00
u32 cont_intv = l_ptr - > continuity_interval ;
if ( ! l_ptr - > started & & ( event ! = STARTING_EVT ) )
return ; /* Not yet. */
2013-12-11 08:45:44 +04:00
/* Check whether changeover is going on */
if ( l_ptr - > exp_msg_count ) {
2010-12-31 21:59:35 +03:00
if ( event = = TIMEOUT_EVT )
2006-01-02 21:04:38 +03:00
link_set_timer ( l_ptr , cont_intv ) ;
2013-12-11 08:45:44 +04:00
return ;
2006-01-02 21:04:38 +03:00
}
switch ( l_ptr - > state ) {
case WORKING_WORKING :
switch ( event ) {
case TRAFFIC_MSG_EVT :
case ACTIVATE_MSG :
break ;
case TIMEOUT_EVT :
if ( l_ptr - > next_in_no ! = l_ptr - > checkpoint ) {
l_ptr - > checkpoint = l_ptr - > next_in_no ;
2006-01-18 02:38:21 +03:00
if ( tipc_bclink_acks_missing ( l_ptr - > owner ) ) {
2007-02-09 17:25:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG ,
2006-01-18 02:38:21 +03:00
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
} else if ( l_ptr - > max_pkt < l_ptr - > max_pkt_target ) {
2007-02-09 17:25:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG ,
2006-01-18 02:38:21 +03:00
1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
}
link_set_timer ( l_ptr , cont_intv ) ;
break ;
}
l_ptr - > state = WORKING_UNKNOWN ;
l_ptr - > fsm_msg_cnt = 0 ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv / 4 ) ;
break ;
case RESET_MSG :
2012-06-29 08:16:37 +04:00
pr_info ( " %s<%s>, requested by peer \n " , link_rst_msg ,
l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_RESET ;
l_ptr - > fsm_msg_cnt = 0 ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , ACTIVATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in WW state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
case WORKING_UNKNOWN :
switch ( event ) {
case TRAFFIC_MSG_EVT :
case ACTIVATE_MSG :
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case RESET_MSG :
2012-06-29 08:16:37 +04:00
pr_info ( " %s<%s>, requested by peer while probing \n " ,
link_rst_msg , l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_RESET ;
l_ptr - > fsm_msg_cnt = 0 ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , ACTIVATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case TIMEOUT_EVT :
if ( l_ptr - > next_in_no ! = l_ptr - > checkpoint ) {
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
l_ptr - > checkpoint = l_ptr - > next_in_no ;
2006-01-18 02:38:21 +03:00
if ( tipc_bclink_acks_missing ( l_ptr - > owner ) ) {
tipc_link_send_proto_msg ( l_ptr , STATE_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
}
link_set_timer ( l_ptr , cont_intv ) ;
} else if ( l_ptr - > fsm_msg_cnt < l_ptr - > abort_limit ) {
2007-02-09 17:25:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG ,
2006-01-18 02:38:21 +03:00
1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv / 4 ) ;
} else { /* Link has failed */
2012-06-29 08:16:37 +04:00
pr_warn ( " %s<%s>, peer not responding \n " ,
link_rst_msg , l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > state = RESET_UNKNOWN ;
l_ptr - > fsm_msg_cnt = 0 ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , RESET_MSG ,
0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
}
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in WU state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
case RESET_UNKNOWN :
switch ( event ) {
case TRAFFIC_MSG_EVT :
break ;
case ACTIVATE_MSG :
other = l_ptr - > owner - > active_links [ 0 ] ;
2010-12-31 21:59:27 +03:00
if ( other & & link_working_unknown ( other ) )
2006-01-02 21:04:38 +03:00
break ;
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
link_activate ( l_ptr ) ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
if ( l_ptr - > owner - > working_links = = 1 )
tipc_link_send_sync ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case RESET_MSG :
l_ptr - > state = RESET_RESET ;
l_ptr - > fsm_msg_cnt = 0 ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , ACTIVATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case STARTING_EVT :
l_ptr - > started = 1 ;
/* fall through */
case TIMEOUT_EVT :
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , RESET_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in RU state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
case RESET_RESET :
switch ( event ) {
case TRAFFIC_MSG_EVT :
case ACTIVATE_MSG :
other = l_ptr - > owner - > active_links [ 0 ] ;
2010-12-31 21:59:27 +03:00
if ( other & & link_working_unknown ( other ) )
2006-01-02 21:04:38 +03:00
break ;
l_ptr - > state = WORKING_WORKING ;
l_ptr - > fsm_msg_cnt = 0 ;
link_activate ( l_ptr ) ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG , 1 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
if ( l_ptr - > owner - > working_links = = 1 )
tipc_link_send_sync ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
link_set_timer ( l_ptr , cont_intv ) ;
break ;
case RESET_MSG :
break ;
case TIMEOUT_EVT :
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , ACTIVATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > fsm_msg_cnt + + ;
link_set_timer ( l_ptr , cont_intv ) ;
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " %s%u in RR state \n " , link_unk_evt , event ) ;
2006-01-02 21:04:38 +03:00
}
break ;
default :
2012-06-29 08:16:37 +04:00
pr_err ( " Unknown link state %u/%u \n " , l_ptr - > state , event ) ;
2006-01-02 21:04:38 +03:00
}
}
/*
* link_bundle_buf ( ) : Append contents of a buffer to
2007-02-09 17:25:21 +03:00
* the tail of an existing one .
2006-01-02 21:04:38 +03:00
*/
2013-06-17 18:54:47 +04:00
static int link_bundle_buf ( struct tipc_link * l_ptr , struct sk_buff * bundler ,
2006-01-02 21:04:38 +03:00
struct sk_buff * buf )
{
struct tipc_msg * bundler_msg = buf_msg ( bundler ) ;
struct tipc_msg * msg = buf_msg ( buf ) ;
u32 size = msg_size ( msg ) ;
2006-06-29 23:32:46 +04:00
u32 bundle_size = msg_size ( bundler_msg ) ;
u32 to_pos = align ( bundle_size ) ;
u32 pad = to_pos - bundle_size ;
2006-01-02 21:04:38 +03:00
if ( msg_user ( bundler_msg ) ! = MSG_BUNDLER )
return 0 ;
if ( msg_type ( bundler_msg ) ! = OPEN_MSG )
return 0 ;
2006-06-29 23:32:46 +04:00
if ( skb_tailroom ( bundler ) < ( pad + size ) )
2006-01-02 21:04:38 +03:00
return 0 ;
2010-05-11 18:30:10 +04:00
if ( l_ptr - > max_pkt < ( to_pos + size ) )
2006-07-04 06:39:36 +04:00
return 0 ;
2006-01-02 21:04:38 +03:00
2006-06-29 23:32:46 +04:00
skb_put ( bundler , pad + size ) ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data_offset ( bundler , to_pos , buf - > data , size ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( bundler_msg , to_pos + size ) ;
msg_set_msgcnt ( bundler_msg , msg_msgcnt ( bundler_msg ) + 1 ) ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > stats . sent_bundled + + ;
return 1 ;
}
2011-12-30 05:58:42 +04:00
static void link_add_to_outqueue ( struct tipc_link * l_ptr ,
2006-03-21 09:37:04 +03:00
struct sk_buff * buf ,
struct tipc_msg * msg )
2006-01-02 21:04:38 +03:00
{
u32 ack = mod ( l_ptr - > next_in_no - 1 ) ;
u32 seqno = mod ( l_ptr - > next_out_no + + ) ;
msg_set_word ( msg , 2 , ( ( ack < < 16 ) | seqno ) ) ;
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
buf - > next = NULL ;
if ( l_ptr - > first_out ) {
l_ptr - > last_out - > next = buf ;
l_ptr - > last_out = buf ;
} else
l_ptr - > first_out = l_ptr - > last_out = buf ;
2011-01-18 23:02:50 +03:00
2006-01-02 21:04:38 +03:00
l_ptr - > out_queue_size + + ;
2011-01-18 23:02:50 +03:00
if ( l_ptr - > out_queue_size > l_ptr - > stats . max_queue_sz )
l_ptr - > stats . max_queue_sz = l_ptr - > out_queue_size ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
static void link_add_chain_to_outqueue ( struct tipc_link * l_ptr ,
2011-04-21 19:50:42 +04:00
struct sk_buff * buf_chain ,
u32 long_msgno )
{
struct sk_buff * buf ;
struct tipc_msg * msg ;
if ( ! l_ptr - > next_out )
l_ptr - > next_out = buf_chain ;
while ( buf_chain ) {
buf = buf_chain ;
buf_chain = buf_chain - > next ;
msg = buf_msg ( buf ) ;
msg_set_long_msgno ( msg , long_msgno ) ;
link_add_to_outqueue ( l_ptr , buf , msg ) ;
}
}
2007-02-09 17:25:21 +03:00
/*
* tipc_link_send_buf ( ) is the ' full path ' for messages , called from
2006-01-02 21:04:38 +03:00
* inside TIPC when the ' fast path ' in tipc_send_buf
* has failed , and from link_send ( )
*/
2011-12-30 05:58:42 +04:00
int tipc_link_send_buf ( struct tipc_link * l_ptr , struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
struct tipc_msg * msg = buf_msg ( buf ) ;
u32 size = msg_size ( msg ) ;
u32 dsz = msg_data_sz ( msg ) ;
u32 queue_size = l_ptr - > out_queue_size ;
2010-05-11 18:30:12 +04:00
u32 imp = tipc_msg_tot_importance ( msg ) ;
2006-01-02 21:04:38 +03:00
u32 queue_limit = l_ptr - > queue_limit [ imp ] ;
2010-05-11 18:30:10 +04:00
u32 max_packet = l_ptr - > max_pkt ;
2006-01-02 21:04:38 +03:00
/* Match msg importance against queue limits: */
if ( unlikely ( queue_size > = queue_limit ) ) {
if ( imp < = TIPC_CRITICAL_IMPORTANCE ) {
2011-04-19 18:17:58 +04:00
link_schedule_port ( l_ptr , msg_origport ( msg ) , size ) ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2011-04-19 18:17:58 +04:00
return - ELINKCONG ;
2006-01-02 21:04:38 +03:00
}
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
if ( imp > CONN_MANAGER ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %s<%s>, send queue full " , link_rst_msg ,
l_ptr - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
}
return dsz ;
}
/* Fragmentation needed ? */
if ( size > max_packet )
2010-10-13 17:20:35 +04:00
return link_send_long_buf ( l_ptr , buf ) ;
2006-01-02 21:04:38 +03:00
2012-04-30 23:29:02 +04:00
/* Packet can be queued or sent. */
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
if ( likely ( ! link_congested ( l_ptr ) ) ) {
2006-01-02 21:04:38 +03:00
link_add_to_outqueue ( l_ptr , buf , msg ) ;
2012-11-15 07:34:45 +04:00
tipc_bearer_send ( l_ptr - > b_ptr , buf , & l_ptr - > media_addr ) ;
l_ptr - > unacked_window = 0 ;
2006-01-02 21:04:38 +03:00
return dsz ;
}
2012-04-30 23:29:02 +04:00
/* Congestion: can message be bundled ? */
2006-01-02 21:04:38 +03:00
if ( ( msg_user ( msg ) ! = CHANGEOVER_PROTOCOL ) & &
( msg_user ( msg ) ! = MSG_FRAGMENTER ) ) {
/* Try adding message to an existing bundle */
2007-02-09 17:25:21 +03:00
if ( l_ptr - > next_out & &
2012-11-15 07:34:45 +04:00
link_bundle_buf ( l_ptr , l_ptr - > last_out , buf ) )
2006-01-02 21:04:38 +03:00
return dsz ;
/* Try creating a new bundle */
if ( size < = max_packet * 2 / 3 ) {
2010-10-13 17:20:35 +04:00
struct sk_buff * bundler = tipc_buf_acquire ( max_packet ) ;
2006-01-02 21:04:38 +03:00
struct tipc_msg bundler_hdr ;
if ( bundler ) {
2010-05-11 18:30:12 +04:00
tipc_msg_init ( & bundler_hdr , MSG_BUNDLER , OPEN_MSG ,
2008-06-05 04:37:34 +04:00
INT_H_SIZE , l_ptr - > addr ) ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( bundler , & bundler_hdr ,
INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
skb_trim ( bundler , INT_H_SIZE ) ;
link_bundle_buf ( l_ptr , bundler , buf ) ;
buf = bundler ;
msg = buf_msg ( buf ) ;
l_ptr - > stats . sent_bundles + + ;
}
}
}
if ( ! l_ptr - > next_out )
l_ptr - > next_out = buf ;
link_add_to_outqueue ( l_ptr , buf , msg ) ;
return dsz ;
}
2007-02-09 17:25:21 +03:00
/*
* tipc_link_send ( ) : same as tipc_link_send_buf ( ) , but the link to use has
2006-01-02 21:04:38 +03:00
* not been selected yet , and the the owner node is not locked
* Called by TIPC internal users , e . g . the name distributor
*/
2006-01-18 02:38:21 +03:00
int tipc_link_send ( struct sk_buff * buf , u32 dest , u32 selector )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2008-09-03 10:38:32 +04:00
struct tipc_node * n_ptr ;
2006-01-02 21:04:38 +03:00
int res = - ELINKCONG ;
2006-01-18 02:38:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2010-12-31 21:59:18 +03:00
n_ptr = tipc_node_find ( dest ) ;
2006-01-02 21:04:38 +03:00
if ( n_ptr ) {
2006-01-18 02:38:21 +03:00
tipc_node_lock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr = n_ptr - > active_links [ selector & 1 ] ;
2010-12-31 21:59:35 +03:00
if ( l_ptr )
2006-01-18 02:38:21 +03:00
res = tipc_link_send_buf ( l_ptr , buf ) ;
2010-12-31 21:59:35 +03:00
else
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
} else {
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
tipc: introduce message to synchronize broadcast link
Upon establishing a first link between two nodes, there is
currently a risk that the two endpoints will disagree on exactly
which sequence number reception and acknowleding of broadcast
packets should start.
The following scenarios may happen:
1: Node A sends an ACTIVATE message to B, telling it to start acking
packets from sequence number N.
2: Node A sends out broadcast N, but does not expect an acknowledge
from B, since B is not yet in its broadcast receiver's list.
3: Node A receives ACK for N from all nodes except B, and releases
packet N.
4: Node B receives the ACTIVATE, activates its link endpoint, and
stores the value N as sequence number of first expected packet.
5: Node B sends a NAME_DISTR message to A.
6: Node A receives the NAME_DISTR message, and activates its endpoint.
At this moment B is added to A's broadcast receiver's set.
Node A also sets sequence number 0 as the first broadcast packet
to be received from B.
7: Node A sends broadcast N+1.
8: B receives N+1, determines there is a gap in the sequence, since
it is expecting N, and sends a NACK for N back to A.
9: Node A has already released N, so no retransmission is possible.
The broadcast link in direction A->B is stale.
In addition to, or instead of, 7-9 above, the following may happen:
10: Node B sends broadcast M > 0 to A.
11: Node A receives M, falsely decides there must be a gap, since
it is expecting packet 0, and asks for retransmission of packets
[0,M-1].
12: Node B has already released these packets, so the broadcast
link is stale in direction B->A.
We solve this problem by introducing a new unicast message type,
BCAST_PROTOCOL/STATE, to convey the sequence number of the next
sent broadcast packet to the other endpoint, at exactly the moment
that endpoint is added to the own node's broadcast receivers list,
and before any other unicast messages are permitted to be sent.
Furthermore, we don't allow any node to start receiving and
processing broadcast packets until this new synchronization
message has been received.
To maintain backwards compatibility, we still open up for
broadcast reception if we receive a NAME_DISTR message without
any preceding broadcast sync message. In this case, we must
assume that the other end has an older code version, and will
never send out the new synchronization message. Hence, for mixed
old and new nodes, the issue arising in 7-12 of the above may
happen with the same probability as before.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 09:51:31 +04:00
/*
* tipc_link_send_sync - synchronize broadcast link endpoints .
*
* Give a newly added peer node the sequence number where it should
* start receiving and acking broadcast packets .
*
* Called with node locked
*/
static void tipc_link_send_sync ( struct tipc_link * l )
{
struct sk_buff * buf ;
struct tipc_msg * msg ;
buf = tipc_buf_acquire ( INT_H_SIZE ) ;
if ( ! buf )
return ;
msg = buf_msg ( buf ) ;
tipc_msg_init ( msg , BCAST_PROTOCOL , STATE_MSG , INT_H_SIZE , l - > addr ) ;
msg_set_last_bcast ( msg , l - > owner - > bclink . acked ) ;
link_add_chain_to_outqueue ( l , buf , 0 ) ;
tipc_link_push_queue ( l ) ;
}
/*
* tipc_link_recv_sync - synchronize broadcast link endpoints .
* Receive the sequence number where we should start receiving and
* acking broadcast packets from a newly added peer node , and open
* up for reception of such packets .
*
* Called with node locked
*/
static void tipc_link_recv_sync ( struct tipc_node * n , struct sk_buff * buf )
{
struct tipc_msg * msg = buf_msg ( buf ) ;
n - > bclink . last_sent = n - > bclink . last_in = msg_last_bcast ( msg ) ;
n - > bclink . recv_permitted = true ;
kfree_skb ( buf ) ;
}
/*
2011-05-31 21:38:02 +04:00
* tipc_link_send_names - send name table entries to new neighbor
*
* Send routine for bulk delivery of name table messages when contact
* with a new neighbor occurs . No link congestion checking is performed
* because name table messages * must * be delivered . The messages must be
* small enough not to require fragmentation .
* Called without any locks held .
*/
void tipc_link_send_names ( struct list_head * message_list , u32 dest )
{
struct tipc_node * n_ptr ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2011-05-31 21:38:02 +04:00
struct sk_buff * buf ;
struct sk_buff * temp_buf ;
if ( list_empty ( message_list ) )
return ;
read_lock_bh ( & tipc_net_lock ) ;
n_ptr = tipc_node_find ( dest ) ;
if ( n_ptr ) {
tipc_node_lock ( n_ptr ) ;
l_ptr = n_ptr - > active_links [ 0 ] ;
if ( l_ptr ) {
/* convert circular list to linear list */
( ( struct sk_buff * ) message_list - > prev ) - > next = NULL ;
link_add_chain_to_outqueue ( l_ptr ,
( struct sk_buff * ) message_list - > next , 0 ) ;
tipc_link_push_queue ( l_ptr ) ;
INIT_LIST_HEAD ( message_list ) ;
}
tipc_node_unlock ( n_ptr ) ;
}
read_unlock_bh ( & tipc_net_lock ) ;
/* discard the messages if they couldn't be sent */
list_for_each_safe ( buf , temp_buf , ( ( struct sk_buff * ) message_list ) ) {
list_del ( ( struct list_head * ) buf ) ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2011-05-31 21:38:02 +04:00
}
}
2007-02-09 17:25:21 +03:00
/*
* link_send_buf_fast : Entry for data messages where the
2006-01-02 21:04:38 +03:00
* destination link is known and the header is complete ,
* inclusive total message length . Very time critical .
* Link is locked . Returns user data length .
*/
2011-12-30 05:58:42 +04:00
static int link_send_buf_fast ( struct tipc_link * l_ptr , struct sk_buff * buf ,
2006-03-21 09:37:04 +03:00
u32 * used_max_pkt )
2006-01-02 21:04:38 +03:00
{
struct tipc_msg * msg = buf_msg ( buf ) ;
int res = msg_data_sz ( msg ) ;
if ( likely ( ! link_congested ( l_ptr ) ) ) {
2010-05-11 18:30:10 +04:00
if ( likely ( msg_size ( msg ) < = l_ptr - > max_pkt ) ) {
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
link_add_to_outqueue ( l_ptr , buf , msg ) ;
tipc_bearer_send ( l_ptr - > b_ptr , buf ,
& l_ptr - > media_addr ) ;
l_ptr - > unacked_window = 0 ;
return res ;
}
else
2010-05-11 18:30:10 +04:00
* used_max_pkt = l_ptr - > max_pkt ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
return tipc_link_send_buf ( l_ptr , buf ) ; /* All other cases */
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/*
* tipc_link_send_sections_fast : Entry for messages where the
2006-01-02 21:04:38 +03:00
* destination processor is known and the header is complete ,
2007-02-09 17:25:21 +03:00
* except for total message length .
2006-01-02 21:04:38 +03:00
* Returns user data length or errno .
*/
2011-01-07 19:43:40 +03:00
int tipc_link_send_sections_fast ( struct tipc_port * sender ,
2006-01-18 02:38:21 +03:00
struct iovec const * msg_sect ,
2013-10-18 09:23:15 +04:00
unsigned int len , u32 destaddr )
2006-01-02 21:04:38 +03:00
{
2011-01-07 19:43:40 +03:00
struct tipc_msg * hdr = & sender - > phdr ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
2008-09-03 10:38:32 +04:00
struct tipc_node * node ;
2006-01-02 21:04:38 +03:00
int res ;
u32 selector = msg_origport ( hdr ) & 1 ;
again :
/*
* Try building message using port ' s max_pkt hint .
* ( Must not hold any locks while building message . )
*/
2013-10-18 09:23:15 +04:00
res = tipc_msg_build ( hdr , msg_sect , len , sender - > max_pkt , & buf ) ;
2013-06-17 18:54:49 +04:00
/* Exit if build request was invalid */
if ( unlikely ( res < 0 ) )
return res ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2010-12-31 21:59:18 +03:00
node = tipc_node_find ( destaddr ) ;
2006-01-02 21:04:38 +03:00
if ( likely ( node ) ) {
2006-01-18 02:38:21 +03:00
tipc_node_lock ( node ) ;
2006-01-02 21:04:38 +03:00
l_ptr = node - > active_links [ selector ] ;
if ( likely ( l_ptr ) ) {
if ( likely ( buf ) ) {
res = link_send_buf_fast ( l_ptr , buf ,
2011-01-07 19:43:40 +03:00
& sender - > max_pkt ) ;
2006-01-02 21:04:38 +03:00
exit :
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
/* Exit if link (or bearer) is congested */
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
if ( link_congested ( l_ptr ) ) {
2006-01-02 21:04:38 +03:00
res = link_schedule_port ( l_ptr ,
2011-01-07 19:43:40 +03:00
sender - > ref , res ) ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* Message size exceeds max_pkt hint ; update hint ,
* then re - try fast path or fragment the message
*/
2011-01-07 19:43:40 +03:00
sender - > max_pkt = l_ptr - > max_pkt ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
2011-01-07 19:43:40 +03:00
if ( ( msg_hdr_sz ( hdr ) + res ) < = sender - > max_pkt )
2006-01-02 21:04:38 +03:00
goto again ;
2013-10-18 09:23:15 +04:00
return link_send_sections_long ( sender , msg_sect , len ,
tipc: Avoid recomputation of outgoing message length
Rework TIPC's message sending routines to take advantage of the total
amount of data value passed to it by the kernel socket infrastructure.
This change eliminates the need for TIPC to compute the size of outgoing
messages itself, as well as the check for an oversize message in
tipc_msg_build(). In addition, this change warrants an explanation:
- res = send_packet(NULL, sock, &my_msg, 0);
+ res = send_packet(NULL, sock, &my_msg, bytes_to_send);
Previously, the final argument to send_packet() was ignored (since the
amount of data being sent was recalculated by a lower-level routine)
and we could just pass in a dummy value (0). Now that the
recalculation is being eliminated, the argument value being passed to
send_packet() is significant and we have to supply the actual amount
of data we want to send.
Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-04-21 19:42:07 +04:00
destaddr ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
/* Couldn't find a link to the destination node */
if ( buf )
return tipc_reject_msg ( buf , TIPC_ERR_NO_NODE ) ;
if ( res > = 0 )
2013-10-18 09:23:15 +04:00
return tipc_port_reject_sections ( sender , hdr , msg_sect ,
len , TIPC_ERR_NO_NODE ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
2007-02-09 17:25:21 +03:00
/*
* link_send_sections_long ( ) : Entry for long messages where the
2006-01-02 21:04:38 +03:00
* destination node is known and the header is complete ,
2007-02-09 17:25:21 +03:00
* inclusive total message length .
2006-01-02 21:04:38 +03:00
* Link and bearer congestion status have been checked to be ok ,
* and are ignored if they change .
*
* Note that fragments do not use the full link MTU so that they won ' t have
* to undergo refragmentation if link changeover causes them to be sent
* over another link with an additional tunnel header added as prefix .
* ( Refragmentation will still occur if the other link has a smaller MTU . )
*
* Returns user data length or errno .
*/
2011-01-07 19:43:40 +03:00
static int link_send_sections_long ( struct tipc_port * sender ,
2006-01-02 21:04:38 +03:00
struct iovec const * msg_sect ,
2013-10-18 09:23:15 +04:00
unsigned int len , u32 destaddr )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2008-09-03 10:38:32 +04:00
struct tipc_node * node ;
2011-01-07 19:43:40 +03:00
struct tipc_msg * hdr = & sender - > phdr ;
2013-10-18 09:23:15 +04:00
u32 dsz = len ;
2010-12-31 21:59:32 +03:00
u32 max_pkt , fragm_sz , rest ;
2006-01-02 21:04:38 +03:00
struct tipc_msg fragm_hdr ;
2010-12-31 21:59:32 +03:00
struct sk_buff * buf , * buf_chain , * prev ;
u32 fragm_crs , fragm_rest , hsz , sect_rest ;
2013-10-18 09:23:16 +04:00
const unchar __user * sect_crs ;
2006-01-02 21:04:38 +03:00
int curr_sect ;
u32 fragm_no ;
2013-06-17 18:54:50 +04:00
int res = 0 ;
2006-01-02 21:04:38 +03:00
again :
fragm_no = 1 ;
2011-01-07 19:43:40 +03:00
max_pkt = sender - > max_pkt - INT_H_SIZE ;
2006-01-02 21:04:38 +03:00
/* leave room for tunnel header in case of link changeover */
2007-02-09 17:25:21 +03:00
fragm_sz = max_pkt - INT_H_SIZE ;
2006-01-02 21:04:38 +03:00
/* leave room for fragmentation header in each fragment */
rest = dsz ;
fragm_crs = 0 ;
fragm_rest = 0 ;
sect_rest = 0 ;
2006-03-21 09:36:47 +03:00
sect_crs = NULL ;
2006-01-02 21:04:38 +03:00
curr_sect = - 1 ;
2012-04-30 23:29:02 +04:00
/* Prepare reusable fragment header */
2010-05-11 18:30:12 +04:00
tipc_msg_init ( & fragm_hdr , MSG_FRAGMENTER , FIRST_FRAGMENT ,
2008-06-05 04:37:34 +04:00
INT_H_SIZE , msg_destnode ( hdr ) ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( & fragm_hdr , max_pkt ) ;
msg_set_fragm_no ( & fragm_hdr , 1 ) ;
2012-04-30 23:29:02 +04:00
/* Prepare header of first fragment */
2010-10-13 17:20:35 +04:00
buf_chain = buf = tipc_buf_acquire ( max_pkt ) ;
2006-01-02 21:04:38 +03:00
if ( ! buf )
return - ENOMEM ;
buf - > next = NULL ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( buf , & fragm_hdr , INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
hsz = msg_hdr_sz ( hdr ) ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data_offset ( buf , INT_H_SIZE , hdr , hsz ) ;
2006-01-02 21:04:38 +03:00
2012-04-30 23:29:02 +04:00
/* Chop up message */
2006-01-02 21:04:38 +03:00
fragm_crs = INT_H_SIZE + hsz ;
fragm_rest = fragm_sz - hsz ;
do { /* For all sections */
u32 sz ;
if ( ! sect_rest ) {
sect_rest = msg_sect [ + + curr_sect ] . iov_len ;
2013-10-18 09:23:16 +04:00
sect_crs = msg_sect [ curr_sect ] . iov_base ;
2006-01-02 21:04:38 +03:00
}
if ( sect_rest < fragm_rest )
sz = sect_rest ;
else
sz = fragm_rest ;
2013-06-17 18:54:43 +04:00
if ( copy_from_user ( buf - > data + fragm_crs , sect_crs , sz ) ) {
2013-06-17 18:54:50 +04:00
res = - EFAULT ;
2006-01-02 21:04:38 +03:00
error :
2013-12-11 08:45:38 +04:00
kfree_skb_list ( buf_chain ) ;
2013-06-17 18:54:50 +04:00
return res ;
2013-06-17 18:54:43 +04:00
}
2006-01-02 21:04:38 +03:00
sect_crs + = sz ;
sect_rest - = sz ;
fragm_crs + = sz ;
fragm_rest - = sz ;
rest - = sz ;
if ( ! fragm_rest & & rest ) {
/* Initiate new fragment: */
if ( rest < = fragm_sz ) {
fragm_sz = rest ;
2010-12-31 21:59:32 +03:00
msg_set_type ( & fragm_hdr , LAST_FRAGMENT ) ;
2006-01-02 21:04:38 +03:00
} else {
msg_set_type ( & fragm_hdr , FRAGMENT ) ;
}
msg_set_size ( & fragm_hdr , fragm_sz + INT_H_SIZE ) ;
msg_set_fragm_no ( & fragm_hdr , + + fragm_no ) ;
prev = buf ;
2010-10-13 17:20:35 +04:00
buf = tipc_buf_acquire ( fragm_sz + INT_H_SIZE ) ;
2013-06-17 18:54:50 +04:00
if ( ! buf ) {
res = - ENOMEM ;
2006-01-02 21:04:38 +03:00
goto error ;
2013-06-17 18:54:50 +04:00
}
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
buf - > next = NULL ;
2006-01-02 21:04:38 +03:00
prev - > next = buf ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( buf , & fragm_hdr , INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
fragm_crs = INT_H_SIZE ;
fragm_rest = fragm_sz ;
}
2010-12-31 21:59:32 +03:00
} while ( rest > 0 ) ;
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* Now we have a buffer chain . Select a link and check
* that packet size is still OK
*/
2010-12-31 21:59:18 +03:00
node = tipc_node_find ( destaddr ) ;
2006-01-02 21:04:38 +03:00
if ( likely ( node ) ) {
2006-01-18 02:38:21 +03:00
tipc_node_lock ( node ) ;
2011-01-07 19:43:40 +03:00
l_ptr = node - > active_links [ sender - > ref & 1 ] ;
2006-01-02 21:04:38 +03:00
if ( ! l_ptr ) {
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
2006-01-02 21:04:38 +03:00
goto reject ;
}
2010-05-11 18:30:10 +04:00
if ( l_ptr - > max_pkt < max_pkt ) {
2011-01-07 19:43:40 +03:00
sender - > max_pkt = l_ptr - > max_pkt ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
2013-12-11 08:45:38 +04:00
kfree_skb_list ( buf_chain ) ;
2006-01-02 21:04:38 +03:00
goto again ;
}
} else {
reject :
2013-12-11 08:45:38 +04:00
kfree_skb_list ( buf_chain ) ;
2013-10-18 09:23:15 +04:00
return tipc_port_reject_sections ( sender , hdr , msg_sect ,
len , TIPC_ERR_NO_NODE ) ;
2006-01-02 21:04:38 +03:00
}
2011-04-21 19:50:42 +04:00
/* Append chain of fragments to send queue & send them */
2011-04-17 19:44:24 +04:00
l_ptr - > long_msg_seq_no + + ;
2011-04-21 19:50:42 +04:00
link_add_chain_to_outqueue ( l_ptr , buf_chain , l_ptr - > long_msg_seq_no ) ;
l_ptr - > stats . sent_fragments + = fragm_no ;
2006-01-02 21:04:38 +03:00
l_ptr - > stats . sent_fragmented + + ;
2006-01-18 02:38:21 +03:00
tipc_link_push_queue ( l_ptr ) ;
tipc_node_unlock ( node ) ;
2006-01-02 21:04:38 +03:00
return dsz ;
}
2007-02-09 17:25:21 +03:00
/*
2006-01-18 02:38:21 +03:00
* tipc_link_push_packet : Push one unsent packet to the media
2006-01-02 21:04:38 +03:00
*/
2014-01-05 01:47:48 +04:00
static u32 tipc_link_push_packet ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
struct sk_buff * buf = l_ptr - > first_out ;
u32 r_q_size = l_ptr - > retransm_queue_size ;
u32 r_q_head = l_ptr - > retransm_queue_head ;
/* Step to position where retransmission failed, if any, */
/* consider that buffers may have been released in meantime */
if ( r_q_size & & buf ) {
2007-02-09 17:25:21 +03:00
u32 last = lesser ( mod ( r_q_head + r_q_size ) ,
2006-01-02 21:04:38 +03:00
link_last_sent ( l_ptr ) ) ;
2011-10-25 00:03:12 +04:00
u32 first = buf_seqno ( buf ) ;
2006-01-02 21:04:38 +03:00
while ( buf & & less ( first , r_q_head ) ) {
first = mod ( first + 1 ) ;
buf = buf - > next ;
}
l_ptr - > retransm_queue_head = r_q_head = first ;
l_ptr - > retransm_queue_size = r_q_size = mod ( last - first ) ;
}
/* Continue retransmission now, if there is anything: */
2010-03-15 10:58:45 +03:00
if ( r_q_size & & buf ) {
2006-01-02 21:04:38 +03:00
msg_set_ack ( buf_msg ( buf ) , mod ( l_ptr - > next_in_no - 1 ) ) ;
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( buf_msg ( buf ) , l_ptr - > owner - > bclink . last_in ) ;
2012-11-15 07:34:45 +04:00
tipc_bearer_send ( l_ptr - > b_ptr , buf , & l_ptr - > media_addr ) ;
l_ptr - > retransm_queue_head = mod ( + + r_q_head ) ;
l_ptr - > retransm_queue_size = - - r_q_size ;
l_ptr - > stats . retransmitted + + ;
return 0 ;
2006-01-02 21:04:38 +03:00
}
/* Send deferred protocol message, if any: */
buf = l_ptr - > proto_msg_queue ;
if ( buf ) {
msg_set_ack ( buf_msg ( buf ) , mod ( l_ptr - > next_in_no - 1 ) ) ;
2010-12-31 21:59:32 +03:00
msg_set_bcast_ack ( buf_msg ( buf ) , l_ptr - > owner - > bclink . last_in ) ;
2012-11-15 07:34:45 +04:00
tipc_bearer_send ( l_ptr - > b_ptr , buf , & l_ptr - > media_addr ) ;
l_ptr - > unacked_window = 0 ;
kfree_skb ( buf ) ;
l_ptr - > proto_msg_queue = NULL ;
return 0 ;
2006-01-02 21:04:38 +03:00
}
/* Send one deferred data message, if send window not full: */
buf = l_ptr - > next_out ;
if ( buf ) {
struct tipc_msg * msg = buf_msg ( buf ) ;
u32 next = msg_seqno ( msg ) ;
2011-10-25 00:03:12 +04:00
u32 first = buf_seqno ( l_ptr - > first_out ) ;
2006-01-02 21:04:38 +03:00
if ( mod ( next - first ) < l_ptr - > queue_limit [ 0 ] ) {
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ;
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2012-11-15 07:34:45 +04:00
tipc_bearer_send ( l_ptr - > b_ptr , buf , & l_ptr - > media_addr ) ;
if ( msg_user ( msg ) = = MSG_BUNDLER )
msg_set_type ( msg , CLOSED_MSG ) ;
l_ptr - > next_out = buf - > next ;
return 0 ;
2006-01-02 21:04:38 +03:00
}
}
2012-11-15 07:34:45 +04:00
return 1 ;
2006-01-02 21:04:38 +03:00
}
/*
* push_queue ( ) : push out the unsent messages of a link where
* congestion has abated . Node is locked
*/
2011-12-30 05:58:42 +04:00
void tipc_link_push_queue ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
u32 res ;
do {
2006-01-18 02:38:21 +03:00
res = tipc_link_push_packet ( l_ptr ) ;
2008-07-15 09:44:01 +04:00
} while ( ! res ) ;
2006-01-02 21:04:38 +03:00
}
2006-06-26 10:40:01 +04:00
static void link_reset_all ( unsigned long addr )
{
2008-09-03 10:38:32 +04:00
struct tipc_node * n_ptr ;
2006-06-26 10:40:01 +04:00
char addr_string [ 16 ] ;
u32 i ;
read_lock_bh ( & tipc_net_lock ) ;
n_ptr = tipc_node_find ( ( u32 ) addr ) ;
if ( ! n_ptr ) {
read_unlock_bh ( & tipc_net_lock ) ;
return ; /* node no longer exists */
}
tipc_node_lock ( n_ptr ) ;
2012-06-29 08:16:37 +04:00
pr_warn ( " Resetting all links to %s \n " ,
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ) ;
2006-06-26 10:40:01 +04:00
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
if ( n_ptr - > links [ i ] ) {
2010-12-31 21:59:27 +03:00
link_print ( n_ptr - > links [ i ] , " Resetting link \n " ) ;
2006-06-26 10:40:01 +04:00
tipc_link_reset ( n_ptr - > links [ i ] ) ;
}
}
tipc_node_unlock ( n_ptr ) ;
read_unlock_bh ( & tipc_net_lock ) ;
}
2011-12-30 05:58:42 +04:00
static void link_retransmit_failure ( struct tipc_link * l_ptr ,
2013-06-17 18:54:47 +04:00
struct sk_buff * buf )
2006-06-26 10:40:01 +04:00
{
struct tipc_msg * msg = buf_msg ( buf ) ;
2012-06-29 08:16:37 +04:00
pr_warn ( " Retransmission failure on link <%s> \n " , l_ptr - > name ) ;
2006-06-26 10:40:01 +04:00
if ( l_ptr - > addr ) {
/* Handle failure on standard link */
2010-12-31 21:59:27 +03:00
link_print ( l_ptr , " Resetting link \n " ) ;
2006-06-26 10:40:01 +04:00
tipc_link_reset ( l_ptr ) ;
} else {
/* Handle failure on broadcast link */
2008-09-03 10:38:32 +04:00
struct tipc_node * n_ptr ;
2006-06-26 10:40:01 +04:00
char addr_string [ 16 ] ;
2012-06-29 08:16:37 +04:00
pr_info ( " Msg seq number: %u, " , msg_seqno ( msg ) ) ;
pr_cont ( " Outstanding acks: %lu \n " ,
( unsigned long ) TIPC_SKB_CB ( buf ) - > handle ) ;
2006-10-04 03:25:34 +04:00
2011-01-18 21:53:16 +03:00
n_ptr = tipc_bclink_retransmit_to ( ) ;
2006-06-26 10:40:01 +04:00
tipc_node_lock ( n_ptr ) ;
2010-05-11 18:30:12 +04:00
tipc_addr_string_fill ( addr_string , n_ptr - > addr ) ;
2012-06-29 08:16:37 +04:00
pr_info ( " Broadcast link info for %s \n " , addr_string ) ;
2012-11-16 09:51:30 +04:00
pr_info ( " Reception permitted: %d, Acked: %u \n " ,
n_ptr - > bclink . recv_permitted ,
2012-06-29 08:16:37 +04:00
n_ptr - > bclink . acked ) ;
pr_info ( " Last in: %u, Oos state: %u, Last sent: %u \n " ,
n_ptr - > bclink . last_in ,
n_ptr - > bclink . oos_state ,
n_ptr - > bclink . last_sent ) ;
2006-06-26 10:40:01 +04:00
tipc_k_signal ( ( Handler ) link_reset_all , ( unsigned long ) n_ptr - > addr ) ;
tipc_node_unlock ( n_ptr ) ;
l_ptr - > stale_count = 0 ;
}
}
2011-12-30 05:58:42 +04:00
void tipc_link_retransmit ( struct tipc_link * l_ptr , struct sk_buff * buf ,
2006-01-18 02:38:21 +03:00
u32 retransmits )
2006-01-02 21:04:38 +03:00
{
struct tipc_msg * msg ;
2006-06-26 10:40:01 +04:00
if ( ! buf )
return ;
msg = buf_msg ( buf ) ;
2007-02-09 17:25:21 +03:00
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
/* Detect repeated retransmit failures */
if ( l_ptr - > last_retransmitted = = msg_seqno ( msg ) ) {
if ( + + l_ptr - > stale_count > 100 ) {
link_retransmit_failure ( l_ptr , buf ) ;
return ;
2006-06-26 10:40:01 +04:00
}
} else {
tipc: remove interface state mirroring in bearer
struct 'tipc_bearer' is a generic representation of the underlying
media type, and exists in a one-to-one relationship to each interface
TIPC is using. The struct contains a 'blocked' flag that mirrors the
operational and execution state of the represented interface, and is
updated through notification calls from the latter. The users of
tipc_bearer are checking this flag before each attempt to send a
packet via the interface.
This state mirroring serves no purpose in the current code base. TIPC
links will not discover a media failure any faster through this
mechanism, and in reality the flag only adds overhead at packet
sending and reception.
Furthermore, the fact that the flag needs to be protected by a spinlock
aggregated into tipc_bearer has turned out to cause a serious and
completely unnecessary deadlock problem.
CPU0 CPU1
---- ----
Time 0: bearer_disable() link_timeout()
Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue()
Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr)
Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock)
Time 4: del_timer_sync(&req->timer)
I.e., del_timer_sync() on CPU0 never returns, because the timer handler
on CPU1 is waiting for the bearer lock.
We eliminate the 'blocked' flag from struct tipc_bearer, along with all
tests on this flag. This not only resolves the deadlock, but also
simplifies and speeds up the data path execution of TIPC. It also fits
well into our ongoing effort to make the locking policy simpler and
more manageable.
An effect of this change is that we can get rid of functions such as
tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer().
We replace the latter with a new function, tipc_reset_bearer(), which
resets all links associated to the bearer immediately after an
interface goes down.
A user might notice one slight change in link behaviour after this
change. When an interface goes down, (e.g. through a NETDEV_DOWN
event) all attached links will be reset immediately, instead of
leaving it to each link to detect the failure through a timer-driven
mechanism. We consider this an improvement, and see no obvious risks
with the new behavior.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 19:08:00 +04:00
l_ptr - > last_retransmitted = msg_seqno ( msg ) ;
l_ptr - > stale_count = 1 ;
2006-01-02 21:04:38 +03:00
}
2006-06-26 10:40:01 +04:00
2010-03-15 10:58:45 +03:00
while ( retransmits & & ( buf ! = l_ptr - > next_out ) & & buf ) {
2006-01-02 21:04:38 +03:00
msg = buf_msg ( buf ) ;
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ;
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2012-11-15 07:34:45 +04:00
tipc_bearer_send ( l_ptr - > b_ptr , buf , & l_ptr - > media_addr ) ;
buf = buf - > next ;
retransmits - - ;
l_ptr - > stats . retransmitted + + ;
2006-01-02 21:04:38 +03:00
}
2006-06-26 10:40:01 +04:00
2006-01-02 21:04:38 +03:00
l_ptr - > retransm_queue_head = l_ptr - > retransm_queue_size = 0 ;
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* link_insert_deferred_queue - insert deferred messages back into receive chain
*/
2011-12-30 05:58:42 +04:00
static struct sk_buff * link_insert_deferred_queue ( struct tipc_link * l_ptr ,
2006-01-02 21:04:38 +03:00
struct sk_buff * buf )
{
u32 seq_no ;
if ( l_ptr - > oldest_deferred_in = = NULL )
return buf ;
2011-10-25 00:03:12 +04:00
seq_no = buf_seqno ( l_ptr - > oldest_deferred_in ) ;
2006-01-02 21:04:38 +03:00
if ( seq_no = = mod ( l_ptr - > next_in_no ) ) {
l_ptr - > newest_deferred_in - > next = buf ;
buf = l_ptr - > oldest_deferred_in ;
l_ptr - > oldest_deferred_in = NULL ;
l_ptr - > deferred_inqueue_sz = 0 ;
}
return buf ;
}
2008-04-16 06:04:54 +04:00
/**
* link_recv_buf_validate - validate basic format of received message
*
* This routine ensures a TIPC message has an acceptable header , and at least
* as much data as the header indicates it should . The routine also ensures
* that the entire message header is stored in the main fragment of the message
* buffer , to simplify future access to message header fields .
*
* Note : Having extra info present in the message header or data areas is OK .
* TIPC will ignore the excess , under the assumption that it is optional info
* introduced by a later release of the protocol .
*/
static int link_recv_buf_validate ( struct sk_buff * buf )
{
static u32 min_data_hdr_size [ 8 ] = {
2011-05-31 23:03:18 +04:00
SHORT_H_SIZE , MCAST_H_SIZE , NAMED_H_SIZE , BASIC_H_SIZE ,
2008-04-16 06:04:54 +04:00
MAX_H_SIZE , MAX_H_SIZE , MAX_H_SIZE , MAX_H_SIZE
} ;
struct tipc_msg * msg ;
u32 tipc_hdr [ 2 ] ;
u32 size ;
u32 hdr_size ;
u32 min_hdr_size ;
if ( unlikely ( buf - > len < MIN_H_SIZE ) )
return 0 ;
msg = skb_header_pointer ( buf , 0 , sizeof ( tipc_hdr ) , tipc_hdr ) ;
if ( msg = = NULL )
return 0 ;
if ( unlikely ( msg_version ( msg ) ! = TIPC_VERSION ) )
return 0 ;
size = msg_size ( msg ) ;
hdr_size = msg_hdr_sz ( msg ) ;
min_hdr_size = msg_isdata ( msg ) ?
min_data_hdr_size [ msg_type ( msg ) ] : INT_H_SIZE ;
if ( unlikely ( ( hdr_size < min_hdr_size ) | |
( size < hdr_size ) | |
( buf - > len < size ) | |
( size - hdr_size > TIPC_MAX_USER_MSG_SIZE ) ) )
return 0 ;
return pskb_may_pull ( buf , hdr_size ) ;
}
2010-08-17 15:00:07 +04:00
/**
2014-01-08 02:02:41 +04:00
* tipc_rcv - process TIPC packets / messages arriving from off - node
2010-08-17 15:00:07 +04:00
* @ head : pointer to message buffer chain
* @ tb_ptr : pointer to bearer message arrived on
*
* Invoked with no locks held . Bearer pointer must point to a valid bearer
* structure ( i . e . cannot be NULL ) , but bearer can be inactive .
*/
2014-01-08 02:02:41 +04:00
void tipc_rcv ( struct sk_buff * head , struct tipc_bearer * b_ptr )
2006-01-02 21:04:38 +03:00
{
2006-01-18 02:38:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
while ( head ) {
2008-09-03 10:38:32 +04:00
struct tipc_node * n_ptr ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2006-01-02 21:04:38 +03:00
struct sk_buff * crs ;
struct sk_buff * buf = head ;
2008-04-16 06:04:54 +04:00
struct tipc_msg * msg ;
u32 seq_no ;
u32 ackd ;
2006-01-02 21:04:38 +03:00
u32 released = 0 ;
int type ;
head = head - > next ;
tipc: correctly unlink packets from deferred packet queue
When we pull a received packet from a link's 'deferred packets' queue
for processing, its 'next' pointer is not cleared, and still refers to
the next packet in that queue, if any. This is incorrect, but caused
no harm before commit 40ba3cdf542a469aaa9083fa041656e59b109b90 ("tipc:
message reassembly using fragment chain") was introduced. After that
commit, it may sometimes lead to the following oops:
general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC
Modules linked in: tipc
CPU: 4 PID: 0 Comm: swapper/4 Tainted: G W 3.13.0-rc2+ #6
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
task: ffff880017af4880 ti: ffff880017aee000 task.ti: ffff880017aee000
RIP: 0010:[<ffffffff81710694>] [<ffffffff81710694>] skb_try_coalesce+0x44/0x3d0
RSP: 0018:ffff880016603a78 EFLAGS: 00010212
RAX: 6b6b6b6bd6d6d6d6 RBX: ffff880013106ac0 RCX: ffff880016603ad0
RDX: ffff880016603ad7 RSI: ffff88001223ed00 RDI: ffff880013106ac0
RBP: ffff880016603ab8 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff88001223ed00
R13: ffff880016603ad0 R14: 000000000000058c R15: ffff880012297650
FS: 0000000000000000(0000) GS:ffff880016600000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 000000000805b000 CR3: 0000000011f5d000 CR4: 00000000000006e0
Stack:
ffff880016603a88 ffffffff810a38ed ffff880016603aa8 ffff88001223ed00
0000000000000001 ffff880012297648 ffff880016603b68 ffff880012297650
ffff880016603b08 ffffffffa0006c51 ffff880016603b08 00ffffffa00005fc
Call Trace:
<IRQ>
[<ffffffff810a38ed>] ? trace_hardirqs_on+0xd/0x10
[<ffffffffa0006c51>] tipc_link_recv_fragment+0xd1/0x1b0 [tipc]
[<ffffffffa0007214>] tipc_recv_msg+0x4e4/0x920 [tipc]
[<ffffffffa00016f0>] ? tipc_l2_rcv_msg+0x40/0x250 [tipc]
[<ffffffffa000177c>] tipc_l2_rcv_msg+0xcc/0x250 [tipc]
[<ffffffffa00016f0>] ? tipc_l2_rcv_msg+0x40/0x250 [tipc]
[<ffffffff8171e65b>] __netif_receive_skb_core+0x80b/0xd00
[<ffffffff8171df94>] ? __netif_receive_skb_core+0x144/0xd00
[<ffffffff8171eb76>] __netif_receive_skb+0x26/0x70
[<ffffffff8171ed6d>] netif_receive_skb+0x2d/0x200
[<ffffffff8171fe70>] napi_gro_receive+0xb0/0x130
[<ffffffff815647c2>] e1000_clean_rx_irq+0x2c2/0x530
[<ffffffff81565986>] e1000_clean+0x266/0x9c0
[<ffffffff81985f7b>] ? notifier_call_chain+0x2b/0x160
[<ffffffff8171f971>] net_rx_action+0x141/0x310
[<ffffffff81051c1b>] __do_softirq+0xeb/0x480
[<ffffffff819817bb>] ? _raw_spin_unlock+0x2b/0x40
[<ffffffff810b8c42>] ? handle_fasteoi_irq+0x72/0x100
[<ffffffff81052346>] irq_exit+0x96/0xc0
[<ffffffff8198cbc3>] do_IRQ+0x63/0xe0
[<ffffffff81981def>] common_interrupt+0x6f/0x6f
<EOI>
This happens when the last fragment of a message has passed through the
the receiving link's 'deferred packets' queue, and at least one other
packet was added to that queue while it was there. After the fragment
chain with the complete message has been successfully delivered to the
receiving socket, it is released. Since 'next' pointer of the last
fragment in the released chain now is non-NULL, we get the crash shown
above.
We fix this by clearing the 'next' pointer of all received packets,
including those being pulled from the 'deferred' queue, before they
undergo any further processing.
Fixes: 40ba3cdf542a4 ("tipc: message reassembly using fragment chain")
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reported-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-08 00:51:36 +04:00
buf - > next = NULL ;
2008-04-16 06:04:54 +04:00
2010-08-17 15:00:07 +04:00
/* Ensure bearer is still enabled */
if ( unlikely ( ! b_ptr - > active ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2010-08-17 15:00:07 +04:00
2008-04-16 06:04:54 +04:00
/* Ensure message is well-formed */
if ( unlikely ( ! link_recv_buf_validate ( buf ) ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2006-01-02 21:04:38 +03:00
2008-04-16 06:03:23 +04:00
/* Ensure message data is a single contiguous unit */
2011-11-04 21:24:29 +04:00
if ( unlikely ( skb_linearize ( buf ) ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2008-04-16 06:03:23 +04:00
2008-04-16 06:04:54 +04:00
/* Handle arrival of a non-unicast link message */
msg = buf_msg ( buf ) ;
2006-01-02 21:04:38 +03:00
if ( unlikely ( msg_non_seq ( msg ) ) ) {
2008-06-05 04:32:35 +04:00
if ( msg_user ( msg ) = = LINK_CONFIG )
tipc_disc_recv_msg ( buf , b_ptr ) ;
else
tipc_bclink_recv_pkt ( buf ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2007-02-09 17:25:21 +03:00
2011-04-05 23:15:04 +04:00
/* Discard unicast link messages destined for another node */
2006-06-26 10:39:31 +04:00
if ( unlikely ( ! msg_short ( msg ) & &
( msg_destnode ( msg ) ! = tipc_own_addr ) ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2007-02-09 17:25:21 +03:00
2010-08-17 15:00:16 +04:00
/* Locate neighboring node that sent message */
2006-01-18 02:38:21 +03:00
n_ptr = tipc_node_find ( msg_prevnode ( msg ) ) ;
2006-01-02 21:04:38 +03:00
if ( unlikely ( ! n_ptr ) )
2013-10-30 07:26:57 +04:00
goto discard ;
2006-01-18 02:38:21 +03:00
tipc_node_lock ( n_ptr ) ;
2008-04-16 06:04:54 +04:00
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
/* Locate unicast link endpoint that should handle message */
l_ptr = n_ptr - > links [ b_ptr - > identity ] ;
2013-10-30 07:26:57 +04:00
if ( unlikely ( ! l_ptr ) )
goto unlock_discard ;
2010-08-17 15:00:16 +04:00
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
/* Verify that communication with node is currently allowed */
if ( ( n_ptr - > block_setup & WAIT_PEER_DOWN ) & &
msg_user ( msg ) = = LINK_PROTOCOL & &
( msg_type ( msg ) = = RESET_MSG | |
msg_type ( msg ) = = ACTIVATE_MSG ) & &
! msg_redundant_link ( msg ) )
n_ptr - > block_setup & = ~ WAIT_PEER_DOWN ;
2013-10-30 07:26:57 +04:00
if ( n_ptr - > block_setup )
goto unlock_discard ;
2008-04-16 06:04:54 +04:00
/* Validate message sequence number info */
seq_no = msg_seqno ( msg ) ;
ackd = msg_ack ( msg ) ;
/* Release acked messages */
2012-11-16 09:51:30 +04:00
if ( n_ptr - > bclink . recv_permitted )
2011-10-24 23:26:24 +04:00
tipc_bclink_acknowledge ( n_ptr , msg_bcast_ack ( msg ) ) ;
2006-01-02 21:04:38 +03:00
crs = l_ptr - > first_out ;
2007-02-09 17:25:21 +03:00
while ( ( crs ! = l_ptr - > next_out ) & &
2011-10-25 00:03:12 +04:00
less_eq ( buf_seqno ( crs ) , ackd ) ) {
2006-01-02 21:04:38 +03:00
struct sk_buff * next = crs - > next ;
2011-11-04 21:24:29 +04:00
kfree_skb ( crs ) ;
2006-01-02 21:04:38 +03:00
crs = next ;
released + + ;
}
if ( released ) {
l_ptr - > first_out = crs ;
l_ptr - > out_queue_size - = released ;
}
2008-04-16 06:04:54 +04:00
/* Try sending any messages link endpoint has pending */
2006-01-02 21:04:38 +03:00
if ( unlikely ( l_ptr - > next_out ) )
2006-01-18 02:38:21 +03:00
tipc_link_push_queue ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
if ( unlikely ( ! list_empty ( & l_ptr - > waiting_ports ) ) )
2006-01-18 02:38:21 +03:00
tipc_link_wakeup_ports ( l_ptr , 0 ) ;
2006-01-02 21:04:38 +03:00
if ( unlikely ( + + l_ptr - > unacked_window > = TIPC_MIN_LINK_WIN ) ) {
l_ptr - > stats . sent_acks + + ;
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
}
2008-04-16 06:04:54 +04:00
/* Now (finally!) process the incoming message */
2006-01-02 21:04:38 +03:00
protocol_check :
2013-10-30 07:26:57 +04:00
if ( unlikely ( ! link_working_working ( l_ptr ) ) ) {
if ( msg_user ( msg ) = = LINK_PROTOCOL ) {
link_recv_proto_msg ( l_ptr , buf ) ;
head = link_insert_deferred_queue ( l_ptr , head ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2013-10-30 07:26:57 +04:00
/* Traffic message. Conditionally activate link */
link_state_event ( l_ptr , TRAFFIC_MSG_EVT ) ;
if ( link_working_working ( l_ptr ) ) {
/* Re-insert buffer in front of queue */
buf - > next = head ;
head = buf ;
tipc_node_unlock ( n_ptr ) ;
continue ;
}
goto unlock_discard ;
}
/* Link is now in state WORKING_WORKING */
if ( unlikely ( seq_no ! = mod ( l_ptr - > next_in_no ) ) ) {
2006-01-02 21:04:38 +03:00
link_handle_out_of_seq_msg ( l_ptr , buf ) ;
head = link_insert_deferred_queue ( l_ptr , head ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2013-10-30 07:26:57 +04:00
l_ptr - > next_in_no + + ;
if ( unlikely ( l_ptr - > oldest_deferred_in ) )
2006-01-02 21:04:38 +03:00
head = link_insert_deferred_queue ( l_ptr , head ) ;
2013-10-30 07:26:57 +04:00
deliver :
if ( likely ( msg_isdata ( msg ) ) ) {
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2013-10-30 07:26:57 +04:00
tipc_port_recv_msg ( buf ) ;
2006-01-02 21:04:38 +03:00
continue ;
}
2013-10-30 07:26:57 +04:00
switch ( msg_user ( msg ) ) {
int ret ;
case MSG_BUNDLER :
l_ptr - > stats . recv_bundles + + ;
l_ptr - > stats . recv_bundled + = msg_msgcnt ( msg ) ;
tipc_node_unlock ( n_ptr ) ;
tipc_link_recv_bundle ( buf ) ;
continue ;
case NAME_DISTRIBUTOR :
n_ptr - > bclink . recv_permitted = true ;
tipc_node_unlock ( n_ptr ) ;
tipc_named_recv ( buf ) ;
continue ;
case BCAST_PROTOCOL :
tipc_link_recv_sync ( n_ptr , buf ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
continue ;
2013-10-30 07:26:57 +04:00
case CONN_MANAGER :
tipc_node_unlock ( n_ptr ) ;
tipc_port_recv_proto_msg ( buf ) ;
continue ;
case MSG_FRAGMENTER :
l_ptr - > stats . recv_fragments + + ;
2014-02-14 02:29:05 +04:00
ret = tipc_link_frag_rcv ( & l_ptr - > reasm_head ,
& l_ptr - > reasm_tail ,
& buf ) ;
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
if ( ret = = LINK_REASM_COMPLETE ) {
2013-10-30 07:26:57 +04:00
l_ptr - > stats . recv_fragmented + + ;
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
msg = buf_msg ( buf ) ;
2013-10-30 07:26:57 +04:00
goto deliver ;
}
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
if ( ret = = LINK_REASM_ERROR )
2013-11-06 12:28:07 +04:00
tipc_link_reset ( l_ptr ) ;
2013-11-06 12:28:05 +04:00
tipc_node_unlock ( n_ptr ) ;
continue ;
2013-10-30 07:26:57 +04:00
case CHANGEOVER_PROTOCOL :
type = msg_type ( msg ) ;
2014-01-08 02:02:41 +04:00
if ( tipc_link_tunnel_rcv ( & l_ptr , & buf ) ) {
2013-10-30 07:26:57 +04:00
msg = buf_msg ( buf ) ;
seq_no = msg_seqno ( msg ) ;
if ( type = = ORIGINAL_MSG )
goto deliver ;
goto protocol_check ;
}
break ;
default :
kfree_skb ( buf ) ;
buf = NULL ;
break ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2013-10-30 07:26:57 +04:00
tipc_net_route_msg ( buf ) ;
continue ;
unlock_discard :
tipc_node_unlock ( n_ptr ) ;
discard :
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
}
2012-07-10 14:55:09 +04:00
/**
2011-10-25 18:44:35 +04:00
* tipc_link_defer_pkt - Add out - of - sequence message to deferred reception queue
*
* Returns increase in queue length ( i . e . 0 or 1 )
2006-01-02 21:04:38 +03:00
*/
2011-10-25 18:44:35 +04:00
u32 tipc_link_defer_pkt ( struct sk_buff * * head , struct sk_buff * * tail ,
2006-01-18 02:38:21 +03:00
struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
2011-10-25 18:44:35 +04:00
struct sk_buff * queue_buf ;
struct sk_buff * * prev ;
2011-10-25 00:03:12 +04:00
u32 seq_no = buf_seqno ( buf ) ;
2006-01-02 21:04:38 +03:00
buf - > next = NULL ;
/* Empty queue ? */
if ( * head = = NULL ) {
* head = * tail = buf ;
return 1 ;
}
/* Last ? */
2011-10-25 00:03:12 +04:00
if ( less ( buf_seqno ( * tail ) , seq_no ) ) {
2006-01-02 21:04:38 +03:00
( * tail ) - > next = buf ;
* tail = buf ;
return 1 ;
}
2011-10-25 18:44:35 +04:00
/* Locate insertion point in queue, then insert; discard if duplicate */
prev = head ;
queue_buf = * head ;
for ( ; ; ) {
u32 curr_seqno = buf_seqno ( queue_buf ) ;
2006-01-02 21:04:38 +03:00
2011-10-25 18:44:35 +04:00
if ( seq_no = = curr_seqno ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2011-10-25 18:44:35 +04:00
return 0 ;
2006-01-02 21:04:38 +03:00
}
2011-10-25 18:44:35 +04:00
if ( less ( seq_no , curr_seqno ) )
2006-01-02 21:04:38 +03:00
break ;
2011-10-25 18:44:35 +04:00
prev = & queue_buf - > next ;
queue_buf = queue_buf - > next ;
}
2006-01-02 21:04:38 +03:00
2011-10-25 18:44:35 +04:00
buf - > next = queue_buf ;
* prev = buf ;
return 1 ;
2006-01-02 21:04:38 +03:00
}
2011-10-25 18:44:35 +04:00
/*
2006-01-02 21:04:38 +03:00
* link_handle_out_of_seq_msg - handle arrival of out - of - sequence packet
*/
2011-12-30 05:58:42 +04:00
static void link_handle_out_of_seq_msg ( struct tipc_link * l_ptr ,
2006-01-02 21:04:38 +03:00
struct sk_buff * buf )
{
2011-10-25 00:03:12 +04:00
u32 seq_no = buf_seqno ( buf ) ;
2006-01-02 21:04:38 +03:00
if ( likely ( msg_user ( buf_msg ( buf ) ) = = LINK_PROTOCOL ) ) {
link_recv_proto_msg ( l_ptr , buf ) ;
return ;
}
/* Record OOS packet arrival (force mismatch on next timeout) */
l_ptr - > checkpoint - - ;
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* Discard packet if a duplicate ; otherwise add it to deferred queue
* and notify peer of gap as per protocol specification
*/
if ( less ( seq_no , mod ( l_ptr - > next_in_no ) ) ) {
l_ptr - > stats . duplicates + + ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
return ;
}
2006-01-18 02:38:21 +03:00
if ( tipc_link_defer_pkt ( & l_ptr - > oldest_deferred_in ,
& l_ptr - > newest_deferred_in , buf ) ) {
2006-01-02 21:04:38 +03:00
l_ptr - > deferred_inqueue_sz + + ;
l_ptr - > stats . deferred_recv + + ;
if ( ( l_ptr - > deferred_inqueue_sz % 16 ) = = 1 )
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG , 0 , 0 , 0 , 0 , 0 ) ;
2006-01-02 21:04:38 +03:00
} else
l_ptr - > stats . duplicates + + ;
}
/*
* Send protocol message to the other endpoint .
*/
2011-12-30 05:58:42 +04:00
void tipc_link_send_proto_msg ( struct tipc_link * l_ptr , u32 msg_typ ,
2013-06-17 18:54:47 +04:00
int probe_msg , u32 gap , u32 tolerance ,
u32 priority , u32 ack_mtu )
2006-01-02 21:04:38 +03:00
{
2006-03-21 09:36:47 +03:00
struct sk_buff * buf = NULL ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * msg = l_ptr - > pmsg ;
2007-02-09 17:25:21 +03:00
u32 msg_size = sizeof ( l_ptr - > proto_msg ) ;
2011-02-28 23:30:20 +03:00
int r_flag ;
2006-01-02 21:04:38 +03:00
2011-10-25 19:20:26 +04:00
/* Discard any previous message that was deferred due to congestion */
if ( l_ptr - > proto_msg_queue ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( l_ptr - > proto_msg_queue ) ;
2011-10-25 19:20:26 +04:00
l_ptr - > proto_msg_queue = NULL ;
}
2013-12-11 08:45:44 +04:00
/* Don't send protocol message during link changeover */
if ( l_ptr - > exp_msg_count )
2006-01-02 21:04:38 +03:00
return ;
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
/* Abort non-RESET send if communication with node is prohibited */
if ( ( l_ptr - > owner - > block_setup ) & & ( msg_typ ! = RESET_MSG ) )
return ;
2011-10-25 19:20:26 +04:00
/* Create protocol message with "out-of-sequence" sequence number */
2006-01-02 21:04:38 +03:00
msg_set_type ( msg , msg_typ ) ;
msg_set_net_plane ( msg , l_ptr - > b_ptr - > net_plane ) ;
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-27 22:17:53 +04:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2006-01-18 02:38:21 +03:00
msg_set_last_bcast ( msg , tipc_bclink_get_last_sent ( ) ) ;
2006-01-02 21:04:38 +03:00
if ( msg_typ = = STATE_MSG ) {
u32 next_sent = mod ( l_ptr - > next_out_no ) ;
2006-01-18 02:38:21 +03:00
if ( ! tipc_link_is_up ( l_ptr ) )
2006-01-02 21:04:38 +03:00
return ;
if ( l_ptr - > next_out )
2011-10-25 00:03:12 +04:00
next_sent = buf_seqno ( l_ptr - > next_out ) ;
2006-01-02 21:04:38 +03:00
msg_set_next_sent ( msg , next_sent ) ;
if ( l_ptr - > oldest_deferred_in ) {
2011-10-25 00:03:12 +04:00
u32 rec = buf_seqno ( l_ptr - > oldest_deferred_in ) ;
2006-01-02 21:04:38 +03:00
gap = mod ( rec - mod ( l_ptr - > next_in_no ) ) ;
}
msg_set_seq_gap ( msg , gap ) ;
if ( gap )
l_ptr - > stats . sent_nacks + + ;
msg_set_link_tolerance ( msg , tolerance ) ;
msg_set_linkprio ( msg , priority ) ;
msg_set_max_pkt ( msg , ack_mtu ) ;
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ;
msg_set_probe ( msg , probe_msg ! = 0 ) ;
2007-02-09 17:25:21 +03:00
if ( probe_msg ) {
2006-01-02 21:04:38 +03:00
u32 mtu = l_ptr - > max_pkt ;
2007-02-09 17:25:21 +03:00
if ( ( mtu < l_ptr - > max_pkt_target ) & &
2006-01-02 21:04:38 +03:00
link_working_working ( l_ptr ) & &
l_ptr - > fsm_msg_cnt ) {
msg_size = ( mtu + ( l_ptr - > max_pkt_target - mtu ) / 2 + 2 ) & ~ 3 ;
2007-02-09 17:25:21 +03:00
if ( l_ptr - > max_pkt_probes = = 10 ) {
l_ptr - > max_pkt_target = ( msg_size - 4 ) ;
l_ptr - > max_pkt_probes = 0 ;
2006-01-02 21:04:38 +03:00
msg_size = ( mtu + ( l_ptr - > max_pkt_target - mtu ) / 2 + 2 ) & ~ 3 ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
l_ptr - > max_pkt_probes + + ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
l_ptr - > stats . sent_probes + + ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
l_ptr - > stats . sent_states + + ;
} else { /* RESET_MSG or ACTIVATE_MSG */
msg_set_ack ( msg , mod ( l_ptr - > reset_checkpoint - 1 ) ) ;
msg_set_seq_gap ( msg , 0 ) ;
msg_set_next_sent ( msg , 1 ) ;
2011-01-18 23:15:34 +03:00
msg_set_probe ( msg , 0 ) ;
2006-01-02 21:04:38 +03:00
msg_set_link_tolerance ( msg , l_ptr - > tolerance ) ;
msg_set_linkprio ( msg , l_ptr - > priority ) ;
msg_set_max_pkt ( msg , l_ptr - > max_pkt_target ) ;
}
2011-02-28 23:30:20 +03:00
r_flag = ( l_ptr - > owner - > working_links > tipc_link_is_up ( l_ptr ) ) ;
msg_set_redundant_link ( msg , r_flag ) ;
2006-01-02 21:04:38 +03:00
msg_set_linkprio ( msg , l_ptr - > priority ) ;
2011-10-25 19:20:26 +04:00
msg_set_size ( msg , msg_size ) ;
2006-01-02 21:04:38 +03:00
msg_set_seqno ( msg , mod ( l_ptr - > next_out_no + ( 0xffff / 2 ) ) ) ;
2010-10-13 17:20:35 +04:00
buf = tipc_buf_acquire ( msg_size ) ;
2006-01-02 21:04:38 +03:00
if ( ! buf )
return ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( buf , msg , sizeof ( l_ptr - > proto_msg ) ) ;
2013-06-17 18:54:48 +04:00
buf - > priority = TC_PRIO_CONTROL ;
2006-01-02 21:04:38 +03:00
2012-11-15 07:34:45 +04:00
tipc_bearer_send ( l_ptr - > b_ptr , buf , & l_ptr - > media_addr ) ;
2011-10-25 19:20:26 +04:00
l_ptr - > unacked_window = 0 ;
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
/*
* Receive protocol message :
2007-02-09 17:25:21 +03:00
* Note that network plane id propagates through the network , and may
* change at any time . The node with lowest address rules
2006-01-02 21:04:38 +03:00
*/
2011-12-30 05:58:42 +04:00
static void link_recv_proto_msg ( struct tipc_link * l_ptr , struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
u32 rec_gap = 0 ;
u32 max_pkt_info ;
2007-02-09 17:25:21 +03:00
u32 max_pkt_ack ;
2006-01-02 21:04:38 +03:00
u32 msg_tol ;
struct tipc_msg * msg = buf_msg ( buf ) ;
2013-12-11 08:45:44 +04:00
/* Discard protocol message during link changeover */
if ( l_ptr - > exp_msg_count )
2006-01-02 21:04:38 +03:00
goto exit ;
/* record unnumbered packet arrival (force mismatch on next timeout) */
l_ptr - > checkpoint - - ;
if ( l_ptr - > b_ptr - > net_plane ! = msg_net_plane ( msg ) )
if ( tipc_own_addr > msg_prevnode ( msg ) )
l_ptr - > b_ptr - > net_plane = msg_net_plane ( msg ) ;
switch ( msg_type ( msg ) ) {
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
case RESET_MSG :
2008-06-05 04:29:39 +04:00
if ( ! link_working_unknown ( l_ptr ) & &
( l_ptr - > peer_session ! = INVALID_SESSION ) ) {
2011-04-07 17:54:43 +04:00
if ( less_eq ( msg_session ( msg ) , l_ptr - > peer_session ) )
break ; /* duplicate or old reset: ignore */
2006-01-02 21:04:38 +03:00
}
tipc: Ensure both nodes recognize loss of contact between them
Enhances TIPC to ensure that a node that loses contact with a
neighboring node does not allow contact to be re-established until
it sees that its peer has also recognized the loss of contact.
Previously, nodes that were connected by two or more links could
encounter a situation in which node A would lose contact with node B
on all of its links, purge its name table of names published by B,
and then fail to repopulate those names once contact with B was restored.
This would happen because B was able to re-establish one or more links
so quickly that it never reached a point where it had no links to A --
meaning that B never saw a loss of contact with A, and consequently
didn't re-publish its names to A.
This problem is now prevented by enhancing the cleanup done by TIPC
following a loss of contact with a neighboring node to ensure that
node A ignores all messages sent by B until it receives a LINK_PROTOCOL
message that indicates B has lost contact with A, thereby preventing
the (re)establishment of links between the nodes. The loss of contact
is recognized when a RESET or ACTIVATE message is received that has
a "redundant link exists" field of 0, indicating that B's sending link
endpoint is in a reset state and that B has no other working links.
Additionally, TIPC now suppresses the sending of (most) link protocol
messages to a neighboring node while it is cleaning up after an earlier
loss of contact with that node. This stops the peer node from prematurely
activating its link endpoint, which would prevent TIPC from later
activating its own end. TIPC still allows outgoing RESET messages to
occur during cleanup, to avoid problems if its own node recognizes
the loss of contact first and tries to notify the peer of the situation.
Finally, TIPC now recognizes an impending loss of contact with a peer node
as soon as it receives a RESET message on a working link that is the
peer's only link to the node, and ensures that the link protocol
suppression mentioned above goes into effect right away -- that is,
even before its own link endpoints have failed. This is necessary to
ensure correct operation when there are redundant links between the nodes,
since otherwise TIPC would send an ACTIVATE message upon receiving a RESET
on its first link and only begin suppressing when a RESET on its second
link was received, instead of initiating suppression with the first RESET
message as it needs to.
Note: The reworked cleanup code also eliminates a check that prevented
a link endpoint's discovery object from responding to incoming messages
while stale name table entries are being purged. This check is now
unnecessary and would have slowed down re-establishment of communication
between the nodes in some situations.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-05-27 19:00:51 +04:00
if ( ! msg_redundant_link ( msg ) & & ( link_working_working ( l_ptr ) | |
link_working_unknown ( l_ptr ) ) ) {
/*
* peer has lost contact - - don ' t allow peer ' s links
* to reactivate before we recognize loss & clean up
*/
l_ptr - > owner - > block_setup = WAIT_NODE_DOWN ;
}
2011-10-26 18:55:16 +04:00
link_state_event ( l_ptr , RESET_MSG ) ;
2006-01-02 21:04:38 +03:00
/* fall thru' */
case ACTIVATE_MSG :
/* Update link settings according other endpoint's values */
strcpy ( ( strrchr ( l_ptr - > name , ' : ' ) + 1 ) , ( char * ) msg_data ( msg ) ) ;
2010-12-31 21:59:33 +03:00
msg_tol = msg_link_tolerance ( msg ) ;
if ( msg_tol > l_ptr - > tolerance )
2006-01-02 21:04:38 +03:00
link_set_supervision_props ( l_ptr , msg_tol ) ;
if ( msg_linkprio ( msg ) > l_ptr - > priority )
l_ptr - > priority = msg_linkprio ( msg ) ;
max_pkt_info = msg_max_pkt ( msg ) ;
2007-02-09 17:25:21 +03:00
if ( max_pkt_info ) {
2006-01-02 21:04:38 +03:00
if ( max_pkt_info < l_ptr - > max_pkt_target )
l_ptr - > max_pkt_target = max_pkt_info ;
if ( l_ptr - > max_pkt > l_ptr - > max_pkt_target )
l_ptr - > max_pkt = l_ptr - > max_pkt_target ;
} else {
2007-02-09 17:25:21 +03:00
l_ptr - > max_pkt = l_ptr - > max_pkt_target ;
2006-01-02 21:04:38 +03:00
}
2011-10-25 20:19:05 +04:00
/* Synchronize broadcast link info, if not done previously */
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-27 22:17:53 +04:00
if ( ! tipc_node_is_up ( l_ptr - > owner ) ) {
l_ptr - > owner - > bclink . last_sent =
l_ptr - > owner - > bclink . last_in =
msg_last_bcast ( msg ) ;
l_ptr - > owner - > bclink . oos_state = 0 ;
}
2011-10-25 20:19:05 +04:00
2006-01-02 21:04:38 +03:00
l_ptr - > peer_session = msg_session ( msg ) ;
l_ptr - > peer_bearer_id = msg_bearer_id ( msg ) ;
2011-10-26 18:55:16 +04:00
if ( msg_type ( msg ) = = ACTIVATE_MSG )
link_state_event ( l_ptr , ACTIVATE_MSG ) ;
2006-01-02 21:04:38 +03:00
break ;
case STATE_MSG :
2010-12-31 21:59:33 +03:00
msg_tol = msg_link_tolerance ( msg ) ;
if ( msg_tol )
2006-01-02 21:04:38 +03:00
link_set_supervision_props ( l_ptr , msg_tol ) ;
2007-02-09 17:25:21 +03:00
if ( msg_linkprio ( msg ) & &
2006-01-02 21:04:38 +03:00
( msg_linkprio ( msg ) ! = l_ptr - > priority ) ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %s<%s>, priority change %u->%u \n " ,
link_rst_msg , l_ptr - > name , l_ptr - > priority ,
msg_linkprio ( msg ) ) ;
2006-01-02 21:04:38 +03:00
l_ptr - > priority = msg_linkprio ( msg ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( l_ptr ) ; /* Enforce change to take effect */
2006-01-02 21:04:38 +03:00
break ;
}
link_state_event ( l_ptr , TRAFFIC_MSG_EVT ) ;
l_ptr - > stats . recv_states + + ;
if ( link_reset_unknown ( l_ptr ) )
break ;
if ( less_eq ( mod ( l_ptr - > next_in_no ) , msg_next_sent ( msg ) ) ) {
2007-02-09 17:25:21 +03:00
rec_gap = mod ( msg_next_sent ( msg ) -
2006-01-02 21:04:38 +03:00
mod ( l_ptr - > next_in_no ) ) ;
}
max_pkt_ack = msg_max_pkt ( msg ) ;
2007-02-09 17:25:21 +03:00
if ( max_pkt_ack > l_ptr - > max_pkt ) {
l_ptr - > max_pkt = max_pkt_ack ;
l_ptr - > max_pkt_probes = 0 ;
}
2006-01-02 21:04:38 +03:00
max_pkt_ack = 0 ;
2007-02-09 17:25:21 +03:00
if ( msg_probe ( msg ) ) {
2006-01-02 21:04:38 +03:00
l_ptr - > stats . recv_probes + + ;
2010-12-31 21:59:35 +03:00
if ( msg_size ( msg ) > sizeof ( l_ptr - > proto_msg ) )
2007-02-09 17:25:21 +03:00
max_pkt_ack = msg_size ( msg ) ;
}
2006-01-02 21:04:38 +03:00
/* Protocol message before retransmits, reduce loss risk */
2012-11-16 09:51:30 +04:00
if ( l_ptr - > owner - > bclink . recv_permitted )
tipc: Major redesign of broadcast link ACK/NACK algorithms
Completely redesigns broadcast link ACK and NACK mechanisms to prevent
spurious retransmit requests in dual LAN networks, and to prevent the
broadcast link from stalling due to the failure of a receiving node to
acknowledge receiving a broadcast message or request its retransmission.
Note: These changes only impact the timing of when ACK and NACK messages
are sent, and not the basic broadcast link protocol itself, so inter-
operability with nodes using the "classic" algorithms is maintained.
The revised algorithms are as follows:
1) An explicit ACK message is still sent after receiving 16 in-sequence
messages, and implicit ACK information continues to be carried in other
unicast link message headers (including link state messages). However,
the timing of explicit ACKs is now based on the receiving node's absolute
network address rather than its relative network address to ensure that
the failure of another node does not delay the ACK beyond its 16 message
target.
2) A NACK message is now typically sent only when a message gap persists
for two consecutive incoming link state messages; this ensures that a
suspected gap is not confirmed until both LANs in a dual LAN network have
had an opportunity to deliver the message, thereby preventing spurious NACKs.
A NACK message can also be generated by the arrival of a single link state
message, if the deferred queue is so big that the current message gap
cannot be the result of "normal" mis-ordering due to the use of dual LANs
(or one LAN using a bonded interface). Since link state messages typically
arrive at different nodes at different times the problem of multiple nodes
issuing identical NACKs simultaneously is inherently avoided.
3) Nodes continue to "peek" at NACK messages sent by other nodes. If
another node requests retransmission of a message gap suspected (but not
yet confirmed) by the peeking node, the peeking node forgets about the
gap and does not generate a duplicate retransmit request. (If the peeking
node subsequently fails to receive the lost message, later link state
messages will cause it to rediscover and confirm the gap and send another
NACK.)
4) Message gap "equality" is now determined by the start of the gap only.
This is sufficient to deal with the most common cases of message loss,
and eliminates the need for complex end of gap computations.
5) A peeking node no longer tries to determine whether it should send a
complementary NACK, since the most common cases of message loss don't
require it to be sent. Consequently, the node no longer examines the
"broadcast tag" field of a NACK message when peeking.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-27 22:17:53 +04:00
tipc_bclink_update_link_state ( l_ptr - > owner ,
msg_last_bcast ( msg ) ) ;
2006-01-02 21:04:38 +03:00
if ( rec_gap | | ( msg_probe ( msg ) ) ) {
2006-01-18 02:38:21 +03:00
tipc_link_send_proto_msg ( l_ptr , STATE_MSG ,
0 , rec_gap , 0 , 0 , max_pkt_ack ) ;
2006-01-02 21:04:38 +03:00
}
if ( msg_seq_gap ( msg ) ) {
l_ptr - > stats . recv_nacks + + ;
2006-01-18 02:38:21 +03:00
tipc_link_retransmit ( l_ptr , l_ptr - > first_out ,
msg_seq_gap ( msg ) ) ;
2006-01-02 21:04:38 +03:00
}
break ;
}
exit :
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
2014-01-08 02:02:41 +04:00
/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to
* a different bearer . Owner node is locked .
2006-01-02 21:04:38 +03:00
*/
2014-01-08 02:02:41 +04:00
static void tipc_link_tunnel_xmit ( struct tipc_link * l_ptr ,
struct tipc_msg * tunnel_hdr ,
struct tipc_msg * msg ,
u32 selector )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * tunnel ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
u32 length = msg_size ( msg ) ;
tunnel = l_ptr - > owner - > active_links [ selector & 1 ] ;
2006-06-26 10:52:50 +04:00
if ( ! tipc_link_is_up ( tunnel ) ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %stunnel link no longer available \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
return ;
2006-06-26 10:52:50 +04:00
}
2006-01-02 21:04:38 +03:00
msg_set_size ( tunnel_hdr , length + INT_H_SIZE ) ;
2010-10-13 17:20:35 +04:00
buf = tipc_buf_acquire ( length + INT_H_SIZE ) ;
2006-06-26 10:52:50 +04:00
if ( ! buf ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sunable to send tunnel msg \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
return ;
2006-06-26 10:52:50 +04:00
}
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( buf , tunnel_hdr , INT_H_SIZE ) ;
skb_copy_to_linear_data_offset ( buf , INT_H_SIZE , msg , length ) ;
2006-01-18 02:38:21 +03:00
tipc_link_send_buf ( tunnel , buf ) ;
2006-01-02 21:04:38 +03:00
}
2014-01-08 02:02:41 +04:00
/* tipc_link_failover_send_queue(): A link has gone down, but a second
* link is still active . We can do failover . Tunnel the failing link ' s
* whole send queue via the remaining link . This way , we don ' t lose
* any packets , and sequence order is preserved for subsequent traffic
* sent over the remaining link . Owner node is locked .
2006-01-02 21:04:38 +03:00
*/
2014-01-08 02:02:41 +04:00
void tipc_link_failover_send_queue ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
u32 msgcount = l_ptr - > out_queue_size ;
struct sk_buff * crs = l_ptr - > first_out ;
2011-12-30 05:58:42 +04:00
struct tipc_link * tunnel = l_ptr - > owner - > active_links [ 0 ] ;
2006-01-02 21:04:38 +03:00
struct tipc_msg tunnel_hdr ;
2006-06-26 10:52:50 +04:00
int split_bundles ;
2006-01-02 21:04:38 +03:00
if ( ! tunnel )
return ;
2010-05-11 18:30:12 +04:00
tipc_msg_init ( & tunnel_hdr , CHANGEOVER_PROTOCOL ,
2008-06-05 04:37:34 +04:00
ORIGINAL_MSG , INT_H_SIZE , l_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
msg_set_bearer_id ( & tunnel_hdr , l_ptr - > peer_bearer_id ) ;
msg_set_msgcnt ( & tunnel_hdr , msgcount ) ;
2006-06-26 10:51:37 +04:00
2006-01-02 21:04:38 +03:00
if ( ! l_ptr - > first_out ) {
struct sk_buff * buf ;
2010-10-13 17:20:35 +04:00
buf = tipc_buf_acquire ( INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
if ( buf ) {
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( buf , & tunnel_hdr , INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( & tunnel_hdr , INT_H_SIZE ) ;
2006-01-18 02:38:21 +03:00
tipc_link_send_buf ( tunnel , buf ) ;
2006-01-02 21:04:38 +03:00
} else {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sunable to send changeover msg \n " ,
link_co_err ) ;
2006-01-02 21:04:38 +03:00
}
return ;
}
2006-06-26 10:51:37 +04:00
2007-02-09 17:25:21 +03:00
split_bundles = ( l_ptr - > owner - > active_links [ 0 ] ! =
2006-06-26 10:52:50 +04:00
l_ptr - > owner - > active_links [ 1 ] ) ;
2006-01-02 21:04:38 +03:00
while ( crs ) {
struct tipc_msg * msg = buf_msg ( crs ) ;
if ( ( msg_user ( msg ) = = MSG_BUNDLER ) & & split_bundles ) {
struct tipc_msg * m = msg_get_wrapped ( msg ) ;
2010-12-31 21:59:32 +03:00
unchar * pos = ( unchar * ) m ;
2006-01-02 21:04:38 +03:00
2007-08-03 06:28:06 +04:00
msgcount = msg_msgcnt ( msg ) ;
2006-01-02 21:04:38 +03:00
while ( msgcount - - ) {
2010-12-31 21:59:32 +03:00
msg_set_seqno ( m , msg_seqno ( msg ) ) ;
2014-01-08 02:02:41 +04:00
tipc_link_tunnel_xmit ( l_ptr , & tunnel_hdr , m ,
msg_link_selector ( m ) ) ;
2006-01-02 21:04:38 +03:00
pos + = align ( msg_size ( m ) ) ;
m = ( struct tipc_msg * ) pos ;
}
} else {
2014-01-08 02:02:41 +04:00
tipc_link_tunnel_xmit ( l_ptr , & tunnel_hdr , msg ,
msg_link_selector ( msg ) ) ;
2006-01-02 21:04:38 +03:00
}
crs = crs - > next ;
}
}
2014-01-08 02:02:41 +04:00
/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a
* duplicate of the first link ' s send queue via the new link . This way , we
* are guaranteed that currently queued packets from a socket are delivered
* before future traffic from the same socket , even if this is using the
* new link . The last arriving copy of each duplicate packet is dropped at
* the receiving end by the regular protocol check , so packet cardinality
* and sequence order is preserved per sender / receiver socket pair .
* Owner node is locked .
*/
void tipc_link_dup_send_queue ( struct tipc_link * l_ptr ,
struct tipc_link * tunnel )
2006-01-02 21:04:38 +03:00
{
struct sk_buff * iter ;
struct tipc_msg tunnel_hdr ;
2010-05-11 18:30:12 +04:00
tipc_msg_init ( & tunnel_hdr , CHANGEOVER_PROTOCOL ,
2008-06-05 04:37:34 +04:00
DUPLICATE_MSG , INT_H_SIZE , l_ptr - > addr ) ;
2006-01-02 21:04:38 +03:00
msg_set_msgcnt ( & tunnel_hdr , l_ptr - > out_queue_size ) ;
msg_set_bearer_id ( & tunnel_hdr , l_ptr - > peer_bearer_id ) ;
iter = l_ptr - > first_out ;
while ( iter ) {
struct sk_buff * outbuf ;
struct tipc_msg * msg = buf_msg ( iter ) ;
u32 length = msg_size ( msg ) ;
if ( msg_user ( msg ) = = MSG_BUNDLER )
msg_set_type ( msg , CLOSED_MSG ) ;
msg_set_ack ( msg , mod ( l_ptr - > next_in_no - 1 ) ) ; /* Update */
2007-02-09 17:25:21 +03:00
msg_set_bcast_ack ( msg , l_ptr - > owner - > bclink . last_in ) ;
2006-01-02 21:04:38 +03:00
msg_set_size ( & tunnel_hdr , length + INT_H_SIZE ) ;
2010-10-13 17:20:35 +04:00
outbuf = tipc_buf_acquire ( length + INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
if ( outbuf = = NULL ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sunable to send duplicate msg \n " ,
link_co_err ) ;
2006-01-02 21:04:38 +03:00
return ;
}
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( outbuf , & tunnel_hdr , INT_H_SIZE ) ;
skb_copy_to_linear_data_offset ( outbuf , INT_H_SIZE , iter - > data ,
length ) ;
2006-01-18 02:38:21 +03:00
tipc_link_send_buf ( tunnel , outbuf ) ;
if ( ! tipc_link_is_up ( l_ptr ) )
2006-01-02 21:04:38 +03:00
return ;
iter = iter - > next ;
}
}
/**
* buf_extract - extracts embedded TIPC message from another message
* @ skb : encapsulating message buffer
* @ from_pos : offset to extract from
*
2007-02-09 17:25:21 +03:00
* Returns a new message buffer containing an embedded message . The
2006-01-02 21:04:38 +03:00
* encapsulating message itself is left unchanged .
*/
static struct sk_buff * buf_extract ( struct sk_buff * skb , u32 from_pos )
{
struct tipc_msg * msg = ( struct tipc_msg * ) ( skb - > data + from_pos ) ;
u32 size = msg_size ( msg ) ;
struct sk_buff * eb ;
2010-10-13 17:20:35 +04:00
eb = tipc_buf_acquire ( size ) ;
2006-01-02 21:04:38 +03:00
if ( eb )
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( eb , msg , size ) ;
2006-01-02 21:04:38 +03:00
return eb ;
}
2014-01-08 02:02:41 +04:00
/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent
* via other link as result of a failover ( ORIGINAL_MSG ) or
* a new active link ( DUPLICATE_MSG ) . Failover packets are
* returned to the active link for delivery upwards .
* Owner node is locked .
2006-01-02 21:04:38 +03:00
*/
2014-01-08 02:02:41 +04:00
static int tipc_link_tunnel_rcv ( struct tipc_link * * l_ptr ,
struct sk_buff * * buf )
2006-01-02 21:04:38 +03:00
{
struct sk_buff * tunnel_buf = * buf ;
2011-12-30 05:58:42 +04:00
struct tipc_link * dest_link ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * msg ;
struct tipc_msg * tunnel_msg = buf_msg ( tunnel_buf ) ;
u32 msg_typ = msg_type ( tunnel_msg ) ;
u32 msg_count = msg_msgcnt ( tunnel_msg ) ;
2013-05-06 12:28:41 +04:00
u32 bearer_id = msg_bearer_id ( tunnel_msg ) ;
2006-01-02 21:04:38 +03:00
2013-05-06 12:28:41 +04:00
if ( bearer_id > = MAX_BEARERS )
goto exit ;
dest_link = ( * l_ptr ) - > owner - > links [ bearer_id ] ;
2010-12-31 21:59:25 +03:00
if ( ! dest_link )
2006-01-02 21:04:38 +03:00
goto exit ;
2006-06-26 10:51:37 +04:00
if ( dest_link = = * l_ptr ) {
2012-06-29 08:16:37 +04:00
pr_err ( " Unexpected changeover message on link <%s> \n " ,
( * l_ptr ) - > name ) ;
2006-06-26 10:51:37 +04:00
goto exit ;
}
2006-01-02 21:04:38 +03:00
* l_ptr = dest_link ;
msg = msg_get_wrapped ( tunnel_msg ) ;
if ( msg_typ = = DUPLICATE_MSG ) {
2010-12-31 21:59:25 +03:00
if ( less ( msg_seqno ( msg ) , mod ( dest_link - > next_in_no ) ) )
2006-01-02 21:04:38 +03:00
goto exit ;
2010-12-31 21:59:32 +03:00
* buf = buf_extract ( tunnel_buf , INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
if ( * buf = = NULL ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sduplicate msg dropped \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
2011-11-04 21:24:29 +04:00
kfree_skb ( tunnel_buf ) ;
2006-01-02 21:04:38 +03:00
return 1 ;
}
/* First original message ?: */
2006-01-18 02:38:21 +03:00
if ( tipc_link_is_up ( dest_link ) ) {
2012-06-29 08:16:37 +04:00
pr_info ( " %s<%s>, changeover initiated by peer \n " , link_rst_msg ,
dest_link - > name ) ;
2006-01-18 02:38:21 +03:00
tipc_link_reset ( dest_link ) ;
2006-01-02 21:04:38 +03:00
dest_link - > exp_msg_count = msg_count ;
if ( ! msg_count )
goto exit ;
} else if ( dest_link - > exp_msg_count = = START_CHANGEOVER ) {
dest_link - > exp_msg_count = msg_count ;
if ( ! msg_count )
goto exit ;
}
/* Receive original message */
if ( dest_link - > exp_msg_count = = 0 ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " %sgot too many tunnelled messages \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
dest_link - > exp_msg_count - - ;
if ( less ( msg_seqno ( msg ) , dest_link - > reset_checkpoint ) ) {
goto exit ;
} else {
* buf = buf_extract ( tunnel_buf , INT_H_SIZE ) ;
if ( * buf ! = NULL ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( tunnel_buf ) ;
2006-01-02 21:04:38 +03:00
return 1 ;
} else {
2012-06-29 08:16:37 +04:00
pr_warn ( " %soriginal msg dropped \n " , link_co_err ) ;
2006-01-02 21:04:38 +03:00
}
}
exit :
2006-03-21 09:36:47 +03:00
* buf = NULL ;
2011-11-04 21:24:29 +04:00
kfree_skb ( tunnel_buf ) ;
2006-01-02 21:04:38 +03:00
return 0 ;
}
/*
* Bundler functionality :
*/
2006-01-18 02:38:21 +03:00
void tipc_link_recv_bundle ( struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
u32 msgcount = msg_msgcnt ( buf_msg ( buf ) ) ;
u32 pos = INT_H_SIZE ;
struct sk_buff * obuf ;
while ( msgcount - - ) {
obuf = buf_extract ( buf , pos ) ;
if ( obuf = = NULL ) {
2012-06-29 08:16:37 +04:00
pr_warn ( " Link unable to unbundle message(s) \n " ) ;
2006-06-26 10:52:17 +04:00
break ;
2007-04-21 04:09:22 +04:00
}
2006-01-02 21:04:38 +03:00
pos + = align ( msg_size ( buf_msg ( obuf ) ) ) ;
2006-01-18 02:38:21 +03:00
tipc_net_route_msg ( obuf ) ;
2006-01-02 21:04:38 +03:00
}
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
}
/*
* Fragmentation / defragmentation :
*/
2007-02-09 17:25:21 +03:00
/*
2010-10-13 17:20:35 +04:00
* link_send_long_buf : Entry for buffers needing fragmentation .
2007-02-09 17:25:21 +03:00
* The buffer is complete , inclusive total message length .
2006-01-02 21:04:38 +03:00
* Returns user data length .
*/
2011-12-30 05:58:42 +04:00
static int link_send_long_buf ( struct tipc_link * l_ptr , struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
2011-04-17 21:06:23 +04:00
struct sk_buff * buf_chain = NULL ;
struct sk_buff * buf_chain_tail = ( struct sk_buff * ) & buf_chain ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * inmsg = buf_msg ( buf ) ;
struct tipc_msg fragm_hdr ;
u32 insize = msg_size ( inmsg ) ;
u32 dsz = msg_data_sz ( inmsg ) ;
unchar * crs = buf - > data ;
u32 rest = insize ;
2010-05-11 18:30:10 +04:00
u32 pack_sz = l_ptr - > max_pkt ;
2006-01-02 21:04:38 +03:00
u32 fragm_sz = pack_sz - INT_H_SIZE ;
2011-04-17 21:06:23 +04:00
u32 fragm_no = 0 ;
2008-06-05 04:36:58 +04:00
u32 destaddr ;
2006-01-02 21:04:38 +03:00
if ( msg_short ( inmsg ) )
destaddr = l_ptr - > addr ;
2008-06-05 04:36:58 +04:00
else
destaddr = msg_destnode ( inmsg ) ;
2006-01-02 21:04:38 +03:00
/* Prepare reusable fragment header: */
2010-05-11 18:30:12 +04:00
tipc_msg_init ( & fragm_hdr , MSG_FRAGMENTER , FIRST_FRAGMENT ,
2008-06-05 04:37:34 +04:00
INT_H_SIZE , destaddr ) ;
2006-01-02 21:04:38 +03:00
/* Chop up message: */
while ( rest > 0 ) {
struct sk_buff * fragm ;
if ( rest < = fragm_sz ) {
fragm_sz = rest ;
msg_set_type ( & fragm_hdr , LAST_FRAGMENT ) ;
}
2010-10-13 17:20:35 +04:00
fragm = tipc_buf_acquire ( fragm_sz + INT_H_SIZE ) ;
2006-01-02 21:04:38 +03:00
if ( fragm = = NULL ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2013-12-11 08:45:38 +04:00
kfree_skb_list ( buf_chain ) ;
2011-04-17 21:06:23 +04:00
return - ENOMEM ;
2006-01-02 21:04:38 +03:00
}
msg_set_size ( & fragm_hdr , fragm_sz + INT_H_SIZE ) ;
2011-04-17 21:06:23 +04:00
fragm_no + + ;
msg_set_fragm_no ( & fragm_hdr , fragm_no ) ;
2007-03-31 18:55:19 +04:00
skb_copy_to_linear_data ( fragm , & fragm_hdr , INT_H_SIZE ) ;
skb_copy_to_linear_data_offset ( fragm , INT_H_SIZE , crs ,
fragm_sz ) ;
2011-04-17 21:06:23 +04:00
buf_chain_tail - > next = fragm ;
buf_chain_tail = fragm ;
2006-01-02 21:04:38 +03:00
rest - = fragm_sz ;
crs + = fragm_sz ;
msg_set_type ( & fragm_hdr , FRAGMENT ) ;
}
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2011-04-17 21:06:23 +04:00
/* Append chain of fragments to send queue & send them */
l_ptr - > long_msg_seq_no + + ;
link_add_chain_to_outqueue ( l_ptr , buf_chain , l_ptr - > long_msg_seq_no ) ;
l_ptr - > stats . sent_fragments + = fragm_no ;
l_ptr - > stats . sent_fragmented + + ;
tipc_link_push_queue ( l_ptr ) ;
2006-01-02 21:04:38 +03:00
return dsz ;
}
2014-02-14 02:29:05 +04:00
/* tipc_link_frag_rcv(): Called with node lock on. Returns
2006-01-02 21:04:38 +03:00
* the reassembled buffer if message is complete .
*/
2014-02-14 02:29:05 +04:00
int tipc_link_frag_rcv ( struct sk_buff * * head , struct sk_buff * * tail ,
struct sk_buff * * fbuf )
2006-01-02 21:04:38 +03:00
{
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
struct sk_buff * frag = * fbuf ;
struct tipc_msg * msg = buf_msg ( frag ) ;
u32 fragid = msg_type ( msg ) ;
bool headstolen ;
int delta ;
skb_pull ( frag , msg_hdr_sz ( msg ) ) ;
if ( fragid = = FIRST_FRAGMENT ) {
if ( * head | | skb_unclone ( frag , GFP_ATOMIC ) )
goto out_free ;
* head = frag ;
skb_frag_list_init ( * head ) ;
2014-02-14 02:29:05 +04:00
* fbuf = NULL ;
2006-01-02 21:04:38 +03:00
return 0 ;
2013-11-13 12:35:11 +04:00
} else if ( * head & &
skb_try_coalesce ( * head , frag , & headstolen , & delta ) ) {
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
kfree_skb_partial ( frag , headstolen ) ;
} else {
if ( ! * head )
goto out_free ;
if ( ! skb_has_frag_list ( * head ) )
skb_shinfo ( * head ) - > frag_list = frag ;
else
( * tail ) - > next = frag ;
* tail = frag ;
( * head ) - > truesize + = frag - > truesize ;
}
if ( fragid = = LAST_FRAGMENT ) {
* fbuf = * head ;
* tail = * head = NULL ;
return LINK_REASM_COMPLETE ;
2006-01-02 21:04:38 +03:00
}
2014-02-14 02:29:05 +04:00
* fbuf = NULL ;
2006-01-02 21:04:38 +03:00
return 0 ;
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
out_free :
pr_warn_ratelimited ( " Link unable to reassemble fragmented message \n " ) ;
kfree_skb ( * fbuf ) ;
2014-02-14 02:29:05 +04:00
* fbuf = NULL ;
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 12:28:06 +04:00
return LINK_REASM_ERROR ;
2006-01-02 21:04:38 +03:00
}
2011-12-30 05:58:42 +04:00
static void link_set_supervision_props ( struct tipc_link * l_ptr , u32 tolerance )
2006-01-02 21:04:38 +03:00
{
2011-01-18 21:24:55 +03:00
if ( ( tolerance < TIPC_MIN_LINK_TOL ) | | ( tolerance > TIPC_MAX_LINK_TOL ) )
return ;
2006-01-02 21:04:38 +03:00
l_ptr - > tolerance = tolerance ;
l_ptr - > continuity_interval =
( ( tolerance / 4 ) > 500 ) ? 500 : tolerance / 4 ;
l_ptr - > abort_limit = tolerance / ( l_ptr - > continuity_interval / 4 ) ;
}
2011-12-30 05:58:42 +04:00
void tipc_link_set_queue_limits ( struct tipc_link * l_ptr , u32 window )
2006-01-02 21:04:38 +03:00
{
/* Data messages from this node, inclusive FIRST_FRAGM */
2008-03-07 02:06:55 +03:00
l_ptr - > queue_limit [ TIPC_LOW_IMPORTANCE ] = window ;
l_ptr - > queue_limit [ TIPC_MEDIUM_IMPORTANCE ] = ( window / 3 ) * 4 ;
l_ptr - > queue_limit [ TIPC_HIGH_IMPORTANCE ] = ( window / 3 ) * 5 ;
l_ptr - > queue_limit [ TIPC_CRITICAL_IMPORTANCE ] = ( window / 3 ) * 6 ;
2006-01-02 21:04:38 +03:00
/* Transiting data messages,inclusive FIRST_FRAGM */
2008-03-07 02:06:55 +03:00
l_ptr - > queue_limit [ TIPC_LOW_IMPORTANCE + 4 ] = 300 ;
l_ptr - > queue_limit [ TIPC_MEDIUM_IMPORTANCE + 4 ] = 600 ;
l_ptr - > queue_limit [ TIPC_HIGH_IMPORTANCE + 4 ] = 900 ;
l_ptr - > queue_limit [ TIPC_CRITICAL_IMPORTANCE + 4 ] = 1200 ;
2006-01-02 21:04:38 +03:00
l_ptr - > queue_limit [ CONN_MANAGER ] = 1200 ;
l_ptr - > queue_limit [ CHANGEOVER_PROTOCOL ] = 2500 ;
l_ptr - > queue_limit [ NAME_DISTRIBUTOR ] = 3000 ;
/* FRAGMENT and LAST_FRAGMENT packets */
l_ptr - > queue_limit [ MSG_FRAGMENTER ] = 4000 ;
}
/**
* link_find_link - locate link by name
2012-07-10 14:55:09 +04:00
* @ name : ptr to link name string
* @ node : ptr to area to be filled with ptr to associated node
2007-02-09 17:25:21 +03:00
*
2006-01-18 02:38:21 +03:00
* Caller must hold ' tipc_net_lock ' to ensure node and bearer are not deleted ;
2006-01-02 21:04:38 +03:00
* this also prevents link deletion .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns pointer to link ( or 0 if invalid link name ) .
*/
2011-12-30 05:58:42 +04:00
static struct tipc_link * link_find_link ( const char * name ,
struct tipc_node * * node )
2006-01-02 21:04:38 +03:00
{
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2013-10-18 09:23:21 +04:00
struct tipc_node * n_ptr ;
int i ;
2006-01-02 21:04:38 +03:00
2013-10-18 09:23:21 +04:00
list_for_each_entry ( n_ptr , & tipc_node_list , list ) {
for ( i = 0 ; i < MAX_BEARERS ; i + + ) {
l_ptr = n_ptr - > links [ i ] ;
if ( l_ptr & & ! strcmp ( l_ptr - > name , name ) )
goto found ;
}
}
l_ptr = NULL ;
n_ptr = NULL ;
found :
* node = n_ptr ;
2006-01-02 21:04:38 +03:00
return l_ptr ;
}
2011-10-18 19:34:29 +04:00
/**
* link_value_is_valid - - validate proposed link tolerance / priority / window
*
2012-07-10 14:55:09 +04:00
* @ cmd : value type ( TIPC_CMD_SET_LINK_ * )
* @ new_value : the new value
2011-10-18 19:34:29 +04:00
*
* Returns 1 if value is within range , 0 if not .
*/
static int link_value_is_valid ( u16 cmd , u32 new_value )
{
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
return ( new_value > = TIPC_MIN_LINK_TOL ) & &
( new_value < = TIPC_MAX_LINK_TOL ) ;
case TIPC_CMD_SET_LINK_PRI :
return ( new_value < = TIPC_MAX_LINK_PRI ) ;
case TIPC_CMD_SET_LINK_WINDOW :
return ( new_value > = TIPC_MIN_LINK_WIN ) & &
( new_value < = TIPC_MAX_LINK_WIN ) ;
}
return 0 ;
}
/**
* link_cmd_set_value - change priority / tolerance / window for link / bearer / media
2012-07-10 14:55:09 +04:00
* @ name : ptr to link , bearer , or media name
* @ new_value : new value of link , bearer , or media setting
* @ cmd : which link , bearer , or media attribute to set ( TIPC_CMD_SET_LINK_ * )
2011-10-18 19:34:29 +04:00
*
* Caller must hold ' tipc_net_lock ' to ensure link / bearer / media is not deleted .
*
* Returns 0 if value updated and negative value on error .
*/
static int link_cmd_set_value ( const char * name , u32 new_value , u16 cmd )
{
struct tipc_node * node ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2011-10-18 19:34:29 +04:00
struct tipc_bearer * b_ptr ;
2011-12-30 05:19:42 +04:00
struct tipc_media * m_ptr ;
2013-10-18 09:23:20 +04:00
int res = 0 ;
2011-10-18 19:34:29 +04:00
l_ptr = link_find_link ( name , & node ) ;
if ( l_ptr ) {
/*
* acquire node lock for tipc_link_send_proto_msg ( ) .
* see " TIPC locking policy " in net . c .
*/
tipc_node_lock ( node ) ;
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
link_set_supervision_props ( l_ptr , new_value ) ;
tipc_link_send_proto_msg ( l_ptr ,
STATE_MSG , 0 , 0 , new_value , 0 , 0 ) ;
break ;
case TIPC_CMD_SET_LINK_PRI :
l_ptr - > priority = new_value ;
tipc_link_send_proto_msg ( l_ptr ,
STATE_MSG , 0 , 0 , 0 , new_value , 0 ) ;
break ;
case TIPC_CMD_SET_LINK_WINDOW :
tipc_link_set_queue_limits ( l_ptr , new_value ) ;
break ;
2013-10-18 09:23:20 +04:00
default :
res = - EINVAL ;
break ;
2011-10-18 19:34:29 +04:00
}
tipc_node_unlock ( node ) ;
2013-10-18 09:23:20 +04:00
return res ;
2011-10-18 19:34:29 +04:00
}
b_ptr = tipc_bearer_find ( name ) ;
if ( b_ptr ) {
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
b_ptr - > tolerance = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_PRI :
b_ptr - > priority = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_WINDOW :
b_ptr - > window = new_value ;
2013-10-18 09:23:20 +04:00
break ;
default :
res = - EINVAL ;
break ;
2011-10-18 19:34:29 +04:00
}
2013-10-18 09:23:20 +04:00
return res ;
2011-10-18 19:34:29 +04:00
}
m_ptr = tipc_media_find ( name ) ;
if ( ! m_ptr )
return - ENODEV ;
switch ( cmd ) {
case TIPC_CMD_SET_LINK_TOL :
m_ptr - > tolerance = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_PRI :
m_ptr - > priority = new_value ;
2013-10-18 09:23:20 +04:00
break ;
2011-10-18 19:34:29 +04:00
case TIPC_CMD_SET_LINK_WINDOW :
m_ptr - > window = new_value ;
2013-10-18 09:23:20 +04:00
break ;
default :
res = - EINVAL ;
break ;
2011-10-18 19:34:29 +04:00
}
2013-10-18 09:23:20 +04:00
return res ;
2011-10-18 19:34:29 +04:00
}
2007-02-09 17:25:21 +03:00
struct sk_buff * tipc_link_cmd_config ( const void * req_tlv_area , int req_tlv_space ,
2006-01-18 02:38:21 +03:00
u16 cmd )
2006-01-02 21:04:38 +03:00
{
struct tipc_link_config * args ;
2007-02-09 17:25:21 +03:00
u32 new_value ;
int res ;
2006-01-02 21:04:38 +03:00
if ( ! TLV_CHECK ( req_tlv_area , req_tlv_space , TIPC_TLV_LINK_CONFIG ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_TLV_ERROR ) ;
2006-01-02 21:04:38 +03:00
args = ( struct tipc_link_config * ) TLV_DATA ( req_tlv_area ) ;
new_value = ntohl ( args - > value ) ;
2011-10-18 19:34:29 +04:00
if ( ! link_value_is_valid ( cmd , new_value ) )
return tipc_cfg_reply_error_string (
" cannot change, value invalid " ) ;
2006-01-18 02:38:21 +03:00
if ( ! strcmp ( args - > name , tipc_bclink_name ) ) {
2006-01-02 21:04:38 +03:00
if ( ( cmd = = TIPC_CMD_SET_LINK_WINDOW ) & &
2006-01-18 02:38:21 +03:00
( tipc_bclink_set_queue_limits ( new_value ) = = 0 ) )
return tipc_cfg_reply_none ( ) ;
2007-02-09 17:25:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_NOT_SUPPORTED
2006-01-18 02:38:21 +03:00
" (cannot change setting on broadcast link) " ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2011-10-18 19:34:29 +04:00
res = link_cmd_set_value ( args - > name , new_value , cmd ) ;
2006-01-18 02:38:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
if ( res )
2007-02-09 17:25:21 +03:00
return tipc_cfg_reply_error_string ( " cannot change link setting " ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_none ( ) ;
2006-01-02 21:04:38 +03:00
}
/**
* link_reset_statistics - reset link statistics
* @ l_ptr : pointer to link
*/
2011-12-30 05:58:42 +04:00
static void link_reset_statistics ( struct tipc_link * l_ptr )
2006-01-02 21:04:38 +03:00
{
memset ( & l_ptr - > stats , 0 , sizeof ( l_ptr - > stats ) ) ;
l_ptr - > stats . sent_info = l_ptr - > next_out_no ;
l_ptr - > stats . recv_info = l_ptr - > next_in_no ;
}
2006-01-18 02:38:21 +03:00
struct sk_buff * tipc_link_cmd_reset_stats ( const void * req_tlv_area , int req_tlv_space )
2006-01-02 21:04:38 +03:00
{
char * link_name ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2008-09-03 10:38:32 +04:00
struct tipc_node * node ;
2006-01-02 21:04:38 +03:00
if ( ! TLV_CHECK ( req_tlv_area , req_tlv_space , TIPC_TLV_LINK_NAME ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_TLV_ERROR ) ;
2006-01-02 21:04:38 +03:00
link_name = ( char * ) TLV_DATA ( req_tlv_area ) ;
2006-01-18 02:38:21 +03:00
if ( ! strcmp ( link_name , tipc_bclink_name ) ) {
if ( tipc_bclink_reset_stats ( ) )
return tipc_cfg_reply_error_string ( " link not found " ) ;
return tipc_cfg_reply_none ( ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2007-02-09 17:25:21 +03:00
l_ptr = link_find_link ( link_name , & node ) ;
2006-01-02 21:04:38 +03:00
if ( ! l_ptr ) {
2006-01-18 02:38:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
return tipc_cfg_reply_error_string ( " link not found " ) ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
tipc_node_lock ( node ) ;
2006-01-02 21:04:38 +03:00
link_reset_statistics ( l_ptr ) ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
read_unlock_bh ( & tipc_net_lock ) ;
return tipc_cfg_reply_none ( ) ;
2006-01-02 21:04:38 +03:00
}
/**
* percent - convert count to a percentage of total ( rounding up or down )
*/
static u32 percent ( u32 count , u32 total )
{
return ( count * 100 + ( total / 2 ) ) / total ;
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_stats - print link statistics
2006-01-02 21:04:38 +03:00
* @ name : link name
* @ buf : print buffer area
* @ buf_size : size of print buffer area
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns length of print buffer data string ( or 0 if error )
*/
2006-01-18 02:38:21 +03:00
static int tipc_link_stats ( const char * name , char * buf , const u32 buf_size )
2006-01-02 21:04:38 +03:00
{
2012-06-29 08:50:23 +04:00
struct tipc_link * l ;
struct tipc_stats * s ;
2008-09-03 10:38:32 +04:00
struct tipc_node * node ;
2006-01-02 21:04:38 +03:00
char * status ;
u32 profile_total = 0 ;
2012-06-29 08:50:23 +04:00
int ret ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
if ( ! strcmp ( name , tipc_bclink_name ) )
return tipc_bclink_stats ( buf , buf_size ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2012-06-29 08:50:23 +04:00
l = link_find_link ( name , & node ) ;
if ( ! l ) {
2006-01-18 02:38:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
return 0 ;
}
2006-01-18 02:38:21 +03:00
tipc_node_lock ( node ) ;
2012-06-29 08:50:23 +04:00
s = & l - > stats ;
2006-01-02 21:04:38 +03:00
2012-06-29 08:50:23 +04:00
if ( tipc_link_is_active ( l ) )
2006-01-02 21:04:38 +03:00
status = " ACTIVE " ;
2012-06-29 08:50:23 +04:00
else if ( tipc_link_is_up ( l ) )
2006-01-02 21:04:38 +03:00
status = " STANDBY " ;
else
status = " DEFUNCT " ;
2012-06-29 08:50:23 +04:00
ret = tipc_snprintf ( buf , buf_size , " Link <%s> \n "
" %s MTU:%u Priority:%u Tolerance:%u ms "
" Window:%u packets \n " ,
l - > name , status , l - > max_pkt , l - > priority ,
l - > tolerance , l - > queue_limit [ 0 ] ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" RX packets:%u fragments:%u/%u bundles:%u/%u \n " ,
l - > next_in_no - s - > recv_info , s - > recv_fragments ,
s - > recv_fragmented , s - > recv_bundles ,
s - > recv_bundled ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" TX packets:%u fragments:%u/%u bundles:%u/%u \n " ,
l - > next_out_no - s - > sent_info , s - > sent_fragments ,
s - > sent_fragmented , s - > sent_bundles ,
s - > sent_bundled ) ;
profile_total = s - > msg_length_counts ;
2006-01-02 21:04:38 +03:00
if ( ! profile_total )
profile_total = 1 ;
2012-06-29 08:50:23 +04:00
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" TX profile sample:%u packets average:%u octets \n "
" 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
" -16384:%u%% -32768:%u%% -66000:%u%% \n " ,
s - > msg_length_counts ,
s - > msg_lengths_total / profile_total ,
percent ( s - > msg_length_profile [ 0 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 1 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 2 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 3 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 4 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 5 ] , profile_total ) ,
percent ( s - > msg_length_profile [ 6 ] , profile_total ) ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" RX states:%u probes:%u naks:%u defs:%u "
" dups:%u \n " , s - > recv_states , s - > recv_probes ,
s - > recv_nacks , s - > deferred_recv , s - > duplicates ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
" TX states:%u probes:%u naks:%u acks:%u "
" dups:%u \n " , s - > sent_states , s - > sent_probes ,
s - > sent_nacks , s - > sent_acks , s - > retransmitted ) ;
ret + = tipc_snprintf ( buf + ret , buf_size - ret ,
2012-11-15 07:34:45 +04:00
" Congestion link:%u Send queue "
" max:%u avg:%u \n " , s - > link_congs ,
2012-06-29 08:50:23 +04:00
s - > max_queue_sz , s - > queue_sz_counts ?
( s - > accu_queue_sz / s - > queue_sz_counts ) : 0 ) ;
2006-01-02 21:04:38 +03:00
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( node ) ;
read_unlock_bh ( & tipc_net_lock ) ;
2012-06-29 08:50:23 +04:00
return ret ;
2006-01-02 21:04:38 +03:00
}
2006-01-18 02:38:21 +03:00
struct sk_buff * tipc_link_cmd_show_stats ( const void * req_tlv_area , int req_tlv_space )
2006-01-02 21:04:38 +03:00
{
struct sk_buff * buf ;
struct tlv_desc * rep_tlv ;
int str_len ;
2012-06-29 08:50:23 +04:00
int pb_len ;
char * pb ;
2006-01-02 21:04:38 +03:00
if ( ! TLV_CHECK ( req_tlv_area , req_tlv_space , TIPC_TLV_LINK_NAME ) )
2006-01-18 02:38:21 +03:00
return tipc_cfg_reply_error_string ( TIPC_CFG_TLV_ERROR ) ;
2006-01-02 21:04:38 +03:00
2012-06-29 08:50:23 +04:00
buf = tipc_cfg_reply_alloc ( TLV_SPACE ( ULTRA_STRING_MAX_LEN ) ) ;
2006-01-02 21:04:38 +03:00
if ( ! buf )
return NULL ;
rep_tlv = ( struct tlv_desc * ) buf - > data ;
2012-06-29 08:50:23 +04:00
pb = TLV_DATA ( rep_tlv ) ;
pb_len = ULTRA_STRING_MAX_LEN ;
2006-01-18 02:38:21 +03:00
str_len = tipc_link_stats ( ( char * ) TLV_DATA ( req_tlv_area ) ,
2012-06-29 08:50:23 +04:00
pb , pb_len ) ;
2006-01-02 21:04:38 +03:00
if ( ! str_len ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2007-02-09 17:25:21 +03:00
return tipc_cfg_reply_error_string ( " link not found " ) ;
2006-01-02 21:04:38 +03:00
}
2012-06-29 08:50:23 +04:00
str_len + = 1 ; /* for "\0" */
2006-01-02 21:04:38 +03:00
skb_put ( buf , TLV_SPACE ( str_len ) ) ;
TLV_SET ( rep_tlv , TIPC_TLV_ULTRA_STRING , NULL , str_len ) ;
return buf ;
}
/**
2006-01-18 02:38:21 +03:00
* tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
2006-01-02 21:04:38 +03:00
* @ dest : network address of destination node
* @ selector : used to select from set of active links
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* If no active link can be found , uses default maximum packet size .
*/
2006-01-18 02:38:21 +03:00
u32 tipc_link_get_max_pkt ( u32 dest , u32 selector )
2006-01-02 21:04:38 +03:00
{
2008-09-03 10:38:32 +04:00
struct tipc_node * n_ptr ;
2011-12-30 05:58:42 +04:00
struct tipc_link * l_ptr ;
2006-01-02 21:04:38 +03:00
u32 res = MAX_PKT_DEFAULT ;
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
if ( dest = = tipc_own_addr )
return MAX_MSG_SIZE ;
2007-02-09 17:25:21 +03:00
read_lock_bh ( & tipc_net_lock ) ;
2010-12-31 21:59:18 +03:00
n_ptr = tipc_node_find ( dest ) ;
2006-01-02 21:04:38 +03:00
if ( n_ptr ) {
2006-01-18 02:38:21 +03:00
tipc_node_lock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
l_ptr = n_ptr - > active_links [ selector & 1 ] ;
if ( l_ptr )
2010-05-11 18:30:10 +04:00
res = l_ptr - > max_pkt ;
2006-01-18 02:38:21 +03:00
tipc_node_unlock ( n_ptr ) ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
read_unlock_bh ( & tipc_net_lock ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
2011-12-30 05:58:42 +04:00
static void link_print ( struct tipc_link * l_ptr , const char * str )
2006-01-02 21:04:38 +03:00
{
2012-07-12 03:27:56 +04:00
pr_info ( " %s Link %x<%s>: " , str , l_ptr - > addr , l_ptr - > b_ptr - > name ) ;
2010-12-31 21:59:27 +03:00
2006-01-02 21:04:38 +03:00
if ( link_working_unknown ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :WU \n " ) ;
2010-12-31 21:59:27 +03:00
else if ( link_reset_reset ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :RR \n " ) ;
2010-12-31 21:59:27 +03:00
else if ( link_reset_unknown ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :RU \n " ) ;
2010-12-31 21:59:27 +03:00
else if ( link_working_working ( l_ptr ) )
2012-07-12 03:27:56 +04:00
pr_cont ( " :WW \n " ) ;
else
pr_cont ( " \n " ) ;
2006-01-02 21:04:38 +03:00
}