2015-03-05 12:23:49 +03:00
/* net/tipc/udp_media.c: IP bearer support for TIPC
*
* Copyright ( c ) 2015 , Ericsson AB
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
*
* Alternatively , this software may be distributed under the terms of the
* GNU General Public License ( " GPL " ) version 2 as published by the Free
* Software Foundation .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE .
*/
# include <linux/socket.h>
# include <linux/ip.h>
# include <linux/udp.h>
# include <linux/inet.h>
# include <linux/inetdevice.h>
# include <linux/igmp.h>
# include <linux/kernel.h>
# include <linux/workqueue.h>
# include <linux/list.h>
# include <net/sock.h>
# include <net/ip.h>
# include <net/udp_tunnel.h>
2019-03-22 16:06:09 +03:00
# include <net/ipv6_stubs.h>
2015-03-05 12:23:49 +03:00
# include <linux/tipc_netlink.h>
# include "core.h"
2018-03-22 22:42:52 +03:00
# include "addr.h"
# include "net.h"
2015-03-05 12:23:49 +03:00
# include "bearer.h"
2016-03-04 19:04:42 +03:00
# include "netlink.h"
2016-08-26 11:52:53 +03:00
# include "msg.h"
2020-09-18 16:18:19 +03:00
# include "udp_media.h"
2015-03-05 12:23:49 +03:00
/* IANA assigned UDP port */
# define UDP_PORT_DEFAULT 6118
2016-03-14 11:43:52 +03:00
# define UDP_MIN_HEADROOM 48
tipc: conditionally expand buffer headroom over udp tunnel
In commit d999297c3dbbe ("tipc: reduce locking scope during packet reception")
we altered the packet retransmission function. Since then, when
restransmitting packets, we create a clone of the original buffer
using __pskb_copy(skb, MIN_H_SIZE), where MIN_H_SIZE is the size of
the area we want to have copied, but also the smallest possible TIPC
packet size. The value of MIN_H_SIZE is 24.
Unfortunately, __pskb_copy() also has the effect that the headroom
of the cloned buffer takes the size MIN_H_SIZE. This is too small
for carrying the packet over the UDP tunnel bearer, which requires
a minimum headroom of 28 bytes. A change to just use pskb_copy()
lets the clone inherit the original headroom of 80 bytes, but also
assumes that the copied data area is of at least that size, something
that is not always the case. So that is not a viable solution.
We now fix this by adding a check for sufficient headroom in the
transmit function of udp_media.c, and expanding it when necessary.
Fixes: commit d999297c3dbbe ("tipc: reduce locking scope during packet reception")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-19 18:43:11 +03:00
2015-03-05 12:23:49 +03:00
/**
* struct udp_media_addr - IP / UDP addressing information
*
* This is the bearer level originating address used in neighbor discovery
* messages , and all fields should be in network byte order
*/
struct udp_media_addr {
__be16 proto ;
2016-06-27 14:34:07 +03:00
__be16 port ;
2015-03-05 12:23:49 +03:00
union {
struct in_addr ipv4 ;
struct in6_addr ipv6 ;
} ;
} ;
2016-08-26 11:52:53 +03:00
/* struct udp_replicast - container for UDP remote addresses */
struct udp_replicast {
struct udp_media_addr addr ;
2019-06-20 14:03:41 +03:00
struct dst_cache dst_cache ;
2016-08-26 11:52:53 +03:00
struct rcu_head rcu ;
struct list_head list ;
} ;
2015-03-05 12:23:49 +03:00
/**
* struct udp_bearer - ip / udp bearer data structure
* @ bearer : associated generic tipc bearer
* @ ubsock : bearer associated socket
* @ ifindex : local address scope
* @ work : used to schedule deferred work on a bearer
*/
struct udp_bearer {
struct tipc_bearer __rcu * bearer ;
struct socket * ubsock ;
u32 ifindex ;
struct work_struct work ;
2016-08-26 11:52:53 +03:00
struct udp_replicast rcast ;
2015-03-05 12:23:49 +03:00
} ;
2016-08-26 11:52:52 +03:00
static int tipc_udp_is_mcast_addr ( struct udp_media_addr * addr )
{
if ( ntohs ( addr - > proto ) = = ETH_P_IP )
return ipv4_is_multicast ( addr - > ipv4 . s_addr ) ;
# if IS_ENABLED(CONFIG_IPV6)
else
return ipv6_addr_is_multicast ( & addr - > ipv6 ) ;
# endif
return 0 ;
}
2015-03-05 12:23:49 +03:00
/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
static void tipc_udp_media_addr_set ( struct tipc_media_addr * addr ,
struct udp_media_addr * ua )
{
memset ( addr , 0 , sizeof ( struct tipc_media_addr ) ) ;
addr - > media_id = TIPC_MEDIA_TYPE_UDP ;
memcpy ( addr - > value , ua , sizeof ( struct udp_media_addr ) ) ;
2016-08-26 11:52:52 +03:00
if ( tipc_udp_is_mcast_addr ( ua ) )
2017-01-18 21:50:50 +03:00
addr - > broadcast = TIPC_BROADCAST_SUPPORT ;
2015-03-05 12:23:49 +03:00
}
/* tipc_udp_addr2str - convert ip/udp address to string */
static int tipc_udp_addr2str ( struct tipc_media_addr * a , char * buf , int size )
{
struct udp_media_addr * ua = ( struct udp_media_addr * ) & a - > value ;
if ( ntohs ( ua - > proto ) = = ETH_P_IP )
2016-06-27 14:34:07 +03:00
snprintf ( buf , size , " %pI4:%u " , & ua - > ipv4 , ntohs ( ua - > port ) ) ;
2015-03-05 12:23:49 +03:00
else if ( ntohs ( ua - > proto ) = = ETH_P_IPV6 )
2016-06-27 14:34:07 +03:00
snprintf ( buf , size , " %pI6:%u " , & ua - > ipv6 , ntohs ( ua - > port ) ) ;
2015-03-05 12:23:49 +03:00
else
pr_err ( " Invalid UDP media address \n " ) ;
return 0 ;
}
/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */
static int tipc_udp_msg2addr ( struct tipc_bearer * b , struct tipc_media_addr * a ,
char * msg )
{
struct udp_media_addr * ua ;
ua = ( struct udp_media_addr * ) ( msg + TIPC_MEDIA_ADDR_OFFSET ) ;
if ( msg [ TIPC_MEDIA_TYPE_OFFSET ] ! = TIPC_MEDIA_TYPE_UDP )
return - EINVAL ;
tipc_udp_media_addr_set ( a , ua ) ;
return 0 ;
}
/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */
static int tipc_udp_addr2msg ( char * msg , struct tipc_media_addr * a )
{
memset ( msg , 0 , TIPC_MEDIA_INFO_SIZE ) ;
msg [ TIPC_MEDIA_TYPE_OFFSET ] = TIPC_MEDIA_TYPE_UDP ;
memcpy ( msg + TIPC_MEDIA_ADDR_OFFSET , a - > value ,
sizeof ( struct udp_media_addr ) ) ;
return 0 ;
}
/* tipc_send_msg - enqueue a send request */
2016-08-26 11:52:51 +03:00
static int tipc_udp_xmit ( struct net * net , struct sk_buff * skb ,
struct udp_bearer * ub , struct udp_media_addr * src ,
2019-06-20 14:03:41 +03:00
struct udp_media_addr * dst , struct dst_cache * cache )
2015-03-05 12:23:49 +03:00
{
2020-05-21 21:29:58 +03:00
struct dst_entry * ndst ;
2015-03-05 12:23:49 +03:00
int ttl , err = 0 ;
2020-05-21 21:29:58 +03:00
local_bh_disable ( ) ;
ndst = dst_cache_get ( cache ) ;
2015-03-09 12:19:31 +03:00
if ( dst - > proto = = htons ( ETH_P_IP ) ) {
2019-06-20 14:03:41 +03:00
struct rtable * rt = ( struct rtable * ) ndst ;
if ( ! rt ) {
struct flowi4 fl = {
. daddr = dst - > ipv4 . s_addr ,
. saddr = src - > ipv4 . s_addr ,
. flowi4_mark = skb - > mark ,
. flowi4_proto = IPPROTO_UDP
} ;
rt = ip_route_output_key ( net , & fl ) ;
if ( IS_ERR ( rt ) ) {
err = PTR_ERR ( rt ) ;
goto tx_error ;
}
dst_cache_set_ip4 ( cache , & rt - > dst , fl . saddr ) ;
2015-03-05 12:23:49 +03:00
}
2016-03-03 16:20:40 +03:00
2015-03-05 12:23:49 +03:00
ttl = ip4_dst_hoplimit ( & rt - > dst ) ;
2015-12-25 01:34:54 +03:00
udp_tunnel_xmit_skb ( rt , ub - > ubsock - > sk , skb , src - > ipv4 . s_addr ,
2016-06-27 14:34:07 +03:00
dst - > ipv4 . s_addr , 0 , ttl , 0 , src - > port ,
dst - > port , false , true ) ;
2015-03-05 12:23:49 +03:00
# if IS_ENABLED(CONFIG_IPV6)
} else {
2019-06-20 14:03:41 +03:00
if ( ! ndst ) {
struct flowi6 fl6 = {
. flowi6_oif = ub - > ifindex ,
. daddr = dst - > ipv6 ,
. saddr = src - > ipv6 ,
. flowi6_proto = IPPROTO_UDP
} ;
2019-12-04 17:35:53 +03:00
ndst = ipv6_stub - > ipv6_dst_lookup_flow ( net ,
ub - > ubsock - > sk ,
& fl6 , NULL ) ;
if ( IS_ERR ( ndst ) ) {
err = PTR_ERR ( ndst ) ;
2019-06-20 14:03:41 +03:00
goto tx_error ;
2019-12-04 17:35:53 +03:00
}
2019-06-20 14:03:41 +03:00
dst_cache_set_ip6 ( cache , ndst , & fl6 . saddr ) ;
}
2015-03-05 12:23:49 +03:00
ttl = ip6_dst_hoplimit ( ndst ) ;
2019-06-17 16:34:15 +03:00
err = udp_tunnel6_xmit_skb ( ndst , ub - > ubsock - > sk , skb , NULL ,
& src - > ipv6 , & dst - > ipv6 , 0 , ttl , 0 ,
src - > port , dst - > port , false ) ;
2015-03-05 12:23:49 +03:00
# endif
}
2020-05-21 21:29:58 +03:00
local_bh_enable ( ) ;
2015-03-05 12:23:49 +03:00
return err ;
tx_error :
2020-05-21 21:29:58 +03:00
local_bh_enable ( ) ;
2015-10-22 15:51:45 +03:00
kfree_skb ( skb ) ;
2015-03-05 12:23:49 +03:00
return err ;
}
2016-08-26 11:52:51 +03:00
static int tipc_udp_send_msg ( struct net * net , struct sk_buff * skb ,
struct tipc_bearer * b ,
struct tipc_media_addr * addr )
{
struct udp_media_addr * src = ( struct udp_media_addr * ) & b - > addr . value ;
struct udp_media_addr * dst = ( struct udp_media_addr * ) & addr - > value ;
2016-08-26 11:52:53 +03:00
struct udp_replicast * rcast ;
2016-08-26 11:52:51 +03:00
struct udp_bearer * ub ;
int err = 0 ;
if ( skb_headroom ( skb ) < UDP_MIN_HEADROOM ) {
err = pskb_expand_head ( skb , UDP_MIN_HEADROOM , 0 , GFP_ATOMIC ) ;
if ( err )
2016-08-26 11:52:53 +03:00
goto out ;
2016-08-26 11:52:51 +03:00
}
skb_set_inner_protocol ( skb , htons ( ETH_P_TIPC ) ) ;
2019-07-01 19:54:55 +03:00
ub = rcu_dereference ( b - > media_ptr ) ;
2016-08-26 11:52:51 +03:00
if ( ! ub ) {
err = - ENODEV ;
2016-08-26 11:52:53 +03:00
goto out ;
2016-08-26 11:52:51 +03:00
}
2017-01-18 21:50:50 +03:00
if ( addr - > broadcast ! = TIPC_REPLICAST_SUPPORT )
2019-06-20 14:03:41 +03:00
return tipc_udp_xmit ( net , skb , ub , src , dst ,
& ub - > rcast . dst_cache ) ;
2016-08-26 11:52:51 +03:00
2016-08-26 11:52:53 +03:00
/* Replicast, send an skb to each configured IP address */
list_for_each_entry_rcu ( rcast , & ub - > rcast . list , list ) {
struct sk_buff * _skb ;
_skb = pskb_copy ( skb , GFP_ATOMIC ) ;
if ( ! _skb ) {
err = - ENOMEM ;
goto out ;
}
2019-06-20 14:03:41 +03:00
err = tipc_udp_xmit ( net , _skb , ub , src , & rcast - > addr ,
& rcast - > dst_cache ) ;
2018-12-10 23:45:45 +03:00
if ( err )
2016-08-26 11:52:53 +03:00
goto out ;
}
err = 0 ;
out :
2016-08-26 11:52:51 +03:00
kfree_skb ( skb ) ;
return err ;
}
2016-08-26 11:52:54 +03:00
static bool tipc_udp_is_known_peer ( struct tipc_bearer * b ,
struct udp_media_addr * addr )
{
struct udp_replicast * rcast , * tmp ;
struct udp_bearer * ub ;
ub = rcu_dereference_rtnl ( b - > media_ptr ) ;
if ( ! ub ) {
pr_err_ratelimited ( " UDP bearer instance not found \n " ) ;
return false ;
}
list_for_each_entry_safe ( rcast , tmp , & ub - > rcast . list , list ) {
if ( ! memcmp ( & rcast - > addr , addr , sizeof ( struct udp_media_addr ) ) )
return true ;
}
return false ;
}
2016-08-26 11:52:53 +03:00
static int tipc_udp_rcast_add ( struct tipc_bearer * b ,
struct udp_media_addr * addr )
{
struct udp_replicast * rcast ;
struct udp_bearer * ub ;
ub = rcu_dereference_rtnl ( b - > media_ptr ) ;
if ( ! ub )
return - ENODEV ;
rcast = kmalloc ( sizeof ( * rcast ) , GFP_ATOMIC ) ;
if ( ! rcast )
return - ENOMEM ;
2019-06-20 14:03:41 +03:00
if ( dst_cache_init ( & rcast - > dst_cache , GFP_ATOMIC ) ) {
kfree ( rcast ) ;
return - ENOMEM ;
}
2016-08-26 11:52:53 +03:00
memcpy ( & rcast - > addr , addr , sizeof ( struct udp_media_addr ) ) ;
if ( ntohs ( addr - > proto ) = = ETH_P_IP )
pr_info ( " New replicast peer: %pI4 \n " , & rcast - > addr . ipv4 ) ;
# if IS_ENABLED(CONFIG_IPV6)
else if ( ntohs ( addr - > proto ) = = ETH_P_IPV6 )
pr_info ( " New replicast peer: %pI6 \n " , & rcast - > addr . ipv6 ) ;
# endif
2017-01-18 21:50:50 +03:00
b - > bcast_addr . broadcast = TIPC_REPLICAST_SUPPORT ;
2016-08-26 11:52:53 +03:00
list_add_rcu ( & rcast - > list , & ub - > rcast . list ) ;
return 0 ;
}
2016-08-26 11:52:54 +03:00
static int tipc_udp_rcast_disc ( struct tipc_bearer * b , struct sk_buff * skb )
{
struct udp_media_addr src = { 0 } ;
struct udp_media_addr * dst ;
dst = ( struct udp_media_addr * ) & b - > bcast_addr . value ;
if ( tipc_udp_is_mcast_addr ( dst ) )
return 0 ;
src . port = udp_hdr ( skb ) - > source ;
if ( ip_hdr ( skb ) - > version = = 4 ) {
struct iphdr * iphdr = ip_hdr ( skb ) ;
src . proto = htons ( ETH_P_IP ) ;
src . ipv4 . s_addr = iphdr - > saddr ;
if ( ipv4_is_multicast ( iphdr - > daddr ) )
return 0 ;
# if IS_ENABLED(CONFIG_IPV6)
} else if ( ip_hdr ( skb ) - > version = = 6 ) {
struct ipv6hdr * iphdr = ipv6_hdr ( skb ) ;
src . proto = htons ( ETH_P_IPV6 ) ;
src . ipv6 = iphdr - > saddr ;
if ( ipv6_addr_is_multicast ( & iphdr - > daddr ) )
return 0 ;
# endif
} else {
return 0 ;
}
if ( likely ( tipc_udp_is_known_peer ( b , & src ) ) )
return 0 ;
return tipc_udp_rcast_add ( b , & src ) ;
}
2015-03-05 12:23:49 +03:00
/* tipc_udp_recv - read data from bearer socket */
static int tipc_udp_recv ( struct sock * sk , struct sk_buff * skb )
{
struct udp_bearer * ub ;
struct tipc_bearer * b ;
2016-08-26 11:52:54 +03:00
struct tipc_msg * hdr ;
int err ;
2015-03-05 12:23:49 +03:00
ub = rcu_dereference_sk_user_data ( sk ) ;
if ( ! ub ) {
pr_err_ratelimited ( " Failed to get UDP bearer reference " ) ;
2016-08-26 11:52:54 +03:00
goto out ;
2015-03-05 12:23:49 +03:00
}
skb_pull ( skb , sizeof ( struct udphdr ) ) ;
2016-08-26 11:52:54 +03:00
hdr = buf_msg ( skb ) ;
2019-04-23 19:24:46 +03:00
b = rcu_dereference ( ub - > bearer ) ;
2016-08-26 11:52:54 +03:00
if ( ! b )
2019-04-23 19:24:46 +03:00
goto out ;
2015-03-05 12:23:49 +03:00
2016-08-16 18:53:50 +03:00
if ( b & & test_bit ( 0 , & b - > up ) ) {
2019-11-08 08:05:11 +03:00
TIPC_SKB_CB ( skb ) - > flags = 0 ;
2015-03-05 12:23:49 +03:00
tipc_rcv ( sock_net ( sk ) , skb , b ) ;
return 0 ;
}
2016-08-26 11:52:54 +03:00
if ( unlikely ( msg_user ( hdr ) = = LINK_CONFIG ) ) {
err = tipc_udp_rcast_disc ( b , skb ) ;
if ( err )
2019-04-23 19:24:46 +03:00
goto out ;
2016-08-26 11:52:54 +03:00
}
out :
2015-03-05 12:23:49 +03:00
kfree_skb ( skb ) ;
return 0 ;
}
static int enable_mcast ( struct udp_bearer * ub , struct udp_media_addr * remote )
{
int err = 0 ;
struct ip_mreqn mreqn ;
struct sock * sk = ub - > ubsock - > sk ;
if ( ntohs ( remote - > proto ) = = ETH_P_IP ) {
mreqn . imr_multiaddr = remote - > ipv4 ;
mreqn . imr_ifindex = ub - > ifindex ;
ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join, drop}
in favor of their inner __ ones, which doesn't grab rtnl.
As these functions need to operate on a locked socket, we can't be
grabbing rtnl by then. It's too late and doing so causes reversed
locking.
So this patch:
- move rtnl handling to callers instead while already fixing some
reversed locking situations, like on vxlan and ipvs code.
- renames __ ones to not have the __ mark:
__ip_mc_{join,leave}_group -> ip_mc_{join,leave}_group
__ipv6_sock_mc_{join,drop} -> ipv6_sock_mc_{join,drop}
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 20:50:43 +03:00
err = ip_mc_join_group ( sk , & mreqn ) ;
2015-03-19 22:47:58 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2015-03-05 12:23:49 +03:00
} else {
2015-03-19 22:47:58 +03:00
err = ipv6_stub - > ipv6_sock_mc_join ( sk , ub - > ifindex ,
& remote - > ipv6 ) ;
# endif
2015-03-05 12:23:49 +03:00
}
return err ;
}
2016-08-26 11:52:55 +03:00
static int __tipc_nl_add_udp_addr ( struct sk_buff * skb ,
struct udp_media_addr * addr , int nla_t )
{
if ( ntohs ( addr - > proto ) = = ETH_P_IP ) {
struct sockaddr_in ip4 ;
2016-10-13 11:06:06 +03:00
memset ( & ip4 , 0 , sizeof ( ip4 ) ) ;
2016-08-26 11:52:55 +03:00
ip4 . sin_family = AF_INET ;
ip4 . sin_port = addr - > port ;
ip4 . sin_addr . s_addr = addr - > ipv4 . s_addr ;
if ( nla_put ( skb , nla_t , sizeof ( ip4 ) , & ip4 ) )
return - EMSGSIZE ;
# if IS_ENABLED(CONFIG_IPV6)
} else if ( ntohs ( addr - > proto ) = = ETH_P_IPV6 ) {
struct sockaddr_in6 ip6 ;
2016-10-13 11:06:06 +03:00
memset ( & ip6 , 0 , sizeof ( ip6 ) ) ;
2016-08-26 11:52:55 +03:00
ip6 . sin6_family = AF_INET6 ;
ip6 . sin6_port = addr - > port ;
memcpy ( & ip6 . sin6_addr , & addr - > ipv6 , sizeof ( struct in6_addr ) ) ;
if ( nla_put ( skb , nla_t , sizeof ( ip6 ) , & ip6 ) )
return - EMSGSIZE ;
# endif
}
return 0 ;
}
2016-08-26 11:52:56 +03:00
int tipc_udp_nl_dump_remoteip ( struct sk_buff * skb , struct netlink_callback * cb )
{
u32 bid = cb - > args [ 0 ] ;
u32 skip_cnt = cb - > args [ 1 ] ;
u32 portid = NETLINK_CB ( cb - > skb ) . portid ;
struct udp_replicast * rcast , * tmp ;
struct tipc_bearer * b ;
struct udp_bearer * ub ;
void * hdr ;
int err ;
int i ;
if ( ! bid & & ! skip_cnt ) {
2019-10-05 21:04:39 +03:00
struct nlattr * * attrs = genl_dumpit_info ( cb ) - > attrs ;
2016-08-26 11:52:56 +03:00
struct net * net = sock_net ( skb - > sk ) ;
struct nlattr * battrs [ TIPC_NLA_BEARER_MAX + 1 ] ;
char * bname ;
if ( ! attrs [ TIPC_NLA_BEARER ] )
return - EINVAL ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 15:07:28 +03:00
err = nla_parse_nested_deprecated ( battrs , TIPC_NLA_BEARER_MAX ,
attrs [ TIPC_NLA_BEARER ] ,
tipc_nl_bearer_policy , NULL ) ;
2016-08-26 11:52:56 +03:00
if ( err )
return err ;
if ( ! battrs [ TIPC_NLA_BEARER_NAME ] )
return - EINVAL ;
bname = nla_data ( battrs [ TIPC_NLA_BEARER_NAME ] ) ;
rtnl_lock ( ) ;
b = tipc_bearer_find ( net , bname ) ;
if ( ! b ) {
rtnl_unlock ( ) ;
return - EINVAL ;
}
bid = b - > identity ;
} else {
struct net * net = sock_net ( skb - > sk ) ;
struct tipc_net * tn = net_generic ( net , tipc_net_id ) ;
rtnl_lock ( ) ;
b = rtnl_dereference ( tn - > bearer_list [ bid ] ) ;
if ( ! b ) {
rtnl_unlock ( ) ;
return - EINVAL ;
}
}
2019-07-01 19:54:55 +03:00
ub = rtnl_dereference ( b - > media_ptr ) ;
2016-08-26 11:52:56 +03:00
if ( ! ub ) {
rtnl_unlock ( ) ;
return - EINVAL ;
}
i = 0 ;
list_for_each_entry_safe ( rcast , tmp , & ub - > rcast . list , list ) {
if ( i < skip_cnt )
goto count ;
hdr = genlmsg_put ( skb , portid , cb - > nlh - > nlmsg_seq ,
& tipc_genl_family , NLM_F_MULTI ,
TIPC_NL_BEARER_GET ) ;
if ( ! hdr )
goto done ;
err = __tipc_nl_add_udp_addr ( skb , & rcast - > addr ,
TIPC_NLA_UDP_REMOTE ) ;
if ( err ) {
genlmsg_cancel ( skb , hdr ) ;
goto done ;
}
genlmsg_end ( skb , hdr ) ;
count :
i + + ;
}
done :
rtnl_unlock ( ) ;
cb - > args [ 0 ] = bid ;
cb - > args [ 1 ] = i ;
return skb - > len ;
}
2016-08-26 11:52:55 +03:00
int tipc_udp_nl_add_bearer_data ( struct tipc_nl_msg * msg , struct tipc_bearer * b )
{
struct udp_media_addr * src = ( struct udp_media_addr * ) & b - > addr . value ;
struct udp_media_addr * dst ;
struct udp_bearer * ub ;
struct nlattr * nest ;
2019-07-01 19:54:55 +03:00
ub = rtnl_dereference ( b - > media_ptr ) ;
2016-08-26 11:52:55 +03:00
if ( ! ub )
return - ENODEV ;
2019-04-26 12:13:06 +03:00
nest = nla_nest_start_noflag ( msg - > skb , TIPC_NLA_BEARER_UDP_OPTS ) ;
2016-08-26 11:52:55 +03:00
if ( ! nest )
goto msg_full ;
if ( __tipc_nl_add_udp_addr ( msg - > skb , src , TIPC_NLA_UDP_LOCAL ) )
goto msg_full ;
dst = ( struct udp_media_addr * ) & b - > bcast_addr . value ;
if ( __tipc_nl_add_udp_addr ( msg - > skb , dst , TIPC_NLA_UDP_REMOTE ) )
goto msg_full ;
if ( ! list_empty ( & ub - > rcast . list ) ) {
if ( nla_put_flag ( msg - > skb , TIPC_NLA_UDP_MULTI_REMOTEIP ) )
goto msg_full ;
}
nla_nest_end ( msg - > skb , nest ) ;
return 0 ;
msg_full :
nla_nest_cancel ( msg - > skb , nest ) ;
return - EMSGSIZE ;
}
2015-03-05 12:23:49 +03:00
/**
2016-08-26 11:52:50 +03:00
* tipc_parse_udp_addr - build udp media address from netlink data
2020-07-13 02:15:14 +03:00
* @ nla : netlink attribute containing sockaddr storage aligned address
2016-08-26 11:52:50 +03:00
* @ addr : tipc media address to fill with address , port and protocol type
* @ scope_id : IPv6 scope id pointer , not NULL indicates it ' s required
2015-03-05 12:23:49 +03:00
*/
2016-08-26 11:52:50 +03:00
static int tipc_parse_udp_addr ( struct nlattr * nla , struct udp_media_addr * addr ,
u32 * scope_id )
2015-03-05 12:23:49 +03:00
{
2016-08-26 11:52:50 +03:00
struct sockaddr_storage sa ;
2015-03-05 12:23:49 +03:00
2016-08-26 11:52:50 +03:00
nla_memcpy ( & sa , nla , sizeof ( sa ) ) ;
if ( sa . ss_family = = AF_INET ) {
struct sockaddr_in * ip4 = ( struct sockaddr_in * ) & sa ;
addr - > proto = htons ( ETH_P_IP ) ;
addr - > port = ip4 - > sin_port ;
addr - > ipv4 . s_addr = ip4 - > sin_addr . s_addr ;
2015-03-05 12:23:49 +03:00
return 0 ;
# if IS_ENABLED(CONFIG_IPV6)
2016-08-26 11:52:50 +03:00
} else if ( sa . ss_family = = AF_INET6 ) {
struct sockaddr_in6 * ip6 = ( struct sockaddr_in6 * ) & sa ;
addr - > proto = htons ( ETH_P_IPV6 ) ;
addr - > port = ip6 - > sin6_port ;
memcpy ( & addr - > ipv6 , & ip6 - > sin6_addr , sizeof ( struct in6_addr ) ) ;
/* Scope ID is only interesting for local addresses */
if ( scope_id ) {
int atype ;
atype = ipv6_addr_type ( & ip6 - > sin6_addr ) ;
if ( __ipv6_addr_needs_scope_id ( atype ) & &
! ip6 - > sin6_scope_id ) {
return - EINVAL ;
}
* scope_id = ip6 - > sin6_scope_id ? : 0 ;
}
2015-03-05 12:23:49 +03:00
return 0 ;
# endif
}
return - EADDRNOTAVAIL ;
}
2016-08-26 11:52:53 +03:00
int tipc_udp_nl_bearer_add ( struct tipc_bearer * b , struct nlattr * attr )
{
int err ;
struct udp_media_addr addr = { 0 } ;
struct nlattr * opts [ TIPC_NLA_UDP_MAX + 1 ] ;
struct udp_media_addr * dst ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 15:07:28 +03:00
if ( nla_parse_nested_deprecated ( opts , TIPC_NLA_UDP_MAX , attr , tipc_nl_udp_policy , NULL ) )
2016-08-26 11:52:53 +03:00
return - EINVAL ;
if ( ! opts [ TIPC_NLA_UDP_REMOTE ] )
return - EINVAL ;
err = tipc_parse_udp_addr ( opts [ TIPC_NLA_UDP_REMOTE ] , & addr , NULL ) ;
if ( err )
return err ;
dst = ( struct udp_media_addr * ) & b - > bcast_addr . value ;
if ( tipc_udp_is_mcast_addr ( dst ) ) {
pr_err ( " Can't add remote ip to TIPC UDP multicast bearer \n " ) ;
return - EINVAL ;
}
2016-08-26 11:52:54 +03:00
if ( tipc_udp_is_known_peer ( b , & addr ) )
return 0 ;
2016-08-26 11:52:53 +03:00
return tipc_udp_rcast_add ( b , & addr ) ;
}
2015-03-05 12:23:49 +03:00
/**
* tipc_udp_enable - callback to create a new udp bearer instance
* @ net : network namespace
* @ b : pointer to generic tipc_bearer
* @ attrs : netlink bearer configuration
*
* validate the bearer parameters and initialize the udp bearer
* rtnl_lock should be held
*/
static int tipc_udp_enable ( struct net * net , struct tipc_bearer * b ,
struct nlattr * attrs [ ] )
{
int err = - EINVAL ;
struct udp_bearer * ub ;
2016-08-26 11:52:53 +03:00
struct udp_media_addr remote = { 0 } ;
2015-03-05 12:23:49 +03:00
struct udp_media_addr local = { 0 } ;
struct udp_port_cfg udp_conf = { 0 } ;
2015-03-09 12:19:31 +03:00
struct udp_tunnel_sock_cfg tuncfg = { NULL } ;
2016-08-26 11:52:50 +03:00
struct nlattr * opts [ TIPC_NLA_UDP_MAX + 1 ] ;
2018-03-22 22:42:52 +03:00
u8 node_id [ NODE_ID_LEN ] = { 0 , } ;
2020-08-17 09:30:49 +03:00
struct net_device * dev ;
2018-10-11 04:43:08 +03:00
int rmcast = 0 ;
2015-03-05 12:23:49 +03:00
ub = kzalloc ( sizeof ( * ub ) , GFP_ATOMIC ) ;
if ( ! ub )
return - ENOMEM ;
2016-08-26 11:52:53 +03:00
INIT_LIST_HEAD ( & ub - > rcast . list ) ;
2016-08-26 11:52:50 +03:00
if ( ! attrs [ TIPC_NLA_BEARER_UDP_OPTS ] )
goto err ;
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 15:07:28 +03:00
if ( nla_parse_nested_deprecated ( opts , TIPC_NLA_UDP_MAX , attrs [ TIPC_NLA_BEARER_UDP_OPTS ] , tipc_nl_udp_policy , NULL ) )
2016-08-26 11:52:50 +03:00
goto err ;
if ( ! opts [ TIPC_NLA_UDP_LOCAL ] | | ! opts [ TIPC_NLA_UDP_REMOTE ] ) {
pr_err ( " Invalid UDP bearer configuration " ) ;
2016-09-10 03:56:55 +03:00
err = - EINVAL ;
goto err ;
2016-08-26 11:52:50 +03:00
}
err = tipc_parse_udp_addr ( opts [ TIPC_NLA_UDP_LOCAL ] , & local ,
& ub - > ifindex ) ;
if ( err )
goto err ;
2016-08-26 11:52:53 +03:00
err = tipc_parse_udp_addr ( opts [ TIPC_NLA_UDP_REMOTE ] , & remote , NULL ) ;
2015-03-05 12:23:49 +03:00
if ( err )
goto err ;
2018-12-11 02:23:30 +03:00
if ( remote . proto ! = local . proto ) {
err = - EINVAL ;
goto err ;
}
2018-10-11 04:43:08 +03:00
/* Checking remote ip address */
rmcast = tipc_udp_is_mcast_addr ( & remote ) ;
2018-03-22 22:42:52 +03:00
/* Autoconfigure own node identity if needed */
if ( ! tipc_own_id ( net ) ) {
memcpy ( node_id , local . ipv6 . in6_u . u6_addr8 , 16 ) ;
tipc_net_init ( net , node_id , 0 ) ;
}
if ( ! tipc_own_id ( net ) ) {
pr_warn ( " Failed to set node id, please configure manually \n " ) ;
2018-03-26 17:32:44 +03:00
err = - EINVAL ;
goto err ;
2018-03-22 22:42:52 +03:00
}
2015-03-05 12:23:49 +03:00
b - > bcast_addr . media_id = TIPC_MEDIA_TYPE_UDP ;
2017-01-18 21:50:50 +03:00
b - > bcast_addr . broadcast = TIPC_BROADCAST_SUPPORT ;
2015-03-05 12:23:49 +03:00
rcu_assign_pointer ( b - > media_ptr , ub ) ;
rcu_assign_pointer ( ub - > bearer , b ) ;
tipc_udp_media_addr_set ( & b - > addr , & local ) ;
2015-03-09 12:19:31 +03:00
if ( local . proto = = htons ( ETH_P_IP ) ) {
2015-03-05 12:23:49 +03:00
dev = __ip_dev_find ( net , local . ipv4 . s_addr , false ) ;
if ( ! dev ) {
err = - ENODEV ;
goto err ;
}
udp_conf . family = AF_INET ;
2018-10-11 04:43:08 +03:00
/* Switch to use ANY to receive packets from group */
if ( rmcast )
udp_conf . local_ip . s_addr = htonl ( INADDR_ANY ) ;
else
udp_conf . local_ip . s_addr = local . ipv4 . s_addr ;
2015-03-05 12:23:49 +03:00
udp_conf . use_udp_checksums = false ;
ub - > ifindex = dev - > ifindex ;
2016-12-02 11:33:41 +03:00
if ( tipc_mtu_bad ( dev , sizeof ( struct iphdr ) +
sizeof ( struct udphdr ) ) ) {
err = - EINVAL ;
goto err ;
}
2018-04-19 12:06:18 +03:00
b - > mtu = b - > media - > mtu ;
2015-03-05 12:23:49 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2015-03-09 12:19:31 +03:00
} else if ( local . proto = = htons ( ETH_P_IPV6 ) ) {
2020-08-17 09:30:49 +03:00
dev = ub - > ifindex ? __dev_get_by_index ( net , ub - > ifindex ) : NULL ;
dev = ipv6_dev_find ( net , & local . ipv6 , dev ) ;
2020-08-03 18:34:47 +03:00
if ( ! dev ) {
err = - ENODEV ;
goto err ;
}
2015-03-05 12:23:49 +03:00
udp_conf . family = AF_INET6 ;
udp_conf . use_udp6_tx_checksums = true ;
udp_conf . use_udp6_rx_checksums = true ;
2018-10-11 04:43:08 +03:00
if ( rmcast )
udp_conf . local_ip6 = in6addr_any ;
else
udp_conf . local_ip6 = local . ipv6 ;
2020-08-03 18:34:47 +03:00
ub - > ifindex = dev - > ifindex ;
2015-03-05 12:23:49 +03:00
b - > mtu = 1280 ;
# endif
} else {
err = - EAFNOSUPPORT ;
goto err ;
}
2016-06-27 14:34:07 +03:00
udp_conf . local_udp_port = local . port ;
2015-03-05 12:23:49 +03:00
err = udp_sock_create ( net , & udp_conf , & ub - > ubsock ) ;
if ( err )
goto err ;
tuncfg . sk_user_data = ub ;
tuncfg . encap_type = 1 ;
tuncfg . encap_rcv = tipc_udp_recv ;
tuncfg . encap_destroy = NULL ;
setup_udp_tunnel_sock ( net , ub - > ubsock , & tuncfg ) ;
2019-06-20 14:03:41 +03:00
err = dst_cache_init ( & ub - > rcast . dst_cache , GFP_ATOMIC ) ;
if ( err )
2019-07-01 19:57:19 +03:00
goto free ;
2019-06-20 14:03:41 +03:00
2016-08-26 11:52:53 +03:00
/**
* The bcast media address port is used for all peers and the ip
* is used if it ' s a multicast address .
*/
memcpy ( & b - > bcast_addr . value , & remote , sizeof ( remote ) ) ;
2018-10-11 04:43:08 +03:00
if ( rmcast )
2016-08-26 11:52:53 +03:00
err = enable_mcast ( ub , & remote ) ;
else
err = tipc_udp_rcast_add ( b , & remote ) ;
if ( err )
2019-07-01 19:57:19 +03:00
goto free ;
2016-08-26 11:52:52 +03:00
2015-03-05 12:23:49 +03:00
return 0 ;
2019-07-01 19:57:19 +03:00
free :
2019-06-20 14:03:41 +03:00
dst_cache_destroy ( & ub - > rcast . dst_cache ) ;
2019-07-01 19:57:19 +03:00
udp_tunnel_sock_release ( ub - > ubsock ) ;
err :
2015-03-05 12:23:49 +03:00
kfree ( ub ) ;
return err ;
}
/* cleanup_bearer - break the socket/bearer association */
static void cleanup_bearer ( struct work_struct * work )
{
struct udp_bearer * ub = container_of ( work , struct udp_bearer , work ) ;
2016-08-26 11:52:53 +03:00
struct udp_replicast * rcast , * tmp ;
list_for_each_entry_safe ( rcast , tmp , & ub - > rcast . list , list ) {
2019-06-20 14:03:41 +03:00
dst_cache_destroy ( & rcast - > dst_cache ) ;
2016-08-26 11:52:53 +03:00
list_del_rcu ( & rcast - > list ) ;
kfree_rcu ( rcast , rcu ) ;
}
2015-03-05 12:23:49 +03:00
2019-06-20 14:03:41 +03:00
dst_cache_destroy ( & ub - > rcast . dst_cache ) ;
2019-07-01 19:57:19 +03:00
udp_tunnel_sock_release ( ub - > ubsock ) ;
2015-03-05 12:23:49 +03:00
synchronize_net ( ) ;
kfree ( ub ) ;
}
/* tipc_udp_disable - detach bearer from socket */
static void tipc_udp_disable ( struct tipc_bearer * b )
{
struct udp_bearer * ub ;
2019-07-01 19:54:55 +03:00
ub = rtnl_dereference ( b - > media_ptr ) ;
2015-03-05 12:23:49 +03:00
if ( ! ub ) {
pr_err ( " UDP bearer instance not found \n " ) ;
return ;
}
2019-07-01 19:57:19 +03:00
sock_set_flag ( ub - > ubsock - > sk , SOCK_DEAD ) ;
2015-03-05 12:23:49 +03:00
RCU_INIT_POINTER ( ub - > bearer , NULL ) ;
/* sock_release need to be done outside of rtnl lock */
INIT_WORK ( & ub - > work , cleanup_bearer ) ;
schedule_work ( & ub - > work ) ;
}
struct tipc_media udp_media_info = {
. send_msg = tipc_udp_send_msg ,
. enable_media = tipc_udp_enable ,
. disable_media = tipc_udp_disable ,
. addr2str = tipc_udp_addr2str ,
. addr2msg = tipc_udp_addr2msg ,
. msg2addr = tipc_udp_msg2addr ,
. priority = TIPC_DEF_LINK_PRI ,
. tolerance = TIPC_DEF_LINK_TOL ,
tipc: introduce variable window congestion control
We introduce a simple variable window congestion control for links.
The algorithm is inspired by the Reno algorithm, covering both 'slow
start', 'congestion avoidance', and 'fast recovery' modes.
- We introduce hard lower and upper window limits per link, still
different and configurable per bearer type.
- We introduce a 'slow start theshold' variable, initially set to
the maximum window size.
- We let a link start at the minimum congestion window, i.e. in slow
start mode, and then let is grow rapidly (+1 per rceived ACK) until
it reaches the slow start threshold and enters congestion avoidance
mode.
- In congestion avoidance mode we increment the congestion window for
each window-size number of acked packets, up to a possible maximum
equal to the configured maximum window.
- For each non-duplicate NACK received, we drop back to fast recovery
mode, by setting the both the slow start threshold to and the
congestion window to (current_congestion_window / 2).
- If the timeout handler finds that the transmit queue has not moved
since the previous timeout, it drops the link back to slow start
and forces a probe containing the last sent sequence number to the
sent to the peer, so that this can discover the stale situation.
This change does in reality have effect only on unicast ethernet
transport, as we have seen that there is no room whatsoever for
increasing the window max size for the UDP bearer.
For now, we also choose to keep the limits for the broadcast link
unchanged and equal.
This algorithm seems to give a 50-100% throughput improvement for
messages larger than MTU.
Suggested-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-10 02:52:46 +03:00
. min_win = TIPC_DEF_LINK_WIN ,
. max_win = TIPC_DEF_LINK_WIN ,
2018-04-19 12:06:18 +03:00
. mtu = TIPC_DEF_LINK_UDP_MTU ,
2015-03-05 12:23:49 +03:00
. type_id = TIPC_MEDIA_TYPE_UDP ,
. hwaddr_len = 0 ,
. name = " udp "
} ;