2019-05-27 08:55:01 +02:00
// SPDX-License-Identifier: GPL-2.0-or-later
2012-08-10 00:51:50 +00:00
/*
* GRE over IPv6 protocol decoder .
*
* Authors : Dmitry Kozlov ( xeb @ mail . ru )
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/capability.h>
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/slab.h>
# include <linux/uaccess.h>
# include <linux/skbuff.h>
# include <linux/netdevice.h>
# include <linux/in.h>
# include <linux/tcp.h>
# include <linux/udp.h>
# include <linux/if_arp.h>
# include <linux/init.h>
# include <linux/in6.h>
# include <linux/inetdevice.h>
# include <linux/igmp.h>
# include <linux/netfilter_ipv4.h>
# include <linux/etherdevice.h>
# include <linux/if_ether.h>
# include <linux/hash.h>
# include <linux/if_tunnel.h>
# include <linux/ip6_tunnel.h>
# include <net/sock.h>
# include <net/ip.h>
2013-03-25 14:49:35 +00:00
# include <net/ip_tunnels.h>
2012-08-10 00:51:50 +00:00
# include <net/icmp.h>
# include <net/protocol.h>
# include <net/addrconf.h>
# include <net/arp.h>
# include <net/checksum.h>
# include <net/dsfield.h>
# include <net/inet_ecn.h>
# include <net/xfrm.h>
# include <net/net_namespace.h>
# include <net/netns/generic.h>
# include <net/rtnetlink.h>
# include <net/ipv6.h>
# include <net/ip6_fib.h>
# include <net/ip6_route.h>
# include <net/ip6_tunnel.h>
2016-04-29 17:12:17 -07:00
# include <net/gre.h>
2017-11-30 11:51:29 -08:00
# include <net/erspan.h>
2017-12-01 15:26:08 -08:00
# include <net/dst_metadata.h>
2012-08-10 00:51:50 +00:00
2012-09-25 11:02:48 +00:00
static bool log_ecn_error = true ;
module_param ( log_ecn_error , bool , 0644 ) ;
MODULE_PARM_DESC ( log_ecn_error , " Log packets received with corrupted ECN " ) ;
2016-08-10 11:03:35 +02:00
# define IP6_GRE_HASH_SIZE_SHIFT 5
# define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
2012-08-10 00:51:50 +00:00
netns: make struct pernet_operations::id unsigned int
Make struct pernet_operations::id unsigned.
There are 2 reasons to do so:
1)
This field is really an index into an zero based array and
thus is unsigned entity. Using negative value is out-of-bound
access by definition.
2)
On x86_64 unsigned 32-bit data which are mixed with pointers
via array indexing or offsets added or subtracted to pointers
are preffered to signed 32-bit data.
"int" being used as an array index needs to be sign-extended
to 64-bit before being used.
void f(long *p, int i)
{
g(p[i]);
}
roughly translates to
movsx rsi, esi
mov rdi, [rsi+...]
call g
MOVSX is 3 byte instruction which isn't necessary if the variable is
unsigned because x86_64 is zero extending by default.
Now, there is net_generic() function which, you guessed it right, uses
"int" as an array index:
static inline void *net_generic(const struct net *net, int id)
{
...
ptr = ng->ptr[id - 1];
...
}
And this function is used a lot, so those sign extensions add up.
Patch snipes ~1730 bytes on allyesconfig kernel (without all junk
messing with code generation):
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
Unfortunately some functions actually grow bigger.
This is a semmingly random artefact of code generation with register
allocator being used differently. gcc decides that some variable
needs to live in new r8+ registers and every access now requires REX
prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be
used which is longer than [r8]
However, overall balance is in negative direction:
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
function old new delta
nfsd4_lock 3886 3959 +73
tipc_link_build_proto_msg 1096 1140 +44
mac80211_hwsim_new_radio 2776 2808 +32
tipc_mon_rcv 1032 1058 +26
svcauth_gss_legacy_init 1413 1429 +16
tipc_bcbase_select_primary 379 392 +13
nfsd4_exchange_id 1247 1260 +13
nfsd4_setclientid_confirm 782 793 +11
...
put_client_renew_locked 494 480 -14
ip_set_sockfn_get 730 716 -14
geneve_sock_add 829 813 -16
nfsd4_sequence_done 721 703 -18
nlmclnt_lookup_host 708 686 -22
nfsd4_lockt 1085 1063 -22
nfs_get_client 1077 1050 -27
tcf_bpf_init 1106 1076 -30
nfsd4_encode_fattr 5997 5930 -67
Total: Before=154856051, After=154854321, chg -0.00%
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 04:58:21 +03:00
static unsigned int ip6gre_net_id __read_mostly ;
2012-08-10 00:51:50 +00:00
struct ip6gre_net {
2016-08-10 11:03:35 +02:00
struct ip6_tnl __rcu * tunnels [ 4 ] [ IP6_GRE_HASH_SIZE ] ;
2012-08-10 00:51:50 +00:00
2017-12-01 15:26:08 -08:00
struct ip6_tnl __rcu * collect_md_tun ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
struct ip6_tnl __rcu * collect_md_tun_erspan ;
2012-08-10 00:51:50 +00:00
struct net_device * fb_tunnel_dev ;
} ;
static struct rtnl_link_ops ip6gre_link_ops __read_mostly ;
2014-04-22 10:15:24 +02:00
static struct rtnl_link_ops ip6gre_tap_ops __read_mostly ;
2017-11-30 11:51:29 -08:00
static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly ;
2012-08-10 00:51:50 +00:00
static int ip6gre_tunnel_init ( struct net_device * dev ) ;
static void ip6gre_tunnel_setup ( struct net_device * dev ) ;
static void ip6gre_tunnel_link ( struct ip6gre_net * ign , struct ip6_tnl * t ) ;
static void ip6gre_tnl_link_config ( struct ip6_tnl * t , int set_mtu ) ;
2018-05-17 16:36:51 +02:00
static void ip6erspan_tnl_link_config ( struct ip6_tnl * t , int set_mtu ) ;
2012-08-10 00:51:50 +00:00
/* Tunnel hash table */
/*
4 hash tables :
3 : ( remote , local )
2 : ( remote , * )
1 : ( * , local )
0 : ( * , * )
We require exact key match i . e . if a key is present in packet
it will match only tunnel with the same key ; if it is not present ,
it will match only keyless tunnel .
All keysless packets , if not matched configured keyless tunnels
will match fallback tunnel .
*/
2016-08-10 11:03:35 +02:00
# define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
2012-08-10 00:51:50 +00:00
static u32 HASH_ADDR ( const struct in6_addr * addr )
{
u32 hash = ipv6_addr_hash ( addr ) ;
2016-08-10 11:03:35 +02:00
return hash_32 ( hash , IP6_GRE_HASH_SIZE_SHIFT ) ;
2012-08-10 00:51:50 +00:00
}
# define tunnels_r_l tunnels[3]
# define tunnels_r tunnels[2]
# define tunnels_l tunnels[1]
# define tunnels_wc tunnels[0]
/* Given src, dst and key, find appropriate for input tunnel. */
static struct ip6_tnl * ip6gre_tunnel_lookup ( struct net_device * dev ,
const struct in6_addr * remote , const struct in6_addr * local ,
__be32 key , __be16 gre_proto )
{
struct net * net = dev_net ( dev ) ;
int link = dev - > ifindex ;
unsigned int h0 = HASH_ADDR ( remote ) ;
unsigned int h1 = HASH_KEY ( key ) ;
struct ip6_tnl * t , * cand = NULL ;
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
2017-11-30 11:51:29 -08:00
int dev_type = ( gre_proto = = htons ( ETH_P_TEB ) | |
2018-03-09 07:34:40 -08:00
gre_proto = = htons ( ETH_P_ERSPAN ) | |
gre_proto = = htons ( ETH_P_ERSPAN2 ) ) ?
2012-08-10 00:51:50 +00:00
ARPHRD_ETHER : ARPHRD_IP6GRE ;
int score , cand_score = 4 ;
2012-11-11 21:52:34 +00:00
for_each_ip_tunnel_rcu ( t , ign - > tunnels_r_l [ h0 ^ h1 ] ) {
2012-08-10 00:51:50 +00:00
if ( ! ipv6_addr_equal ( local , & t - > parms . laddr ) | |
! ipv6_addr_equal ( remote , & t - > parms . raddr ) | |
key ! = t - > parms . i_key | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( t - > dev - > type ! = ARPHRD_IP6GRE & &
t - > dev - > type ! = dev_type )
continue ;
score = 0 ;
if ( t - > parms . link ! = link )
score | = 1 ;
if ( t - > dev - > type ! = dev_type )
score | = 2 ;
if ( score = = 0 )
return t ;
if ( score < cand_score ) {
cand = t ;
cand_score = score ;
}
}
2012-11-11 21:52:34 +00:00
for_each_ip_tunnel_rcu ( t , ign - > tunnels_r [ h0 ^ h1 ] ) {
2012-08-10 00:51:50 +00:00
if ( ! ipv6_addr_equal ( remote , & t - > parms . raddr ) | |
key ! = t - > parms . i_key | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( t - > dev - > type ! = ARPHRD_IP6GRE & &
t - > dev - > type ! = dev_type )
continue ;
score = 0 ;
if ( t - > parms . link ! = link )
score | = 1 ;
if ( t - > dev - > type ! = dev_type )
score | = 2 ;
if ( score = = 0 )
return t ;
if ( score < cand_score ) {
cand = t ;
cand_score = score ;
}
}
2012-11-11 21:52:34 +00:00
for_each_ip_tunnel_rcu ( t , ign - > tunnels_l [ h1 ] ) {
2012-08-10 00:51:50 +00:00
if ( ( ! ipv6_addr_equal ( local , & t - > parms . laddr ) & &
( ! ipv6_addr_equal ( local , & t - > parms . raddr ) | |
! ipv6_addr_is_multicast ( local ) ) ) | |
key ! = t - > parms . i_key | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( t - > dev - > type ! = ARPHRD_IP6GRE & &
t - > dev - > type ! = dev_type )
continue ;
score = 0 ;
if ( t - > parms . link ! = link )
score | = 1 ;
if ( t - > dev - > type ! = dev_type )
score | = 2 ;
if ( score = = 0 )
return t ;
if ( score < cand_score ) {
cand = t ;
cand_score = score ;
}
}
2012-11-11 21:52:34 +00:00
for_each_ip_tunnel_rcu ( t , ign - > tunnels_wc [ h1 ] ) {
2012-08-10 00:51:50 +00:00
if ( t - > parms . i_key ! = key | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( t - > dev - > type ! = ARPHRD_IP6GRE & &
t - > dev - > type ! = dev_type )
continue ;
score = 0 ;
if ( t - > parms . link ! = link )
score | = 1 ;
if ( t - > dev - > type ! = dev_type )
score | = 2 ;
if ( score = = 0 )
return t ;
if ( score < cand_score ) {
cand = t ;
cand_score = score ;
}
}
2015-03-29 14:00:05 +01:00
if ( cand )
2012-08-10 00:51:50 +00:00
return cand ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
if ( gre_proto = = htons ( ETH_P_ERSPAN ) | |
gre_proto = = htons ( ETH_P_ERSPAN2 ) )
t = rcu_dereference ( ign - > collect_md_tun_erspan ) ;
else
t = rcu_dereference ( ign - > collect_md_tun ) ;
2017-12-01 15:26:08 -08:00
if ( t & & t - > dev - > flags & IFF_UP )
return t ;
2012-08-10 00:51:50 +00:00
dev = ign - > fb_tunnel_dev ;
net: do not create fallback tunnels for non-default namespaces
fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0,
ip6tnl0, ip6gre0) are automatically created when the corresponding
module is loaded.
These tunnels are also automatically created when a new network
namespace is created, at a great cost.
In many cases, netns are used for isolation purposes, and these
extra network devices are a waste of resources. We are using
thousands of netns per host, and hit the netns creation/delete
bottleneck a lot. (Many thanks to Kirill for recent work on this)
Add a new sysctl so that we can opt-out from this automatic creation.
Note that these tunnels are still created for the initial namespace,
to be the least intrusive for typical setups.
Tested:
lpk43:~# cat add_del_unshare.sh
for i in `seq 1 40`
do
(for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) &
done
wait
lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time ./add_del_unshare.sh
real 0m37.521s
user 0m0.886s
sys 7m7.084s
lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time ./add_del_unshare.sh
real 0m4.761s
user 0m0.851s
sys 1m8.343s
lpk43:~#
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-08 12:51:41 -08:00
if ( dev & & dev - > flags & IFF_UP )
2012-08-10 00:51:50 +00:00
return netdev_priv ( dev ) ;
return NULL ;
}
static struct ip6_tnl __rcu * * __ip6gre_bucket ( struct ip6gre_net * ign ,
const struct __ip6_tnl_parm * p )
{
const struct in6_addr * remote = & p - > raddr ;
const struct in6_addr * local = & p - > laddr ;
unsigned int h = HASH_KEY ( p - > i_key ) ;
int prio = 0 ;
if ( ! ipv6_addr_any ( local ) )
prio | = 1 ;
if ( ! ipv6_addr_any ( remote ) & & ! ipv6_addr_is_multicast ( remote ) ) {
prio | = 2 ;
h ^ = HASH_ADDR ( remote ) ;
}
return & ign - > tunnels [ prio ] [ h ] ;
}
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
static void ip6gre_tunnel_link_md ( struct ip6gre_net * ign , struct ip6_tnl * t )
{
if ( t - > parms . collect_md )
rcu_assign_pointer ( ign - > collect_md_tun , t ) ;
}
static void ip6erspan_tunnel_link_md ( struct ip6gre_net * ign , struct ip6_tnl * t )
{
if ( t - > parms . collect_md )
rcu_assign_pointer ( ign - > collect_md_tun_erspan , t ) ;
}
static void ip6gre_tunnel_unlink_md ( struct ip6gre_net * ign , struct ip6_tnl * t )
{
if ( t - > parms . collect_md )
rcu_assign_pointer ( ign - > collect_md_tun , NULL ) ;
}
static void ip6erspan_tunnel_unlink_md ( struct ip6gre_net * ign ,
struct ip6_tnl * t )
{
if ( t - > parms . collect_md )
rcu_assign_pointer ( ign - > collect_md_tun_erspan , NULL ) ;
}
2012-08-10 00:51:50 +00:00
static inline struct ip6_tnl __rcu * * ip6gre_bucket ( struct ip6gre_net * ign ,
const struct ip6_tnl * t )
{
return __ip6gre_bucket ( ign , & t - > parms ) ;
}
static void ip6gre_tunnel_link ( struct ip6gre_net * ign , struct ip6_tnl * t )
{
struct ip6_tnl __rcu * * tp = ip6gre_bucket ( ign , t ) ;
rcu_assign_pointer ( t - > next , rtnl_dereference ( * tp ) ) ;
rcu_assign_pointer ( * tp , t ) ;
}
static void ip6gre_tunnel_unlink ( struct ip6gre_net * ign , struct ip6_tnl * t )
{
struct ip6_tnl __rcu * * tp ;
struct ip6_tnl * iter ;
for ( tp = ip6gre_bucket ( ign , t ) ;
( iter = rtnl_dereference ( * tp ) ) ! = NULL ;
tp = & iter - > next ) {
if ( t = = iter ) {
rcu_assign_pointer ( * tp , t - > next ) ;
break ;
}
}
}
static struct ip6_tnl * ip6gre_tunnel_find ( struct net * net ,
const struct __ip6_tnl_parm * parms ,
int type )
{
const struct in6_addr * remote = & parms - > raddr ;
const struct in6_addr * local = & parms - > laddr ;
__be32 key = parms - > i_key ;
int link = parms - > link ;
struct ip6_tnl * t ;
struct ip6_tnl __rcu * * tp ;
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
for ( tp = __ip6gre_bucket ( ign , parms ) ;
( t = rtnl_dereference ( * tp ) ) ! = NULL ;
tp = & t - > next )
if ( ipv6_addr_equal ( local , & t - > parms . laddr ) & &
ipv6_addr_equal ( remote , & t - > parms . raddr ) & &
key = = t - > parms . i_key & &
link = = t - > parms . link & &
type = = t - > dev - > type )
break ;
return t ;
}
static struct ip6_tnl * ip6gre_tunnel_locate ( struct net * net ,
const struct __ip6_tnl_parm * parms , int create )
{
struct ip6_tnl * t , * nt ;
struct net_device * dev ;
char name [ IFNAMSIZ ] ;
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
t = ip6gre_tunnel_find ( net , parms , ARPHRD_IP6GRE ) ;
2014-09-22 10:07:26 +02:00
if ( t & & create )
return NULL ;
2012-08-10 00:51:50 +00:00
if ( t | | ! create )
return t ;
2018-04-05 06:39:29 -07:00
if ( parms - > name [ 0 ] ) {
if ( ! dev_valid_name ( parms - > name ) )
return NULL ;
2012-08-10 00:51:50 +00:00
strlcpy ( name , parms - > name , IFNAMSIZ ) ;
2018-04-05 06:39:29 -07:00
} else {
2012-08-10 00:51:50 +00:00
strcpy ( name , " ip6gre%d " ) ;
2018-04-05 06:39:29 -07:00
}
net: set name_assign_type in alloc_netdev()
Extend alloc_netdev{,_mq{,s}}() to take name_assign_type as argument, and convert
all users to pass NET_NAME_UNKNOWN.
Coccinelle patch:
@@
expression sizeof_priv, name, setup, txqs, rxqs, count;
@@
(
-alloc_netdev_mqs(sizeof_priv, name, setup, txqs, rxqs)
+alloc_netdev_mqs(sizeof_priv, name, NET_NAME_UNKNOWN, setup, txqs, rxqs)
|
-alloc_netdev_mq(sizeof_priv, name, setup, count)
+alloc_netdev_mq(sizeof_priv, name, NET_NAME_UNKNOWN, setup, count)
|
-alloc_netdev(sizeof_priv, name, setup)
+alloc_netdev(sizeof_priv, name, NET_NAME_UNKNOWN, setup)
)
v9: move comments here from the wrong commit
Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-14 16:37:24 +02:00
dev = alloc_netdev ( sizeof ( * t ) , name , NET_NAME_UNKNOWN ,
ip6gre_tunnel_setup ) ;
2012-08-10 00:51:50 +00:00
if ( ! dev )
return NULL ;
dev_net_set ( dev , net ) ;
nt = netdev_priv ( dev ) ;
nt - > parms = * parms ;
dev - > rtnl_link_ops = & ip6gre_link_ops ;
nt - > dev = dev ;
2013-08-13 17:51:12 +02:00
nt - > net = dev_net ( dev ) ;
2012-08-10 00:51:50 +00:00
if ( register_netdevice ( dev ) < 0 )
goto failed_free ;
ip6_gre: init dev->mtu and dev->hard_header_len correctly
Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path,
call common GRE functions") moved dev->mtu initialization
from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a
result, the previously set values, before ndo_init(), are
reset in the following cases:
* rtnl_create_link() can update dev->mtu from IFLA_MTU
parameter.
* ip6gre_tnl_link_config() is invoked before ndo_init() in
netlink and ioctl setup, so ndo_init() can reset MTU
adjustments with the lower device MTU as well, dev->mtu
and dev->hard_header_len.
Not applicable for ip6gretap because it has one more call
to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init().
Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]'
parameter if a user sets it manually on a device creation,
and fix the second one by moving ip6gre_tnl_link_config()
call after register_netdevice().
Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions")
Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-01-18 20:51:12 +03:00
ip6gre_tnl_link_config ( nt , 1 ) ;
2012-08-10 00:51:50 +00:00
/* Can use a lockless transmit, unless we generate output sequences */
2016-05-09 17:12:12 -07:00
if ( ! ( nt - > parms . o_flags & TUNNEL_SEQ ) )
2012-08-10 00:51:50 +00:00
dev - > features | = NETIF_F_LLTX ;
dev_hold ( dev ) ;
ip6gre_tunnel_link ( ign , nt ) ;
return nt ;
failed_free :
free_netdev ( dev ) ;
return NULL ;
}
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
static void ip6erspan_tunnel_uninit ( struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct ip6gre_net * ign = net_generic ( t - > net , ip6gre_net_id ) ;
ip6erspan_tunnel_unlink_md ( ign , t ) ;
ip6gre_tunnel_unlink ( ign , t ) ;
dst_cache_reset ( & t - > dst_cache ) ;
dev_put ( dev ) ;
}
2012-08-10 00:51:50 +00:00
static void ip6gre_tunnel_uninit ( struct net_device * dev )
{
2014-04-22 10:15:24 +02:00
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct ip6gre_net * ign = net_generic ( t - > net , ip6gre_net_id ) ;
2012-08-10 00:51:50 +00:00
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6gre_tunnel_unlink_md ( ign , t ) ;
2014-04-22 10:15:24 +02:00
ip6gre_tunnel_unlink ( ign , t ) ;
2016-02-12 15:43:54 +01:00
dst_cache_reset ( & t - > dst_cache ) ;
2012-08-10 00:51:50 +00:00
dev_put ( dev ) ;
}
2018-11-08 12:19:21 +01:00
static int ip6gre_err ( struct sk_buff * skb , struct inet6_skb_parm * opt ,
2017-02-04 23:18:55 -08:00
u8 type , u8 code , int offset , __be32 info )
2012-08-10 00:51:50 +00:00
{
2017-11-11 19:06:49 +08:00
struct net * net = dev_net ( skb - > dev ) ;
2017-02-04 23:18:55 -08:00
const struct ipv6hdr * ipv6h ;
2018-09-14 12:26:48 +08:00
struct tnl_ptk_info tpi ;
2012-08-10 00:51:50 +00:00
struct ip6_tnl * t ;
2018-09-14 12:26:48 +08:00
if ( gre_parse_header ( skb , & tpi , NULL , htons ( ETH_P_IPV6 ) ,
offset ) < 0 )
2018-11-08 12:19:21 +01:00
return - EINVAL ;
2012-08-10 00:51:50 +00:00
2012-08-19 03:47:30 +00:00
ipv6h = ( const struct ipv6hdr * ) skb - > data ;
2012-08-10 00:51:50 +00:00
t = ip6gre_tunnel_lookup ( skb - > dev , & ipv6h - > daddr , & ipv6h - > saddr ,
2018-09-14 12:26:48 +08:00
tpi . key , tpi . proto ) ;
2015-03-29 14:00:04 +01:00
if ( ! t )
2018-11-08 12:19:21 +01:00
return - ENOENT ;
2012-08-10 00:51:50 +00:00
switch ( type ) {
struct ipv6_tlv_tnl_enc_lim * tel ;
2017-11-11 19:06:50 +08:00
__u32 teli ;
2012-08-10 00:51:50 +00:00
case ICMPV6_DEST_UNREACH :
2015-09-23 16:58:31 +12:00
net_dbg_ratelimited ( " %s: Path to destination invalid or inactive! \n " ,
t - > parms . name ) ;
2017-10-26 19:23:27 +08:00
if ( code ! = ICMPV6_PORT_UNREACH )
break ;
2018-11-08 12:19:21 +01:00
return 0 ;
2012-08-10 00:51:50 +00:00
case ICMPV6_TIME_EXCEED :
if ( code = = ICMPV6_EXC_HOPLIMIT ) {
2015-09-23 16:58:31 +12:00
net_dbg_ratelimited ( " %s: Too small hop limit or routing loop in tunnel! \n " ,
t - > parms . name ) ;
2017-10-26 19:23:27 +08:00
break ;
2012-08-10 00:51:50 +00:00
}
2018-11-08 12:19:21 +01:00
return 0 ;
2012-08-10 00:51:50 +00:00
case ICMPV6_PARAMPROB :
teli = 0 ;
if ( code = = ICMPV6_HDR_FIELD )
teli = ip6_tnl_parse_tlv_enc_lim ( skb , skb - > data ) ;
2015-02-04 15:25:09 +01:00
if ( teli & & teli = = be32_to_cpu ( info ) - 2 ) {
2012-08-10 00:51:50 +00:00
tel = ( struct ipv6_tlv_tnl_enc_lim * ) & skb - > data [ teli ] ;
if ( tel - > encap_limit = = 0 ) {
2015-09-23 16:58:31 +12:00
net_dbg_ratelimited ( " %s: Too small encapsulation limit or routing loop in tunnel! \n " ,
t - > parms . name ) ;
2012-08-10 00:51:50 +00:00
}
} else {
2015-09-23 16:58:31 +12:00
net_dbg_ratelimited ( " %s: Recipient unable to parse tunneled packet! \n " ,
t - > parms . name ) ;
2012-08-10 00:51:50 +00:00
}
2018-11-08 12:19:21 +01:00
return 0 ;
2012-08-10 00:51:50 +00:00
case ICMPV6_PKT_TOOBIG :
2017-11-11 19:06:50 +08:00
ip6_update_pmtu ( skb , net , info , 0 , 0 , sock_net_uid ( net , NULL ) ) ;
2018-11-08 12:19:21 +01:00
return 0 ;
2017-11-11 19:06:49 +08:00
case NDISC_REDIRECT :
ip6_redirect ( skb , net , skb - > dev - > ifindex , 0 ,
sock_net_uid ( net , NULL ) ) ;
2018-11-08 12:19:21 +01:00
return 0 ;
2012-08-10 00:51:50 +00:00
}
if ( time_before ( jiffies , t - > err_time + IP6TUNNEL_ERR_TIMEO ) )
t - > err_count + + ;
else
t - > err_count = 1 ;
t - > err_time = jiffies ;
2018-11-08 12:19:21 +01:00
return 0 ;
2012-08-10 00:51:50 +00:00
}
2016-04-29 17:12:17 -07:00
static int ip6gre_rcv ( struct sk_buff * skb , const struct tnl_ptk_info * tpi )
2012-08-10 00:51:50 +00:00
{
const struct ipv6hdr * ipv6h ;
struct ip6_tnl * tunnel ;
ipv6h = ipv6_hdr ( skb ) ;
tunnel = ip6gre_tunnel_lookup ( skb - > dev ,
2016-04-29 17:12:17 -07:00
& ipv6h - > saddr , & ipv6h - > daddr , tpi - > key ,
tpi - > proto ) ;
2012-08-10 00:51:50 +00:00
if ( tunnel ) {
2017-12-01 15:26:08 -08:00
if ( tunnel - > parms . collect_md ) {
struct metadata_dst * tun_dst ;
__be64 tun_id ;
__be16 flags ;
flags = tpi - > flags ;
tun_id = key32_to_tunnel_id ( tpi - > key ) ;
tun_dst = ipv6_tun_rx_dst ( skb , flags , tun_id , 0 ) ;
if ( ! tun_dst )
return PACKET_REJECT ;
ip6_tnl_rcv ( tunnel , skb , tpi , tun_dst , log_ecn_error ) ;
} else {
ip6_tnl_rcv ( tunnel , skb , tpi , NULL , log_ecn_error ) ;
}
2012-08-10 00:51:50 +00:00
2016-04-29 17:12:17 -07:00
return PACKET_RCVD ;
}
2012-09-25 11:02:48 +00:00
2016-04-29 17:12:17 -07:00
return PACKET_REJECT ;
}
2012-09-25 11:02:48 +00:00
2019-01-16 19:38:05 +01:00
static int ip6erspan_rcv ( struct sk_buff * skb ,
2019-04-06 17:16:53 +02:00
struct tnl_ptk_info * tpi ,
int gre_hdr_len )
2017-11-30 11:51:29 -08:00
{
2017-12-13 16:38:55 -08:00
struct erspan_base_hdr * ershdr ;
2017-11-30 11:51:29 -08:00
const struct ipv6hdr * ipv6h ;
2018-02-05 13:35:34 -08:00
struct erspan_md2 * md2 ;
2017-11-30 11:51:29 -08:00
struct ip6_tnl * tunnel ;
2017-12-13 16:38:55 -08:00
u8 ver ;
2017-11-30 11:51:29 -08:00
2017-12-20 09:53:19 +08:00
ipv6h = ipv6_hdr ( skb ) ;
ershdr = ( struct erspan_base_hdr * ) skb - > data ;
2018-01-25 13:20:09 -08:00
ver = ershdr - > ver ;
2017-11-30 11:51:29 -08:00
tunnel = ip6gre_tunnel_lookup ( skb - > dev ,
& ipv6h - > saddr , & ipv6h - > daddr , tpi - > key ,
tpi - > proto ) ;
if ( tunnel ) {
2017-12-13 16:38:55 -08:00
int len = erspan_hdr_len ( ver ) ;
if ( unlikely ( ! pskb_may_pull ( skb , len ) ) )
2017-12-15 14:27:43 -08:00
return PACKET_REJECT ;
2017-12-13 16:38:55 -08:00
if ( __iptunnel_pull_header ( skb , len ,
2017-11-30 11:51:29 -08:00
htons ( ETH_P_TEB ) ,
false , false ) < 0 )
return PACKET_REJECT ;
2017-12-05 15:15:44 -08:00
if ( tunnel - > parms . collect_md ) {
2019-04-06 17:16:53 +02:00
struct erspan_metadata * pkt_md , * md ;
2017-12-05 15:15:44 -08:00
struct metadata_dst * tun_dst ;
struct ip_tunnel_info * info ;
2019-04-06 17:16:53 +02:00
unsigned char * gh ;
2017-12-05 15:15:44 -08:00
__be64 tun_id ;
__be16 flags ;
tpi - > flags | = TUNNEL_KEY ;
flags = tpi - > flags ;
tun_id = key32_to_tunnel_id ( tpi - > key ) ;
tun_dst = ipv6_tun_rx_dst ( skb , flags , tun_id ,
sizeof ( * md ) ) ;
if ( ! tun_dst )
return PACKET_REJECT ;
2019-04-06 17:16:53 +02:00
/* skb can be uncloned in __iptunnel_pull_header, so
* old pkt_md is no longer valid and we need to reset
* it
*/
gh = skb_network_header ( skb ) +
skb_network_header_len ( skb ) ;
pkt_md = ( struct erspan_metadata * ) ( gh + gre_hdr_len +
sizeof ( * ershdr ) ) ;
2017-12-05 15:15:44 -08:00
info = & tun_dst - > u . tun_info ;
md = ip_tunnel_info_opts ( info ) ;
2017-12-13 16:38:57 -08:00
md - > version = ver ;
2018-02-05 13:35:34 -08:00
md2 = & md - > u . md2 ;
memcpy ( md2 , pkt_md , ver = = 1 ? ERSPAN_V1_MDSIZE :
ERSPAN_V2_MDSIZE ) ;
2017-12-05 15:15:44 -08:00
info - > key . tun_flags | = TUNNEL_ERSPAN_OPT ;
info - > options_len = sizeof ( * md ) ;
ip6_tnl_rcv ( tunnel , skb , tpi , tun_dst , log_ecn_error ) ;
} else {
ip6_tnl_rcv ( tunnel , skb , tpi , NULL , log_ecn_error ) ;
}
2017-11-30 11:51:29 -08:00
return PACKET_RCVD ;
}
return PACKET_REJECT ;
}
2016-04-29 17:12:17 -07:00
static int gre_rcv ( struct sk_buff * skb )
{
struct tnl_ptk_info tpi ;
bool csum_err = false ;
int hdr_len ;
2012-09-25 11:02:48 +00:00
2016-06-15 06:24:00 -07:00
hdr_len = gre_parse_header ( skb , & tpi , & csum_err , htons ( ETH_P_IPV6 ) , 0 ) ;
2016-05-03 15:00:21 +02:00
if ( hdr_len < 0 )
2016-04-29 17:12:17 -07:00
goto drop ;
2012-08-10 00:51:50 +00:00
2016-04-29 17:12:17 -07:00
if ( iptunnel_pull_header ( skb , hdr_len , tpi . proto , false ) )
goto drop ;
2012-08-10 00:51:50 +00:00
2017-12-13 16:38:57 -08:00
if ( unlikely ( tpi . proto = = htons ( ETH_P_ERSPAN ) | |
tpi . proto = = htons ( ETH_P_ERSPAN2 ) ) ) {
2019-04-06 17:16:53 +02:00
if ( ip6erspan_rcv ( skb , & tpi , hdr_len ) = = PACKET_RCVD )
2017-11-30 11:51:29 -08:00
return 0 ;
2017-12-20 10:21:47 +08:00
goto out ;
2017-11-30 11:51:29 -08:00
}
2016-04-29 17:12:17 -07:00
if ( ip6gre_rcv ( skb , & tpi ) = = PACKET_RCVD )
2012-08-10 00:51:50 +00:00
return 0 ;
2017-12-20 10:21:47 +08:00
out :
2016-04-29 17:12:17 -07:00
icmpv6_send ( skb , ICMPV6_DEST_UNREACH , ICMPV6_PORT_UNREACH , 0 ) ;
2012-08-10 00:51:50 +00:00
drop :
kfree_skb ( skb ) ;
return 0 ;
}
2016-04-29 17:12:21 -07:00
static int gre_handle_offloads ( struct sk_buff * skb , bool csum )
2012-08-10 00:51:50 +00:00
{
2016-04-29 17:12:21 -07:00
return iptunnel_handle_offloads ( skb ,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE ) ;
2012-08-10 00:51:50 +00:00
}
2017-11-30 11:51:28 -08:00
static void prepare_ip6gre_xmit_ipv4 ( struct sk_buff * skb ,
struct net_device * dev ,
struct flowi6 * fl6 , __u8 * dsfield ,
int * encap_limit )
{
const struct iphdr * iph = ip_hdr ( skb ) ;
struct ip6_tnl * t = netdev_priv ( dev ) ;
if ( ! ( t - > parms . flags & IP6_TNL_F_IGN_ENCAP_LIMIT ) )
* encap_limit = t - > parms . encap_limit ;
memcpy ( fl6 , & t - > fl . u . ip6 , sizeof ( * fl6 ) ) ;
if ( t - > parms . flags & IP6_TNL_F_USE_ORIG_TCLASS )
* dsfield = ipv4_get_dsfield ( iph ) ;
else
* dsfield = ip6_tclass ( t - > parms . flowinfo ) ;
if ( t - > parms . flags & IP6_TNL_F_USE_ORIG_FWMARK )
fl6 - > flowi6_mark = skb - > mark ;
else
fl6 - > flowi6_mark = t - > parms . fwmark ;
fl6 - > flowi6_uid = sock_net_uid ( dev_net ( dev ) , NULL ) ;
}
static int prepare_ip6gre_xmit_ipv6 ( struct sk_buff * skb ,
struct net_device * dev ,
struct flowi6 * fl6 , __u8 * dsfield ,
int * encap_limit )
{
struct ipv6hdr * ipv6h = ipv6_hdr ( skb ) ;
struct ip6_tnl * t = netdev_priv ( dev ) ;
__u16 offset ;
offset = ip6_tnl_parse_tlv_enc_lim ( skb , skb_network_header ( skb ) ) ;
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
if ( offset > 0 ) {
struct ipv6_tlv_tnl_enc_lim * tel ;
tel = ( struct ipv6_tlv_tnl_enc_lim * ) & skb_network_header ( skb ) [ offset ] ;
if ( tel - > encap_limit = = 0 ) {
icmpv6_send ( skb , ICMPV6_PARAMPROB ,
ICMPV6_HDR_FIELD , offset + 2 ) ;
return - 1 ;
}
* encap_limit = tel - > encap_limit - 1 ;
} else if ( ! ( t - > parms . flags & IP6_TNL_F_IGN_ENCAP_LIMIT ) ) {
* encap_limit = t - > parms . encap_limit ;
}
memcpy ( fl6 , & t - > fl . u . ip6 , sizeof ( * fl6 ) ) ;
if ( t - > parms . flags & IP6_TNL_F_USE_ORIG_TCLASS )
* dsfield = ipv6_get_dsfield ( ipv6h ) ;
else
* dsfield = ip6_tclass ( t - > parms . flowinfo ) ;
if ( t - > parms . flags & IP6_TNL_F_USE_ORIG_FLOWLABEL )
fl6 - > flowlabel | = ip6_flowlabel ( ipv6h ) ;
if ( t - > parms . flags & IP6_TNL_F_USE_ORIG_FWMARK )
fl6 - > flowi6_mark = skb - > mark ;
else
fl6 - > flowi6_mark = t - > parms . fwmark ;
fl6 - > flowi6_uid = sock_net_uid ( dev_net ( dev ) , NULL ) ;
return 0 ;
}
2016-04-29 17:12:21 -07:00
static netdev_tx_t __gre6_xmit ( struct sk_buff * skb ,
struct net_device * dev , __u8 dsfield ,
struct flowi6 * fl6 , int encap_limit ,
__u32 * pmtu , __be16 proto )
2012-08-10 00:51:50 +00:00
{
struct ip6_tnl * tunnel = netdev_priv ( dev ) ;
2017-10-26 19:27:17 +08:00
__be16 protocol ;
2012-08-10 00:51:50 +00:00
if ( dev - > type = = ARPHRD_ETHER )
IPCB ( skb ) - > flags = 0 ;
2016-04-29 17:12:21 -07:00
if ( dev - > header_ops & & dev - > type = = ARPHRD_IP6GRE )
fl6 - > daddr = ( ( struct ipv6hdr * ) skb - > data ) - > daddr ;
else
2012-08-10 00:51:50 +00:00
fl6 - > daddr = tunnel - > parms . raddr ;
2018-05-17 16:36:10 +02:00
if ( skb_cow_head ( skb , dev - > needed_headroom ? : tunnel - > hlen ) )
return - ENOMEM ;
2016-04-29 17:12:21 -07:00
/* Push GRE header. */
2017-10-26 19:27:17 +08:00
protocol = ( dev - > type = = ARPHRD_ETHER ) ? htons ( ETH_P_TEB ) : proto ;
2017-12-01 15:26:08 -08:00
if ( tunnel - > parms . collect_md ) {
struct ip_tunnel_info * tun_info ;
const struct ip_tunnel_key * key ;
__be16 flags ;
tun_info = skb_tunnel_info ( skb ) ;
if ( unlikely ( ! tun_info | |
! ( tun_info - > mode & IP_TUNNEL_INFO_TX ) | |
ip_tunnel_info_af ( tun_info ) ! = AF_INET6 ) )
return - EINVAL ;
key = & tun_info - > key ;
memset ( fl6 , 0 , sizeof ( * fl6 ) ) ;
fl6 - > flowi6_proto = IPPROTO_GRE ;
fl6 - > daddr = key - > u . ipv6 . dst ;
fl6 - > flowlabel = key - > label ;
fl6 - > flowi6_uid = sock_net_uid ( dev_net ( dev ) , NULL ) ;
dsfield = key - > tos ;
2018-03-01 13:49:57 -08:00
flags = key - > tun_flags &
( TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ ) ;
2017-12-01 15:26:08 -08:00
tunnel - > tun_hlen = gre_calc_hlen ( flags ) ;
gre_build_header ( skb , tunnel - > tun_hlen ,
flags , protocol ,
2018-03-01 13:49:57 -08:00
tunnel_id_to_key32 ( tun_info - > key . tun_id ) ,
2018-03-21 19:34:58 +00:00
( flags & TUNNEL_SEQ ) ? htonl ( tunnel - > o_seqno + + )
2018-03-01 13:49:57 -08:00
: 0 ) ;
2017-12-01 15:26:08 -08:00
} else {
2018-03-01 13:49:57 -08:00
if ( tunnel - > parms . o_flags & TUNNEL_SEQ )
tunnel - > o_seqno + + ;
2017-12-01 15:26:08 -08:00
gre_build_header ( skb , tunnel - > tun_hlen , tunnel - > parms . o_flags ,
protocol , tunnel - > parms . o_key ,
htonl ( tunnel - > o_seqno ) ) ;
}
2012-08-10 00:51:50 +00:00
2016-04-29 17:12:21 -07:00
return ip6_tnl_xmit ( skb , dev , dsfield , fl6 , encap_limit , pmtu ,
NEXTHDR_GRE ) ;
2012-08-10 00:51:50 +00:00
}
static inline int ip6gre_xmit_ipv4 ( struct sk_buff * skb , struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
int encap_limit = - 1 ;
struct flowi6 fl6 ;
2017-12-01 15:26:08 -08:00
__u8 dsfield = 0 ;
2012-08-10 00:51:50 +00:00
__u32 mtu ;
int err ;
2016-02-22 12:58:05 +13:00
memset ( & ( IPCB ( skb ) - > opt ) , 0 , sizeof ( IPCB ( skb ) - > opt ) ) ;
2017-12-01 15:26:08 -08:00
if ( ! t - > parms . collect_md )
prepare_ip6gre_xmit_ipv4 ( skb , dev , & fl6 ,
& dsfield , & encap_limit ) ;
2016-11-04 02:23:43 +09:00
2016-04-29 17:12:21 -07:00
err = gre_handle_offloads ( skb , ! ! ( t - > parms . o_flags & TUNNEL_CSUM ) ) ;
if ( err )
return - 1 ;
err = __gre6_xmit ( skb , dev , dsfield , & fl6 , encap_limit , & mtu ,
skb - > protocol ) ;
2012-08-10 00:51:50 +00:00
if ( err ! = 0 ) {
/* XXX: send ICMP error even if DF is not set. */
if ( err = = - EMSGSIZE )
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_FRAG_NEEDED ,
htonl ( mtu ) ) ;
return - 1 ;
}
return 0 ;
}
static inline int ip6gre_xmit_ipv6 ( struct sk_buff * skb , struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct ipv6hdr * ipv6h = ipv6_hdr ( skb ) ;
int encap_limit = - 1 ;
struct flowi6 fl6 ;
2017-12-01 15:26:08 -08:00
__u8 dsfield = 0 ;
2012-08-10 00:51:50 +00:00
__u32 mtu ;
int err ;
if ( ipv6_addr_equal ( & t - > parms . raddr , & ipv6h - > saddr ) )
return - 1 ;
2017-12-01 15:26:08 -08:00
if ( ! t - > parms . collect_md & &
prepare_ip6gre_xmit_ipv6 ( skb , dev , & fl6 , & dsfield , & encap_limit ) )
2017-11-30 11:51:28 -08:00
return - 1 ;
2016-11-04 02:23:43 +09:00
2016-04-29 17:12:21 -07:00
if ( gre_handle_offloads ( skb , ! ! ( t - > parms . o_flags & TUNNEL_CSUM ) ) )
return - 1 ;
err = __gre6_xmit ( skb , dev , dsfield , & fl6 , encap_limit ,
& mtu , skb - > protocol ) ;
2012-08-10 00:51:50 +00:00
if ( err ! = 0 ) {
if ( err = = - EMSGSIZE )
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu ) ;
return - 1 ;
}
return 0 ;
}
/**
2018-05-03 09:34:29 +08:00
* ip6gre_tnl_addr_conflict - compare packet addresses to tunnel ' s own
2012-08-10 00:51:50 +00:00
* @ t : the outgoing tunnel device
* @ hdr : IPv6 header from the incoming packet
*
* Description :
* Avoid trivial tunneling loop by checking that tunnel exit - point
* doesn ' t match source of incoming packet .
*
* Return :
* 1 if conflict ,
* 0 else
* */
static inline bool ip6gre_tnl_addr_conflict ( const struct ip6_tnl * t ,
const struct ipv6hdr * hdr )
{
return ipv6_addr_equal ( & t - > parms . raddr , & hdr - > saddr ) ;
}
static int ip6gre_xmit_other ( struct sk_buff * skb , struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
int encap_limit = - 1 ;
struct flowi6 fl6 ;
__u32 mtu ;
int err ;
if ( ! ( t - > parms . flags & IP6_TNL_F_IGN_ENCAP_LIMIT ) )
encap_limit = t - > parms . encap_limit ;
2017-12-01 15:26:08 -08:00
if ( ! t - > parms . collect_md )
memcpy ( & fl6 , & t - > fl . u . ip6 , sizeof ( fl6 ) ) ;
2012-08-10 00:51:50 +00:00
2016-04-29 17:12:21 -07:00
err = gre_handle_offloads ( skb , ! ! ( t - > parms . o_flags & TUNNEL_CSUM ) ) ;
if ( err )
return err ;
err = __gre6_xmit ( skb , dev , 0 , & fl6 , encap_limit , & mtu , skb - > protocol ) ;
2012-08-10 00:51:50 +00:00
return err ;
}
static netdev_tx_t ip6gre_tunnel_xmit ( struct sk_buff * skb ,
struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct net_device_stats * stats = & t - > dev - > stats ;
int ret ;
2018-12-30 17:24:36 -05:00
if ( ! pskb_inet_may_pull ( skb ) )
goto tx_err ;
2014-11-05 08:02:48 +01:00
if ( ! ip6_tnl_xmit_ctl ( t , & t - > parms . laddr , & t - > parms . raddr ) )
2013-02-06 03:24:02 +00:00
goto tx_err ;
2012-08-10 00:51:50 +00:00
switch ( skb - > protocol ) {
case htons ( ETH_P_IP ) :
ret = ip6gre_xmit_ipv4 ( skb , dev ) ;
break ;
case htons ( ETH_P_IPV6 ) :
ret = ip6gre_xmit_ipv6 ( skb , dev ) ;
break ;
default :
ret = ip6gre_xmit_other ( skb , dev ) ;
break ;
}
if ( ret < 0 )
goto tx_err ;
return NETDEV_TX_OK ;
tx_err :
stats - > tx_errors + + ;
stats - > tx_dropped + + ;
kfree_skb ( skb ) ;
return NETDEV_TX_OK ;
}
2017-11-30 11:51:29 -08:00
static netdev_tx_t ip6erspan_tunnel_xmit ( struct sk_buff * skb ,
struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct dst_entry * dst = skb_dst ( skb ) ;
struct net_device_stats * stats ;
bool truncate = false ;
int encap_limit = - 1 ;
__u8 dsfield = false ;
struct flowi6 fl6 ;
int err = - EINVAL ;
2019-01-14 18:10:06 +08:00
__be16 proto ;
2017-11-30 11:51:29 -08:00
__u32 mtu ;
2018-04-27 14:16:32 -07:00
int nhoff ;
2018-05-11 05:49:47 -07:00
int thoff ;
2017-11-30 11:51:29 -08:00
2018-12-30 17:24:36 -05:00
if ( ! pskb_inet_may_pull ( skb ) )
goto tx_err ;
2017-11-30 11:51:29 -08:00
if ( ! ip6_tnl_xmit_ctl ( t , & t - > parms . laddr , & t - > parms . raddr ) )
goto tx_err ;
if ( gre_handle_offloads ( skb , false ) )
goto tx_err ;
if ( skb - > len > dev - > mtu + dev - > hard_header_len ) {
pskb_trim ( skb , dev - > mtu + dev - > hard_header_len ) ;
truncate = true ;
}
2018-04-27 14:16:32 -07:00
nhoff = skb_network_header ( skb ) - skb_mac_header ( skb ) ;
if ( skb - > protocol = = htons ( ETH_P_IP ) & &
( ntohs ( ip_hdr ( skb ) - > tot_len ) > skb - > len - nhoff ) )
truncate = true ;
2018-05-11 05:49:47 -07:00
thoff = skb_transport_header ( skb ) - skb_mac_header ( skb ) ;
if ( skb - > protocol = = htons ( ETH_P_IPV6 ) & &
( ntohs ( ipv6_hdr ( skb ) - > payload_len ) > skb - > len - thoff ) )
truncate = true ;
2018-05-17 16:36:15 +02:00
if ( skb_cow_head ( skb , dev - > needed_headroom ? : t - > hlen ) )
2018-03-09 07:34:42 -08:00
goto tx_err ;
2017-11-30 11:51:29 -08:00
t - > parms . o_flags & = ~ TUNNEL_KEY ;
IPCB ( skb ) - > flags = 0 ;
2017-12-05 15:15:44 -08:00
/* For collect_md mode, derive fl6 from the tunnel key,
* for native mode , call prepare_ip6gre_xmit_ { ipv4 , ipv6 } .
*/
if ( t - > parms . collect_md ) {
struct ip_tunnel_info * tun_info ;
const struct ip_tunnel_key * key ;
struct erspan_metadata * md ;
2018-01-25 13:20:09 -08:00
__be32 tun_id ;
2017-12-05 15:15:44 -08:00
tun_info = skb_tunnel_info ( skb ) ;
if ( unlikely ( ! tun_info | |
! ( tun_info - > mode & IP_TUNNEL_INFO_TX ) | |
ip_tunnel_info_af ( tun_info ) ! = AF_INET6 ) )
return - EINVAL ;
key = & tun_info - > key ;
memset ( & fl6 , 0 , sizeof ( fl6 ) ) ;
fl6 . flowi6_proto = IPPROTO_GRE ;
fl6 . daddr = key - > u . ipv6 . dst ;
fl6 . flowlabel = key - > label ;
fl6 . flowi6_uid = sock_net_uid ( dev_net ( dev ) , NULL ) ;
dsfield = key - > tos ;
2018-06-26 21:39:36 -07:00
if ( ! ( tun_info - > key . tun_flags & TUNNEL_ERSPAN_OPT ) )
goto tx_err ;
2017-12-05 15:15:44 -08:00
md = ip_tunnel_info_opts ( tun_info ) ;
if ( ! md )
goto tx_err ;
2018-01-25 13:20:09 -08:00
tun_id = tunnel_id_to_key32 ( key - > tun_id ) ;
2017-12-13 16:38:57 -08:00
if ( md - > version = = 1 ) {
erspan_build_header ( skb ,
2018-01-25 13:20:09 -08:00
ntohl ( tun_id ) ,
2017-12-13 16:38:57 -08:00
ntohl ( md - > u . index ) , truncate ,
false ) ;
} else if ( md - > version = = 2 ) {
erspan_build_header_v2 ( skb ,
2018-01-25 13:20:09 -08:00
ntohl ( tun_id ) ,
md - > u . md2 . dir ,
get_hwid ( & md - > u . md2 ) ,
truncate , false ) ;
2018-03-09 07:34:41 -08:00
} else {
goto tx_err ;
2017-12-13 16:38:57 -08:00
}
2017-12-05 15:15:44 -08:00
} else {
switch ( skb - > protocol ) {
case htons ( ETH_P_IP ) :
memset ( & ( IPCB ( skb ) - > opt ) , 0 , sizeof ( IPCB ( skb ) - > opt ) ) ;
prepare_ip6gre_xmit_ipv4 ( skb , dev , & fl6 ,
& dsfield , & encap_limit ) ;
break ;
case htons ( ETH_P_IPV6 ) :
2018-12-30 17:24:36 -05:00
if ( ipv6_addr_equal ( & t - > parms . raddr , & ipv6_hdr ( skb ) - > saddr ) )
2017-12-05 15:15:44 -08:00
goto tx_err ;
if ( prepare_ip6gre_xmit_ipv6 ( skb , dev , & fl6 ,
& dsfield , & encap_limit ) )
goto tx_err ;
break ;
default :
memcpy ( & fl6 , & t - > fl . u . ip6 , sizeof ( fl6 ) ) ;
break ;
}
2017-12-13 16:38:57 -08:00
if ( t - > parms . erspan_ver = = 1 )
2018-01-25 13:20:09 -08:00
erspan_build_header ( skb , ntohl ( t - > parms . o_key ) ,
2017-12-13 16:38:57 -08:00
t - > parms . index ,
truncate , false ) ;
2018-05-16 17:24:32 -07:00
else if ( t - > parms . erspan_ver = = 2 )
2018-01-25 13:20:09 -08:00
erspan_build_header_v2 ( skb , ntohl ( t - > parms . o_key ) ,
2017-12-13 16:38:57 -08:00
t - > parms . dir ,
t - > parms . hwid ,
truncate , false ) ;
2018-05-16 17:24:32 -07:00
else
goto tx_err ;
2017-12-05 15:15:44 -08:00
fl6 . daddr = t - > parms . raddr ;
}
2017-11-30 11:51:29 -08:00
/* Push GRE header. */
2019-01-14 18:10:06 +08:00
proto = ( t - > parms . erspan_ver = = 1 ) ? htons ( ETH_P_ERSPAN )
: htons ( ETH_P_ERSPAN2 ) ;
gre_build_header ( skb , 8 , TUNNEL_SEQ , proto , 0 , htonl ( t - > o_seqno + + ) ) ;
2017-11-30 11:51:29 -08:00
/* TooBig packet may have updated dst->dev's mtu */
2017-12-05 15:15:44 -08:00
if ( ! t - > parms . collect_md & & dst & & dst_mtu ( dst ) > dst - > dev - > mtu )
2017-11-30 11:51:29 -08:00
dst - > ops - > update_pmtu ( dst , NULL , skb , dst - > dev - > mtu ) ;
err = ip6_tnl_xmit ( skb , dev , dsfield , & fl6 , encap_limit , & mtu ,
NEXTHDR_GRE ) ;
if ( err ! = 0 ) {
/* XXX: send ICMP error even if DF is not set. */
if ( err = = - EMSGSIZE ) {
if ( skb - > protocol = = htons ( ETH_P_IP ) )
icmp_send ( skb , ICMP_DEST_UNREACH ,
ICMP_FRAG_NEEDED , htonl ( mtu ) ) ;
else
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu ) ;
}
goto tx_err ;
}
return NETDEV_TX_OK ;
tx_err :
stats = & t - > dev - > stats ;
stats - > tx_errors + + ;
stats - > tx_dropped + + ;
kfree_skb ( skb ) ;
return NETDEV_TX_OK ;
}
2018-05-17 16:36:27 +02:00
static void ip6gre_tnl_link_config_common ( struct ip6_tnl * t )
2012-08-10 00:51:50 +00:00
{
struct net_device * dev = t - > dev ;
struct __ip6_tnl_parm * p = & t - > parms ;
struct flowi6 * fl6 = & t - > fl . u . ip6 ;
if ( dev - > type ! = ARPHRD_ETHER ) {
memcpy ( dev - > dev_addr , & p - > laddr , sizeof ( struct in6_addr ) ) ;
memcpy ( dev - > broadcast , & p - > raddr , sizeof ( struct in6_addr ) ) ;
}
/* Set up flowi template */
fl6 - > saddr = p - > laddr ;
fl6 - > daddr = p - > raddr ;
fl6 - > flowi6_oif = p - > link ;
fl6 - > flowlabel = 0 ;
2016-05-21 18:17:35 +08:00
fl6 - > flowi6_proto = IPPROTO_GRE ;
2012-08-10 00:51:50 +00:00
if ( ! ( p - > flags & IP6_TNL_F_USE_ORIG_TCLASS ) )
fl6 - > flowlabel | = IPV6_TCLASS_MASK & p - > flowinfo ;
if ( ! ( p - > flags & IP6_TNL_F_USE_ORIG_FLOWLABEL ) )
fl6 - > flowlabel | = IPV6_FLOWLABEL_MASK & p - > flowinfo ;
p - > flags & = ~ ( IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | IP6_TNL_F_CAP_PER_PACKET ) ;
p - > flags | = ip6_tnl_get_cap ( t , & p - > laddr , & p - > raddr ) ;
if ( p - > flags & IP6_TNL_F_CAP_XMIT & &
p - > flags & IP6_TNL_F_CAP_RCV & & dev - > type ! = ARPHRD_ETHER )
dev - > flags | = IFF_POINTOPOINT ;
else
dev - > flags & = ~ IFF_POINTOPOINT ;
2018-05-17 16:36:27 +02:00
}
2012-08-10 00:51:50 +00:00
2018-05-17 16:36:27 +02:00
static void ip6gre_tnl_link_config_route ( struct ip6_tnl * t , int set_mtu ,
int t_hlen )
{
const struct __ip6_tnl_parm * p = & t - > parms ;
struct net_device * dev = t - > dev ;
2012-08-10 00:51:50 +00:00
if ( p - > flags & IP6_TNL_F_CAP_XMIT ) {
int strict = ( ipv6_addr_type ( & p - > raddr ) &
( IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL ) ) ;
2014-04-22 10:15:24 +02:00
struct rt6_info * rt = rt6_lookup ( t - > net ,
2012-08-10 00:51:50 +00:00
& p - > raddr , & p - > laddr ,
2018-03-02 08:32:17 -08:00
p - > link , NULL , strict ) ;
2012-08-10 00:51:50 +00:00
2015-03-29 14:00:04 +01:00
if ( ! rt )
2012-08-10 00:51:50 +00:00
return ;
if ( rt - > dst . dev ) {
net: ipv6_gre: Fix GRO to work on IPv6 over GRE tap
IPv6 GRO over GRE tap is not working while GRO is not set
over the native interface.
gro_list_prepare function updates the same_flow variable
of existing sessions to 1 if their mac headers match the one
of the incoming packet.
same_flow is used to filter out non-matching sessions and keep
potential ones for aggregation.
The number of bytes to compare should be the number of bytes
in the mac headers. In gro_list_prepare this number is set to
be skb->dev->hard_header_len. For GRE interfaces this hard_header_len
should be as it is set in the initialization process (when GRE is
created), it should not be overridden. But currently it is being overridden
by the value that is actually supposed to represent the needed_headroom.
Therefore, the number of bytes compared in order to decide whether the
the mac headers are the same is greater than the length of the headers.
As it's documented in netdevice.h, hard_header_len is the maximum
hardware header length, and needed_headroom is the extra headroom
the hardware may need.
hard_header_len is basically all the bytes received by the physical
till layer 3 header of the packet received by the interface.
For example, if the interface is a GRE tap then the needed_headroom
should be the total length of the following headers:
IP header of the physical, GRE header, mac header of GRE.
It is often used to calculate the MTU of the created interface.
This patch removes the override of the hard_header_len, and
assigns the calculated value to needed_headroom.
This way, the comparison in gro_list_prepare is really of
the mac headers, and if the packets have the same mac headers
the same_flow will be set to 1.
Performance testing: 45% higher bandwidth.
Measuring bandwidth of single-stream IPv4 TCP traffic over IPv6
GRE tap while GRO is not set on the native.
NIC: ConnectX-4LX
Before (GRO not working) : 7.2 Gbits/sec
After (GRO working): 10.5 Gbits/sec
Signed-off-by: Maria Pasechnik <mariap@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:46:30 +03:00
dev - > needed_headroom = rt - > dst . dev - > hard_header_len +
2016-05-09 17:12:08 -07:00
t_hlen ;
2012-08-10 00:51:50 +00:00
if ( set_mtu ) {
2016-05-09 17:12:08 -07:00
dev - > mtu = rt - > dst . dev - > mtu - t_hlen ;
2012-08-10 00:51:50 +00:00
if ( ! ( t - > parms . flags & IP6_TNL_F_IGN_ENCAP_LIMIT ) )
dev - > mtu - = 8 ;
2016-04-14 15:33:45 -04:00
if ( dev - > type = = ARPHRD_ETHER )
dev - > mtu - = ETH_HLEN ;
2012-08-10 00:51:50 +00:00
if ( dev - > mtu < IPV6_MIN_MTU )
dev - > mtu = IPV6_MIN_MTU ;
}
}
2012-10-29 00:13:19 +00:00
ip6_rt_put ( rt ) ;
2012-08-10 00:51:50 +00:00
}
}
2018-05-17 16:36:27 +02:00
static int ip6gre_calc_hlen ( struct ip6_tnl * tunnel )
{
int t_hlen ;
tunnel - > tun_hlen = gre_calc_hlen ( tunnel - > parms . o_flags ) ;
tunnel - > hlen = tunnel - > tun_hlen + tunnel - > encap_hlen ;
t_hlen = tunnel - > hlen + sizeof ( struct ipv6hdr ) ;
net: ipv6_gre: Fix GRO to work on IPv6 over GRE tap
IPv6 GRO over GRE tap is not working while GRO is not set
over the native interface.
gro_list_prepare function updates the same_flow variable
of existing sessions to 1 if their mac headers match the one
of the incoming packet.
same_flow is used to filter out non-matching sessions and keep
potential ones for aggregation.
The number of bytes to compare should be the number of bytes
in the mac headers. In gro_list_prepare this number is set to
be skb->dev->hard_header_len. For GRE interfaces this hard_header_len
should be as it is set in the initialization process (when GRE is
created), it should not be overridden. But currently it is being overridden
by the value that is actually supposed to represent the needed_headroom.
Therefore, the number of bytes compared in order to decide whether the
the mac headers are the same is greater than the length of the headers.
As it's documented in netdevice.h, hard_header_len is the maximum
hardware header length, and needed_headroom is the extra headroom
the hardware may need.
hard_header_len is basically all the bytes received by the physical
till layer 3 header of the packet received by the interface.
For example, if the interface is a GRE tap then the needed_headroom
should be the total length of the following headers:
IP header of the physical, GRE header, mac header of GRE.
It is often used to calculate the MTU of the created interface.
This patch removes the override of the hard_header_len, and
assigns the calculated value to needed_headroom.
This way, the comparison in gro_list_prepare is really of
the mac headers, and if the packets have the same mac headers
the same_flow will be set to 1.
Performance testing: 45% higher bandwidth.
Measuring bandwidth of single-stream IPv4 TCP traffic over IPv6
GRE tap while GRO is not set on the native.
NIC: ConnectX-4LX
Before (GRO not working) : 7.2 Gbits/sec
After (GRO working): 10.5 Gbits/sec
Signed-off-by: Maria Pasechnik <mariap@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:46:30 +03:00
tunnel - > dev - > needed_headroom = LL_MAX_HEADER + t_hlen ;
2018-05-17 16:36:27 +02:00
return t_hlen ;
}
static void ip6gre_tnl_link_config ( struct ip6_tnl * t , int set_mtu )
{
ip6gre_tnl_link_config_common ( t ) ;
ip6gre_tnl_link_config_route ( t , set_mtu , ip6gre_calc_hlen ( t ) ) ;
}
2018-05-17 16:36:33 +02:00
static void ip6gre_tnl_copy_tnl_parm ( struct ip6_tnl * t ,
const struct __ip6_tnl_parm * p )
2012-08-10 00:51:50 +00:00
{
t - > parms . laddr = p - > laddr ;
t - > parms . raddr = p - > raddr ;
t - > parms . flags = p - > flags ;
t - > parms . hop_limit = p - > hop_limit ;
t - > parms . encap_limit = p - > encap_limit ;
t - > parms . flowinfo = p - > flowinfo ;
t - > parms . link = p - > link ;
t - > parms . proto = p - > proto ;
t - > parms . i_key = p - > i_key ;
t - > parms . o_key = p - > o_key ;
t - > parms . i_flags = p - > i_flags ;
t - > parms . o_flags = p - > o_flags ;
2017-04-19 12:30:53 -04:00
t - > parms . fwmark = p - > fwmark ;
2019-01-10 11:17:42 +08:00
t - > parms . erspan_ver = p - > erspan_ver ;
t - > parms . index = p - > index ;
t - > parms . dir = p - > dir ;
t - > parms . hwid = p - > hwid ;
2016-02-12 15:43:54 +01:00
dst_cache_reset ( & t - > dst_cache ) ;
2018-05-17 16:36:33 +02:00
}
static int ip6gre_tnl_change ( struct ip6_tnl * t , const struct __ip6_tnl_parm * p ,
int set_mtu )
{
ip6gre_tnl_copy_tnl_parm ( t , p ) ;
2012-08-10 00:51:50 +00:00
ip6gre_tnl_link_config ( t , set_mtu ) ;
return 0 ;
}
static void ip6gre_tnl_parm_from_user ( struct __ip6_tnl_parm * p ,
const struct ip6_tnl_parm2 * u )
{
p - > laddr = u - > laddr ;
p - > raddr = u - > raddr ;
p - > flags = u - > flags ;
p - > hop_limit = u - > hop_limit ;
p - > encap_limit = u - > encap_limit ;
p - > flowinfo = u - > flowinfo ;
p - > link = u - > link ;
p - > i_key = u - > i_key ;
p - > o_key = u - > o_key ;
2016-05-09 17:12:09 -07:00
p - > i_flags = gre_flags_to_tnl_flags ( u - > i_flags ) ;
p - > o_flags = gre_flags_to_tnl_flags ( u - > o_flags ) ;
2012-08-10 00:51:50 +00:00
memcpy ( p - > name , u - > name , sizeof ( u - > name ) ) ;
}
static void ip6gre_tnl_parm_to_user ( struct ip6_tnl_parm2 * u ,
const struct __ip6_tnl_parm * p )
{
u - > proto = IPPROTO_GRE ;
u - > laddr = p - > laddr ;
u - > raddr = p - > raddr ;
u - > flags = p - > flags ;
u - > hop_limit = p - > hop_limit ;
u - > encap_limit = p - > encap_limit ;
u - > flowinfo = p - > flowinfo ;
u - > link = p - > link ;
u - > i_key = p - > i_key ;
u - > o_key = p - > o_key ;
2016-05-09 17:12:09 -07:00
u - > i_flags = gre_tnl_flags_to_gre_flags ( p - > i_flags ) ;
u - > o_flags = gre_tnl_flags_to_gre_flags ( p - > o_flags ) ;
2012-08-10 00:51:50 +00:00
memcpy ( u - > name , p - > name , sizeof ( u - > name ) ) ;
}
static int ip6gre_tunnel_ioctl ( struct net_device * dev ,
struct ifreq * ifr , int cmd )
{
int err = 0 ;
struct ip6_tnl_parm2 p ;
struct __ip6_tnl_parm p1 ;
2014-04-22 10:15:24 +02:00
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct net * net = t - > net ;
2012-08-10 00:51:50 +00:00
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
2016-04-29 17:12:17 -07:00
memset ( & p1 , 0 , sizeof ( p1 ) ) ;
2012-08-10 00:51:50 +00:00
switch ( cmd ) {
case SIOCGETTUNNEL :
if ( dev = = ign - > fb_tunnel_dev ) {
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) ) {
err = - EFAULT ;
break ;
}
ip6gre_tnl_parm_from_user ( & p1 , & p ) ;
t = ip6gre_tunnel_locate ( net , & p1 , 0 ) ;
2015-03-29 14:00:04 +01:00
if ( ! t )
2014-04-22 10:15:24 +02:00
t = netdev_priv ( dev ) ;
2012-08-10 00:51:50 +00:00
}
2013-05-09 21:56:37 +00:00
memset ( & p , 0 , sizeof ( p ) ) ;
2012-08-10 00:51:50 +00:00
ip6gre_tnl_parm_to_user ( & p , & t - > parms ) ;
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & p , sizeof ( p ) ) )
err = - EFAULT ;
break ;
case SIOCADDTUNNEL :
case SIOCCHGTUNNEL :
err = - EPERM ;
net: Allow userns root to control ipv6
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed while
resource control is left unchanged.
Allow the SIOCSIFADDR ioctl to add ipv6 addresses.
Allow the SIOCDIFADDR ioctl to delete ipv6 addresses.
Allow the SIOCADDRT ioctl to add ipv6 routes.
Allow the SIOCDELRT ioctl to delete ipv6 routes.
Allow creation of ipv6 raw sockets.
Allow setting the IPV6_JOIN_ANYCAST socket option.
Allow setting the IPV6_FL_A_RENEW parameter of the IPV6_FLOWLABEL_MGR
socket option.
Allow setting the IPV6_TRANSPARENT socket option.
Allow setting the IPV6_HOPOPTS socket option.
Allow setting the IPV6_RTHDRDSTOPTS socket option.
Allow setting the IPV6_DSTOPTS socket option.
Allow setting the IPV6_IPSEC_POLICY socket option.
Allow setting the IPV6_XFRM_POLICY socket option.
Allow sending packets with the IPV6_2292HOPOPTS control message.
Allow sending packets with the IPV6_2292DSTOPTS control message.
Allow sending packets with the IPV6_RTHDRDSTOPTS control message.
Allow setting the multicast routing socket options on non multicast
routing sockets.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, and SIOCDELTUNNEL ioctls for
setting up, changing and deleting tunnels over ipv6.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, SIOCDELTUNNEL ioctls for
setting up, changing and deleting ipv6 over ipv4 tunnels.
Allow the SIOCADDPRL, SIOCDELPRL, SIOCCHGPRL ioctls for adding,
deleting, and changing the potential router list for ISATAP tunnels.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-16 03:03:06 +00:00
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
2012-08-10 00:51:50 +00:00
goto done ;
err = - EFAULT ;
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
goto done ;
err = - EINVAL ;
if ( ( p . i_flags | p . o_flags ) & ( GRE_VERSION | GRE_ROUTING ) )
goto done ;
if ( ! ( p . i_flags & GRE_KEY ) )
p . i_key = 0 ;
if ( ! ( p . o_flags & GRE_KEY ) )
p . o_key = 0 ;
ip6gre_tnl_parm_from_user ( & p1 , & p ) ;
t = ip6gre_tunnel_locate ( net , & p1 , cmd = = SIOCADDTUNNEL ) ;
if ( dev ! = ign - > fb_tunnel_dev & & cmd = = SIOCCHGTUNNEL ) {
2015-03-29 14:00:05 +01:00
if ( t ) {
2012-08-10 00:51:50 +00:00
if ( t - > dev ! = dev ) {
err = - EEXIST ;
break ;
}
} else {
t = netdev_priv ( dev ) ;
ip6gre_tunnel_unlink ( ign , t ) ;
synchronize_net ( ) ;
ip6gre_tnl_change ( t , & p1 , 1 ) ;
ip6gre_tunnel_link ( ign , t ) ;
netdev_state_change ( dev ) ;
}
}
if ( t ) {
err = 0 ;
2013-05-09 21:56:37 +00:00
memset ( & p , 0 , sizeof ( p ) ) ;
2012-08-10 00:51:50 +00:00
ip6gre_tnl_parm_to_user ( & p , & t - > parms ) ;
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & p , sizeof ( p ) ) )
err = - EFAULT ;
} else
err = ( cmd = = SIOCADDTUNNEL ? - ENOBUFS : - ENOENT ) ;
break ;
case SIOCDELTUNNEL :
err = - EPERM ;
net: Allow userns root to control ipv6
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed while
resource control is left unchanged.
Allow the SIOCSIFADDR ioctl to add ipv6 addresses.
Allow the SIOCDIFADDR ioctl to delete ipv6 addresses.
Allow the SIOCADDRT ioctl to add ipv6 routes.
Allow the SIOCDELRT ioctl to delete ipv6 routes.
Allow creation of ipv6 raw sockets.
Allow setting the IPV6_JOIN_ANYCAST socket option.
Allow setting the IPV6_FL_A_RENEW parameter of the IPV6_FLOWLABEL_MGR
socket option.
Allow setting the IPV6_TRANSPARENT socket option.
Allow setting the IPV6_HOPOPTS socket option.
Allow setting the IPV6_RTHDRDSTOPTS socket option.
Allow setting the IPV6_DSTOPTS socket option.
Allow setting the IPV6_IPSEC_POLICY socket option.
Allow setting the IPV6_XFRM_POLICY socket option.
Allow sending packets with the IPV6_2292HOPOPTS control message.
Allow sending packets with the IPV6_2292DSTOPTS control message.
Allow sending packets with the IPV6_RTHDRDSTOPTS control message.
Allow setting the multicast routing socket options on non multicast
routing sockets.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, and SIOCDELTUNNEL ioctls for
setting up, changing and deleting tunnels over ipv6.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, SIOCDELTUNNEL ioctls for
setting up, changing and deleting ipv6 over ipv4 tunnels.
Allow the SIOCADDPRL, SIOCDELPRL, SIOCCHGPRL ioctls for adding,
deleting, and changing the potential router list for ISATAP tunnels.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-16 03:03:06 +00:00
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
2012-08-10 00:51:50 +00:00
goto done ;
if ( dev = = ign - > fb_tunnel_dev ) {
err = - EFAULT ;
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
goto done ;
err = - ENOENT ;
ip6gre_tnl_parm_from_user ( & p1 , & p ) ;
t = ip6gre_tunnel_locate ( net , & p1 , 0 ) ;
2015-03-29 14:00:04 +01:00
if ( ! t )
2012-08-10 00:51:50 +00:00
goto done ;
err = - EPERM ;
if ( t = = netdev_priv ( ign - > fb_tunnel_dev ) )
goto done ;
dev = t - > dev ;
}
unregister_netdevice ( dev ) ;
err = 0 ;
break ;
default :
err = - EINVAL ;
}
done :
return err ;
}
static int ip6gre_header ( struct sk_buff * skb , struct net_device * dev ,
2017-09-15 12:00:07 +08:00
unsigned short type , const void * daddr ,
const void * saddr , unsigned int len )
2012-08-10 00:51:50 +00:00
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
2017-09-15 12:00:07 +08:00
struct ipv6hdr * ipv6h ;
__be16 * p ;
2012-08-10 00:51:50 +00:00
2017-09-15 12:00:07 +08:00
ipv6h = skb_push ( skb , t - > hlen + sizeof ( * ipv6h ) ) ;
ip6_flow_hdr ( ipv6h , 0 , ip6_make_flowlabel ( dev_net ( dev ) , skb ,
t - > fl . u . ip6 . flowlabel ,
true , & t - > fl . u . ip6 ) ) ;
2012-08-10 00:51:50 +00:00
ipv6h - > hop_limit = t - > parms . hop_limit ;
ipv6h - > nexthdr = NEXTHDR_GRE ;
ipv6h - > saddr = t - > parms . laddr ;
ipv6h - > daddr = t - > parms . raddr ;
2017-09-15 12:00:07 +08:00
p = ( __be16 * ) ( ipv6h + 1 ) ;
p [ 0 ] = t - > parms . o_flags ;
p [ 1 ] = htons ( type ) ;
2012-08-10 00:51:50 +00:00
/*
* Set the source hardware address .
*/
if ( saddr )
memcpy ( & ipv6h - > saddr , saddr , sizeof ( struct in6_addr ) ) ;
if ( daddr )
memcpy ( & ipv6h - > daddr , daddr , sizeof ( struct in6_addr ) ) ;
if ( ! ipv6_addr_any ( & ipv6h - > daddr ) )
return t - > hlen ;
return - t - > hlen ;
}
static const struct header_ops ip6gre_header_ops = {
. create = ip6gre_header ,
} ;
static const struct net_device_ops ip6gre_netdev_ops = {
. ndo_init = ip6gre_tunnel_init ,
. ndo_uninit = ip6gre_tunnel_uninit ,
. ndo_start_xmit = ip6gre_tunnel_xmit ,
. ndo_do_ioctl = ip6gre_tunnel_ioctl ,
2016-04-29 17:12:21 -07:00
. ndo_change_mtu = ip6_tnl_change_mtu ,
2013-03-25 14:50:00 +00:00
. ndo_get_stats64 = ip_tunnel_get_stats64 ,
2015-04-02 17:07:01 +02:00
. ndo_get_iflink = ip6_tnl_get_iflink ,
2012-08-10 00:51:50 +00:00
} ;
static void ip6gre_dev_free ( struct net_device * dev )
{
2015-09-15 14:30:07 -07:00
struct ip6_tnl * t = netdev_priv ( dev ) ;
2018-05-07 10:45:27 +03:00
gro_cells_destroy ( & t - > gro_cells ) ;
2016-02-12 15:43:54 +01:00
dst_cache_destroy ( & t - > dst_cache ) ;
2012-08-10 00:51:50 +00:00
free_percpu ( dev - > tstats ) ;
}
static void ip6gre_tunnel_setup ( struct net_device * dev )
{
dev - > netdev_ops = & ip6gre_netdev_ops ;
net: Fix inconsistent teardown and release of private netdev state.
Network devices can allocate reasources and private memory using
netdev_ops->ndo_init(). However, the release of these resources
can occur in one of two different places.
Either netdev_ops->ndo_uninit() or netdev->destructor().
The decision of which operation frees the resources depends upon
whether it is necessary for all netdev refs to be released before it
is safe to perform the freeing.
netdev_ops->ndo_uninit() presumably can occur right after the
NETDEV_UNREGISTER notifier completes and the unicast and multicast
address lists are flushed.
netdev->destructor(), on the other hand, does not run until the
netdev references all go away.
Further complicating the situation is that netdev->destructor()
almost universally does also a free_netdev().
This creates a problem for the logic in register_netdevice().
Because all callers of register_netdevice() manage the freeing
of the netdev, and invoke free_netdev(dev) if register_netdevice()
fails.
If netdev_ops->ndo_init() succeeds, but something else fails inside
of register_netdevice(), it does call ndo_ops->ndo_uninit(). But
it is not able to invoke netdev->destructor().
This is because netdev->destructor() will do a free_netdev() and
then the caller of register_netdevice() will do the same.
However, this means that the resources that would normally be released
by netdev->destructor() will not be.
Over the years drivers have added local hacks to deal with this, by
invoking their destructor parts by hand when register_netdevice()
fails.
Many drivers do not try to deal with this, and instead we have leaks.
Let's close this hole by formalizing the distinction between what
private things need to be freed up by netdev->destructor() and whether
the driver needs unregister_netdevice() to perform the free_netdev().
netdev->priv_destructor() performs all actions to free up the private
resources that used to be freed by netdev->destructor(), except for
free_netdev().
netdev->needs_free_netdev is a boolean that indicates whether
free_netdev() should be done at the end of unregister_netdevice().
Now, register_netdevice() can sanely release all resources after
ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit()
and netdev->priv_destructor().
And at the end of unregister_netdevice(), we invoke
netdev->priv_destructor() and optionally call free_netdev().
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-08 12:52:56 -04:00
dev - > needs_free_netdev = true ;
dev - > priv_destructor = ip6gre_dev_free ;
2012-08-10 00:51:50 +00:00
dev - > type = ARPHRD_IP6GRE ;
2016-04-29 17:12:21 -07:00
2012-08-10 00:51:50 +00:00
dev - > flags | = IFF_NOARP ;
dev - > addr_len = sizeof ( struct in6_addr ) ;
2014-10-05 18:38:35 -07:00
netif_keep_dst ( dev ) ;
2017-01-26 16:59:18 +13:00
/* This perm addr will be used as interface identifier by IPv6 */
dev - > addr_assign_type = NET_ADDR_RANDOM ;
eth_random_addr ( dev - > perm_addr ) ;
2012-08-10 00:51:50 +00:00
}
2017-12-20 19:36:03 +03:00
# define GRE6_FEATURES (NETIF_F_SG | \
NETIF_F_FRAGLIST | \
NETIF_F_HIGHDMA | \
NETIF_F_HW_CSUM )
static void ip6gre_tnl_init_features ( struct net_device * dev )
{
struct ip6_tnl * nt = netdev_priv ( dev ) ;
dev - > features | = GRE6_FEATURES ;
dev - > hw_features | = GRE6_FEATURES ;
if ( ! ( nt - > parms . o_flags & TUNNEL_SEQ ) ) {
/* TCP offload with GRE SEQ is not supported, nor
* can we support 2 levels of outer headers requiring
* an update .
*/
if ( ! ( nt - > parms . o_flags & TUNNEL_CSUM ) | |
nt - > encap . type = = TUNNEL_ENCAP_NONE ) {
dev - > features | = NETIF_F_GSO_SOFTWARE ;
dev - > hw_features | = NETIF_F_GSO_SOFTWARE ;
}
/* Can use a lockless transmit, unless we generate
* output sequences
*/
dev - > features | = NETIF_F_LLTX ;
}
}
2015-09-15 14:30:05 -07:00
static int ip6gre_tunnel_init_common ( struct net_device * dev )
2012-08-10 00:51:50 +00:00
{
struct ip6_tnl * tunnel ;
2015-09-15 14:30:07 -07:00
int ret ;
2016-04-29 17:12:21 -07:00
int t_hlen ;
2012-08-10 00:51:50 +00:00
tunnel = netdev_priv ( dev ) ;
tunnel - > dev = dev ;
2013-08-13 17:51:12 +02:00
tunnel - > net = dev_net ( dev ) ;
2012-08-10 00:51:50 +00:00
strcpy ( tunnel - > parms . name , dev - > name ) ;
2015-09-15 14:30:05 -07:00
dev - > tstats = netdev_alloc_pcpu_stats ( struct pcpu_sw_netstats ) ;
if ( ! dev - > tstats )
return - ENOMEM ;
2016-02-12 15:43:54 +01:00
ret = dst_cache_init ( & tunnel - > dst_cache , GFP_KERNEL ) ;
2018-05-07 10:45:27 +03:00
if ( ret )
goto cleanup_alloc_pcpu_stats ;
ret = gro_cells_init ( & tunnel - > gro_cells , dev ) ;
if ( ret )
goto cleanup_dst_cache_init ;
2015-09-15 14:30:07 -07:00
2018-05-17 16:36:27 +02:00
t_hlen = ip6gre_calc_hlen ( tunnel ) ;
2016-05-09 17:12:08 -07:00
dev - > mtu = ETH_DATA_LEN - t_hlen ;
2016-05-21 18:17:34 +08:00
if ( dev - > type = = ARPHRD_ETHER )
dev - > mtu - = ETH_HLEN ;
2016-04-29 17:12:21 -07:00
if ( ! ( tunnel - > parms . flags & IP6_TNL_F_IGN_ENCAP_LIMIT ) )
dev - > mtu - = 8 ;
2017-12-01 15:26:08 -08:00
if ( tunnel - > parms . collect_md ) {
dev - > features | = NETIF_F_NETNS_LOCAL ;
netif_keep_dst ( dev ) ;
}
2017-12-20 19:36:03 +03:00
ip6gre_tnl_init_features ( dev ) ;
2017-12-01 15:26:08 -08:00
2015-09-15 14:30:05 -07:00
return 0 ;
2018-05-07 10:45:27 +03:00
cleanup_dst_cache_init :
dst_cache_destroy ( & tunnel - > dst_cache ) ;
cleanup_alloc_pcpu_stats :
free_percpu ( dev - > tstats ) ;
dev - > tstats = NULL ;
return ret ;
2015-09-15 14:30:05 -07:00
}
static int ip6gre_tunnel_init ( struct net_device * dev )
{
struct ip6_tnl * tunnel ;
int ret ;
ret = ip6gre_tunnel_init_common ( dev ) ;
if ( ret )
return ret ;
tunnel = netdev_priv ( dev ) ;
2017-12-01 15:26:08 -08:00
if ( tunnel - > parms . collect_md )
return 0 ;
2012-08-10 00:51:50 +00:00
memcpy ( dev - > dev_addr , & tunnel - > parms . laddr , sizeof ( struct in6_addr ) ) ;
memcpy ( dev - > broadcast , & tunnel - > parms . raddr , sizeof ( struct in6_addr ) ) ;
if ( ipv6_addr_any ( & tunnel - > parms . raddr ) )
dev - > header_ops = & ip6gre_header_ops ;
return 0 ;
}
static void ip6gre_fb_tunnel_init ( struct net_device * dev )
{
struct ip6_tnl * tunnel = netdev_priv ( dev ) ;
tunnel - > dev = dev ;
2013-08-13 17:51:12 +02:00
tunnel - > net = dev_net ( dev ) ;
2012-08-10 00:51:50 +00:00
strcpy ( tunnel - > parms . name , dev - > name ) ;
tunnel - > hlen = sizeof ( struct ipv6hdr ) + 4 ;
dev_hold ( dev ) ;
}
static struct inet6_protocol ip6gre_protocol __read_mostly = {
2016-04-29 17:12:17 -07:00
. handler = gre_rcv ,
2012-08-10 00:51:50 +00:00
. err_handler = ip6gre_err ,
. flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL ,
} ;
2014-04-22 10:15:24 +02:00
static void ip6gre_destroy_tunnels ( struct net * net , struct list_head * head )
2012-08-10 00:51:50 +00:00
{
2014-04-22 10:15:24 +02:00
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
struct net_device * dev , * aux ;
2012-08-10 00:51:50 +00:00
int prio ;
2014-04-22 10:15:24 +02:00
for_each_netdev_safe ( net , dev , aux )
if ( dev - > rtnl_link_ops = = & ip6gre_link_ops | |
2017-11-30 11:51:29 -08:00
dev - > rtnl_link_ops = = & ip6gre_tap_ops | |
dev - > rtnl_link_ops = = & ip6erspan_tap_ops )
2014-04-22 10:15:24 +02:00
unregister_netdevice_queue ( dev , head ) ;
2012-08-10 00:51:50 +00:00
for ( prio = 0 ; prio < 4 ; prio + + ) {
int h ;
2016-08-10 11:03:35 +02:00
for ( h = 0 ; h < IP6_GRE_HASH_SIZE ; h + + ) {
2012-08-10 00:51:50 +00:00
struct ip6_tnl * t ;
t = rtnl_dereference ( ign - > tunnels [ prio ] [ h ] ) ;
2015-03-29 14:00:05 +01:00
while ( t ) {
2014-04-22 10:15:24 +02:00
/* If dev is in the same netns, it has already
* been added to the list by the previous loop .
*/
if ( ! net_eq ( dev_net ( t - > dev ) , net ) )
unregister_netdevice_queue ( t - > dev ,
head ) ;
2012-08-10 00:51:50 +00:00
t = rtnl_dereference ( t - > next ) ;
}
}
}
}
static int __net_init ip6gre_init_net ( struct net * net )
{
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
int err ;
net: do not create fallback tunnels for non-default namespaces
fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0,
ip6tnl0, ip6gre0) are automatically created when the corresponding
module is loaded.
These tunnels are also automatically created when a new network
namespace is created, at a great cost.
In many cases, netns are used for isolation purposes, and these
extra network devices are a waste of resources. We are using
thousands of netns per host, and hit the netns creation/delete
bottleneck a lot. (Many thanks to Kirill for recent work on this)
Add a new sysctl so that we can opt-out from this automatic creation.
Note that these tunnels are still created for the initial namespace,
to be the least intrusive for typical setups.
Tested:
lpk43:~# cat add_del_unshare.sh
for i in `seq 1 40`
do
(for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) &
done
wait
lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time ./add_del_unshare.sh
real 0m37.521s
user 0m0.886s
sys 7m7.084s
lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time ./add_del_unshare.sh
real 0m4.761s
user 0m0.851s
sys 1m8.343s
lpk43:~#
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-08 12:51:41 -08:00
if ( ! net_has_fallback_tunnels ( net ) )
return 0 ;
2012-08-10 00:51:50 +00:00
ign - > fb_tunnel_dev = alloc_netdev ( sizeof ( struct ip6_tnl ) , " ip6gre0 " ,
net: set name_assign_type in alloc_netdev()
Extend alloc_netdev{,_mq{,s}}() to take name_assign_type as argument, and convert
all users to pass NET_NAME_UNKNOWN.
Coccinelle patch:
@@
expression sizeof_priv, name, setup, txqs, rxqs, count;
@@
(
-alloc_netdev_mqs(sizeof_priv, name, setup, txqs, rxqs)
+alloc_netdev_mqs(sizeof_priv, name, NET_NAME_UNKNOWN, setup, txqs, rxqs)
|
-alloc_netdev_mq(sizeof_priv, name, setup, count)
+alloc_netdev_mq(sizeof_priv, name, NET_NAME_UNKNOWN, setup, count)
|
-alloc_netdev(sizeof_priv, name, setup)
+alloc_netdev(sizeof_priv, name, NET_NAME_UNKNOWN, setup)
)
v9: move comments here from the wrong commit
Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-14 16:37:24 +02:00
NET_NAME_UNKNOWN ,
ip6gre_tunnel_setup ) ;
2012-08-10 00:51:50 +00:00
if ( ! ign - > fb_tunnel_dev ) {
err = - ENOMEM ;
goto err_alloc_dev ;
}
dev_net_set ( ign - > fb_tunnel_dev , net ) ;
2014-04-22 10:15:24 +02:00
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe .
*/
ign - > fb_tunnel_dev - > features | = NETIF_F_NETNS_LOCAL ;
2012-08-10 00:51:50 +00:00
ip6gre_fb_tunnel_init ( ign - > fb_tunnel_dev ) ;
ign - > fb_tunnel_dev - > rtnl_link_ops = & ip6gre_link_ops ;
err = register_netdev ( ign - > fb_tunnel_dev ) ;
if ( err )
goto err_reg_dev ;
rcu_assign_pointer ( ign - > tunnels_wc [ 0 ] ,
netdev_priv ( ign - > fb_tunnel_dev ) ) ;
return 0 ;
err_reg_dev :
net: Fix inconsistent teardown and release of private netdev state.
Network devices can allocate reasources and private memory using
netdev_ops->ndo_init(). However, the release of these resources
can occur in one of two different places.
Either netdev_ops->ndo_uninit() or netdev->destructor().
The decision of which operation frees the resources depends upon
whether it is necessary for all netdev refs to be released before it
is safe to perform the freeing.
netdev_ops->ndo_uninit() presumably can occur right after the
NETDEV_UNREGISTER notifier completes and the unicast and multicast
address lists are flushed.
netdev->destructor(), on the other hand, does not run until the
netdev references all go away.
Further complicating the situation is that netdev->destructor()
almost universally does also a free_netdev().
This creates a problem for the logic in register_netdevice().
Because all callers of register_netdevice() manage the freeing
of the netdev, and invoke free_netdev(dev) if register_netdevice()
fails.
If netdev_ops->ndo_init() succeeds, but something else fails inside
of register_netdevice(), it does call ndo_ops->ndo_uninit(). But
it is not able to invoke netdev->destructor().
This is because netdev->destructor() will do a free_netdev() and
then the caller of register_netdevice() will do the same.
However, this means that the resources that would normally be released
by netdev->destructor() will not be.
Over the years drivers have added local hacks to deal with this, by
invoking their destructor parts by hand when register_netdevice()
fails.
Many drivers do not try to deal with this, and instead we have leaks.
Let's close this hole by formalizing the distinction between what
private things need to be freed up by netdev->destructor() and whether
the driver needs unregister_netdevice() to perform the free_netdev().
netdev->priv_destructor() performs all actions to free up the private
resources that used to be freed by netdev->destructor(), except for
free_netdev().
netdev->needs_free_netdev is a boolean that indicates whether
free_netdev() should be done at the end of unregister_netdevice().
Now, register_netdevice() can sanely release all resources after
ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit()
and netdev->priv_destructor().
And at the end of unregister_netdevice(), we invoke
netdev->priv_destructor() and optionally call free_netdev().
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-08 12:52:56 -04:00
free_netdev ( ign - > fb_tunnel_dev ) ;
2012-08-10 00:51:50 +00:00
err_alloc_dev :
return err ;
}
2017-09-19 16:27:08 -07:00
static void __net_exit ip6gre_exit_batch_net ( struct list_head * net_list )
2012-08-10 00:51:50 +00:00
{
2017-09-19 16:27:08 -07:00
struct net * net ;
2012-08-10 00:51:50 +00:00
LIST_HEAD ( list ) ;
rtnl_lock ( ) ;
2017-09-19 16:27:08 -07:00
list_for_each_entry ( net , net_list , exit_list )
ip6gre_destroy_tunnels ( net , & list ) ;
2012-08-10 00:51:50 +00:00
unregister_netdevice_many ( & list ) ;
rtnl_unlock ( ) ;
}
static struct pernet_operations ip6gre_net_ops = {
. init = ip6gre_init_net ,
2017-09-19 16:27:08 -07:00
. exit_batch = ip6gre_exit_batch_net ,
2012-08-10 00:51:50 +00:00
. id = & ip6gre_net_id ,
. size = sizeof ( struct ip6gre_net ) ,
} ;
2017-06-25 23:56:01 +02:00
static int ip6gre_tunnel_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
2012-08-10 00:51:50 +00:00
{
__be16 flags ;
if ( ! data )
return 0 ;
flags = 0 ;
if ( data [ IFLA_GRE_IFLAGS ] )
flags | = nla_get_be16 ( data [ IFLA_GRE_IFLAGS ] ) ;
if ( data [ IFLA_GRE_OFLAGS ] )
flags | = nla_get_be16 ( data [ IFLA_GRE_OFLAGS ] ) ;
if ( flags & ( GRE_VERSION | GRE_ROUTING ) )
return - EINVAL ;
return 0 ;
}
2017-06-25 23:56:01 +02:00
static int ip6gre_tap_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
2012-08-10 00:51:50 +00:00
{
struct in6_addr daddr ;
if ( tb [ IFLA_ADDRESS ] ) {
if ( nla_len ( tb [ IFLA_ADDRESS ] ) ! = ETH_ALEN )
return - EINVAL ;
if ( ! is_valid_ether_addr ( nla_data ( tb [ IFLA_ADDRESS ] ) ) )
return - EADDRNOTAVAIL ;
}
if ( ! data )
goto out ;
if ( data [ IFLA_GRE_REMOTE ] ) {
2015-03-29 16:59:26 +02:00
daddr = nla_get_in6_addr ( data [ IFLA_GRE_REMOTE ] ) ;
2012-08-10 00:51:50 +00:00
if ( ipv6_addr_any ( & daddr ) )
return - EINVAL ;
}
out :
2017-06-25 23:56:01 +02:00
return ip6gre_tunnel_validate ( tb , data , extack ) ;
2012-08-10 00:51:50 +00:00
}
2017-11-30 11:51:29 -08:00
static int ip6erspan_tap_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
{
__be16 flags = 0 ;
2017-12-13 16:38:57 -08:00
int ret , ver = 0 ;
2017-11-30 11:51:29 -08:00
if ( ! data )
return 0 ;
ret = ip6gre_tap_validate ( tb , data , extack ) ;
if ( ret )
return ret ;
/* ERSPAN should only have GRE sequence and key flag */
if ( data [ IFLA_GRE_OFLAGS ] )
flags | = nla_get_be16 ( data [ IFLA_GRE_OFLAGS ] ) ;
if ( data [ IFLA_GRE_IFLAGS ] )
flags | = nla_get_be16 ( data [ IFLA_GRE_IFLAGS ] ) ;
if ( ! data [ IFLA_GRE_COLLECT_METADATA ] & &
flags ! = ( GRE_SEQ | GRE_KEY ) )
return - EINVAL ;
/* ERSPAN Session ID only has 10-bit. Since we reuse
* 32 - bit key field as ID , check it ' s range .
*/
if ( data [ IFLA_GRE_IKEY ] & &
( ntohl ( nla_get_be32 ( data [ IFLA_GRE_IKEY ] ) ) & ~ ID_MASK ) )
return - EINVAL ;
if ( data [ IFLA_GRE_OKEY ] & &
( ntohl ( nla_get_be32 ( data [ IFLA_GRE_OKEY ] ) ) & ~ ID_MASK ) )
return - EINVAL ;
2017-12-13 16:38:57 -08:00
if ( data [ IFLA_GRE_ERSPAN_VER ] ) {
ver = nla_get_u8 ( data [ IFLA_GRE_ERSPAN_VER ] ) ;
if ( ver ! = 1 & & ver ! = 2 )
2017-11-30 11:51:29 -08:00
return - EINVAL ;
}
2017-12-13 16:38:57 -08:00
if ( ver = = 1 ) {
if ( data [ IFLA_GRE_ERSPAN_INDEX ] ) {
u32 index = nla_get_u32 ( data [ IFLA_GRE_ERSPAN_INDEX ] ) ;
if ( index & ~ INDEX_MASK )
return - EINVAL ;
}
} else if ( ver = = 2 ) {
if ( data [ IFLA_GRE_ERSPAN_DIR ] ) {
u16 dir = nla_get_u8 ( data [ IFLA_GRE_ERSPAN_DIR ] ) ;
if ( dir & ~ ( DIR_MASK > > DIR_OFFSET ) )
return - EINVAL ;
}
if ( data [ IFLA_GRE_ERSPAN_HWID ] ) {
u16 hwid = nla_get_u16 ( data [ IFLA_GRE_ERSPAN_HWID ] ) ;
if ( hwid & ~ ( HWID_MASK > > HWID_OFFSET ) )
return - EINVAL ;
}
}
2017-11-30 11:51:29 -08:00
return 0 ;
}
2012-08-10 00:51:50 +00:00
2019-02-15 15:10:32 +01:00
static void ip6erspan_set_version ( struct nlattr * data [ ] ,
struct __ip6_tnl_parm * parms )
{
2019-02-20 09:23:03 +01:00
if ( ! data )
return ;
2019-02-15 15:10:32 +01:00
parms - > erspan_ver = 1 ;
if ( data [ IFLA_GRE_ERSPAN_VER ] )
parms - > erspan_ver = nla_get_u8 ( data [ IFLA_GRE_ERSPAN_VER ] ) ;
if ( parms - > erspan_ver = = 1 ) {
if ( data [ IFLA_GRE_ERSPAN_INDEX ] )
parms - > index = nla_get_u32 ( data [ IFLA_GRE_ERSPAN_INDEX ] ) ;
} else if ( parms - > erspan_ver = = 2 ) {
if ( data [ IFLA_GRE_ERSPAN_DIR ] )
parms - > dir = nla_get_u8 ( data [ IFLA_GRE_ERSPAN_DIR ] ) ;
if ( data [ IFLA_GRE_ERSPAN_HWID ] )
parms - > hwid = nla_get_u16 ( data [ IFLA_GRE_ERSPAN_HWID ] ) ;
}
}
2012-08-10 00:51:50 +00:00
static void ip6gre_netlink_parms ( struct nlattr * data [ ] ,
struct __ip6_tnl_parm * parms )
{
memset ( parms , 0 , sizeof ( * parms ) ) ;
if ( ! data )
return ;
if ( data [ IFLA_GRE_LINK ] )
parms - > link = nla_get_u32 ( data [ IFLA_GRE_LINK ] ) ;
if ( data [ IFLA_GRE_IFLAGS ] )
2016-05-09 17:12:09 -07:00
parms - > i_flags = gre_flags_to_tnl_flags (
nla_get_be16 ( data [ IFLA_GRE_IFLAGS ] ) ) ;
2012-08-10 00:51:50 +00:00
if ( data [ IFLA_GRE_OFLAGS ] )
2016-05-09 17:12:09 -07:00
parms - > o_flags = gre_flags_to_tnl_flags (
nla_get_be16 ( data [ IFLA_GRE_OFLAGS ] ) ) ;
2012-08-10 00:51:50 +00:00
if ( data [ IFLA_GRE_IKEY ] )
parms - > i_key = nla_get_be32 ( data [ IFLA_GRE_IKEY ] ) ;
if ( data [ IFLA_GRE_OKEY ] )
parms - > o_key = nla_get_be32 ( data [ IFLA_GRE_OKEY ] ) ;
if ( data [ IFLA_GRE_LOCAL ] )
2015-03-29 16:59:26 +02:00
parms - > laddr = nla_get_in6_addr ( data [ IFLA_GRE_LOCAL ] ) ;
2012-08-10 00:51:50 +00:00
if ( data [ IFLA_GRE_REMOTE ] )
2015-03-29 16:59:26 +02:00
parms - > raddr = nla_get_in6_addr ( data [ IFLA_GRE_REMOTE ] ) ;
2012-08-10 00:51:50 +00:00
if ( data [ IFLA_GRE_TTL ] )
parms - > hop_limit = nla_get_u8 ( data [ IFLA_GRE_TTL ] ) ;
if ( data [ IFLA_GRE_ENCAP_LIMIT ] )
parms - > encap_limit = nla_get_u8 ( data [ IFLA_GRE_ENCAP_LIMIT ] ) ;
if ( data [ IFLA_GRE_FLOWINFO ] )
2016-09-24 14:01:04 -04:00
parms - > flowinfo = nla_get_be32 ( data [ IFLA_GRE_FLOWINFO ] ) ;
2012-08-10 00:51:50 +00:00
if ( data [ IFLA_GRE_FLAGS ] )
parms - > flags = nla_get_u32 ( data [ IFLA_GRE_FLAGS ] ) ;
2017-04-19 12:30:53 -04:00
if ( data [ IFLA_GRE_FWMARK ] )
parms - > fwmark = nla_get_u32 ( data [ IFLA_GRE_FWMARK ] ) ;
2017-11-30 11:51:29 -08:00
2017-12-01 15:26:08 -08:00
if ( data [ IFLA_GRE_COLLECT_METADATA ] )
parms - > collect_md = true ;
2012-08-10 00:51:50 +00:00
}
static int ip6gre_tap_init ( struct net_device * dev )
{
2015-09-15 14:30:05 -07:00
int ret ;
2012-08-10 00:51:50 +00:00
2015-09-15 14:30:05 -07:00
ret = ip6gre_tunnel_init_common ( dev ) ;
if ( ret )
return ret ;
2012-08-10 00:51:50 +00:00
2016-06-08 20:15:43 +01:00
dev - > priv_flags | = IFF_LIVE_ADDR_CHANGE ;
2012-08-10 00:51:50 +00:00
return 0 ;
}
static const struct net_device_ops ip6gre_tap_netdev_ops = {
. ndo_init = ip6gre_tap_init ,
. ndo_uninit = ip6gre_tunnel_uninit ,
. ndo_start_xmit = ip6gre_tunnel_xmit ,
. ndo_set_mac_address = eth_mac_addr ,
. ndo_validate_addr = eth_validate_addr ,
2016-04-29 17:12:21 -07:00
. ndo_change_mtu = ip6_tnl_change_mtu ,
2013-03-25 14:50:00 +00:00
. ndo_get_stats64 = ip_tunnel_get_stats64 ,
2015-04-02 17:07:01 +02:00
. ndo_get_iflink = ip6_tnl_get_iflink ,
2012-08-10 00:51:50 +00:00
} ;
2018-05-17 16:36:51 +02:00
static int ip6erspan_calc_hlen ( struct ip6_tnl * tunnel )
{
int t_hlen ;
tunnel - > tun_hlen = 8 ;
tunnel - > hlen = tunnel - > tun_hlen + tunnel - > encap_hlen +
erspan_hdr_len ( tunnel - > parms . erspan_ver ) ;
t_hlen = tunnel - > hlen + sizeof ( struct ipv6hdr ) ;
net: ipv6_gre: Fix GRO to work on IPv6 over GRE tap
IPv6 GRO over GRE tap is not working while GRO is not set
over the native interface.
gro_list_prepare function updates the same_flow variable
of existing sessions to 1 if their mac headers match the one
of the incoming packet.
same_flow is used to filter out non-matching sessions and keep
potential ones for aggregation.
The number of bytes to compare should be the number of bytes
in the mac headers. In gro_list_prepare this number is set to
be skb->dev->hard_header_len. For GRE interfaces this hard_header_len
should be as it is set in the initialization process (when GRE is
created), it should not be overridden. But currently it is being overridden
by the value that is actually supposed to represent the needed_headroom.
Therefore, the number of bytes compared in order to decide whether the
the mac headers are the same is greater than the length of the headers.
As it's documented in netdevice.h, hard_header_len is the maximum
hardware header length, and needed_headroom is the extra headroom
the hardware may need.
hard_header_len is basically all the bytes received by the physical
till layer 3 header of the packet received by the interface.
For example, if the interface is a GRE tap then the needed_headroom
should be the total length of the following headers:
IP header of the physical, GRE header, mac header of GRE.
It is often used to calculate the MTU of the created interface.
This patch removes the override of the hard_header_len, and
assigns the calculated value to needed_headroom.
This way, the comparison in gro_list_prepare is really of
the mac headers, and if the packets have the same mac headers
the same_flow will be set to 1.
Performance testing: 45% higher bandwidth.
Measuring bandwidth of single-stream IPv4 TCP traffic over IPv6
GRE tap while GRO is not set on the native.
NIC: ConnectX-4LX
Before (GRO not working) : 7.2 Gbits/sec
After (GRO working): 10.5 Gbits/sec
Signed-off-by: Maria Pasechnik <mariap@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:46:30 +03:00
tunnel - > dev - > needed_headroom = LL_MAX_HEADER + t_hlen ;
2018-05-17 16:36:51 +02:00
return t_hlen ;
}
2017-11-30 11:51:29 -08:00
static int ip6erspan_tap_init ( struct net_device * dev )
{
struct ip6_tnl * tunnel ;
int t_hlen ;
int ret ;
tunnel = netdev_priv ( dev ) ;
tunnel - > dev = dev ;
tunnel - > net = dev_net ( dev ) ;
strcpy ( tunnel - > parms . name , dev - > name ) ;
dev - > tstats = netdev_alloc_pcpu_stats ( struct pcpu_sw_netstats ) ;
if ( ! dev - > tstats )
return - ENOMEM ;
ret = dst_cache_init ( & tunnel - > dst_cache , GFP_KERNEL ) ;
2018-05-07 10:45:27 +03:00
if ( ret )
goto cleanup_alloc_pcpu_stats ;
ret = gro_cells_init ( & tunnel - > gro_cells , dev ) ;
if ( ret )
goto cleanup_dst_cache_init ;
2017-11-30 11:51:29 -08:00
2018-05-17 16:36:51 +02:00
t_hlen = ip6erspan_calc_hlen ( tunnel ) ;
2017-11-30 11:51:29 -08:00
dev - > mtu = ETH_DATA_LEN - t_hlen ;
if ( dev - > type = = ARPHRD_ETHER )
dev - > mtu - = ETH_HLEN ;
if ( ! ( tunnel - > parms . flags & IP6_TNL_F_IGN_ENCAP_LIMIT ) )
dev - > mtu - = 8 ;
dev - > priv_flags | = IFF_LIVE_ADDR_CHANGE ;
2018-05-17 16:36:51 +02:00
ip6erspan_tnl_link_config ( tunnel , 1 ) ;
2017-11-30 11:51:29 -08:00
return 0 ;
2018-05-07 10:45:27 +03:00
cleanup_dst_cache_init :
dst_cache_destroy ( & tunnel - > dst_cache ) ;
cleanup_alloc_pcpu_stats :
free_percpu ( dev - > tstats ) ;
dev - > tstats = NULL ;
return ret ;
2017-11-30 11:51:29 -08:00
}
static const struct net_device_ops ip6erspan_netdev_ops = {
. ndo_init = ip6erspan_tap_init ,
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
. ndo_uninit = ip6erspan_tunnel_uninit ,
2017-11-30 11:51:29 -08:00
. ndo_start_xmit = ip6erspan_tunnel_xmit ,
. ndo_set_mac_address = eth_mac_addr ,
. ndo_validate_addr = eth_validate_addr ,
. ndo_change_mtu = ip6_tnl_change_mtu ,
. ndo_get_stats64 = ip_tunnel_get_stats64 ,
. ndo_get_iflink = ip6_tnl_get_iflink ,
} ;
2012-08-10 00:51:50 +00:00
static void ip6gre_tap_setup ( struct net_device * dev )
{
ether_setup ( dev ) ;
2017-12-18 14:25:09 +08:00
dev - > max_mtu = 0 ;
2012-08-10 00:51:50 +00:00
dev - > netdev_ops = & ip6gre_tap_netdev_ops ;
net: Fix inconsistent teardown and release of private netdev state.
Network devices can allocate reasources and private memory using
netdev_ops->ndo_init(). However, the release of these resources
can occur in one of two different places.
Either netdev_ops->ndo_uninit() or netdev->destructor().
The decision of which operation frees the resources depends upon
whether it is necessary for all netdev refs to be released before it
is safe to perform the freeing.
netdev_ops->ndo_uninit() presumably can occur right after the
NETDEV_UNREGISTER notifier completes and the unicast and multicast
address lists are flushed.
netdev->destructor(), on the other hand, does not run until the
netdev references all go away.
Further complicating the situation is that netdev->destructor()
almost universally does also a free_netdev().
This creates a problem for the logic in register_netdevice().
Because all callers of register_netdevice() manage the freeing
of the netdev, and invoke free_netdev(dev) if register_netdevice()
fails.
If netdev_ops->ndo_init() succeeds, but something else fails inside
of register_netdevice(), it does call ndo_ops->ndo_uninit(). But
it is not able to invoke netdev->destructor().
This is because netdev->destructor() will do a free_netdev() and
then the caller of register_netdevice() will do the same.
However, this means that the resources that would normally be released
by netdev->destructor() will not be.
Over the years drivers have added local hacks to deal with this, by
invoking their destructor parts by hand when register_netdevice()
fails.
Many drivers do not try to deal with this, and instead we have leaks.
Let's close this hole by formalizing the distinction between what
private things need to be freed up by netdev->destructor() and whether
the driver needs unregister_netdevice() to perform the free_netdev().
netdev->priv_destructor() performs all actions to free up the private
resources that used to be freed by netdev->destructor(), except for
free_netdev().
netdev->needs_free_netdev is a boolean that indicates whether
free_netdev() should be done at the end of unregister_netdevice().
Now, register_netdevice() can sanely release all resources after
ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit()
and netdev->priv_destructor().
And at the end of unregister_netdevice(), we invoke
netdev->priv_destructor() and optionally call free_netdev().
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-08 12:52:56 -04:00
dev - > needs_free_netdev = true ;
dev - > priv_destructor = ip6gre_dev_free ;
2012-08-10 00:51:50 +00:00
dev - > features | = NETIF_F_NETNS_LOCAL ;
2016-02-17 15:32:53 +01:00
dev - > priv_flags & = ~ IFF_TX_SKB_SHARING ;
2016-06-08 20:15:43 +01:00
dev - > priv_flags | = IFF_LIVE_ADDR_CHANGE ;
2017-09-28 13:23:50 +08:00
netif_keep_dst ( dev ) ;
2012-08-10 00:51:50 +00:00
}
2016-05-18 09:06:19 -07:00
static bool ip6gre_netlink_encap_parms ( struct nlattr * data [ ] ,
struct ip_tunnel_encap * ipencap )
{
bool ret = false ;
memset ( ipencap , 0 , sizeof ( * ipencap ) ) ;
if ( ! data )
return ret ;
if ( data [ IFLA_GRE_ENCAP_TYPE ] ) {
ret = true ;
ipencap - > type = nla_get_u16 ( data [ IFLA_GRE_ENCAP_TYPE ] ) ;
}
if ( data [ IFLA_GRE_ENCAP_FLAGS ] ) {
ret = true ;
ipencap - > flags = nla_get_u16 ( data [ IFLA_GRE_ENCAP_FLAGS ] ) ;
}
if ( data [ IFLA_GRE_ENCAP_SPORT ] ) {
ret = true ;
ipencap - > sport = nla_get_be16 ( data [ IFLA_GRE_ENCAP_SPORT ] ) ;
}
if ( data [ IFLA_GRE_ENCAP_DPORT ] ) {
ret = true ;
ipencap - > dport = nla_get_be16 ( data [ IFLA_GRE_ENCAP_DPORT ] ) ;
}
return ret ;
}
2018-05-17 16:36:39 +02:00
static int ip6gre_newlink_common ( struct net * src_net , struct net_device * dev ,
struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
2012-08-10 00:51:50 +00:00
{
struct ip6_tnl * nt ;
2016-05-18 09:06:19 -07:00
struct ip_tunnel_encap ipencap ;
2012-08-10 00:51:50 +00:00
int err ;
nt = netdev_priv ( dev ) ;
2016-05-18 09:06:19 -07:00
if ( ip6gre_netlink_encap_parms ( data , & ipencap ) ) {
int err = ip6_tnl_encap_setup ( nt , & ipencap ) ;
if ( err < 0 )
return err ;
}
2012-08-10 00:51:50 +00:00
if ( dev - > type = = ARPHRD_ETHER & & ! tb [ IFLA_ADDRESS ] )
eth_hw_addr_random ( dev ) ;
nt - > dev = dev ;
2013-08-13 17:51:12 +02:00
nt - > net = dev_net ( dev ) ;
2012-08-10 00:51:50 +00:00
err = register_netdevice ( dev ) ;
if ( err )
goto out ;
ip6_gre: init dev->mtu and dev->hard_header_len correctly
Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path,
call common GRE functions") moved dev->mtu initialization
from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a
result, the previously set values, before ndo_init(), are
reset in the following cases:
* rtnl_create_link() can update dev->mtu from IFLA_MTU
parameter.
* ip6gre_tnl_link_config() is invoked before ndo_init() in
netlink and ioctl setup, so ndo_init() can reset MTU
adjustments with the lower device MTU as well, dev->mtu
and dev->hard_header_len.
Not applicable for ip6gretap because it has one more call
to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init().
Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]'
parameter if a user sets it manually on a device creation,
and fix the second one by moving ip6gre_tnl_link_config()
call after register_netdevice().
Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions")
Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-01-18 20:51:12 +03:00
if ( tb [ IFLA_MTU ] )
ip6_tnl_change_mtu ( dev , nla_get_u32 ( tb [ IFLA_MTU ] ) ) ;
2012-08-10 00:51:50 +00:00
dev_hold ( dev ) ;
out :
return err ;
}
2018-05-17 16:36:39 +02:00
static int ip6gre_newlink ( struct net * src_net , struct net_device * dev ,
struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
{
struct ip6_tnl * nt = netdev_priv ( dev ) ;
struct net * net = dev_net ( dev ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
struct ip6gre_net * ign ;
int err ;
ip6gre_netlink_parms ( data , & nt - > parms ) ;
ign = net_generic ( net , ip6gre_net_id ) ;
if ( nt - > parms . collect_md ) {
if ( rtnl_dereference ( ign - > collect_md_tun ) )
return - EEXIST ;
} else {
if ( ip6gre_tunnel_find ( net , & nt - > parms , dev - > type ) )
return - EEXIST ;
}
2018-05-17 16:36:39 +02:00
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
err = ip6gre_newlink_common ( src_net , dev , tb , data , extack ) ;
2018-05-17 16:36:39 +02:00
if ( ! err ) {
ip6gre_tnl_link_config ( nt , ! tb [ IFLA_MTU ] ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6gre_tunnel_link_md ( ign , nt ) ;
2018-05-17 16:36:39 +02:00
ip6gre_tunnel_link ( net_generic ( net , ip6gre_net_id ) , nt ) ;
}
return err ;
}
2018-05-17 16:36:45 +02:00
static struct ip6_tnl *
ip6gre_changelink_common ( struct net_device * dev , struct nlattr * tb [ ] ,
struct nlattr * data [ ] , struct __ip6_tnl_parm * p_p ,
struct netlink_ext_ack * extack )
2012-08-10 00:51:50 +00:00
{
2014-04-22 10:15:24 +02:00
struct ip6_tnl * t , * nt = netdev_priv ( dev ) ;
struct net * net = nt - > net ;
2012-08-10 00:51:50 +00:00
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
2016-05-18 09:06:19 -07:00
struct ip_tunnel_encap ipencap ;
2012-08-10 00:51:50 +00:00
if ( dev = = ign - > fb_tunnel_dev )
2018-05-17 16:36:45 +02:00
return ERR_PTR ( - EINVAL ) ;
2012-08-10 00:51:50 +00:00
2016-05-18 09:06:19 -07:00
if ( ip6gre_netlink_encap_parms ( data , & ipencap ) ) {
int err = ip6_tnl_encap_setup ( nt , & ipencap ) ;
if ( err < 0 )
2018-05-17 16:36:45 +02:00
return ERR_PTR ( err ) ;
2016-05-18 09:06:19 -07:00
}
2018-05-17 16:36:45 +02:00
ip6gre_netlink_parms ( data , p_p ) ;
2012-08-10 00:51:50 +00:00
2018-05-17 16:36:45 +02:00
t = ip6gre_tunnel_locate ( net , p_p , 0 ) ;
2012-08-10 00:51:50 +00:00
if ( t ) {
if ( t - > dev ! = dev )
2018-05-17 16:36:45 +02:00
return ERR_PTR ( - EEXIST ) ;
2012-08-10 00:51:50 +00:00
} else {
t = nt ;
}
2018-05-17 16:36:45 +02:00
return t ;
}
static int ip6gre_changelink ( struct net_device * dev , struct nlattr * tb [ ] ,
struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
{
2019-01-09 10:57:21 +01:00
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct ip6gre_net * ign = net_generic ( t - > net , ip6gre_net_id ) ;
2018-05-17 16:36:45 +02:00
struct __ip6_tnl_parm p ;
t = ip6gre_changelink_common ( dev , tb , data , & p , extack ) ;
if ( IS_ERR ( t ) )
return PTR_ERR ( t ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6gre_tunnel_unlink_md ( ign , t ) ;
2015-12-03 17:21:50 +01:00
ip6gre_tunnel_unlink ( ign , t ) ;
ip6gre_tnl_change ( t , & p , ! tb [ IFLA_MTU ] ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6gre_tunnel_link_md ( ign , t ) ;
2015-12-03 17:21:50 +01:00
ip6gre_tunnel_link ( ign , t ) ;
2012-08-10 00:51:50 +00:00
return 0 ;
}
2014-04-14 17:11:38 +02:00
static void ip6gre_dellink ( struct net_device * dev , struct list_head * head )
{
struct net * net = dev_net ( dev ) ;
struct ip6gre_net * ign = net_generic ( net , ip6gre_net_id ) ;
if ( dev ! = ign - > fb_tunnel_dev )
unregister_netdevice_queue ( dev , head ) ;
}
2012-08-10 00:51:50 +00:00
static size_t ip6gre_get_size ( const struct net_device * dev )
{
return
/* IFLA_GRE_LINK */
nla_total_size ( 4 ) +
/* IFLA_GRE_IFLAGS */
nla_total_size ( 2 ) +
/* IFLA_GRE_OFLAGS */
nla_total_size ( 2 ) +
/* IFLA_GRE_IKEY */
nla_total_size ( 4 ) +
/* IFLA_GRE_OKEY */
nla_total_size ( 4 ) +
/* IFLA_GRE_LOCAL */
2012-11-09 05:34:56 +00:00
nla_total_size ( sizeof ( struct in6_addr ) ) +
2012-08-10 00:51:50 +00:00
/* IFLA_GRE_REMOTE */
2012-11-09 05:34:56 +00:00
nla_total_size ( sizeof ( struct in6_addr ) ) +
2012-08-10 00:51:50 +00:00
/* IFLA_GRE_TTL */
nla_total_size ( 1 ) +
/* IFLA_GRE_ENCAP_LIMIT */
nla_total_size ( 1 ) +
/* IFLA_GRE_FLOWINFO */
nla_total_size ( 4 ) +
/* IFLA_GRE_FLAGS */
nla_total_size ( 4 ) +
2016-05-18 09:06:19 -07:00
/* IFLA_GRE_ENCAP_TYPE */
nla_total_size ( 2 ) +
/* IFLA_GRE_ENCAP_FLAGS */
nla_total_size ( 2 ) +
/* IFLA_GRE_ENCAP_SPORT */
nla_total_size ( 2 ) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size ( 2 ) +
2017-12-01 15:26:08 -08:00
/* IFLA_GRE_COLLECT_METADATA */
nla_total_size ( 0 ) +
2017-04-19 12:30:53 -04:00
/* IFLA_GRE_FWMARK */
nla_total_size ( 4 ) +
2017-11-30 11:51:29 -08:00
/* IFLA_GRE_ERSPAN_INDEX */
nla_total_size ( 4 ) +
2012-08-10 00:51:50 +00:00
0 ;
}
static int ip6gre_fill_info ( struct sk_buff * skb , const struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct __ip6_tnl_parm * p = & t - > parms ;
2019-01-28 22:23:49 +01:00
__be16 o_flags = p - > o_flags ;
2019-02-19 17:42:06 +01:00
if ( p - > erspan_ver = = 1 | | p - > erspan_ver = = 2 ) {
if ( ! p - > collect_md )
o_flags | = TUNNEL_KEY ;
if ( nla_put_u8 ( skb , IFLA_GRE_ERSPAN_VER , p - > erspan_ver ) )
goto nla_put_failure ;
if ( p - > erspan_ver = = 1 ) {
if ( nla_put_u32 ( skb , IFLA_GRE_ERSPAN_INDEX , p - > index ) )
goto nla_put_failure ;
} else {
if ( nla_put_u8 ( skb , IFLA_GRE_ERSPAN_DIR , p - > dir ) )
goto nla_put_failure ;
if ( nla_put_u16 ( skb , IFLA_GRE_ERSPAN_HWID , p - > hwid ) )
goto nla_put_failure ;
}
}
2012-08-10 00:51:50 +00:00
if ( nla_put_u32 ( skb , IFLA_GRE_LINK , p - > link ) | |
2016-05-09 17:12:09 -07:00
nla_put_be16 ( skb , IFLA_GRE_IFLAGS ,
gre_tnl_flags_to_gre_flags ( p - > i_flags ) ) | |
nla_put_be16 ( skb , IFLA_GRE_OFLAGS ,
2019-01-28 22:23:49 +01:00
gre_tnl_flags_to_gre_flags ( o_flags ) ) | |
2012-08-10 00:51:50 +00:00
nla_put_be32 ( skb , IFLA_GRE_IKEY , p - > i_key ) | |
nla_put_be32 ( skb , IFLA_GRE_OKEY , p - > o_key ) | |
2015-03-29 16:59:25 +02:00
nla_put_in6_addr ( skb , IFLA_GRE_LOCAL , & p - > laddr ) | |
nla_put_in6_addr ( skb , IFLA_GRE_REMOTE , & p - > raddr ) | |
2012-08-10 00:51:50 +00:00
nla_put_u8 ( skb , IFLA_GRE_TTL , p - > hop_limit ) | |
nla_put_u8 ( skb , IFLA_GRE_ENCAP_LIMIT , p - > encap_limit ) | |
nla_put_be32 ( skb , IFLA_GRE_FLOWINFO , p - > flowinfo ) | |
2017-04-19 12:30:53 -04:00
nla_put_u32 ( skb , IFLA_GRE_FLAGS , p - > flags ) | |
2019-02-19 17:42:06 +01:00
nla_put_u32 ( skb , IFLA_GRE_FWMARK , p - > fwmark ) )
2012-08-10 00:51:50 +00:00
goto nla_put_failure ;
2016-05-18 09:06:19 -07:00
if ( nla_put_u16 ( skb , IFLA_GRE_ENCAP_TYPE ,
t - > encap . type ) | |
nla_put_be16 ( skb , IFLA_GRE_ENCAP_SPORT ,
t - > encap . sport ) | |
nla_put_be16 ( skb , IFLA_GRE_ENCAP_DPORT ,
t - > encap . dport ) | |
nla_put_u16 ( skb , IFLA_GRE_ENCAP_FLAGS ,
t - > encap . flags ) )
goto nla_put_failure ;
2017-12-01 15:26:08 -08:00
if ( p - > collect_md ) {
if ( nla_put_flag ( skb , IFLA_GRE_COLLECT_METADATA ) )
goto nla_put_failure ;
}
2012-08-10 00:51:50 +00:00
return 0 ;
nla_put_failure :
return - EMSGSIZE ;
}
static const struct nla_policy ip6gre_policy [ IFLA_GRE_MAX + 1 ] = {
[ IFLA_GRE_LINK ] = { . type = NLA_U32 } ,
[ IFLA_GRE_IFLAGS ] = { . type = NLA_U16 } ,
[ IFLA_GRE_OFLAGS ] = { . type = NLA_U16 } ,
[ IFLA_GRE_IKEY ] = { . type = NLA_U32 } ,
[ IFLA_GRE_OKEY ] = { . type = NLA_U32 } ,
[ IFLA_GRE_LOCAL ] = { . len = FIELD_SIZEOF ( struct ipv6hdr , saddr ) } ,
[ IFLA_GRE_REMOTE ] = { . len = FIELD_SIZEOF ( struct ipv6hdr , daddr ) } ,
[ IFLA_GRE_TTL ] = { . type = NLA_U8 } ,
[ IFLA_GRE_ENCAP_LIMIT ] = { . type = NLA_U8 } ,
[ IFLA_GRE_FLOWINFO ] = { . type = NLA_U32 } ,
[ IFLA_GRE_FLAGS ] = { . type = NLA_U32 } ,
2016-05-18 09:06:19 -07:00
[ IFLA_GRE_ENCAP_TYPE ] = { . type = NLA_U16 } ,
[ IFLA_GRE_ENCAP_FLAGS ] = { . type = NLA_U16 } ,
[ IFLA_GRE_ENCAP_SPORT ] = { . type = NLA_U16 } ,
[ IFLA_GRE_ENCAP_DPORT ] = { . type = NLA_U16 } ,
2017-12-01 15:26:08 -08:00
[ IFLA_GRE_COLLECT_METADATA ] = { . type = NLA_FLAG } ,
2017-04-19 12:30:53 -04:00
[ IFLA_GRE_FWMARK ] = { . type = NLA_U32 } ,
2017-11-30 11:51:29 -08:00
[ IFLA_GRE_ERSPAN_INDEX ] = { . type = NLA_U32 } ,
2017-12-13 16:38:57 -08:00
[ IFLA_GRE_ERSPAN_VER ] = { . type = NLA_U8 } ,
[ IFLA_GRE_ERSPAN_DIR ] = { . type = NLA_U8 } ,
[ IFLA_GRE_ERSPAN_HWID ] = { . type = NLA_U16 } ,
2012-08-10 00:51:50 +00:00
} ;
2017-11-30 11:51:29 -08:00
static void ip6erspan_tap_setup ( struct net_device * dev )
{
ether_setup ( dev ) ;
dev - > netdev_ops = & ip6erspan_netdev_ops ;
dev - > needs_free_netdev = true ;
dev - > priv_destructor = ip6gre_dev_free ;
dev - > features | = NETIF_F_NETNS_LOCAL ;
dev - > priv_flags & = ~ IFF_TX_SKB_SHARING ;
dev - > priv_flags | = IFF_LIVE_ADDR_CHANGE ;
netif_keep_dst ( dev ) ;
}
2018-05-17 16:36:51 +02:00
static int ip6erspan_newlink ( struct net * src_net , struct net_device * dev ,
struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
{
struct ip6_tnl * nt = netdev_priv ( dev ) ;
struct net * net = dev_net ( dev ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
struct ip6gre_net * ign ;
int err ;
ip6gre_netlink_parms ( data , & nt - > parms ) ;
2019-02-15 15:10:32 +01:00
ip6erspan_set_version ( data , & nt - > parms ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ign = net_generic ( net , ip6gre_net_id ) ;
if ( nt - > parms . collect_md ) {
if ( rtnl_dereference ( ign - > collect_md_tun_erspan ) )
return - EEXIST ;
} else {
if ( ip6gre_tunnel_find ( net , & nt - > parms , dev - > type ) )
return - EEXIST ;
}
2018-05-17 16:36:51 +02:00
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
err = ip6gre_newlink_common ( src_net , dev , tb , data , extack ) ;
2018-05-17 16:36:51 +02:00
if ( ! err ) {
ip6erspan_tnl_link_config ( nt , ! tb [ IFLA_MTU ] ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6erspan_tunnel_link_md ( ign , nt ) ;
2018-05-17 16:36:51 +02:00
ip6gre_tunnel_link ( net_generic ( net , ip6gre_net_id ) , nt ) ;
}
return err ;
}
static void ip6erspan_tnl_link_config ( struct ip6_tnl * t , int set_mtu )
{
ip6gre_tnl_link_config_common ( t ) ;
ip6gre_tnl_link_config_route ( t , set_mtu , ip6erspan_calc_hlen ( t ) ) ;
}
static int ip6erspan_tnl_change ( struct ip6_tnl * t ,
const struct __ip6_tnl_parm * p , int set_mtu )
{
ip6gre_tnl_copy_tnl_parm ( t , p ) ;
ip6erspan_tnl_link_config ( t , set_mtu ) ;
return 0 ;
}
static int ip6erspan_changelink ( struct net_device * dev , struct nlattr * tb [ ] ,
struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
{
struct ip6gre_net * ign = net_generic ( dev_net ( dev ) , ip6gre_net_id ) ;
struct __ip6_tnl_parm p ;
struct ip6_tnl * t ;
t = ip6gre_changelink_common ( dev , tb , data , & p , extack ) ;
if ( IS_ERR ( t ) )
return PTR_ERR ( t ) ;
2019-02-15 15:10:32 +01:00
ip6erspan_set_version ( data , & p ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6gre_tunnel_unlink_md ( ign , t ) ;
2018-05-17 16:36:51 +02:00
ip6gre_tunnel_unlink ( ign , t ) ;
ip6erspan_tnl_change ( t , & p , ! tb [ IFLA_MTU ] ) ;
net: ip6_gre: fix tunnel metadata device sharing.
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-18 19:22:28 -07:00
ip6erspan_tunnel_link_md ( ign , t ) ;
2018-05-17 16:36:51 +02:00
ip6gre_tunnel_link ( ign , t ) ;
return 0 ;
}
2012-08-10 00:51:50 +00:00
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
. kind = " ip6gre " ,
. maxtype = IFLA_GRE_MAX ,
. policy = ip6gre_policy ,
. priv_size = sizeof ( struct ip6_tnl ) ,
. setup = ip6gre_tunnel_setup ,
. validate = ip6gre_tunnel_validate ,
. newlink = ip6gre_newlink ,
. changelink = ip6gre_changelink ,
2014-04-14 17:11:38 +02:00
. dellink = ip6gre_dellink ,
2012-08-10 00:51:50 +00:00
. get_size = ip6gre_get_size ,
. fill_info = ip6gre_fill_info ,
2015-01-15 15:11:17 +01:00
. get_link_net = ip6_tnl_get_link_net ,
2012-08-10 00:51:50 +00:00
} ;
static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
. kind = " ip6gretap " ,
. maxtype = IFLA_GRE_MAX ,
. policy = ip6gre_policy ,
. priv_size = sizeof ( struct ip6_tnl ) ,
. setup = ip6gre_tap_setup ,
. validate = ip6gre_tap_validate ,
. newlink = ip6gre_newlink ,
. changelink = ip6gre_changelink ,
. get_size = ip6gre_get_size ,
. fill_info = ip6gre_fill_info ,
2015-01-20 15:15:43 +01:00
. get_link_net = ip6_tnl_get_link_net ,
2012-08-10 00:51:50 +00:00
} ;
2017-11-30 11:51:29 -08:00
static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
. kind = " ip6erspan " ,
. maxtype = IFLA_GRE_MAX ,
. policy = ip6gre_policy ,
. priv_size = sizeof ( struct ip6_tnl ) ,
. setup = ip6erspan_tap_setup ,
. validate = ip6erspan_tap_validate ,
2018-05-17 16:36:51 +02:00
. newlink = ip6erspan_newlink ,
. changelink = ip6erspan_changelink ,
2017-11-30 11:51:29 -08:00
. get_size = ip6gre_get_size ,
. fill_info = ip6gre_fill_info ,
. get_link_net = ip6_tnl_get_link_net ,
} ;
2012-08-10 00:51:50 +00:00
/*
* And now the modules code and kernel interface .
*/
static int __init ip6gre_init ( void )
{
int err ;
pr_info ( " GRE over IPv6 tunneling driver \n " ) ;
err = register_pernet_device ( & ip6gre_net_ops ) ;
if ( err < 0 )
return err ;
err = inet6_add_protocol ( & ip6gre_protocol , IPPROTO_GRE ) ;
if ( err < 0 ) {
pr_info ( " %s: can't add protocol \n " , __func__ ) ;
goto add_proto_failed ;
}
err = rtnl_link_register ( & ip6gre_link_ops ) ;
if ( err < 0 )
goto rtnl_link_failed ;
err = rtnl_link_register ( & ip6gre_tap_ops ) ;
if ( err < 0 )
goto tap_ops_failed ;
2017-11-30 11:51:29 -08:00
err = rtnl_link_register ( & ip6erspan_tap_ops ) ;
if ( err < 0 )
goto erspan_link_failed ;
2012-08-10 00:51:50 +00:00
out :
return err ;
2017-11-30 11:51:29 -08:00
erspan_link_failed :
rtnl_link_unregister ( & ip6gre_tap_ops ) ;
2012-08-10 00:51:50 +00:00
tap_ops_failed :
rtnl_link_unregister ( & ip6gre_link_ops ) ;
rtnl_link_failed :
inet6_del_protocol ( & ip6gre_protocol , IPPROTO_GRE ) ;
add_proto_failed :
unregister_pernet_device ( & ip6gre_net_ops ) ;
goto out ;
}
static void __exit ip6gre_fini ( void )
{
rtnl_link_unregister ( & ip6gre_tap_ops ) ;
rtnl_link_unregister ( & ip6gre_link_ops ) ;
2017-11-30 11:51:29 -08:00
rtnl_link_unregister ( & ip6erspan_tap_ops ) ;
2012-08-10 00:51:50 +00:00
inet6_del_protocol ( & ip6gre_protocol , IPPROTO_GRE ) ;
unregister_pernet_device ( & ip6gre_net_ops ) ;
}
module_init ( ip6gre_init ) ;
module_exit ( ip6gre_fini ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " D. Kozlov (xeb@mail.ru) " ) ;
MODULE_DESCRIPTION ( " GRE over IPv6 tunneling device " ) ;
MODULE_ALIAS_RTNL_LINK ( " ip6gre " ) ;
2014-09-24 11:03:00 +02:00
MODULE_ALIAS_RTNL_LINK ( " ip6gretap " ) ;
2017-12-13 16:38:57 -08:00
MODULE_ALIAS_RTNL_LINK ( " ip6erspan " ) ;
2012-08-10 00:51:50 +00:00
MODULE_ALIAS_NETDEV ( " ip6gre0 " ) ;