2013-08-19 10:07:34 +04:00
/*
* IPv6 virtual tunneling interface
*
* Copyright ( C ) 2013 secunet Security Networks AG
*
* Author :
* Steffen Klassert < steffen . klassert @ secunet . com >
*
* Based on :
* net / ipv6 / ip6_tunnel . c
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/module.h>
# include <linux/capability.h>
# include <linux/errno.h>
# include <linux/types.h>
# include <linux/sockios.h>
# include <linux/icmp.h>
# include <linux/if.h>
# include <linux/in.h>
# include <linux/ip.h>
# include <linux/net.h>
# include <linux/in6.h>
# include <linux/netdevice.h>
# include <linux/if_arp.h>
# include <linux/icmpv6.h>
# include <linux/init.h>
# include <linux/route.h>
# include <linux/rtnetlink.h>
# include <linux/netfilter_ipv6.h>
# include <linux/slab.h>
# include <linux/hash.h>
# include <linux/uaccess.h>
# include <linux/atomic.h>
# include <net/icmp.h>
# include <net/ip.h>
# include <net/ip_tunnels.h>
# include <net/ipv6.h>
# include <net/ip6_route.h>
# include <net/addrconf.h>
# include <net/ip6_tunnel.h>
# include <net/xfrm.h>
# include <net/net_namespace.h>
# include <net/netns/generic.h>
2017-01-26 06:59:18 +03:00
# include <linux/etherdevice.h>
2013-08-19 10:07:34 +04:00
2016-08-10 12:03:35 +03:00
# define IP6_VTI_HASH_SIZE_SHIFT 5
# define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
2013-08-19 10:07:34 +04:00
static u32 HASH ( const struct in6_addr * addr1 , const struct in6_addr * addr2 )
{
u32 hash = ipv6_addr_hash ( addr1 ) ^ ipv6_addr_hash ( addr2 ) ;
2016-08-10 12:03:35 +03:00
return hash_32 ( hash , IP6_VTI_HASH_SIZE_SHIFT ) ;
2013-08-19 10:07:34 +04:00
}
static int vti6_dev_init ( struct net_device * dev ) ;
static void vti6_dev_setup ( struct net_device * dev ) ;
static struct rtnl_link_ops vti6_link_ops __read_mostly ;
netns: make struct pernet_operations::id unsigned int
Make struct pernet_operations::id unsigned.
There are 2 reasons to do so:
1)
This field is really an index into an zero based array and
thus is unsigned entity. Using negative value is out-of-bound
access by definition.
2)
On x86_64 unsigned 32-bit data which are mixed with pointers
via array indexing or offsets added or subtracted to pointers
are preffered to signed 32-bit data.
"int" being used as an array index needs to be sign-extended
to 64-bit before being used.
void f(long *p, int i)
{
g(p[i]);
}
roughly translates to
movsx rsi, esi
mov rdi, [rsi+...]
call g
MOVSX is 3 byte instruction which isn't necessary if the variable is
unsigned because x86_64 is zero extending by default.
Now, there is net_generic() function which, you guessed it right, uses
"int" as an array index:
static inline void *net_generic(const struct net *net, int id)
{
...
ptr = ng->ptr[id - 1];
...
}
And this function is used a lot, so those sign extensions add up.
Patch snipes ~1730 bytes on allyesconfig kernel (without all junk
messing with code generation):
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
Unfortunately some functions actually grow bigger.
This is a semmingly random artefact of code generation with register
allocator being used differently. gcc decides that some variable
needs to live in new r8+ registers and every access now requires REX
prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be
used which is longer than [r8]
However, overall balance is in negative direction:
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
function old new delta
nfsd4_lock 3886 3959 +73
tipc_link_build_proto_msg 1096 1140 +44
mac80211_hwsim_new_radio 2776 2808 +32
tipc_mon_rcv 1032 1058 +26
svcauth_gss_legacy_init 1413 1429 +16
tipc_bcbase_select_primary 379 392 +13
nfsd4_exchange_id 1247 1260 +13
nfsd4_setclientid_confirm 782 793 +11
...
put_client_renew_locked 494 480 -14
ip_set_sockfn_get 730 716 -14
geneve_sock_add 829 813 -16
nfsd4_sequence_done 721 703 -18
nlmclnt_lookup_host 708 686 -22
nfsd4_lockt 1085 1063 -22
nfs_get_client 1077 1050 -27
tcf_bpf_init 1106 1076 -30
nfsd4_encode_fattr 5997 5930 -67
Total: Before=154856051, After=154854321, chg -0.00%
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 04:58:21 +03:00
static unsigned int vti6_net_id __read_mostly ;
2013-08-19 10:07:34 +04:00
struct vti6_net {
/* the vti6 tunnel fallback device */
struct net_device * fb_tnl_dev ;
/* lists for storing tunnels in use */
2016-08-10 12:03:35 +03:00
struct ip6_tnl __rcu * tnls_r_l [ IP6_VTI_HASH_SIZE ] ;
2013-08-19 10:07:34 +04:00
struct ip6_tnl __rcu * tnls_wc [ 1 ] ;
struct ip6_tnl __rcu * * tnls [ 2 ] ;
} ;
# define for_each_vti6_tunnel_rcu(start) \
for ( t = rcu_dereference ( start ) ; t ; t = rcu_dereference ( t - > next ) )
/**
* vti6_tnl_lookup - fetch tunnel matching the end - point addresses
* @ net : network namespace
* @ remote : the address of the tunnel exit - point
* @ local : the address of the tunnel entry - point
*
* Return :
* tunnel matching given end - points if found ,
* else fallback tunnel if its device is up ,
* else % NULL
* */
static struct ip6_tnl *
vti6_tnl_lookup ( struct net * net , const struct in6_addr * remote ,
const struct in6_addr * local )
{
unsigned int hash = HASH ( remote , local ) ;
struct ip6_tnl * t ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
2014-11-20 12:01:49 +03:00
struct in6_addr any ;
2013-08-19 10:07:34 +04:00
for_each_vti6_tunnel_rcu ( ip6n - > tnls_r_l [ hash ] ) {
if ( ipv6_addr_equal ( local , & t - > parms . laddr ) & &
ipv6_addr_equal ( remote , & t - > parms . raddr ) & &
( t - > dev - > flags & IFF_UP ) )
return t ;
}
2014-11-20 12:01:49 +03:00
memset ( & any , 0 , sizeof ( any ) ) ;
hash = HASH ( & any , local ) ;
for_each_vti6_tunnel_rcu ( ip6n - > tnls_r_l [ hash ] ) {
if ( ipv6_addr_equal ( local , & t - > parms . laddr ) & &
( t - > dev - > flags & IFF_UP ) )
return t ;
}
hash = HASH ( remote , & any ) ;
for_each_vti6_tunnel_rcu ( ip6n - > tnls_r_l [ hash ] ) {
if ( ipv6_addr_equal ( remote , & t - > parms . raddr ) & &
( t - > dev - > flags & IFF_UP ) )
return t ;
}
2013-08-19 10:07:34 +04:00
t = rcu_dereference ( ip6n - > tnls_wc [ 0 ] ) ;
if ( t & & ( t - > dev - > flags & IFF_UP ) )
return t ;
return NULL ;
}
/**
* vti6_tnl_bucket - get head of list matching given tunnel parameters
* @ p : parameters containing tunnel end - points
*
* Description :
* vti6_tnl_bucket ( ) returns the head of the list matching the
* & struct in6_addr entries laddr and raddr in @ p .
*
* Return : head of IPv6 tunnel list
* */
static struct ip6_tnl __rcu * *
vti6_tnl_bucket ( struct vti6_net * ip6n , const struct __ip6_tnl_parm * p )
{
const struct in6_addr * remote = & p - > raddr ;
const struct in6_addr * local = & p - > laddr ;
unsigned int h = 0 ;
int prio = 0 ;
if ( ! ipv6_addr_any ( remote ) | | ! ipv6_addr_any ( local ) ) {
prio = 1 ;
h = HASH ( remote , local ) ;
}
return & ip6n - > tnls [ prio ] [ h ] ;
}
static void
vti6_tnl_link ( struct vti6_net * ip6n , struct ip6_tnl * t )
{
struct ip6_tnl __rcu * * tp = vti6_tnl_bucket ( ip6n , & t - > parms ) ;
rcu_assign_pointer ( t - > next , rtnl_dereference ( * tp ) ) ;
rcu_assign_pointer ( * tp , t ) ;
}
static void
vti6_tnl_unlink ( struct vti6_net * ip6n , struct ip6_tnl * t )
{
struct ip6_tnl __rcu * * tp ;
struct ip6_tnl * iter ;
for ( tp = vti6_tnl_bucket ( ip6n , & t - > parms ) ;
( iter = rtnl_dereference ( * tp ) ) ! = NULL ;
tp = & iter - > next ) {
if ( t = = iter ) {
rcu_assign_pointer ( * tp , t - > next ) ;
break ;
}
}
}
static void vti6_dev_free ( struct net_device * dev )
{
free_percpu ( dev - > tstats ) ;
free_netdev ( dev ) ;
}
static int vti6_tnl_create2 ( struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct net * net = dev_net ( dev ) ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
int err ;
2017-01-06 13:27:59 +03:00
dev - > rtnl_link_ops = & vti6_link_ops ;
2013-08-19 10:07:34 +04:00
err = register_netdevice ( dev ) ;
if ( err < 0 )
goto out ;
strcpy ( t - > parms . name , dev - > name ) ;
dev_hold ( dev ) ;
vti6_tnl_link ( ip6n , t ) ;
return 0 ;
out :
return err ;
}
static struct ip6_tnl * vti6_tnl_create ( struct net * net , struct __ip6_tnl_parm * p )
{
struct net_device * dev ;
struct ip6_tnl * t ;
char name [ IFNAMSIZ ] ;
int err ;
if ( p - > name [ 0 ] )
strlcpy ( name , p - > name , IFNAMSIZ ) ;
else
sprintf ( name , " ip6_vti%%d " ) ;
net: set name_assign_type in alloc_netdev()
Extend alloc_netdev{,_mq{,s}}() to take name_assign_type as argument, and convert
all users to pass NET_NAME_UNKNOWN.
Coccinelle patch:
@@
expression sizeof_priv, name, setup, txqs, rxqs, count;
@@
(
-alloc_netdev_mqs(sizeof_priv, name, setup, txqs, rxqs)
+alloc_netdev_mqs(sizeof_priv, name, NET_NAME_UNKNOWN, setup, txqs, rxqs)
|
-alloc_netdev_mq(sizeof_priv, name, setup, count)
+alloc_netdev_mq(sizeof_priv, name, NET_NAME_UNKNOWN, setup, count)
|
-alloc_netdev(sizeof_priv, name, setup)
+alloc_netdev(sizeof_priv, name, NET_NAME_UNKNOWN, setup)
)
v9: move comments here from the wrong commit
Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-14 18:37:24 +04:00
dev = alloc_netdev ( sizeof ( * t ) , name , NET_NAME_UNKNOWN , vti6_dev_setup ) ;
2015-03-29 16:00:04 +03:00
if ( ! dev )
2013-08-19 10:07:34 +04:00
goto failed ;
dev_net_set ( dev , net ) ;
t = netdev_priv ( dev ) ;
t - > parms = * p ;
t - > net = dev_net ( dev ) ;
err = vti6_tnl_create2 ( dev ) ;
if ( err < 0 )
goto failed_free ;
return t ;
failed_free :
vti6_dev_free ( dev ) ;
failed :
return NULL ;
}
/**
* vti6_locate - find or create tunnel matching given parameters
* @ net : network namespace
* @ p : tunnel parameters
* @ create : ! = 0 if allowed to create new tunnel if no match found
*
* Description :
* vti6_locate ( ) first tries to locate an existing tunnel
* based on @ parms . If this is unsuccessful , but @ create is set a new
* tunnel device is created and registered for use .
*
* Return :
* matching tunnel or NULL
* */
static struct ip6_tnl * vti6_locate ( struct net * net , struct __ip6_tnl_parm * p ,
int create )
{
const struct in6_addr * remote = & p - > raddr ;
const struct in6_addr * local = & p - > laddr ;
struct ip6_tnl __rcu * * tp ;
struct ip6_tnl * t ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
for ( tp = vti6_tnl_bucket ( ip6n , p ) ;
( t = rtnl_dereference ( * tp ) ) ! = NULL ;
tp = & t - > next ) {
if ( ipv6_addr_equal ( local , & t - > parms . laddr ) & &
2014-09-22 12:07:25 +04:00
ipv6_addr_equal ( remote , & t - > parms . raddr ) ) {
if ( create )
return NULL ;
2013-08-19 10:07:34 +04:00
return t ;
2014-09-22 12:07:25 +04:00
}
2013-08-19 10:07:34 +04:00
}
if ( ! create )
return NULL ;
return vti6_tnl_create ( net , p ) ;
}
/**
* vti6_dev_uninit - tunnel device uninitializer
* @ dev : the device to be destroyed
*
* Description :
* vti6_dev_uninit ( ) removes tunnel from its list
* */
static void vti6_dev_uninit ( struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
2015-04-02 18:31:17 +03:00
struct vti6_net * ip6n = net_generic ( t - > net , vti6_net_id ) ;
2013-08-19 10:07:34 +04:00
if ( dev = = ip6n - > fb_tnl_dev )
RCU_INIT_POINTER ( ip6n - > tnls_wc [ 0 ] , NULL ) ;
else
vti6_tnl_unlink ( ip6n , t ) ;
dev_put ( dev ) ;
}
static int vti6_rcv ( struct sk_buff * skb )
{
struct ip6_tnl * t ;
const struct ipv6hdr * ipv6h = ipv6_hdr ( skb ) ;
rcu_read_lock ( ) ;
2014-11-24 00:28:43 +03:00
t = vti6_tnl_lookup ( dev_net ( skb - > dev ) , & ipv6h - > saddr , & ipv6h - > daddr ) ;
2015-03-29 16:00:05 +03:00
if ( t ) {
2013-08-19 10:07:34 +04:00
if ( t - > parms . proto ! = IPPROTO_IPV6 & & t - > parms . proto ! = 0 ) {
rcu_read_unlock ( ) ;
goto discard ;
}
if ( ! xfrm6_policy_check ( NULL , XFRM_POLICY_IN , skb ) ) {
rcu_read_unlock ( ) ;
return 0 ;
}
if ( ! ip6_tnl_rcv_ctl ( t , & ipv6h - > daddr , & ipv6h - > saddr ) ) {
t - > dev - > stats . rx_dropped + + ;
rcu_read_unlock ( ) ;
goto discard ;
}
rcu_read_unlock ( ) ;
2014-03-14 10:28:08 +04:00
2016-09-19 17:17:57 +03:00
return xfrm6_rcv_tnl ( skb , t ) ;
2013-08-19 10:07:34 +04:00
}
rcu_read_unlock ( ) ;
2014-03-14 10:28:08 +04:00
return - EINVAL ;
2013-08-19 10:07:34 +04:00
discard :
kfree_skb ( skb ) ;
return 0 ;
}
2014-03-14 10:28:08 +04:00
static int vti6_rcv_cb ( struct sk_buff * skb , int err )
{
unsigned short family ;
struct net_device * dev ;
struct pcpu_sw_netstats * tstats ;
struct xfrm_state * x ;
2016-09-07 21:40:38 +03:00
struct xfrm_mode * inner_mode ;
2014-03-14 10:28:08 +04:00
struct ip6_tnl * t = XFRM_TUNNEL_SKB_CB ( skb ) - > tunnel . ip6 ;
2015-05-27 17:16:54 +03:00
u32 orig_mark = skb - > mark ;
int ret ;
2014-03-14 10:28:08 +04:00
if ( ! t )
return 1 ;
dev = t - > dev ;
if ( err ) {
dev - > stats . rx_errors + + ;
dev - > stats . rx_dropped + + ;
return 0 ;
}
x = xfrm_input_state ( skb ) ;
2016-09-07 21:40:38 +03:00
inner_mode = x - > inner_mode ;
if ( x - > sel . family = = AF_UNSPEC ) {
inner_mode = xfrm_ip2inner_mode ( x , XFRM_MODE_SKB_CB ( skb ) - > protocol ) ;
if ( inner_mode = = NULL ) {
XFRM_INC_STATS ( dev_net ( skb - > dev ) ,
LINUX_MIB_XFRMINSTATEMODEERROR ) ;
return - EINVAL ;
}
}
family = inner_mode - > afinfo - > family ;
2014-03-14 10:28:08 +04:00
2015-05-27 17:16:54 +03:00
skb - > mark = be32_to_cpu ( t - > parms . i_key ) ;
ret = xfrm_policy_check ( NULL , XFRM_POLICY_IN , skb , family ) ;
skb - > mark = orig_mark ;
if ( ! ret )
2014-03-14 10:28:08 +04:00
return - EPERM ;
skb_scrub_packet ( skb , ! net_eq ( t - > net , dev_net ( skb - > dev ) ) ) ;
skb - > dev = dev ;
tstats = this_cpu_ptr ( dev - > tstats ) ;
u64_stats_update_begin ( & tstats - > syncp ) ;
tstats - > rx_packets + + ;
tstats - > rx_bytes + = skb - > len ;
u64_stats_update_end ( & tstats - > syncp ) ;
return 0 ;
}
2013-08-19 10:07:34 +04:00
/**
* vti6_addr_conflict - compare packet addresses to tunnel ' s own
* @ t : the outgoing tunnel device
* @ hdr : IPv6 header from the incoming packet
*
* Description :
* Avoid trivial tunneling loop by checking that tunnel exit - point
* doesn ' t match source of incoming packet .
*
* Return :
* 1 if conflict ,
* 0 else
* */
static inline bool
vti6_addr_conflict ( const struct ip6_tnl * t , const struct ipv6hdr * hdr )
{
return ipv6_addr_equal ( & t - > parms . raddr , & hdr - > saddr ) ;
}
2014-03-14 10:28:09 +04:00
static bool vti6_state_check ( const struct xfrm_state * x ,
const struct in6_addr * dst ,
const struct in6_addr * src )
{
xfrm_address_t * daddr = ( xfrm_address_t * ) dst ;
xfrm_address_t * saddr = ( xfrm_address_t * ) src ;
/* if there is no transform then this tunnel is not functional.
* Or if the xfrm is not mode tunnel .
*/
if ( ! x | | x - > props . mode ! = XFRM_MODE_TUNNEL | |
x - > props . family ! = AF_INET6 )
return false ;
if ( ipv6_addr_any ( dst ) )
return xfrm_addr_equal ( saddr , & x - > props . saddr , AF_INET6 ) ;
if ( ! xfrm_state_addr_check ( x , daddr , saddr , AF_INET6 ) )
return false ;
return true ;
}
2013-08-19 10:07:34 +04:00
/**
* vti6_xmit - send a packet
* @ skb : the outgoing socket buffer
* @ dev : the outgoing tunnel device
2014-03-14 10:28:08 +04:00
* @ fl : the flow informations for the xfrm_lookup
2013-08-19 10:07:34 +04:00
* */
2014-03-14 10:28:08 +04:00
static int
vti6_xmit ( struct sk_buff * skb , struct net_device * dev , struct flowi * fl )
2013-08-19 10:07:34 +04:00
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct net_device_stats * stats = & t - > dev - > stats ;
2014-03-14 10:28:08 +04:00
struct dst_entry * dst = skb_dst ( skb ) ;
2013-08-19 10:07:34 +04:00
struct net_device * tdev ;
2014-11-05 10:02:48 +03:00
struct xfrm_state * x ;
2013-08-19 10:07:34 +04:00
int err = - 1 ;
2015-05-29 21:28:26 +03:00
int mtu ;
2013-08-19 10:07:34 +04:00
2014-03-14 10:28:08 +04:00
if ( ! dst )
goto tx_err_link_failure ;
2013-08-19 10:07:34 +04:00
2014-03-14 10:28:08 +04:00
dst_hold ( dst ) ;
2014-03-14 10:28:08 +04:00
dst = xfrm_lookup ( t - > net , dst , fl , NULL , 0 ) ;
2014-03-14 10:28:08 +04:00
if ( IS_ERR ( dst ) ) {
err = PTR_ERR ( dst ) ;
dst = NULL ;
goto tx_err_link_failure ;
2013-08-19 10:07:34 +04:00
}
2014-11-05 10:02:48 +03:00
x = dst - > xfrm ;
if ( ! vti6_state_check ( x , & t - > parms . raddr , & t - > parms . laddr ) )
goto tx_err_link_failure ;
if ( ! ip6_tnl_xmit_ctl ( t , ( const struct in6_addr * ) & x - > props . saddr ,
( const struct in6_addr * ) & x - > id . daddr ) )
2013-08-19 10:07:34 +04:00
goto tx_err_link_failure ;
tdev = dst - > dev ;
if ( tdev = = dev ) {
stats - > collisions + + ;
net_warn_ratelimited ( " %s: Local routing loop detected! \n " ,
t - > parms . name ) ;
goto tx_err_dst_release ;
}
2014-03-14 10:28:08 +04:00
skb_scrub_packet ( skb , ! net_eq ( t - > net , dev_net ( dev ) ) ) ;
skb_dst_set ( skb , dst ) ;
skb - > dev = skb_dst ( skb ) - > dev ;
2013-08-19 10:07:34 +04:00
2015-05-29 21:28:26 +03:00
mtu = dst_mtu ( dst ) ;
if ( ! skb - > ignore_df & & skb - > len > mtu ) {
skb_dst ( skb ) - > ops - > update_pmtu ( dst , NULL , skb , mtu ) ;
2017-02-15 13:38:58 +03:00
if ( skb - > protocol = = htons ( ETH_P_IPV6 ) ) {
if ( mtu < IPV6_MIN_MTU )
mtu = IPV6_MIN_MTU ;
2015-05-29 21:28:26 +03:00
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu ) ;
2017-02-15 13:38:58 +03:00
} else {
2015-05-29 21:28:26 +03:00
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_FRAG_NEEDED ,
htonl ( mtu ) ) ;
2017-02-15 13:38:58 +03:00
}
2015-05-29 21:28:26 +03:00
return - EMSGSIZE ;
}
2015-10-08 00:48:35 +03:00
err = dst_output ( t - > net , skb - > sk , skb ) ;
2014-03-14 10:28:08 +04:00
if ( net_xmit_eval ( err ) = = 0 ) {
struct pcpu_sw_netstats * tstats = this_cpu_ptr ( dev - > tstats ) ;
u64_stats_update_begin ( & tstats - > syncp ) ;
tstats - > tx_bytes + = skb - > len ;
tstats - > tx_packets + + ;
u64_stats_update_end ( & tstats - > syncp ) ;
} else {
stats - > tx_errors + + ;
stats - > tx_aborted_errors + + ;
}
2013-08-19 10:07:34 +04:00
return 0 ;
tx_err_link_failure :
stats - > tx_carrier_errors + + ;
dst_link_failure ( skb ) ;
tx_err_dst_release :
2014-03-14 10:28:08 +04:00
dst_release ( dst ) ;
2013-08-19 10:07:34 +04:00
return err ;
}
static netdev_tx_t
vti6_tnl_xmit ( struct sk_buff * skb , struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct net_device_stats * stats = & t - > dev - > stats ;
2014-03-14 10:28:08 +04:00
struct ipv6hdr * ipv6h ;
struct flowi fl ;
2013-08-19 10:07:34 +04:00
int ret ;
2014-03-14 10:28:08 +04:00
memset ( & fl , 0 , sizeof ( fl ) ) ;
2013-08-19 10:07:34 +04:00
switch ( skb - > protocol ) {
case htons ( ETH_P_IPV6 ) :
2014-03-14 10:28:08 +04:00
ipv6h = ipv6_hdr ( skb ) ;
if ( ( t - > parms . proto ! = IPPROTO_IPV6 & & t - > parms . proto ! = 0 ) | |
2014-11-05 10:02:48 +03:00
vti6_addr_conflict ( t , ipv6h ) )
2014-03-14 10:28:08 +04:00
goto tx_err ;
xfrm_decode_session ( skb , & fl , AF_INET6 ) ;
memset ( IP6CB ( skb ) , 0 , sizeof ( * IP6CB ( skb ) ) ) ;
break ;
case htons ( ETH_P_IP ) :
xfrm_decode_session ( skb , & fl , AF_INET ) ;
memset ( IPCB ( skb ) , 0 , sizeof ( * IPCB ( skb ) ) ) ;
2013-08-19 10:07:34 +04:00
break ;
default :
goto tx_err ;
}
2015-05-27 17:16:43 +03:00
/* override mark with tunnel output key */
fl . flowi_mark = be32_to_cpu ( t - > parms . o_key ) ;
2014-03-14 10:28:08 +04:00
ret = vti6_xmit ( skb , dev , & fl ) ;
2013-08-19 10:07:34 +04:00
if ( ret < 0 )
goto tx_err ;
return NETDEV_TX_OK ;
tx_err :
stats - > tx_errors + + ;
stats - > tx_dropped + + ;
kfree_skb ( skb ) ;
return NETDEV_TX_OK ;
}
2014-03-14 10:28:08 +04:00
static int vti6_err ( struct sk_buff * skb , struct inet6_skb_parm * opt ,
u8 type , u8 code , int offset , __be32 info )
{
__be32 spi ;
2014-05-12 11:09:26 +04:00
__u32 mark ;
2014-03-14 10:28:08 +04:00
struct xfrm_state * x ;
struct ip6_tnl * t ;
struct ip_esp_hdr * esph ;
struct ip_auth_hdr * ah ;
struct ip_comp_hdr * ipch ;
struct net * net = dev_net ( skb - > dev ) ;
const struct ipv6hdr * iph = ( const struct ipv6hdr * ) skb - > data ;
int protocol = iph - > nexthdr ;
t = vti6_tnl_lookup ( dev_net ( skb - > dev ) , & iph - > daddr , & iph - > saddr ) ;
if ( ! t )
return - 1 ;
2014-05-12 11:09:26 +04:00
mark = be32_to_cpu ( t - > parms . o_key ) ;
2014-03-14 10:28:08 +04:00
switch ( protocol ) {
case IPPROTO_ESP :
esph = ( struct ip_esp_hdr * ) ( skb - > data + offset ) ;
spi = esph - > spi ;
break ;
case IPPROTO_AH :
ah = ( struct ip_auth_hdr * ) ( skb - > data + offset ) ;
spi = ah - > spi ;
break ;
case IPPROTO_COMP :
ipch = ( struct ip_comp_hdr * ) ( skb - > data + offset ) ;
spi = htonl ( ntohs ( ipch - > cpi ) ) ;
break ;
default :
return 0 ;
}
if ( type ! = ICMPV6_PKT_TOOBIG & &
type ! = NDISC_REDIRECT )
return 0 ;
2014-05-12 11:09:26 +04:00
x = xfrm_state_lookup ( net , mark , ( const xfrm_address_t * ) & iph - > daddr ,
2014-03-14 10:28:08 +04:00
spi , protocol , AF_INET6 ) ;
if ( ! x )
return 0 ;
if ( type = = NDISC_REDIRECT )
2016-11-03 20:23:43 +03:00
ip6_redirect ( skb , net , skb - > dev - > ifindex , 0 ,
sock_net_uid ( net , NULL ) ) ;
2014-03-14 10:28:08 +04:00
else
2016-11-03 20:23:43 +03:00
ip6_update_pmtu ( skb , net , info , 0 , 0 , sock_net_uid ( net , NULL ) ) ;
2014-03-14 10:28:08 +04:00
xfrm_state_put ( x ) ;
return 0 ;
}
2013-08-19 10:07:34 +04:00
static void vti6_link_config ( struct ip6_tnl * t )
{
struct net_device * dev = t - > dev ;
struct __ip6_tnl_parm * p = & t - > parms ;
memcpy ( dev - > dev_addr , & p - > laddr , sizeof ( struct in6_addr ) ) ;
memcpy ( dev - > broadcast , & p - > raddr , sizeof ( struct in6_addr ) ) ;
p - > flags & = ~ ( IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
IP6_TNL_F_CAP_PER_PACKET ) ;
p - > flags | = ip6_tnl_get_cap ( t , & p - > laddr , & p - > raddr ) ;
if ( p - > flags & IP6_TNL_F_CAP_XMIT & & p - > flags & IP6_TNL_F_CAP_RCV )
dev - > flags | = IFF_POINTOPOINT ;
else
dev - > flags & = ~ IFF_POINTOPOINT ;
}
/**
* vti6_tnl_change - update the tunnel parameters
* @ t : tunnel to be changed
* @ p : tunnel configuration parameters
*
* Description :
* vti6_tnl_change ( ) updates the tunnel parameters
* */
static int
vti6_tnl_change ( struct ip6_tnl * t , const struct __ip6_tnl_parm * p )
{
t - > parms . laddr = p - > laddr ;
t - > parms . raddr = p - > raddr ;
t - > parms . link = p - > link ;
t - > parms . i_key = p - > i_key ;
t - > parms . o_key = p - > o_key ;
t - > parms . proto = p - > proto ;
2016-02-12 17:43:54 +03:00
dst_cache_reset ( & t - > dst_cache ) ;
2013-08-19 10:07:34 +04:00
vti6_link_config ( t ) ;
return 0 ;
}
static int vti6_update ( struct ip6_tnl * t , struct __ip6_tnl_parm * p )
{
struct net * net = dev_net ( t - > dev ) ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
int err ;
vti6_tnl_unlink ( ip6n , t ) ;
synchronize_net ( ) ;
err = vti6_tnl_change ( t , p ) ;
vti6_tnl_link ( ip6n , t ) ;
netdev_state_change ( t - > dev ) ;
return err ;
}
static void
vti6_parm_from_user ( struct __ip6_tnl_parm * p , const struct ip6_tnl_parm2 * u )
{
p - > laddr = u - > laddr ;
p - > raddr = u - > raddr ;
p - > link = u - > link ;
p - > i_key = u - > i_key ;
p - > o_key = u - > o_key ;
p - > proto = u - > proto ;
memcpy ( p - > name , u - > name , sizeof ( u - > name ) ) ;
}
static void
vti6_parm_to_user ( struct ip6_tnl_parm2 * u , const struct __ip6_tnl_parm * p )
{
u - > laddr = p - > laddr ;
u - > raddr = p - > raddr ;
u - > link = p - > link ;
u - > i_key = p - > i_key ;
u - > o_key = p - > o_key ;
2017-02-24 17:20:32 +03:00
if ( u - > i_key )
u - > i_flags | = GRE_KEY ;
if ( u - > o_key )
u - > o_flags | = GRE_KEY ;
2013-08-19 10:07:34 +04:00
u - > proto = p - > proto ;
memcpy ( u - > name , p - > name , sizeof ( u - > name ) ) ;
}
/**
* vti6_tnl_ioctl - configure vti6 tunnels from userspace
* @ dev : virtual device associated with tunnel
* @ ifr : parameters passed from userspace
* @ cmd : command to be performed
*
* Description :
* vti6_ioctl ( ) is used for managing vti6 tunnels
* from userspace .
*
* The possible commands are the following :
* % SIOCGETTUNNEL : get tunnel parameters for device
* % SIOCADDTUNNEL : add tunnel matching given tunnel parameters
* % SIOCCHGTUNNEL : change tunnel parameters to those given
* % SIOCDELTUNNEL : delete tunnel
*
* The fallback device " ip6_vti0 " , created during module
* initialization , can be used for creating other tunnel devices .
*
* Return :
* 0 on success ,
* % - EFAULT if unable to copy data to or from userspace ,
* % - EPERM if current process hasn ' t % CAP_NET_ADMIN set
* % - EINVAL if passed tunnel parameters are invalid ,
* % - EEXIST if changing a tunnel ' s parameters would cause a conflict
* % - ENODEV if attempting to change or delete a nonexisting device
* */
static int
vti6_ioctl ( struct net_device * dev , struct ifreq * ifr , int cmd )
{
int err = 0 ;
struct ip6_tnl_parm2 p ;
struct __ip6_tnl_parm p1 ;
struct ip6_tnl * t = NULL ;
struct net * net = dev_net ( dev ) ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
switch ( cmd ) {
case SIOCGETTUNNEL :
if ( dev = = ip6n - > fb_tnl_dev ) {
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) ) {
err = - EFAULT ;
break ;
}
vti6_parm_from_user ( & p1 , & p ) ;
t = vti6_locate ( net , & p1 , 0 ) ;
} else {
memset ( & p , 0 , sizeof ( p ) ) ;
}
2015-03-29 16:00:04 +03:00
if ( ! t )
2013-08-19 10:07:34 +04:00
t = netdev_priv ( dev ) ;
vti6_parm_to_user ( & p , & t - > parms ) ;
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & p , sizeof ( p ) ) )
err = - EFAULT ;
break ;
case SIOCADDTUNNEL :
case SIOCCHGTUNNEL :
err = - EPERM ;
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
break ;
err = - EFAULT ;
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
break ;
err = - EINVAL ;
if ( p . proto ! = IPPROTO_IPV6 & & p . proto ! = 0 )
break ;
vti6_parm_from_user ( & p1 , & p ) ;
t = vti6_locate ( net , & p1 , cmd = = SIOCADDTUNNEL ) ;
if ( dev ! = ip6n - > fb_tnl_dev & & cmd = = SIOCCHGTUNNEL ) {
2015-03-29 16:00:05 +03:00
if ( t ) {
2013-08-19 10:07:34 +04:00
if ( t - > dev ! = dev ) {
err = - EEXIST ;
break ;
}
} else
t = netdev_priv ( dev ) ;
err = vti6_update ( t , & p1 ) ;
}
if ( t ) {
err = 0 ;
vti6_parm_to_user ( & p , & t - > parms ) ;
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & p , sizeof ( p ) ) )
err = - EFAULT ;
} else
err = ( cmd = = SIOCADDTUNNEL ? - ENOBUFS : - ENOENT ) ;
break ;
case SIOCDELTUNNEL :
err = - EPERM ;
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
break ;
if ( dev = = ip6n - > fb_tnl_dev ) {
err = - EFAULT ;
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
break ;
err = - ENOENT ;
vti6_parm_from_user ( & p1 , & p ) ;
t = vti6_locate ( net , & p1 , 0 ) ;
2015-03-29 16:00:04 +03:00
if ( ! t )
2013-08-19 10:07:34 +04:00
break ;
err = - EPERM ;
if ( t - > dev = = ip6n - > fb_tnl_dev )
break ;
dev = t - > dev ;
}
err = 0 ;
unregister_netdevice ( dev ) ;
break ;
default :
err = - EINVAL ;
}
return err ;
}
static const struct net_device_ops vti6_netdev_ops = {
2014-11-03 11:19:28 +03:00
. ndo_init = vti6_dev_init ,
2013-08-19 10:07:34 +04:00
. ndo_uninit = vti6_dev_uninit ,
. ndo_start_xmit = vti6_tnl_xmit ,
. ndo_do_ioctl = vti6_ioctl ,
2014-01-02 10:24:36 +04:00
. ndo_get_stats64 = ip_tunnel_get_stats64 ,
2015-04-02 18:07:01 +03:00
. ndo_get_iflink = ip6_tnl_get_iflink ,
2013-08-19 10:07:34 +04:00
} ;
/**
* vti6_dev_setup - setup virtual tunnel device
* @ dev : virtual device associated with tunnel
*
* Description :
* Initialize function pointers and device parameters
* */
static void vti6_dev_setup ( struct net_device * dev )
{
dev - > netdev_ops = & vti6_netdev_ops ;
dev - > destructor = vti6_dev_free ;
dev - > type = ARPHRD_TUNNEL6 ;
dev - > hard_header_len = LL_MAX_HEADER + sizeof ( struct ipv6hdr ) ;
dev - > mtu = ETH_DATA_LEN ;
2016-10-20 20:55:24 +03:00
dev - > min_mtu = IPV6_MIN_MTU ;
dev - > max_mtu = IP_MAX_MTU ;
2013-08-19 10:07:34 +04:00
dev - > flags | = IFF_NOARP ;
dev - > addr_len = sizeof ( struct in6_addr ) ;
2014-10-06 05:38:35 +04:00
netif_keep_dst ( dev ) ;
2017-01-26 06:59:18 +03:00
/* This perm addr will be used as interface identifier by IPv6 */
dev - > addr_assign_type = NET_ADDR_RANDOM ;
eth_random_addr ( dev - > perm_addr ) ;
2013-08-19 10:07:34 +04:00
}
/**
* vti6_dev_init_gen - general initializer for all tunnel devices
* @ dev : virtual device associated with tunnel
* */
static inline int vti6_dev_init_gen ( struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
t - > dev = dev ;
t - > net = dev_net ( dev ) ;
2014-02-13 23:46:28 +04:00
dev - > tstats = netdev_alloc_pcpu_stats ( struct pcpu_sw_netstats ) ;
2013-08-19 10:07:34 +04:00
if ( ! dev - > tstats )
return - ENOMEM ;
return 0 ;
}
/**
* vti6_dev_init - initializer for all non fallback tunnel devices
* @ dev : virtual device associated with tunnel
* */
static int vti6_dev_init ( struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
int err = vti6_dev_init_gen ( dev ) ;
if ( err )
return err ;
vti6_link_config ( t ) ;
return 0 ;
}
/**
* vti6_fb_tnl_dev_init - initializer for fallback tunnel device
* @ dev : fallback device
*
* Return : 0
* */
static int __net_init vti6_fb_tnl_dev_init ( struct net_device * dev )
{
struct ip6_tnl * t = netdev_priv ( dev ) ;
struct net * net = dev_net ( dev ) ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
t - > parms . proto = IPPROTO_IPV6 ;
dev_hold ( dev ) ;
rcu_assign_pointer ( ip6n - > tnls_wc [ 0 ] , t ) ;
return 0 ;
}
static int vti6_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] )
{
return 0 ;
}
static void vti6_netlink_parms ( struct nlattr * data [ ] ,
struct __ip6_tnl_parm * parms )
{
memset ( parms , 0 , sizeof ( * parms ) ) ;
if ( ! data )
return ;
if ( data [ IFLA_VTI_LINK ] )
parms - > link = nla_get_u32 ( data [ IFLA_VTI_LINK ] ) ;
if ( data [ IFLA_VTI_LOCAL ] )
2015-03-29 17:59:26 +03:00
parms - > laddr = nla_get_in6_addr ( data [ IFLA_VTI_LOCAL ] ) ;
2013-08-19 10:07:34 +04:00
if ( data [ IFLA_VTI_REMOTE ] )
2015-03-29 17:59:26 +03:00
parms - > raddr = nla_get_in6_addr ( data [ IFLA_VTI_REMOTE ] ) ;
2013-08-19 10:07:34 +04:00
if ( data [ IFLA_VTI_IKEY ] )
parms - > i_key = nla_get_be32 ( data [ IFLA_VTI_IKEY ] ) ;
if ( data [ IFLA_VTI_OKEY ] )
parms - > o_key = nla_get_be32 ( data [ IFLA_VTI_OKEY ] ) ;
}
static int vti6_newlink ( struct net * src_net , struct net_device * dev ,
struct nlattr * tb [ ] , struct nlattr * data [ ] )
{
struct net * net = dev_net ( dev ) ;
struct ip6_tnl * nt ;
nt = netdev_priv ( dev ) ;
vti6_netlink_parms ( data , & nt - > parms ) ;
nt - > parms . proto = IPPROTO_IPV6 ;
if ( vti6_locate ( net , & nt - > parms , 0 ) )
return - EEXIST ;
return vti6_tnl_create2 ( dev ) ;
}
ip_tunnel: the lack of vti_link_ops' dellink() cause kernel panic
Now the vti_link_ops do not point the .dellink, for fb tunnel device
(ip_vti0), the net_device will be removed as the default .dellink is
unregister_netdevice_queue,but the tunnel still in the tunnel list,
then if we add a new vti tunnel, in ip_tunnel_find():
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
link == t->parms.link &&
==> type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
}
the panic will happen, cause dev of ip_tunnel *t is null:
[ 3835.072977] IP: [<ffffffffa04103fd>] ip_tunnel_find+0x9d/0xc0 [ip_tunnel]
[ 3835.073008] PGD b2c21067 PUD b7277067 PMD 0
[ 3835.073008] Oops: 0000 [#1] SMP
.....
[ 3835.073008] Stack:
[ 3835.073008] ffff8800b72d77f0 ffffffffa0411924 ffff8800bb956000 ffff8800b72d78e0
[ 3835.073008] ffff8800b72d78a0 0000000000000000 ffffffffa040d100 ffff8800b72d7858
[ 3835.073008] ffffffffa040b2e3 0000000000000000 0000000000000000 0000000000000000
[ 3835.073008] Call Trace:
[ 3835.073008] [<ffffffffa0411924>] ip_tunnel_newlink+0x64/0x160 [ip_tunnel]
[ 3835.073008] [<ffffffffa040b2e3>] vti_newlink+0x43/0x70 [ip_vti]
[ 3835.073008] [<ffffffff8150d4da>] rtnl_newlink+0x4fa/0x5f0
[ 3835.073008] [<ffffffff812f68bb>] ? nla_strlcpy+0x5b/0x70
[ 3835.073008] [<ffffffff81508fb0>] ? rtnl_link_ops_get+0x40/0x60
[ 3835.073008] [<ffffffff8150d11f>] ? rtnl_newlink+0x13f/0x5f0
[ 3835.073008] [<ffffffff81509cf4>] rtnetlink_rcv_msg+0xa4/0x270
[ 3835.073008] [<ffffffff8126adf5>] ? sock_has_perm+0x75/0x90
[ 3835.073008] [<ffffffff81509c50>] ? rtnetlink_rcv+0x30/0x30
[ 3835.073008] [<ffffffff81529e39>] netlink_rcv_skb+0xa9/0xc0
[ 3835.073008] [<ffffffff81509c48>] rtnetlink_rcv+0x28/0x30
....
modprobe ip_vti
ip link del ip_vti0 type vti
ip link add ip_vti0 type vti
rmmod ip_vti
do that one or more times, kernel will panic.
fix it by assigning ip_tunnel_dellink to vti_link_ops' dellink, in
which we skip the unregister of fb tunnel device. do the same on ip6_vti.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-11-23 10:04:11 +03:00
static void vti6_dellink ( struct net_device * dev , struct list_head * head )
{
struct net * net = dev_net ( dev ) ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
if ( dev ! = ip6n - > fb_tnl_dev )
unregister_netdevice_queue ( dev , head ) ;
}
2013-08-19 10:07:34 +04:00
static int vti6_changelink ( struct net_device * dev , struct nlattr * tb [ ] ,
struct nlattr * data [ ] )
{
struct ip6_tnl * t ;
struct __ip6_tnl_parm p ;
struct net * net = dev_net ( dev ) ;
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
if ( dev = = ip6n - > fb_tnl_dev )
return - EINVAL ;
vti6_netlink_parms ( data , & p ) ;
t = vti6_locate ( net , & p , 0 ) ;
if ( t ) {
if ( t - > dev ! = dev )
return - EEXIST ;
} else
t = netdev_priv ( dev ) ;
return vti6_update ( t , & p ) ;
}
static size_t vti6_get_size ( const struct net_device * dev )
{
return
/* IFLA_VTI_LINK */
nla_total_size ( 4 ) +
/* IFLA_VTI_LOCAL */
nla_total_size ( sizeof ( struct in6_addr ) ) +
/* IFLA_VTI_REMOTE */
nla_total_size ( sizeof ( struct in6_addr ) ) +
/* IFLA_VTI_IKEY */
nla_total_size ( 4 ) +
/* IFLA_VTI_OKEY */
nla_total_size ( 4 ) +
0 ;
}
static int vti6_fill_info ( struct sk_buff * skb , const struct net_device * dev )
{
struct ip6_tnl * tunnel = netdev_priv ( dev ) ;
struct __ip6_tnl_parm * parm = & tunnel - > parms ;
if ( nla_put_u32 ( skb , IFLA_VTI_LINK , parm - > link ) | |
2015-03-29 17:59:25 +03:00
nla_put_in6_addr ( skb , IFLA_VTI_LOCAL , & parm - > laddr ) | |
nla_put_in6_addr ( skb , IFLA_VTI_REMOTE , & parm - > raddr ) | |
2013-08-19 10:07:34 +04:00
nla_put_be32 ( skb , IFLA_VTI_IKEY , parm - > i_key ) | |
nla_put_be32 ( skb , IFLA_VTI_OKEY , parm - > o_key ) )
goto nla_put_failure ;
return 0 ;
nla_put_failure :
return - EMSGSIZE ;
}
static const struct nla_policy vti6_policy [ IFLA_VTI_MAX + 1 ] = {
[ IFLA_VTI_LINK ] = { . type = NLA_U32 } ,
[ IFLA_VTI_LOCAL ] = { . len = sizeof ( struct in6_addr ) } ,
[ IFLA_VTI_REMOTE ] = { . len = sizeof ( struct in6_addr ) } ,
[ IFLA_VTI_IKEY ] = { . type = NLA_U32 } ,
[ IFLA_VTI_OKEY ] = { . type = NLA_U32 } ,
} ;
static struct rtnl_link_ops vti6_link_ops __read_mostly = {
. kind = " vti6 " ,
. maxtype = IFLA_VTI_MAX ,
. policy = vti6_policy ,
. priv_size = sizeof ( struct ip6_tnl ) ,
. setup = vti6_dev_setup ,
. validate = vti6_validate ,
. newlink = vti6_newlink ,
ip_tunnel: the lack of vti_link_ops' dellink() cause kernel panic
Now the vti_link_ops do not point the .dellink, for fb tunnel device
(ip_vti0), the net_device will be removed as the default .dellink is
unregister_netdevice_queue,but the tunnel still in the tunnel list,
then if we add a new vti tunnel, in ip_tunnel_find():
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
link == t->parms.link &&
==> type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
}
the panic will happen, cause dev of ip_tunnel *t is null:
[ 3835.072977] IP: [<ffffffffa04103fd>] ip_tunnel_find+0x9d/0xc0 [ip_tunnel]
[ 3835.073008] PGD b2c21067 PUD b7277067 PMD 0
[ 3835.073008] Oops: 0000 [#1] SMP
.....
[ 3835.073008] Stack:
[ 3835.073008] ffff8800b72d77f0 ffffffffa0411924 ffff8800bb956000 ffff8800b72d78e0
[ 3835.073008] ffff8800b72d78a0 0000000000000000 ffffffffa040d100 ffff8800b72d7858
[ 3835.073008] ffffffffa040b2e3 0000000000000000 0000000000000000 0000000000000000
[ 3835.073008] Call Trace:
[ 3835.073008] [<ffffffffa0411924>] ip_tunnel_newlink+0x64/0x160 [ip_tunnel]
[ 3835.073008] [<ffffffffa040b2e3>] vti_newlink+0x43/0x70 [ip_vti]
[ 3835.073008] [<ffffffff8150d4da>] rtnl_newlink+0x4fa/0x5f0
[ 3835.073008] [<ffffffff812f68bb>] ? nla_strlcpy+0x5b/0x70
[ 3835.073008] [<ffffffff81508fb0>] ? rtnl_link_ops_get+0x40/0x60
[ 3835.073008] [<ffffffff8150d11f>] ? rtnl_newlink+0x13f/0x5f0
[ 3835.073008] [<ffffffff81509cf4>] rtnetlink_rcv_msg+0xa4/0x270
[ 3835.073008] [<ffffffff8126adf5>] ? sock_has_perm+0x75/0x90
[ 3835.073008] [<ffffffff81509c50>] ? rtnetlink_rcv+0x30/0x30
[ 3835.073008] [<ffffffff81529e39>] netlink_rcv_skb+0xa9/0xc0
[ 3835.073008] [<ffffffff81509c48>] rtnetlink_rcv+0x28/0x30
....
modprobe ip_vti
ip link del ip_vti0 type vti
ip link add ip_vti0 type vti
rmmod ip_vti
do that one or more times, kernel will panic.
fix it by assigning ip_tunnel_dellink to vti_link_ops' dellink, in
which we skip the unregister of fb tunnel device. do the same on ip6_vti.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-11-23 10:04:11 +03:00
. dellink = vti6_dellink ,
2013-08-19 10:07:34 +04:00
. changelink = vti6_changelink ,
. get_size = vti6_get_size ,
. fill_info = vti6_fill_info ,
2015-01-15 17:11:17 +03:00
. get_link_net = ip6_tnl_get_link_net ,
2013-08-19 10:07:34 +04:00
} ;
static void __net_exit vti6_destroy_tunnels ( struct vti6_net * ip6n )
{
int h ;
struct ip6_tnl * t ;
LIST_HEAD ( list ) ;
2016-08-10 12:03:35 +03:00
for ( h = 0 ; h < IP6_VTI_HASH_SIZE ; h + + ) {
2013-08-19 10:07:34 +04:00
t = rtnl_dereference ( ip6n - > tnls_r_l [ h ] ) ;
2015-03-29 16:00:05 +03:00
while ( t ) {
2013-08-19 10:07:34 +04:00
unregister_netdevice_queue ( t - > dev , & list ) ;
t = rtnl_dereference ( t - > next ) ;
}
}
t = rtnl_dereference ( ip6n - > tnls_wc [ 0 ] ) ;
unregister_netdevice_queue ( t - > dev , & list ) ;
unregister_netdevice_many ( & list ) ;
}
static int __net_init vti6_init_net ( struct net * net )
{
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
struct ip6_tnl * t = NULL ;
int err ;
ip6n - > tnls [ 0 ] = ip6n - > tnls_wc ;
ip6n - > tnls [ 1 ] = ip6n - > tnls_r_l ;
err = - ENOMEM ;
ip6n - > fb_tnl_dev = alloc_netdev ( sizeof ( struct ip6_tnl ) , " ip6_vti0 " ,
net: set name_assign_type in alloc_netdev()
Extend alloc_netdev{,_mq{,s}}() to take name_assign_type as argument, and convert
all users to pass NET_NAME_UNKNOWN.
Coccinelle patch:
@@
expression sizeof_priv, name, setup, txqs, rxqs, count;
@@
(
-alloc_netdev_mqs(sizeof_priv, name, setup, txqs, rxqs)
+alloc_netdev_mqs(sizeof_priv, name, NET_NAME_UNKNOWN, setup, txqs, rxqs)
|
-alloc_netdev_mq(sizeof_priv, name, setup, count)
+alloc_netdev_mq(sizeof_priv, name, NET_NAME_UNKNOWN, setup, count)
|
-alloc_netdev(sizeof_priv, name, setup)
+alloc_netdev(sizeof_priv, name, NET_NAME_UNKNOWN, setup)
)
v9: move comments here from the wrong commit
Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-14 18:37:24 +04:00
NET_NAME_UNKNOWN , vti6_dev_setup ) ;
2013-08-19 10:07:34 +04:00
if ( ! ip6n - > fb_tnl_dev )
goto err_alloc_dev ;
dev_net_set ( ip6n - > fb_tnl_dev , net ) ;
ip_tunnel: the lack of vti_link_ops' dellink() cause kernel panic
Now the vti_link_ops do not point the .dellink, for fb tunnel device
(ip_vti0), the net_device will be removed as the default .dellink is
unregister_netdevice_queue,but the tunnel still in the tunnel list,
then if we add a new vti tunnel, in ip_tunnel_find():
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
link == t->parms.link &&
==> type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
}
the panic will happen, cause dev of ip_tunnel *t is null:
[ 3835.072977] IP: [<ffffffffa04103fd>] ip_tunnel_find+0x9d/0xc0 [ip_tunnel]
[ 3835.073008] PGD b2c21067 PUD b7277067 PMD 0
[ 3835.073008] Oops: 0000 [#1] SMP
.....
[ 3835.073008] Stack:
[ 3835.073008] ffff8800b72d77f0 ffffffffa0411924 ffff8800bb956000 ffff8800b72d78e0
[ 3835.073008] ffff8800b72d78a0 0000000000000000 ffffffffa040d100 ffff8800b72d7858
[ 3835.073008] ffffffffa040b2e3 0000000000000000 0000000000000000 0000000000000000
[ 3835.073008] Call Trace:
[ 3835.073008] [<ffffffffa0411924>] ip_tunnel_newlink+0x64/0x160 [ip_tunnel]
[ 3835.073008] [<ffffffffa040b2e3>] vti_newlink+0x43/0x70 [ip_vti]
[ 3835.073008] [<ffffffff8150d4da>] rtnl_newlink+0x4fa/0x5f0
[ 3835.073008] [<ffffffff812f68bb>] ? nla_strlcpy+0x5b/0x70
[ 3835.073008] [<ffffffff81508fb0>] ? rtnl_link_ops_get+0x40/0x60
[ 3835.073008] [<ffffffff8150d11f>] ? rtnl_newlink+0x13f/0x5f0
[ 3835.073008] [<ffffffff81509cf4>] rtnetlink_rcv_msg+0xa4/0x270
[ 3835.073008] [<ffffffff8126adf5>] ? sock_has_perm+0x75/0x90
[ 3835.073008] [<ffffffff81509c50>] ? rtnetlink_rcv+0x30/0x30
[ 3835.073008] [<ffffffff81529e39>] netlink_rcv_skb+0xa9/0xc0
[ 3835.073008] [<ffffffff81509c48>] rtnetlink_rcv+0x28/0x30
....
modprobe ip_vti
ip link del ip_vti0 type vti
ip link add ip_vti0 type vti
rmmod ip_vti
do that one or more times, kernel will panic.
fix it by assigning ip_tunnel_dellink to vti_link_ops' dellink, in
which we skip the unregister of fb tunnel device. do the same on ip6_vti.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-11-23 10:04:11 +03:00
ip6n - > fb_tnl_dev - > rtnl_link_ops = & vti6_link_ops ;
2013-08-19 10:07:34 +04:00
err = vti6_fb_tnl_dev_init ( ip6n - > fb_tnl_dev ) ;
if ( err < 0 )
goto err_register ;
err = register_netdev ( ip6n - > fb_tnl_dev ) ;
if ( err < 0 )
goto err_register ;
t = netdev_priv ( ip6n - > fb_tnl_dev ) ;
strcpy ( t - > parms . name , ip6n - > fb_tnl_dev - > name ) ;
return 0 ;
err_register :
vti6_dev_free ( ip6n - > fb_tnl_dev ) ;
err_alloc_dev :
return err ;
}
static void __net_exit vti6_exit_net ( struct net * net )
{
struct vti6_net * ip6n = net_generic ( net , vti6_net_id ) ;
rtnl_lock ( ) ;
vti6_destroy_tunnels ( ip6n ) ;
rtnl_unlock ( ) ;
}
static struct pernet_operations vti6_net_ops = {
. init = vti6_init_net ,
. exit = vti6_exit_net ,
. id = & vti6_net_id ,
. size = sizeof ( struct vti6_net ) ,
} ;
2014-03-14 10:28:08 +04:00
static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
. handler = vti6_rcv ,
. cb_handler = vti6_rcv_cb ,
. err_handler = vti6_err ,
. priority = 100 ,
} ;
static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
. handler = vti6_rcv ,
. cb_handler = vti6_rcv_cb ,
. err_handler = vti6_err ,
. priority = 100 ,
} ;
static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
. handler = vti6_rcv ,
. cb_handler = vti6_rcv_cb ,
. err_handler = vti6_err ,
. priority = 100 ,
} ;
2016-09-30 12:11:07 +03:00
static bool is_vti6_tunnel ( const struct net_device * dev )
{
return dev - > netdev_ops = = & vti6_netdev_ops ;
}
static int vti6_device_event ( struct notifier_block * unused ,
unsigned long event , void * ptr )
{
struct net_device * dev = netdev_notifier_info_to_dev ( ptr ) ;
struct ip6_tnl * t = netdev_priv ( dev ) ;
if ( ! is_vti6_tunnel ( dev ) )
return NOTIFY_DONE ;
switch ( event ) {
case NETDEV_DOWN :
if ( ! net_eq ( t - > net , dev_net ( dev ) ) )
xfrm_garbage_collect ( t - > net ) ;
break ;
}
return NOTIFY_DONE ;
}
static struct notifier_block vti6_notifier_block __read_mostly = {
. notifier_call = vti6_device_event ,
} ;
2013-08-19 10:07:34 +04:00
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
* Return : 0 on success
* */
static int __init vti6_tunnel_init ( void )
{
2014-05-10 01:43:41 +04:00
const char * msg ;
int err ;
2013-08-19 10:07:34 +04:00
2016-09-30 12:11:07 +03:00
register_netdevice_notifier ( & vti6_notifier_block ) ;
2014-05-10 01:43:41 +04:00
msg = " tunnel device " ;
2013-08-19 10:07:34 +04:00
err = register_pernet_device ( & vti6_net_ops ) ;
if ( err < 0 )
2014-05-10 01:43:41 +04:00
goto pernet_dev_failed ;
2013-08-19 10:07:34 +04:00
2014-05-10 01:43:41 +04:00
msg = " tunnel protocols " ;
2014-03-14 10:28:08 +04:00
err = xfrm6_protocol_register ( & vti_esp6_protocol , IPPROTO_ESP ) ;
2014-05-10 01:43:41 +04:00
if ( err < 0 )
goto xfrm_proto_esp_failed ;
2014-03-14 10:28:08 +04:00
err = xfrm6_protocol_register ( & vti_ah6_protocol , IPPROTO_AH ) ;
2014-05-10 01:43:41 +04:00
if ( err < 0 )
goto xfrm_proto_ah_failed ;
2014-03-14 10:28:08 +04:00
err = xfrm6_protocol_register ( & vti_ipcomp6_protocol , IPPROTO_COMP ) ;
2014-05-10 01:43:41 +04:00
if ( err < 0 )
goto xfrm_proto_comp_failed ;
2014-03-14 10:28:08 +04:00
2014-05-10 01:43:41 +04:00
msg = " netlink interface " ;
2013-08-19 10:07:34 +04:00
err = rtnl_link_register ( & vti6_link_ops ) ;
if ( err < 0 )
goto rtnl_link_failed ;
return 0 ;
rtnl_link_failed :
2014-03-14 10:28:08 +04:00
xfrm6_protocol_deregister ( & vti_ipcomp6_protocol , IPPROTO_COMP ) ;
2014-05-10 01:43:41 +04:00
xfrm_proto_comp_failed :
2014-03-14 10:28:08 +04:00
xfrm6_protocol_deregister ( & vti_ah6_protocol , IPPROTO_AH ) ;
2014-05-10 01:43:41 +04:00
xfrm_proto_ah_failed :
2014-03-14 10:28:08 +04:00
xfrm6_protocol_deregister ( & vti_esp6_protocol , IPPROTO_ESP ) ;
2014-05-10 01:43:41 +04:00
xfrm_proto_esp_failed :
2013-08-19 10:07:34 +04:00
unregister_pernet_device ( & vti6_net_ops ) ;
2014-05-10 01:43:41 +04:00
pernet_dev_failed :
2016-09-30 12:11:07 +03:00
unregister_netdevice_notifier ( & vti6_notifier_block ) ;
2014-05-10 01:43:41 +04:00
pr_err ( " vti6 init: failed to register %s \n " , msg ) ;
2013-08-19 10:07:34 +04:00
return err ;
}
/**
* vti6_tunnel_cleanup - free resources and unregister protocol
* */
static void __exit vti6_tunnel_cleanup ( void )
{
rtnl_link_unregister ( & vti6_link_ops ) ;
2014-05-10 01:43:41 +04:00
xfrm6_protocol_deregister ( & vti_ipcomp6_protocol , IPPROTO_COMP ) ;
xfrm6_protocol_deregister ( & vti_ah6_protocol , IPPROTO_AH ) ;
xfrm6_protocol_deregister ( & vti_esp6_protocol , IPPROTO_ESP ) ;
2013-08-19 10:07:34 +04:00
unregister_pernet_device ( & vti6_net_ops ) ;
2016-09-30 12:11:07 +03:00
unregister_netdevice_notifier ( & vti6_notifier_block ) ;
2013-08-19 10:07:34 +04:00
}
module_init ( vti6_tunnel_init ) ;
module_exit ( vti6_tunnel_cleanup ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_ALIAS_RTNL_LINK ( " vti6 " ) ;
MODULE_ALIAS_NETDEV ( " ip6_vti0 " ) ;
MODULE_AUTHOR ( " Steffen Klassert " ) ;
MODULE_DESCRIPTION ( " IPv6 virtual tunnel interface " ) ;