2019-05-27 09:55:01 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2012-07-17 13:44:54 +04:00
/*
* Linux NET3 : IP / IP protocol decoder modified to support
* virtual tunnel interface
*
* Authors :
* Saurabh Mohan ( saurabh . mohan @ vyatta . com ) 05 / 07 / 2012
*/
/*
This version of net / ipv4 / ip_vti . c is cloned of net / ipv4 / ipip . c
For comments look at net / ipv4 / ip_gre . c - - ANK
*/
# include <linux/capability.h>
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/uaccess.h>
# include <linux/skbuff.h>
# include <linux/netdevice.h>
# include <linux/in.h>
# include <linux/tcp.h>
# include <linux/udp.h>
# include <linux/if_arp.h>
# include <linux/init.h>
# include <linux/netfilter_ipv4.h>
# include <linux/if_ether.h>
2014-02-21 11:41:10 +04:00
# include <linux/icmpv6.h>
2012-07-17 13:44:54 +04:00
# include <net/sock.h>
# include <net/ip.h>
# include <net/icmp.h>
2013-03-25 18:49:35 +04:00
# include <net/ip_tunnels.h>
2012-07-17 13:44:54 +04:00
# include <net/inet_ecn.h>
# include <net/xfrm.h>
# include <net/net_namespace.h>
# include <net/netns/generic.h>
static struct rtnl_link_ops vti_link_ops __read_mostly ;
netns: make struct pernet_operations::id unsigned int
Make struct pernet_operations::id unsigned.
There are 2 reasons to do so:
1)
This field is really an index into an zero based array and
thus is unsigned entity. Using negative value is out-of-bound
access by definition.
2)
On x86_64 unsigned 32-bit data which are mixed with pointers
via array indexing or offsets added or subtracted to pointers
are preffered to signed 32-bit data.
"int" being used as an array index needs to be sign-extended
to 64-bit before being used.
void f(long *p, int i)
{
g(p[i]);
}
roughly translates to
movsx rsi, esi
mov rdi, [rsi+...]
call g
MOVSX is 3 byte instruction which isn't necessary if the variable is
unsigned because x86_64 is zero extending by default.
Now, there is net_generic() function which, you guessed it right, uses
"int" as an array index:
static inline void *net_generic(const struct net *net, int id)
{
...
ptr = ng->ptr[id - 1];
...
}
And this function is used a lot, so those sign extensions add up.
Patch snipes ~1730 bytes on allyesconfig kernel (without all junk
messing with code generation):
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
Unfortunately some functions actually grow bigger.
This is a semmingly random artefact of code generation with register
allocator being used differently. gcc decides that some variable
needs to live in new r8+ registers and every access now requires REX
prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be
used which is longer than [r8]
However, overall balance is in negative direction:
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
function old new delta
nfsd4_lock 3886 3959 +73
tipc_link_build_proto_msg 1096 1140 +44
mac80211_hwsim_new_radio 2776 2808 +32
tipc_mon_rcv 1032 1058 +26
svcauth_gss_legacy_init 1413 1429 +16
tipc_bcbase_select_primary 379 392 +13
nfsd4_exchange_id 1247 1260 +13
nfsd4_setclientid_confirm 782 793 +11
...
put_client_renew_locked 494 480 -14
ip_set_sockfn_get 730 716 -14
geneve_sock_add 829 813 -16
nfsd4_sequence_done 721 703 -18
nlmclnt_lookup_host 708 686 -22
nfsd4_lockt 1085 1063 -22
nfs_get_client 1077 1050 -27
tcf_bpf_init 1106 1076 -30
nfsd4_encode_fattr 5997 5930 -67
Total: Before=154856051, After=154854321, chg -0.00%
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 04:58:21 +03:00
static unsigned int vti_net_id __read_mostly ;
2012-07-17 13:44:54 +04:00
static int vti_tunnel_init ( struct net_device * dev ) ;
2014-02-21 11:41:10 +04:00
static int vti_input ( struct sk_buff * skb , int nexthdr , __be32 spi ,
2019-03-23 17:43:02 +03:00
int encap_type , bool update_skb_dev )
2012-07-17 13:44:54 +04:00
{
struct ip_tunnel * tunnel ;
const struct iphdr * iph = ip_hdr ( skb ) ;
2013-07-21 06:46:25 +04:00
struct net * net = dev_net ( skb - > dev ) ;
struct ip_tunnel_net * itn = net_generic ( net , vti_net_id ) ;
2012-07-17 13:44:54 +04:00
2013-07-21 06:46:25 +04:00
tunnel = ip_tunnel_lookup ( itn , skb - > dev - > ifindex , TUNNEL_NO_KEY ,
iph - > saddr , iph - > daddr , 0 ) ;
2015-04-03 11:17:27 +03:00
if ( tunnel ) {
2014-02-21 11:41:10 +04:00
if ( ! xfrm4_policy_check ( NULL , XFRM_POLICY_IN , skb ) )
goto drop ;
XFRM_TUNNEL_SKB_CB ( skb ) - > tunnel . ip4 = tunnel ;
2019-03-23 17:43:02 +03:00
if ( update_skb_dev )
skb - > dev = tunnel - > dev ;
2014-02-21 11:41:10 +04:00
return xfrm_input ( skb , nexthdr , spi , encap_type ) ;
}
return - EINVAL ;
drop :
kfree_skb ( skb ) ;
return 0 ;
}
2019-03-23 17:43:02 +03:00
static int vti_input_proto ( struct sk_buff * skb , int nexthdr , __be32 spi ,
int encap_type )
2019-01-07 05:31:20 +03:00
{
2019-03-23 17:43:02 +03:00
return vti_input ( skb , nexthdr , spi , encap_type , false ) ;
2019-01-07 05:31:20 +03:00
}
2019-03-23 17:43:02 +03:00
static int vti_rcv ( struct sk_buff * skb , __be32 spi , bool update_skb_dev )
2014-02-21 11:41:10 +04:00
{
XFRM_SPI_SKB_CB ( skb ) - > family = AF_INET ;
XFRM_SPI_SKB_CB ( skb ) - > daddroff = offsetof ( struct iphdr , daddr ) ;
2019-03-23 17:43:02 +03:00
return vti_input ( skb , ip_hdr ( skb ) - > protocol , spi , 0 , update_skb_dev ) ;
2014-02-21 11:41:10 +04:00
}
2019-03-23 17:43:02 +03:00
static int vti_rcv_proto ( struct sk_buff * skb )
2019-01-07 05:31:20 +03:00
{
2019-03-23 17:43:02 +03:00
return vti_rcv ( skb , 0 , false ) ;
}
2019-01-07 05:31:20 +03:00
2019-03-23 17:43:02 +03:00
static int vti_rcv_tunnel ( struct sk_buff * skb )
{
return vti_rcv ( skb , ip_hdr ( skb ) - > saddr , true ) ;
2019-01-07 05:31:20 +03:00
}
2014-02-21 11:41:10 +04:00
static int vti_rcv_cb ( struct sk_buff * skb , int err )
{
unsigned short family ;
struct net_device * dev ;
struct pcpu_sw_netstats * tstats ;
struct xfrm_state * x ;
2019-03-29 23:16:31 +03:00
const struct xfrm_mode * inner_mode ;
2014-02-21 11:41:10 +04:00
struct ip_tunnel * tunnel = XFRM_TUNNEL_SKB_CB ( skb ) - > tunnel . ip4 ;
2015-05-27 17:16:54 +03:00
u32 orig_mark = skb - > mark ;
int ret ;
2014-02-21 11:41:10 +04:00
if ( ! tunnel )
2012-07-17 13:44:54 +04:00
return 1 ;
2014-02-21 11:41:10 +04:00
dev = tunnel - > dev ;
if ( err ) {
dev - > stats . rx_errors + + ;
dev - > stats . rx_dropped + + ;
return 0 ;
2012-07-17 13:44:54 +04:00
}
2014-02-21 11:41:10 +04:00
x = xfrm_input_state ( skb ) ;
2016-09-07 21:40:38 +03:00
2019-03-29 23:16:32 +03:00
inner_mode = & x - > inner_mode ;
2016-09-07 21:40:38 +03:00
if ( x - > sel . family = = AF_UNSPEC ) {
inner_mode = xfrm_ip2inner_mode ( x , XFRM_MODE_SKB_CB ( skb ) - > protocol ) ;
if ( inner_mode = = NULL ) {
XFRM_INC_STATS ( dev_net ( skb - > dev ) ,
LINUX_MIB_XFRMINSTATEMODEERROR ) ;
return - EINVAL ;
}
}
2019-03-29 23:16:23 +03:00
family = inner_mode - > family ;
2014-02-21 11:41:10 +04:00
2015-05-27 17:16:54 +03:00
skb - > mark = be32_to_cpu ( tunnel - > parms . i_key ) ;
ret = xfrm_policy_check ( NULL , XFRM_POLICY_IN , skb , family ) ;
skb - > mark = orig_mark ;
if ( ! ret )
2014-02-21 11:41:10 +04:00
return - EPERM ;
skb_scrub_packet ( skb , ! net_eq ( tunnel - > net , dev_net ( skb - > dev ) ) ) ;
skb - > dev = dev ;
tstats = this_cpu_ptr ( dev - > tstats ) ;
u64_stats_update_begin ( & tstats - > syncp ) ;
tstats - > rx_packets + + ;
tstats - > rx_bytes + = skb - > len ;
u64_stats_update_end ( & tstats - > syncp ) ;
return 0 ;
2012-07-17 13:44:54 +04:00
}
2014-02-21 11:41:11 +04:00
static bool vti_state_check ( const struct xfrm_state * x , __be32 dst , __be32 src )
{
xfrm_address_t * daddr = ( xfrm_address_t * ) & dst ;
xfrm_address_t * saddr = ( xfrm_address_t * ) & src ;
/* if there is no transform then this tunnel is not functional.
* Or if the xfrm is not mode tunnel .
*/
if ( ! x | | x - > props . mode ! = XFRM_MODE_TUNNEL | |
x - > props . family ! = AF_INET )
return false ;
if ( ! dst )
return xfrm_addr_equal ( saddr , & x - > props . saddr , AF_INET ) ;
if ( ! xfrm_state_addr_check ( x , daddr , saddr , AF_INET ) )
return false ;
return true ;
}
2014-02-21 11:41:10 +04:00
static netdev_tx_t vti_xmit ( struct sk_buff * skb , struct net_device * dev ,
struct flowi * fl )
2012-07-17 13:44:54 +04:00
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2014-02-21 11:41:11 +04:00
struct ip_tunnel_parm * parms = & tunnel - > parms ;
2014-02-21 11:41:10 +04:00
struct dst_entry * dst = skb_dst ( skb ) ;
2012-07-17 13:44:54 +04:00
struct net_device * tdev ; /* Device to other host */
2017-09-26 15:14:29 +03:00
int pkt_len = skb - > len ;
2013-07-21 06:46:25 +04:00
int err ;
2016-03-16 12:17:37 +03:00
int mtu ;
2012-07-17 13:44:54 +04:00
2014-02-21 11:41:10 +04:00
if ( ! dst ) {
2014-02-21 11:41:10 +04:00
dev - > stats . tx_carrier_errors + + ;
goto tx_error_icmp ;
}
2012-07-17 13:44:54 +04:00
2014-02-21 11:41:10 +04:00
dst_hold ( dst ) ;
2014-02-21 11:41:10 +04:00
dst = xfrm_lookup ( tunnel - > net , dst , fl , NULL , 0 ) ;
2014-02-21 11:41:10 +04:00
if ( IS_ERR ( dst ) ) {
2012-07-17 13:44:54 +04:00
dev - > stats . tx_carrier_errors + + ;
goto tx_error_icmp ;
}
2014-02-21 11:41:10 +04:00
2014-02-21 11:41:11 +04:00
if ( ! vti_state_check ( dst - > xfrm , parms - > iph . daddr , parms - > iph . saddr ) ) {
2012-07-17 13:44:54 +04:00
dev - > stats . tx_carrier_errors + + ;
2014-02-21 11:41:10 +04:00
dst_release ( dst ) ;
2012-07-17 13:44:54 +04:00
goto tx_error_icmp ;
}
2014-02-21 11:41:11 +04:00
2014-02-21 11:41:10 +04:00
tdev = dst - > dev ;
2012-07-17 13:44:54 +04:00
if ( tdev = = dev ) {
2014-02-21 11:41:10 +04:00
dst_release ( dst ) ;
2012-07-17 13:44:54 +04:00
dev - > stats . collisions + + ;
goto tx_error ;
}
2016-03-16 12:17:37 +03:00
mtu = dst_mtu ( dst ) ;
if ( skb - > len > mtu ) {
2018-01-25 21:03:03 +03:00
skb_dst_update_pmtu ( skb , mtu ) ;
2016-03-16 12:17:37 +03:00
if ( skb - > protocol = = htons ( ETH_P_IP ) ) {
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_FRAG_NEEDED ,
htonl ( mtu ) ) ;
} else {
if ( mtu < IPV6_MIN_MTU )
mtu = IPV6_MIN_MTU ;
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu ) ;
}
dst_release ( dst ) ;
goto tx_error ;
}
2014-02-21 11:41:10 +04:00
skb_scrub_packet ( skb , ! net_eq ( tunnel - > net , dev_net ( dev ) ) ) ;
2014-02-21 11:41:10 +04:00
skb_dst_set ( skb , dst ) ;
2012-07-17 13:44:54 +04:00
skb - > dev = skb_dst ( skb ) - > dev ;
2015-10-08 00:48:35 +03:00
err = dst_output ( tunnel - > net , skb - > sk , skb ) ;
2013-07-21 06:46:25 +04:00
if ( net_xmit_eval ( err ) = = 0 )
2017-09-26 15:14:29 +03:00
err = pkt_len ;
2015-12-25 01:34:54 +03:00
iptunnel_xmit_stats ( dev , err ) ;
2012-07-17 13:44:54 +04:00
return NETDEV_TX_OK ;
tx_error_icmp :
dst_link_failure ( skb ) ;
tx_error :
dev - > stats . tx_errors + + ;
2014-01-19 06:27:49 +04:00
kfree_skb ( skb ) ;
2012-07-17 13:44:54 +04:00
return NETDEV_TX_OK ;
}
2014-02-21 11:41:10 +04:00
/* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function .
*/
static netdev_tx_t vti_tunnel_xmit ( struct sk_buff * skb , struct net_device * dev )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
struct flowi fl ;
2018-12-31 01:24:36 +03:00
if ( ! pskb_inet_may_pull ( skb ) )
goto tx_err ;
2014-02-21 11:41:10 +04:00
memset ( & fl , 0 , sizeof ( fl ) ) ;
switch ( skb - > protocol ) {
case htons ( ETH_P_IP ) :
xfrm_decode_session ( skb , & fl , AF_INET ) ;
memset ( IPCB ( skb ) , 0 , sizeof ( * IPCB ( skb ) ) ) ;
break ;
case htons ( ETH_P_IPV6 ) :
xfrm_decode_session ( skb , & fl , AF_INET6 ) ;
memset ( IP6CB ( skb ) , 0 , sizeof ( * IP6CB ( skb ) ) ) ;
break ;
default :
2018-12-31 01:24:36 +03:00
goto tx_err ;
2014-02-21 11:41:10 +04:00
}
2015-05-27 17:16:43 +03:00
/* override mark with tunnel output key */
fl . flowi_mark = be32_to_cpu ( tunnel - > parms . o_key ) ;
2014-02-21 11:41:10 +04:00
return vti_xmit ( skb , dev , & fl ) ;
2018-12-31 01:24:36 +03:00
tx_err :
dev - > stats . tx_errors + + ;
kfree_skb ( skb ) ;
return NETDEV_TX_OK ;
2014-02-21 11:41:10 +04:00
}
2014-02-21 11:41:10 +04:00
static int vti4_err ( struct sk_buff * skb , u32 info )
{
__be32 spi ;
2014-05-12 11:09:26 +04:00
__u32 mark ;
2014-02-21 11:41:10 +04:00
struct xfrm_state * x ;
struct ip_tunnel * tunnel ;
struct ip_esp_hdr * esph ;
struct ip_auth_hdr * ah ;
struct ip_comp_hdr * ipch ;
struct net * net = dev_net ( skb - > dev ) ;
const struct iphdr * iph = ( const struct iphdr * ) skb - > data ;
int protocol = iph - > protocol ;
struct ip_tunnel_net * itn = net_generic ( net , vti_net_id ) ;
tunnel = ip_tunnel_lookup ( itn , skb - > dev - > ifindex , TUNNEL_NO_KEY ,
iph - > daddr , iph - > saddr , 0 ) ;
if ( ! tunnel )
return - 1 ;
2014-05-12 11:09:26 +04:00
mark = be32_to_cpu ( tunnel - > parms . o_key ) ;
2014-02-21 11:41:10 +04:00
switch ( protocol ) {
case IPPROTO_ESP :
esph = ( struct ip_esp_hdr * ) ( skb - > data + ( iph - > ihl < < 2 ) ) ;
spi = esph - > spi ;
break ;
case IPPROTO_AH :
ah = ( struct ip_auth_hdr * ) ( skb - > data + ( iph - > ihl < < 2 ) ) ;
spi = ah - > spi ;
break ;
case IPPROTO_COMP :
ipch = ( struct ip_comp_hdr * ) ( skb - > data + ( iph - > ihl < < 2 ) ) ;
spi = htonl ( ntohs ( ipch - > cpi ) ) ;
break ;
default :
return 0 ;
}
switch ( icmp_hdr ( skb ) - > type ) {
case ICMP_DEST_UNREACH :
if ( icmp_hdr ( skb ) - > code ! = ICMP_FRAG_NEEDED )
return 0 ;
case ICMP_REDIRECT :
break ;
default :
return 0 ;
}
2014-05-12 11:09:26 +04:00
x = xfrm_state_lookup ( net , mark , ( const xfrm_address_t * ) & iph - > daddr ,
2014-02-21 11:41:10 +04:00
spi , protocol , AF_INET ) ;
if ( ! x )
return 0 ;
if ( icmp_hdr ( skb ) - > type = = ICMP_DEST_UNREACH )
2018-09-26 06:56:26 +03:00
ipv4_update_pmtu ( skb , net , info , 0 , protocol ) ;
2014-02-21 11:41:10 +04:00
else
2018-09-26 06:56:27 +03:00
ipv4_redirect ( skb , net , 0 , protocol ) ;
2014-02-21 11:41:10 +04:00
xfrm_state_put ( x ) ;
return 0 ;
}
2012-07-17 13:44:54 +04:00
static int
vti_tunnel_ioctl ( struct net_device * dev , struct ifreq * ifr , int cmd )
{
int err = 0 ;
struct ip_tunnel_parm p ;
2013-07-21 06:46:25 +04:00
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
return - EFAULT ;
2012-07-17 13:44:54 +04:00
2013-07-21 06:46:25 +04:00
if ( cmd = = SIOCADDTUNNEL | | cmd = = SIOCCHGTUNNEL ) {
2012-07-17 13:44:54 +04:00
if ( p . iph . version ! = 4 | | p . iph . protocol ! = IPPROTO_IPIP | |
p . iph . ihl ! = 5 )
2013-07-21 06:46:25 +04:00
return - EINVAL ;
}
2012-07-17 13:44:54 +04:00
2014-06-08 02:06:25 +04:00
if ( ! ( p . i_flags & GRE_KEY ) )
p . i_key = 0 ;
if ( ! ( p . o_flags & GRE_KEY ) )
p . o_key = 0 ;
p . i_flags = VTI_ISVTI ;
2013-07-21 06:46:25 +04:00
err = ip_tunnel_ioctl ( dev , & p , cmd ) ;
if ( err )
return err ;
2012-07-17 13:44:54 +04:00
2013-07-21 06:46:25 +04:00
if ( cmd ! = SIOCDELTUNNEL ) {
2014-02-21 11:41:10 +04:00
p . i_flags | = GRE_KEY ;
2013-07-21 06:46:25 +04:00
p . o_flags | = GRE_KEY ;
2012-07-17 13:44:54 +04:00
}
2013-07-21 06:46:25 +04:00
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & p , sizeof ( p ) ) )
return - EFAULT ;
2012-07-17 13:44:54 +04:00
return 0 ;
}
static const struct net_device_ops vti_netdev_ops = {
. ndo_init = vti_tunnel_init ,
2013-07-21 06:46:25 +04:00
. ndo_uninit = ip_tunnel_uninit ,
2012-07-17 13:44:54 +04:00
. ndo_start_xmit = vti_tunnel_xmit ,
. ndo_do_ioctl = vti_tunnel_ioctl ,
2013-07-21 06:46:25 +04:00
. ndo_change_mtu = ip_tunnel_change_mtu ,
2013-03-25 18:50:00 +04:00
. ndo_get_stats64 = ip_tunnel_get_stats64 ,
2015-04-02 18:07:02 +03:00
. ndo_get_iflink = ip_tunnel_get_iflink ,
2012-07-17 13:44:54 +04:00
} ;
2013-07-21 06:46:25 +04:00
static void vti_tunnel_setup ( struct net_device * dev )
2012-07-17 13:44:54 +04:00
{
2013-07-21 06:46:25 +04:00
dev - > netdev_ops = & vti_netdev_ops ;
2014-04-11 17:51:19 +04:00
dev - > type = ARPHRD_TUNNEL ;
2013-07-21 06:46:25 +04:00
ip_tunnel_setup ( dev , vti_net_id ) ;
2012-07-17 13:44:54 +04:00
}
2013-07-21 06:46:25 +04:00
static int vti_tunnel_init ( struct net_device * dev )
2012-07-17 13:44:54 +04:00
{
2013-07-21 06:46:25 +04:00
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
struct iphdr * iph = & tunnel - > parms . iph ;
memcpy ( dev - > dev_addr , & iph - > saddr , 4 ) ;
memcpy ( dev - > broadcast , & iph - > daddr , 4 ) ;
2012-07-17 13:44:54 +04:00
dev - > flags = IFF_NOARP ;
dev - > addr_len = 4 ;
dev - > features | = NETIF_F_LLTX ;
2014-10-06 05:38:35 +04:00
netif_keep_dst ( dev ) ;
2012-07-17 13:44:54 +04:00
2013-07-21 06:46:25 +04:00
return ip_tunnel_init ( dev ) ;
2012-07-17 13:44:54 +04:00
}
2013-07-21 06:46:25 +04:00
static void __net_init vti_fb_tunnel_init ( struct net_device * dev )
2012-07-17 13:44:54 +04:00
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
struct iphdr * iph = & tunnel - > parms . iph ;
iph - > version = 4 ;
iph - > protocol = IPPROTO_IPIP ;
iph - > ihl = 5 ;
}
2014-02-21 11:41:10 +04:00
static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
2019-03-23 17:43:02 +03:00
. handler = vti_rcv_proto ,
. input_handler = vti_input_proto ,
2014-02-21 11:41:10 +04:00
. cb_handler = vti_rcv_cb ,
. err_handler = vti4_err ,
. priority = 100 ,
} ;
static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
2019-03-23 17:43:02 +03:00
. handler = vti_rcv_proto ,
. input_handler = vti_input_proto ,
2014-02-21 11:41:10 +04:00
. cb_handler = vti_rcv_cb ,
. err_handler = vti4_err ,
. priority = 100 ,
} ;
static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
2019-03-23 17:43:02 +03:00
. handler = vti_rcv_proto ,
. input_handler = vti_input_proto ,
2014-02-21 11:41:10 +04:00
. cb_handler = vti_rcv_cb ,
. err_handler = vti4_err ,
. priority = 100 ,
2012-07-17 13:44:54 +04:00
} ;
2019-01-07 05:31:20 +03:00
static struct xfrm_tunnel ipip_handler __read_mostly = {
2019-03-23 17:43:02 +03:00
. handler = vti_rcv_tunnel ,
2019-01-07 05:31:20 +03:00
. err_handler = vti4_err ,
. priority = 0 ,
} ;
2012-07-17 13:44:54 +04:00
static int __net_init vti_init_net ( struct net * net )
{
int err ;
2013-07-21 06:46:25 +04:00
struct ip_tunnel_net * itn ;
2012-07-17 13:44:54 +04:00
2013-07-21 06:46:25 +04:00
err = ip_tunnel_init_net ( net , vti_net_id , & vti_link_ops , " ip_vti0 " ) ;
2012-07-17 13:44:54 +04:00
if ( err )
2013-07-21 06:46:25 +04:00
return err ;
itn = net_generic ( net , vti_net_id ) ;
2018-08-19 10:05:04 +03:00
if ( itn - > fb_tunnel_dev )
vti_fb_tunnel_init ( itn - > fb_tunnel_dev ) ;
2012-07-17 13:44:54 +04:00
return 0 ;
}
2017-09-20 02:27:09 +03:00
static void __net_exit vti_exit_batch_net ( struct list_head * list_net )
2012-07-17 13:44:54 +04:00
{
2017-09-20 02:27:09 +03:00
ip_tunnel_delete_nets ( list_net , vti_net_id , & vti_link_ops ) ;
2012-07-17 13:44:54 +04:00
}
static struct pernet_operations vti_net_ops = {
. init = vti_init_net ,
2017-09-20 02:27:09 +03:00
. exit_batch = vti_exit_batch_net ,
2012-07-17 13:44:54 +04:00
. id = & vti_net_id ,
2013-07-21 06:46:25 +04:00
. size = sizeof ( struct ip_tunnel_net ) ,
2012-07-17 13:44:54 +04:00
} ;
2017-06-26 00:56:01 +03:00
static int vti_tunnel_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
2012-07-17 13:44:54 +04:00
{
return 0 ;
}
static void vti_netlink_parms ( struct nlattr * data [ ] ,
2017-04-19 19:30:54 +03:00
struct ip_tunnel_parm * parms ,
__u32 * fwmark )
2012-07-17 13:44:54 +04:00
{
memset ( parms , 0 , sizeof ( * parms ) ) ;
parms - > iph . protocol = IPPROTO_IPIP ;
if ( ! data )
return ;
2014-02-21 11:41:10 +04:00
parms - > i_flags = VTI_ISVTI ;
2012-07-17 13:44:54 +04:00
if ( data [ IFLA_VTI_LINK ] )
parms - > link = nla_get_u32 ( data [ IFLA_VTI_LINK ] ) ;
if ( data [ IFLA_VTI_IKEY ] )
parms - > i_key = nla_get_be32 ( data [ IFLA_VTI_IKEY ] ) ;
if ( data [ IFLA_VTI_OKEY ] )
parms - > o_key = nla_get_be32 ( data [ IFLA_VTI_OKEY ] ) ;
if ( data [ IFLA_VTI_LOCAL ] )
2015-03-29 17:59:26 +03:00
parms - > iph . saddr = nla_get_in_addr ( data [ IFLA_VTI_LOCAL ] ) ;
2012-07-17 13:44:54 +04:00
if ( data [ IFLA_VTI_REMOTE ] )
2015-03-29 17:59:26 +03:00
parms - > iph . daddr = nla_get_in_addr ( data [ IFLA_VTI_REMOTE ] ) ;
2012-07-17 13:44:54 +04:00
2017-04-19 19:30:54 +03:00
if ( data [ IFLA_VTI_FWMARK ] )
* fwmark = nla_get_u32 ( data [ IFLA_VTI_FWMARK ] ) ;
2012-07-17 13:44:54 +04:00
}
static int vti_newlink ( struct net * src_net , struct net_device * dev ,
2017-06-26 00:55:59 +03:00
struct nlattr * tb [ ] , struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
2012-07-17 13:44:54 +04:00
{
2013-07-21 06:46:25 +04:00
struct ip_tunnel_parm parms ;
2017-04-19 19:30:54 +03:00
__u32 fwmark = 0 ;
2012-07-17 13:44:54 +04:00
2017-04-19 19:30:54 +03:00
vti_netlink_parms ( data , & parms , & fwmark ) ;
return ip_tunnel_newlink ( dev , tb , & parms , fwmark ) ;
2012-07-17 13:44:54 +04:00
}
static int vti_changelink ( struct net_device * dev , struct nlattr * tb [ ] ,
2017-06-26 00:56:00 +03:00
struct nlattr * data [ ] ,
struct netlink_ext_ack * extack )
2012-07-17 13:44:54 +04:00
{
2017-04-19 19:30:54 +03:00
struct ip_tunnel * t = netdev_priv ( dev ) ;
__u32 fwmark = t - > fwmark ;
2012-07-17 13:44:54 +04:00
struct ip_tunnel_parm p ;
2017-04-19 19:30:54 +03:00
vti_netlink_parms ( data , & p , & fwmark ) ;
return ip_tunnel_changelink ( dev , tb , & p , fwmark ) ;
2012-07-17 13:44:54 +04:00
}
static size_t vti_get_size ( const struct net_device * dev )
{
return
/* IFLA_VTI_LINK */
nla_total_size ( 4 ) +
/* IFLA_VTI_IKEY */
nla_total_size ( 4 ) +
/* IFLA_VTI_OKEY */
nla_total_size ( 4 ) +
/* IFLA_VTI_LOCAL */
nla_total_size ( 4 ) +
/* IFLA_VTI_REMOTE */
nla_total_size ( 4 ) +
2017-04-19 19:30:54 +03:00
/* IFLA_VTI_FWMARK */
nla_total_size ( 4 ) +
2012-07-17 13:44:54 +04:00
0 ;
}
static int vti_fill_info ( struct sk_buff * skb , const struct net_device * dev )
{
struct ip_tunnel * t = netdev_priv ( dev ) ;
struct ip_tunnel_parm * p = & t - > parms ;
2017-05-08 12:57:13 +03:00
if ( nla_put_u32 ( skb , IFLA_VTI_LINK , p - > link ) | |
nla_put_be32 ( skb , IFLA_VTI_IKEY , p - > i_key ) | |
nla_put_be32 ( skb , IFLA_VTI_OKEY , p - > o_key ) | |
nla_put_in_addr ( skb , IFLA_VTI_LOCAL , p - > iph . saddr ) | |
nla_put_in_addr ( skb , IFLA_VTI_REMOTE , p - > iph . daddr ) | |
nla_put_u32 ( skb , IFLA_VTI_FWMARK , t - > fwmark ) )
return - EMSGSIZE ;
2012-07-17 13:44:54 +04:00
return 0 ;
}
static const struct nla_policy vti_policy [ IFLA_VTI_MAX + 1 ] = {
[ IFLA_VTI_LINK ] = { . type = NLA_U32 } ,
[ IFLA_VTI_IKEY ] = { . type = NLA_U32 } ,
[ IFLA_VTI_OKEY ] = { . type = NLA_U32 } ,
[ IFLA_VTI_LOCAL ] = { . len = FIELD_SIZEOF ( struct iphdr , saddr ) } ,
[ IFLA_VTI_REMOTE ] = { . len = FIELD_SIZEOF ( struct iphdr , daddr ) } ,
2017-04-19 19:30:54 +03:00
[ IFLA_VTI_FWMARK ] = { . type = NLA_U32 } ,
2012-07-17 13:44:54 +04:00
} ;
static struct rtnl_link_ops vti_link_ops __read_mostly = {
. kind = " vti " ,
. maxtype = IFLA_VTI_MAX ,
. policy = vti_policy ,
. priv_size = sizeof ( struct ip_tunnel ) ,
. setup = vti_tunnel_setup ,
. validate = vti_tunnel_validate ,
. newlink = vti_newlink ,
. changelink = vti_changelink ,
ip_tunnel: the lack of vti_link_ops' dellink() cause kernel panic
Now the vti_link_ops do not point the .dellink, for fb tunnel device
(ip_vti0), the net_device will be removed as the default .dellink is
unregister_netdevice_queue,but the tunnel still in the tunnel list,
then if we add a new vti tunnel, in ip_tunnel_find():
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
link == t->parms.link &&
==> type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
}
the panic will happen, cause dev of ip_tunnel *t is null:
[ 3835.072977] IP: [<ffffffffa04103fd>] ip_tunnel_find+0x9d/0xc0 [ip_tunnel]
[ 3835.073008] PGD b2c21067 PUD b7277067 PMD 0
[ 3835.073008] Oops: 0000 [#1] SMP
.....
[ 3835.073008] Stack:
[ 3835.073008] ffff8800b72d77f0 ffffffffa0411924 ffff8800bb956000 ffff8800b72d78e0
[ 3835.073008] ffff8800b72d78a0 0000000000000000 ffffffffa040d100 ffff8800b72d7858
[ 3835.073008] ffffffffa040b2e3 0000000000000000 0000000000000000 0000000000000000
[ 3835.073008] Call Trace:
[ 3835.073008] [<ffffffffa0411924>] ip_tunnel_newlink+0x64/0x160 [ip_tunnel]
[ 3835.073008] [<ffffffffa040b2e3>] vti_newlink+0x43/0x70 [ip_vti]
[ 3835.073008] [<ffffffff8150d4da>] rtnl_newlink+0x4fa/0x5f0
[ 3835.073008] [<ffffffff812f68bb>] ? nla_strlcpy+0x5b/0x70
[ 3835.073008] [<ffffffff81508fb0>] ? rtnl_link_ops_get+0x40/0x60
[ 3835.073008] [<ffffffff8150d11f>] ? rtnl_newlink+0x13f/0x5f0
[ 3835.073008] [<ffffffff81509cf4>] rtnetlink_rcv_msg+0xa4/0x270
[ 3835.073008] [<ffffffff8126adf5>] ? sock_has_perm+0x75/0x90
[ 3835.073008] [<ffffffff81509c50>] ? rtnetlink_rcv+0x30/0x30
[ 3835.073008] [<ffffffff81529e39>] netlink_rcv_skb+0xa9/0xc0
[ 3835.073008] [<ffffffff81509c48>] rtnetlink_rcv+0x28/0x30
....
modprobe ip_vti
ip link del ip_vti0 type vti
ip link add ip_vti0 type vti
rmmod ip_vti
do that one or more times, kernel will panic.
fix it by assigning ip_tunnel_dellink to vti_link_ops' dellink, in
which we skip the unregister of fb tunnel device. do the same on ip6_vti.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-11-23 10:04:11 +03:00
. dellink = ip_tunnel_dellink ,
2012-07-17 13:44:54 +04:00
. get_size = vti_get_size ,
. fill_info = vti_fill_info ,
2015-01-15 17:11:17 +03:00
. get_link_net = ip_tunnel_get_link_net ,
2012-07-17 13:44:54 +04:00
} ;
static int __init vti_init ( void )
{
2014-05-10 01:43:42 +04:00
const char * msg ;
2012-07-17 13:44:54 +04:00
int err ;
2014-05-10 01:43:42 +04:00
pr_info ( " IPv4 over IPsec tunneling driver \n " ) ;
2012-07-17 13:44:54 +04:00
2014-05-10 01:43:42 +04:00
msg = " tunnel device " ;
2012-07-17 13:44:54 +04:00
err = register_pernet_device ( & vti_net_ops ) ;
if ( err < 0 )
2014-05-10 01:43:42 +04:00
goto pernet_dev_failed ;
2014-02-21 11:41:10 +04:00
2014-05-10 01:43:42 +04:00
msg = " tunnel protocols " ;
err = xfrm4_protocol_register ( & vti_esp4_protocol , IPPROTO_ESP ) ;
if ( err < 0 )
goto xfrm_proto_esp_failed ;
2014-02-21 11:41:10 +04:00
err = xfrm4_protocol_register ( & vti_ah4_protocol , IPPROTO_AH ) ;
2014-05-10 01:43:42 +04:00
if ( err < 0 )
goto xfrm_proto_ah_failed ;
2014-02-21 11:41:10 +04:00
err = xfrm4_protocol_register ( & vti_ipcomp4_protocol , IPPROTO_COMP ) ;
2014-05-10 01:43:42 +04:00
if ( err < 0 )
goto xfrm_proto_comp_failed ;
2012-07-17 13:44:54 +04:00
2019-01-07 05:31:20 +03:00
msg = " ipip tunnel " ;
err = xfrm4_tunnel_register ( & ipip_handler , AF_INET ) ;
2019-03-19 18:39:21 +03:00
if ( err < 0 )
2019-01-07 05:31:20 +03:00
goto xfrm_tunnel_failed ;
2014-05-10 01:43:42 +04:00
msg = " netlink interface " ;
2012-07-17 13:44:54 +04:00
err = rtnl_link_register ( & vti_link_ops ) ;
if ( err < 0 )
goto rtnl_link_failed ;
return err ;
rtnl_link_failed :
2019-01-07 05:31:20 +03:00
xfrm4_tunnel_deregister ( & ipip_handler , AF_INET ) ;
2019-03-19 18:39:20 +03:00
xfrm_tunnel_failed :
xfrm4_protocol_deregister ( & vti_ipcomp4_protocol , IPPROTO_COMP ) ;
2014-05-10 01:43:42 +04:00
xfrm_proto_comp_failed :
2014-02-21 11:41:10 +04:00
xfrm4_protocol_deregister ( & vti_ah4_protocol , IPPROTO_AH ) ;
2014-05-10 01:43:42 +04:00
xfrm_proto_ah_failed :
2014-02-21 11:41:10 +04:00
xfrm4_protocol_deregister ( & vti_esp4_protocol , IPPROTO_ESP ) ;
2014-05-10 01:43:42 +04:00
xfrm_proto_esp_failed :
2012-07-17 13:44:54 +04:00
unregister_pernet_device ( & vti_net_ops ) ;
2014-05-10 01:43:42 +04:00
pernet_dev_failed :
pr_err ( " vti init: failed to register %s \n " , msg ) ;
2012-07-17 13:44:54 +04:00
return err ;
}
static void __exit vti_fini ( void )
{
rtnl_link_unregister ( & vti_link_ops ) ;
2019-03-19 18:39:20 +03:00
xfrm4_tunnel_deregister ( & ipip_handler , AF_INET ) ;
2014-05-10 01:43:42 +04:00
xfrm4_protocol_deregister ( & vti_ipcomp4_protocol , IPPROTO_COMP ) ;
xfrm4_protocol_deregister ( & vti_ah4_protocol , IPPROTO_AH ) ;
xfrm4_protocol_deregister ( & vti_esp4_protocol , IPPROTO_ESP ) ;
2012-07-17 13:44:54 +04:00
unregister_pernet_device ( & vti_net_ops ) ;
}
module_init ( vti_init ) ;
module_exit ( vti_fini ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_ALIAS_RTNL_LINK ( " vti " ) ;
MODULE_ALIAS_NETDEV ( " ip_vti0 " ) ;