2013-03-25 18:49:35 +04:00
/*
* Copyright ( c ) 2013 Nicira , Inc .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA
* 02110 - 1301 , USA
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/capability.h>
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/slab.h>
# include <linux/uaccess.h>
# include <linux/skbuff.h>
# include <linux/netdevice.h>
# include <linux/in.h>
# include <linux/tcp.h>
# include <linux/udp.h>
# include <linux/if_arp.h>
# include <linux/mroute.h>
# include <linux/init.h>
# include <linux/in6.h>
# include <linux/inetdevice.h>
# include <linux/igmp.h>
# include <linux/netfilter_ipv4.h>
# include <linux/etherdevice.h>
# include <linux/if_ether.h>
# include <linux/if_vlan.h>
# include <linux/rculist.h>
# include <net/sock.h>
# include <net/ip.h>
# include <net/icmp.h>
# include <net/protocol.h>
# include <net/ip_tunnels.h>
# include <net/arp.h>
# include <net/checksum.h>
# include <net/dsfield.h>
# include <net/inet_ecn.h>
# include <net/xfrm.h>
# include <net/net_namespace.h>
# include <net/netns/generic.h>
# include <net/rtnetlink.h>
# if IS_ENABLED(CONFIG_IPV6)
# include <net/ipv6.h>
# include <net/ip6_fib.h>
# include <net/ip6_route.h>
# endif
static unsigned int ip_tunnel_hash ( struct ip_tunnel_net * itn ,
__be32 key , __be32 remote )
{
return hash_32 ( ( __force u32 ) key ^ ( __force u32 ) remote ,
IP_TNL_HASH_BITS ) ;
}
2014-01-02 23:48:33 +04:00
static inline void __tunnel_dst_set ( struct ip_tunnel_dst * idst ,
struct dst_entry * dst )
2014-01-02 23:48:26 +04:00
{
struct dst_entry * old_dst ;
if ( dst & & ( dst - > flags & DST_NOCACHE ) )
dst = NULL ;
2014-01-02 23:48:33 +04:00
spin_lock_bh ( & idst - > lock ) ;
old_dst = rcu_dereference ( idst - > dst ) ;
rcu_assign_pointer ( idst - > dst , dst ) ;
2014-01-02 23:48:26 +04:00
dst_release ( old_dst ) ;
2014-01-02 23:48:33 +04:00
spin_unlock_bh ( & idst - > lock ) ;
2014-01-02 23:48:26 +04:00
}
static inline void tunnel_dst_set ( struct ip_tunnel * t , struct dst_entry * dst )
{
2014-01-02 23:48:33 +04:00
__tunnel_dst_set ( this_cpu_ptr ( t - > dst_cache ) , dst ) ;
2014-01-02 23:48:26 +04:00
}
static inline void tunnel_dst_reset ( struct ip_tunnel * t )
{
tunnel_dst_set ( t , NULL ) ;
}
2014-01-02 23:48:33 +04:00
static void tunnel_dst_reset_all ( struct ip_tunnel * t )
{
int i ;
for_each_possible_cpu ( i )
__tunnel_dst_set ( per_cpu_ptr ( t - > dst_cache , i ) , NULL ) ;
}
2014-01-02 23:48:26 +04:00
static inline struct dst_entry * tunnel_dst_get ( struct ip_tunnel * t )
{
struct dst_entry * dst ;
rcu_read_lock ( ) ;
2014-01-02 23:48:33 +04:00
dst = rcu_dereference ( this_cpu_ptr ( t - > dst_cache ) - > dst ) ;
2014-01-02 23:48:26 +04:00
if ( dst )
dst_hold ( dst ) ;
rcu_read_unlock ( ) ;
return dst ;
}
2014-01-08 17:59:30 +04:00
static struct dst_entry * tunnel_dst_check ( struct ip_tunnel * t , u32 cookie )
2014-01-02 23:48:26 +04:00
{
struct dst_entry * dst = tunnel_dst_get ( t ) ;
if ( dst & & dst - > obsolete & & dst - > ops - > check ( dst , cookie ) = = NULL ) {
tunnel_dst_reset ( t ) ;
return NULL ;
}
return dst ;
}
2013-03-25 18:49:35 +04:00
/* Often modified stats are per cpu, other are shared (netdev->stats) */
struct rtnl_link_stats64 * ip_tunnel_get_stats64 ( struct net_device * dev ,
struct rtnl_link_stats64 * tot )
{
int i ;
for_each_possible_cpu ( i ) {
2014-01-04 09:57:59 +04:00
const struct pcpu_sw_netstats * tstats =
per_cpu_ptr ( dev - > tstats , i ) ;
2013-03-25 18:49:35 +04:00
u64 rx_packets , rx_bytes , tx_packets , tx_bytes ;
unsigned int start ;
do {
start = u64_stats_fetch_begin_bh ( & tstats - > syncp ) ;
rx_packets = tstats - > rx_packets ;
tx_packets = tstats - > tx_packets ;
rx_bytes = tstats - > rx_bytes ;
tx_bytes = tstats - > tx_bytes ;
} while ( u64_stats_fetch_retry_bh ( & tstats - > syncp , start ) ) ;
tot - > rx_packets + = rx_packets ;
tot - > tx_packets + = tx_packets ;
tot - > rx_bytes + = rx_bytes ;
tot - > tx_bytes + = tx_bytes ;
}
tot - > multicast = dev - > stats . multicast ;
tot - > rx_crc_errors = dev - > stats . rx_crc_errors ;
tot - > rx_fifo_errors = dev - > stats . rx_fifo_errors ;
tot - > rx_length_errors = dev - > stats . rx_length_errors ;
tot - > rx_frame_errors = dev - > stats . rx_frame_errors ;
tot - > rx_errors = dev - > stats . rx_errors ;
tot - > tx_fifo_errors = dev - > stats . tx_fifo_errors ;
tot - > tx_carrier_errors = dev - > stats . tx_carrier_errors ;
tot - > tx_dropped = dev - > stats . tx_dropped ;
tot - > tx_aborted_errors = dev - > stats . tx_aborted_errors ;
tot - > tx_errors = dev - > stats . tx_errors ;
tot - > collisions = dev - > stats . collisions ;
return tot ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_get_stats64 ) ;
static bool ip_tunnel_key_match ( const struct ip_tunnel_parm * p ,
__be16 flags , __be32 key )
{
if ( p - > i_flags & TUNNEL_KEY ) {
if ( flags & TUNNEL_KEY )
return key = = p - > i_key ;
else
/* key expected, none present */
return false ;
} else
return ! ( flags & TUNNEL_KEY ) ;
}
/* Fallback tunnel: no source, no destination, no key, no options
Tunnel hash table :
We require exact key match i . e . if a key is present in packet
it will match only tunnel with the same key ; if it is not present ,
it will match only keyless tunnel .
All keysless packets , if not matched configured keyless tunnels
will match fallback tunnel .
Given src , dst and key , find appropriate for input tunnel .
*/
struct ip_tunnel * ip_tunnel_lookup ( struct ip_tunnel_net * itn ,
int link , __be16 flags ,
__be32 remote , __be32 local ,
__be32 key )
{
unsigned int hash ;
struct ip_tunnel * t , * cand = NULL ;
struct hlist_head * head ;
hash = ip_tunnel_hash ( itn , key , remote ) ;
head = & itn - > tunnels [ hash ] ;
hlist_for_each_entry_rcu ( t , head , hash_node ) {
if ( local ! = t - > parms . iph . saddr | |
remote ! = t - > parms . iph . daddr | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( ! ip_tunnel_key_match ( & t - > parms , flags , key ) )
continue ;
if ( t - > parms . link = = link )
return t ;
else
cand = t ;
}
hlist_for_each_entry_rcu ( t , head , hash_node ) {
if ( remote ! = t - > parms . iph . daddr | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( ! ip_tunnel_key_match ( & t - > parms , flags , key ) )
continue ;
if ( t - > parms . link = = link )
return t ;
else if ( ! cand )
cand = t ;
}
hash = ip_tunnel_hash ( itn , key , 0 ) ;
head = & itn - > tunnels [ hash ] ;
hlist_for_each_entry_rcu ( t , head , hash_node ) {
if ( ( local ! = t - > parms . iph . saddr & &
( local ! = t - > parms . iph . daddr | |
! ipv4_is_multicast ( local ) ) ) | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( ! ip_tunnel_key_match ( & t - > parms , flags , key ) )
continue ;
if ( t - > parms . link = = link )
return t ;
else if ( ! cand )
cand = t ;
}
if ( flags & TUNNEL_NO_KEY )
goto skip_key_lookup ;
hlist_for_each_entry_rcu ( t , head , hash_node ) {
if ( t - > parms . i_key ! = key | |
! ( t - > dev - > flags & IFF_UP ) )
continue ;
if ( t - > parms . link = = link )
return t ;
else if ( ! cand )
cand = t ;
}
skip_key_lookup :
if ( cand )
return cand ;
if ( itn - > fb_tunnel_dev & & itn - > fb_tunnel_dev - > flags & IFF_UP )
return netdev_priv ( itn - > fb_tunnel_dev ) ;
return NULL ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_lookup ) ;
static struct hlist_head * ip_bucket ( struct ip_tunnel_net * itn ,
struct ip_tunnel_parm * parms )
{
unsigned int h ;
__be32 remote ;
if ( parms - > iph . daddr & & ! ipv4_is_multicast ( parms - > iph . daddr ) )
remote = parms - > iph . daddr ;
else
remote = 0 ;
h = ip_tunnel_hash ( itn , parms - > i_key , remote ) ;
return & itn - > tunnels [ h ] ;
}
static void ip_tunnel_add ( struct ip_tunnel_net * itn , struct ip_tunnel * t )
{
struct hlist_head * head = ip_bucket ( itn , & t - > parms ) ;
hlist_add_head_rcu ( & t - > hash_node , head ) ;
}
static void ip_tunnel_del ( struct ip_tunnel * t )
{
hlist_del_init_rcu ( & t - > hash_node ) ;
}
static struct ip_tunnel * ip_tunnel_find ( struct ip_tunnel_net * itn ,
struct ip_tunnel_parm * parms ,
int type )
{
__be32 remote = parms - > iph . daddr ;
__be32 local = parms - > iph . saddr ;
__be32 key = parms - > i_key ;
int link = parms - > link ;
struct ip_tunnel * t = NULL ;
struct hlist_head * head = ip_bucket ( itn , parms ) ;
hlist_for_each_entry_rcu ( t , head , hash_node ) {
if ( local = = t - > parms . iph . saddr & &
remote = = t - > parms . iph . daddr & &
key = = t - > parms . i_key & &
link = = t - > parms . link & &
type = = t - > dev - > type )
break ;
}
return t ;
}
static struct net_device * __ip_tunnel_create ( struct net * net ,
const struct rtnl_link_ops * ops ,
struct ip_tunnel_parm * parms )
{
int err ;
struct ip_tunnel * tunnel ;
struct net_device * dev ;
char name [ IFNAMSIZ ] ;
if ( parms - > name [ 0 ] )
strlcpy ( name , parms - > name , IFNAMSIZ ) ;
else {
2013-03-28 12:21:46 +04:00
if ( strlen ( ops - > kind ) > ( IFNAMSIZ - 3 ) ) {
2013-03-25 18:49:35 +04:00
err = - E2BIG ;
goto failed ;
}
strlcpy ( name , ops - > kind , IFNAMSIZ ) ;
strncat ( name , " %d " , 2 ) ;
}
ASSERT_RTNL ( ) ;
dev = alloc_netdev ( ops - > priv_size , name , ops - > setup ) ;
if ( ! dev ) {
err = - ENOMEM ;
goto failed ;
}
dev_net_set ( dev , net ) ;
dev - > rtnl_link_ops = ops ;
tunnel = netdev_priv ( dev ) ;
tunnel - > parms = * parms ;
2013-06-26 18:11:28 +04:00
tunnel - > net = net ;
2013-03-25 18:49:35 +04:00
err = register_netdevice ( dev ) ;
if ( err )
goto failed_free ;
return dev ;
failed_free :
free_netdev ( dev ) ;
failed :
return ERR_PTR ( err ) ;
}
2014-01-02 23:48:26 +04:00
static inline void init_tunnel_flow ( struct flowi4 * fl4 ,
int proto ,
__be32 daddr , __be32 saddr ,
__be32 key , __u8 tos , int oif )
2013-03-25 18:49:35 +04:00
{
memset ( fl4 , 0 , sizeof ( * fl4 ) ) ;
fl4 - > flowi4_oif = oif ;
fl4 - > daddr = daddr ;
fl4 - > saddr = saddr ;
fl4 - > flowi4_tos = tos ;
fl4 - > flowi4_proto = proto ;
fl4 - > fl4_gre_key = key ;
}
static int ip_tunnel_bind_dev ( struct net_device * dev )
{
struct net_device * tdev = NULL ;
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
const struct iphdr * iph ;
int hlen = LL_MAX_HEADER ;
int mtu = ETH_DATA_LEN ;
int t_hlen = tunnel - > hlen + sizeof ( struct iphdr ) ;
iph = & tunnel - > parms . iph ;
/* Guess output device to choose reasonable mtu and needed_headroom */
if ( iph - > daddr ) {
struct flowi4 fl4 ;
struct rtable * rt ;
2014-01-02 23:48:26 +04:00
init_tunnel_flow ( & fl4 , iph - > protocol , iph - > daddr ,
iph - > saddr , tunnel - > parms . o_key ,
RT_TOS ( iph - > tos ) , tunnel - > parms . link ) ;
rt = ip_route_output_key ( tunnel - > net , & fl4 ) ;
2013-03-25 18:49:35 +04:00
if ( ! IS_ERR ( rt ) ) {
tdev = rt - > dst . dev ;
2014-01-02 23:48:26 +04:00
tunnel_dst_set ( tunnel , dst_clone ( & rt - > dst ) ) ;
2013-03-25 18:49:35 +04:00
ip_rt_put ( rt ) ;
}
if ( dev - > type ! = ARPHRD_ETHER )
dev - > flags | = IFF_POINTOPOINT ;
}
if ( ! tdev & & tunnel - > parms . link )
2013-08-13 19:51:11 +04:00
tdev = __dev_get_by_index ( tunnel - > net , tunnel - > parms . link ) ;
2013-03-25 18:49:35 +04:00
if ( tdev ) {
hlen = tdev - > hard_header_len + tdev - > needed_headroom ;
mtu = tdev - > mtu ;
}
dev - > iflink = tunnel - > parms . link ;
dev - > needed_headroom = t_hlen + hlen ;
mtu - = ( dev - > hard_header_len + t_hlen ) ;
if ( mtu < 68 )
mtu = 68 ;
return mtu ;
}
static struct ip_tunnel * ip_tunnel_create ( struct net * net ,
struct ip_tunnel_net * itn ,
struct ip_tunnel_parm * parms )
{
struct ip_tunnel * nt , * fbt ;
struct net_device * dev ;
BUG_ON ( ! itn - > fb_tunnel_dev ) ;
fbt = netdev_priv ( itn - > fb_tunnel_dev ) ;
dev = __ip_tunnel_create ( net , itn - > fb_tunnel_dev - > rtnl_link_ops , parms ) ;
if ( IS_ERR ( dev ) )
return NULL ;
dev - > mtu = ip_tunnel_bind_dev ( dev ) ;
nt = netdev_priv ( dev ) ;
ip_tunnel_add ( itn , nt ) ;
return nt ;
}
int ip_tunnel_rcv ( struct ip_tunnel * tunnel , struct sk_buff * skb ,
const struct tnl_ptk_info * tpi , bool log_ecn_error )
{
2014-01-04 09:57:59 +04:00
struct pcpu_sw_netstats * tstats ;
2013-03-25 18:49:35 +04:00
const struct iphdr * iph = ip_hdr ( skb ) ;
int err ;
# ifdef CONFIG_NET_IPGRE_BROADCAST
if ( ipv4_is_multicast ( iph - > daddr ) ) {
/* Looped back packet, drop it! */
if ( rt_is_output_route ( skb_rtable ( skb ) ) )
goto drop ;
tunnel - > dev - > stats . multicast + + ;
skb - > pkt_type = PACKET_BROADCAST ;
}
# endif
if ( ( ! ( tpi - > flags & TUNNEL_CSUM ) & & ( tunnel - > parms . i_flags & TUNNEL_CSUM ) ) | |
( ( tpi - > flags & TUNNEL_CSUM ) & & ! ( tunnel - > parms . i_flags & TUNNEL_CSUM ) ) ) {
tunnel - > dev - > stats . rx_crc_errors + + ;
tunnel - > dev - > stats . rx_errors + + ;
goto drop ;
}
if ( tunnel - > parms . i_flags & TUNNEL_SEQ ) {
if ( ! ( tpi - > flags & TUNNEL_SEQ ) | |
( tunnel - > i_seqno & & ( s32 ) ( ntohl ( tpi - > seq ) - tunnel - > i_seqno ) < 0 ) ) {
tunnel - > dev - > stats . rx_fifo_errors + + ;
tunnel - > dev - > stats . rx_errors + + ;
goto drop ;
}
tunnel - > i_seqno = ntohl ( tpi - > seq ) + 1 ;
}
err = IP_ECN_decapsulate ( iph , skb ) ;
if ( unlikely ( err ) ) {
if ( log_ecn_error )
net_info_ratelimited ( " non-ECT from %pI4 with TOS=%#x \n " ,
& iph - > saddr , iph - > tos ) ;
if ( err > 1 ) {
+ + tunnel - > dev - > stats . rx_frame_errors ;
+ + tunnel - > dev - > stats . rx_errors ;
goto drop ;
}
}
tstats = this_cpu_ptr ( tunnel - > dev - > tstats ) ;
u64_stats_update_begin ( & tstats - > syncp ) ;
tstats - > rx_packets + + ;
tstats - > rx_bytes + = skb - > len ;
u64_stats_update_end ( & tstats - > syncp ) ;
2013-11-13 02:39:13 +04:00
skb_scrub_packet ( skb , ! net_eq ( tunnel - > net , dev_net ( tunnel - > dev ) ) ) ;
2013-06-18 04:50:02 +04:00
if ( tunnel - > dev - > type = = ARPHRD_ETHER ) {
skb - > protocol = eth_type_trans ( skb , tunnel - > dev ) ;
skb_postpull_rcsum ( skb , eth_hdr ( skb ) , ETH_HLEN ) ;
} else {
skb - > dev = tunnel - > dev ;
}
2013-08-13 19:51:09 +04:00
2013-03-25 18:49:35 +04:00
gro_cells_receive ( & tunnel - > gro_cells , skb ) ;
return 0 ;
drop :
kfree_skb ( skb ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_rcv ) ;
2013-07-02 21:57:33 +04:00
static int tnl_update_pmtu ( struct net_device * dev , struct sk_buff * skb ,
struct rtable * rt , __be16 df )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2013-07-12 00:12:22 +04:00
int pkt_size = skb - > len - tunnel - > hlen - dev - > hard_header_len ;
2013-07-02 21:57:33 +04:00
int mtu ;
if ( df )
mtu = dst_mtu ( & rt - > dst ) - dev - > hard_header_len
- sizeof ( struct iphdr ) - tunnel - > hlen ;
else
mtu = skb_dst ( skb ) ? dst_mtu ( skb_dst ( skb ) ) : dev - > mtu ;
if ( skb_dst ( skb ) )
skb_dst ( skb ) - > ops - > update_pmtu ( skb_dst ( skb ) , NULL , skb , mtu ) ;
if ( skb - > protocol = = htons ( ETH_P_IP ) ) {
if ( ! skb_is_gso ( skb ) & &
( df & htons ( IP_DF ) ) & & mtu < pkt_size ) {
memset ( IPCB ( skb ) , 0 , sizeof ( * IPCB ( skb ) ) ) ;
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_FRAG_NEEDED , htonl ( mtu ) ) ;
return - E2BIG ;
}
}
# if IS_ENABLED(CONFIG_IPV6)
else if ( skb - > protocol = = htons ( ETH_P_IPV6 ) ) {
struct rt6_info * rt6 = ( struct rt6_info * ) skb_dst ( skb ) ;
if ( rt6 & & mtu < dst_mtu ( skb_dst ( skb ) ) & &
mtu > = IPV6_MIN_MTU ) {
if ( ( tunnel - > parms . iph . daddr & &
! ipv4_is_multicast ( tunnel - > parms . iph . daddr ) ) | |
rt6 - > rt6i_dst . plen = = 128 ) {
rt6 - > rt6i_flags | = RTF_MODIFIED ;
dst_metric_set ( skb_dst ( skb ) , RTAX_MTU , mtu ) ;
}
}
if ( ! skb_is_gso ( skb ) & & mtu > = IPV6_MIN_MTU & &
mtu < pkt_size ) {
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu ) ;
return - E2BIG ;
}
}
# endif
return 0 ;
}
2013-03-25 18:49:35 +04:00
void ip_tunnel_xmit ( struct sk_buff * skb , struct net_device * dev ,
2013-05-28 03:48:15 +04:00
const struct iphdr * tnl_params , const u8 protocol )
2013-03-25 18:49:35 +04:00
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
const struct iphdr * inner_iph ;
struct flowi4 fl4 ;
u8 tos , ttl ;
__be16 df ;
2014-01-02 23:48:26 +04:00
struct rtable * rt = NULL ; /* Route to the other host */
2013-03-25 18:49:35 +04:00
unsigned int max_headroom ; /* The extra header space needed */
__be32 dst ;
2013-06-18 04:49:56 +04:00
int err ;
2014-01-02 23:48:26 +04:00
bool connected = true ;
2013-03-25 18:49:35 +04:00
inner_iph = ( const struct iphdr * ) skb_inner_network_header ( skb ) ;
dst = tnl_params - > daddr ;
if ( dst = = 0 ) {
/* NBMA tunnel */
if ( skb_dst ( skb ) = = NULL ) {
dev - > stats . tx_fifo_errors + + ;
goto tx_error ;
}
if ( skb - > protocol = = htons ( ETH_P_IP ) ) {
rt = skb_rtable ( skb ) ;
dst = rt_nexthop ( rt , inner_iph - > daddr ) ;
}
# if IS_ENABLED(CONFIG_IPV6)
else if ( skb - > protocol = = htons ( ETH_P_IPV6 ) ) {
const struct in6_addr * addr6 ;
struct neighbour * neigh ;
bool do_tx_error_icmp ;
int addr_type ;
neigh = dst_neigh_lookup ( skb_dst ( skb ) ,
& ipv6_hdr ( skb ) - > daddr ) ;
if ( neigh = = NULL )
goto tx_error ;
addr6 = ( const struct in6_addr * ) & neigh - > primary_key ;
addr_type = ipv6_addr_type ( addr6 ) ;
if ( addr_type = = IPV6_ADDR_ANY ) {
addr6 = & ipv6_hdr ( skb ) - > daddr ;
addr_type = ipv6_addr_type ( addr6 ) ;
}
if ( ( addr_type & IPV6_ADDR_COMPATv4 ) = = 0 )
do_tx_error_icmp = true ;
else {
do_tx_error_icmp = false ;
dst = addr6 - > s6_addr32 [ 3 ] ;
}
neigh_release ( neigh ) ;
if ( do_tx_error_icmp )
goto tx_error_icmp ;
}
# endif
else
goto tx_error ;
2014-01-02 23:48:26 +04:00
connected = false ;
2013-03-25 18:49:35 +04:00
}
tos = tnl_params - > tos ;
if ( tos & 0x1 ) {
tos & = ~ 0x1 ;
2014-01-02 23:48:26 +04:00
if ( skb - > protocol = = htons ( ETH_P_IP ) ) {
2013-03-25 18:49:35 +04:00
tos = inner_iph - > tos ;
2014-01-02 23:48:26 +04:00
connected = false ;
} else if ( skb - > protocol = = htons ( ETH_P_IPV6 ) ) {
2013-03-25 18:49:35 +04:00
tos = ipv6_get_dsfield ( ( const struct ipv6hdr * ) inner_iph ) ;
2014-01-02 23:48:26 +04:00
connected = false ;
}
2013-03-25 18:49:35 +04:00
}
2014-01-02 23:48:26 +04:00
init_tunnel_flow ( & fl4 , protocol , dst , tnl_params - > saddr ,
tunnel - > parms . o_key , RT_TOS ( tos ) , tunnel - > parms . link ) ;
if ( connected )
rt = ( struct rtable * ) tunnel_dst_check ( tunnel , 0 ) ;
if ( ! rt ) {
rt = ip_route_output_key ( tunnel - > net , & fl4 ) ;
if ( IS_ERR ( rt ) ) {
dev - > stats . tx_carrier_errors + + ;
goto tx_error ;
}
if ( connected )
tunnel_dst_set ( tunnel , dst_clone ( & rt - > dst ) ) ;
2013-03-25 18:49:35 +04:00
}
2014-01-02 23:48:26 +04:00
2013-06-18 04:49:56 +04:00
if ( rt - > dst . dev = = dev ) {
2013-03-25 18:49:35 +04:00
ip_rt_put ( rt ) ;
dev - > stats . collisions + + ;
goto tx_error ;
}
2013-07-02 21:57:33 +04:00
if ( tnl_update_pmtu ( dev , skb , rt , tnl_params - > frag_off ) ) {
ip_rt_put ( rt ) ;
goto tx_error ;
2013-03-25 18:49:35 +04:00
}
if ( tunnel - > err_count > 0 ) {
if ( time_before ( jiffies ,
tunnel - > err_time + IPTUNNEL_ERR_TIMEO ) ) {
tunnel - > err_count - - ;
dst_link_failure ( skb ) ;
} else
tunnel - > err_count = 0 ;
}
2013-09-25 20:57:47 +04:00
tos = ip_tunnel_ecn_encap ( tos , inner_iph , skb ) ;
2013-03-25 18:49:35 +04:00
ttl = tnl_params - > ttl ;
if ( ttl = = 0 ) {
if ( skb - > protocol = = htons ( ETH_P_IP ) )
ttl = inner_iph - > ttl ;
# if IS_ENABLED(CONFIG_IPV6)
else if ( skb - > protocol = = htons ( ETH_P_IPV6 ) )
ttl = ( ( const struct ipv6hdr * ) inner_iph ) - > hop_limit ;
# endif
else
ttl = ip4_dst_hoplimit ( & rt - > dst ) ;
}
2013-07-02 21:57:33 +04:00
df = tnl_params - > frag_off ;
if ( skb - > protocol = = htons ( ETH_P_IP ) )
df | = ( inner_iph - > frag_off & htons ( IP_DF ) ) ;
2013-06-18 04:49:56 +04:00
max_headroom = LL_RESERVED_SPACE ( rt - > dst . dev ) + sizeof ( struct iphdr )
+ rt - > dst . header_len ;
2013-10-01 13:33:59 +04:00
if ( max_headroom > dev - > needed_headroom )
2013-03-25 18:49:35 +04:00
dev - > needed_headroom = max_headroom ;
2013-10-01 13:33:59 +04:00
if ( skb_cow_head ( skb , dev - > needed_headroom ) ) {
dev - > stats . tx_dropped + + ;
dev_kfree_skb ( skb ) ;
return ;
2013-03-25 18:49:35 +04:00
}
2013-09-02 17:34:54 +04:00
err = iptunnel_xmit ( rt , skb , fl4 . saddr , fl4 . daddr , protocol ,
2013-09-25 20:57:47 +04:00
tos , ttl , df , ! net_eq ( tunnel - > net , dev_net ( dev ) ) ) ;
2013-06-18 04:49:56 +04:00
iptunnel_xmit_stats ( err , & dev - > stats , dev - > tstats ) ;
2013-03-25 18:49:35 +04:00
return ;
# if IS_ENABLED(CONFIG_IPV6)
tx_error_icmp :
dst_link_failure ( skb ) ;
# endif
tx_error :
dev - > stats . tx_errors + + ;
dev_kfree_skb ( skb ) ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_xmit ) ;
static void ip_tunnel_update ( struct ip_tunnel_net * itn ,
struct ip_tunnel * t ,
struct net_device * dev ,
struct ip_tunnel_parm * p ,
bool set_mtu )
{
ip_tunnel_del ( t ) ;
t - > parms . iph . saddr = p - > iph . saddr ;
t - > parms . iph . daddr = p - > iph . daddr ;
t - > parms . i_key = p - > i_key ;
t - > parms . o_key = p - > o_key ;
if ( dev - > type ! = ARPHRD_ETHER ) {
memcpy ( dev - > dev_addr , & p - > iph . saddr , 4 ) ;
memcpy ( dev - > broadcast , & p - > iph . daddr , 4 ) ;
}
ip_tunnel_add ( itn , t ) ;
t - > parms . iph . ttl = p - > iph . ttl ;
t - > parms . iph . tos = p - > iph . tos ;
t - > parms . iph . frag_off = p - > iph . frag_off ;
if ( t - > parms . link ! = p - > link ) {
int mtu ;
t - > parms . link = p - > link ;
mtu = ip_tunnel_bind_dev ( dev ) ;
if ( set_mtu )
dev - > mtu = mtu ;
}
2014-01-02 23:48:33 +04:00
tunnel_dst_reset_all ( t ) ;
2013-03-25 18:49:35 +04:00
netdev_state_change ( dev ) ;
}
int ip_tunnel_ioctl ( struct net_device * dev , struct ip_tunnel_parm * p , int cmd )
{
int err = 0 ;
struct ip_tunnel * t ;
struct net * net = dev_net ( dev ) ;
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
struct ip_tunnel_net * itn = net_generic ( net , tunnel - > ip_tnl_net_id ) ;
BUG_ON ( ! itn - > fb_tunnel_dev ) ;
switch ( cmd ) {
case SIOCGETTUNNEL :
t = NULL ;
if ( dev = = itn - > fb_tunnel_dev )
t = ip_tunnel_find ( itn , p , itn - > fb_tunnel_dev - > type ) ;
if ( t = = NULL )
t = netdev_priv ( dev ) ;
memcpy ( p , & t - > parms , sizeof ( * p ) ) ;
break ;
case SIOCADDTUNNEL :
case SIOCCHGTUNNEL :
err = - EPERM ;
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
goto done ;
if ( p - > iph . ttl )
p - > iph . frag_off | = htons ( IP_DF ) ;
if ( ! ( p - > i_flags & TUNNEL_KEY ) )
p - > i_key = 0 ;
if ( ! ( p - > o_flags & TUNNEL_KEY ) )
p - > o_key = 0 ;
t = ip_tunnel_find ( itn , p , itn - > fb_tunnel_dev - > type ) ;
if ( ! t & & ( cmd = = SIOCADDTUNNEL ) )
t = ip_tunnel_create ( net , itn , p ) ;
if ( dev ! = itn - > fb_tunnel_dev & & cmd = = SIOCCHGTUNNEL ) {
if ( t ! = NULL ) {
if ( t - > dev ! = dev ) {
err = - EEXIST ;
break ;
}
} else {
unsigned int nflags = 0 ;
if ( ipv4_is_multicast ( p - > iph . daddr ) )
nflags = IFF_BROADCAST ;
else if ( p - > iph . daddr )
nflags = IFF_POINTOPOINT ;
if ( ( dev - > flags ^ nflags ) & ( IFF_POINTOPOINT | IFF_BROADCAST ) ) {
err = - EINVAL ;
break ;
}
t = netdev_priv ( dev ) ;
}
}
if ( t ) {
err = 0 ;
ip_tunnel_update ( itn , t , dev , p , true ) ;
} else
err = ( cmd = = SIOCADDTUNNEL ? - ENOBUFS : - ENOENT ) ;
break ;
case SIOCDELTUNNEL :
err = - EPERM ;
if ( ! ns_capable ( net - > user_ns , CAP_NET_ADMIN ) )
goto done ;
if ( dev = = itn - > fb_tunnel_dev ) {
err = - ENOENT ;
t = ip_tunnel_find ( itn , p , itn - > fb_tunnel_dev - > type ) ;
if ( t = = NULL )
goto done ;
err = - EPERM ;
if ( t = = netdev_priv ( itn - > fb_tunnel_dev ) )
goto done ;
dev = t - > dev ;
}
unregister_netdevice ( dev ) ;
err = 0 ;
break ;
default :
err = - EINVAL ;
}
done :
return err ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_ioctl ) ;
int ip_tunnel_change_mtu ( struct net_device * dev , int new_mtu )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
int t_hlen = tunnel - > hlen + sizeof ( struct iphdr ) ;
if ( new_mtu < 68 | |
new_mtu > 0xFFF8 - dev - > hard_header_len - t_hlen )
return - EINVAL ;
dev - > mtu = new_mtu ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_change_mtu ) ;
static void ip_tunnel_dev_free ( struct net_device * dev )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
gro_cells_destroy ( & tunnel - > gro_cells ) ;
2014-01-02 23:48:33 +04:00
free_percpu ( tunnel - > dst_cache ) ;
2013-03-25 18:49:35 +04:00
free_percpu ( dev - > tstats ) ;
free_netdev ( dev ) ;
}
void ip_tunnel_dellink ( struct net_device * dev , struct list_head * head )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
struct ip_tunnel_net * itn ;
2013-08-13 19:51:11 +04:00
itn = net_generic ( tunnel - > net , tunnel - > ip_tnl_net_id ) ;
2013-03-25 18:49:35 +04:00
if ( itn - > fb_tunnel_dev ! = dev ) {
ip_tunnel_del ( netdev_priv ( dev ) ) ;
unregister_netdevice_queue ( dev , head ) ;
}
}
EXPORT_SYMBOL_GPL ( ip_tunnel_dellink ) ;
2013-06-08 00:26:05 +04:00
int ip_tunnel_init_net ( struct net * net , int ip_tnl_net_id ,
2013-03-25 18:49:35 +04:00
struct rtnl_link_ops * ops , char * devname )
{
struct ip_tunnel_net * itn = net_generic ( net , ip_tnl_net_id ) ;
struct ip_tunnel_parm parms ;
2013-08-06 09:51:37 +04:00
unsigned int i ;
2013-03-25 18:49:35 +04:00
2013-08-06 09:51:37 +04:00
for ( i = 0 ; i < IP_TNL_HASH_SIZE ; i + + )
INIT_HLIST_HEAD ( & itn - > tunnels [ i ] ) ;
2013-03-25 18:49:35 +04:00
if ( ! ops ) {
itn - > fb_tunnel_dev = NULL ;
return 0 ;
}
2013-08-06 09:51:37 +04:00
2013-03-25 18:49:35 +04:00
memset ( & parms , 0 , sizeof ( parms ) ) ;
if ( devname )
strlcpy ( parms . name , devname , IFNAMSIZ ) ;
rtnl_lock ( ) ;
itn - > fb_tunnel_dev = __ip_tunnel_create ( net , ops , & parms ) ;
2013-08-19 11:05:10 +04:00
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe .
*/
2013-10-01 13:34:48 +04:00
if ( ! IS_ERR ( itn - > fb_tunnel_dev ) ) {
2013-08-23 12:15:37 +04:00
itn - > fb_tunnel_dev - > features | = NETIF_F_NETNS_LOCAL ;
2013-10-01 13:34:48 +04:00
ip_tunnel_add ( itn , netdev_priv ( itn - > fb_tunnel_dev ) ) ;
}
2013-08-23 12:15:37 +04:00
rtnl_unlock ( ) ;
2013-03-25 18:49:35 +04:00
2013-08-23 12:15:37 +04:00
return PTR_RET ( itn - > fb_tunnel_dev ) ;
2013-03-25 18:49:35 +04:00
}
EXPORT_SYMBOL_GPL ( ip_tunnel_init_net ) ;
2013-08-13 19:51:11 +04:00
static void ip_tunnel_destroy ( struct ip_tunnel_net * itn , struct list_head * head ,
struct rtnl_link_ops * ops )
2013-03-25 18:49:35 +04:00
{
2013-08-13 19:51:11 +04:00
struct net * net = dev_net ( itn - > fb_tunnel_dev ) ;
struct net_device * dev , * aux ;
2013-03-25 18:49:35 +04:00
int h ;
2013-08-13 19:51:11 +04:00
for_each_netdev_safe ( net , dev , aux )
if ( dev - > rtnl_link_ops = = ops )
unregister_netdevice_queue ( dev , head ) ;
2013-03-25 18:49:35 +04:00
for ( h = 0 ; h < IP_TNL_HASH_SIZE ; h + + ) {
struct ip_tunnel * t ;
struct hlist_node * n ;
struct hlist_head * thead = & itn - > tunnels [ h ] ;
hlist_for_each_entry_safe ( t , n , thead , hash_node )
2013-08-13 19:51:11 +04:00
/* If dev is in the same netns, it has already
* been added to the list by the previous loop .
*/
if ( ! net_eq ( dev_net ( t - > dev ) , net ) )
unregister_netdevice_queue ( t - > dev , head ) ;
2013-03-25 18:49:35 +04:00
}
}
2013-08-13 19:51:11 +04:00
void ip_tunnel_delete_net ( struct ip_tunnel_net * itn , struct rtnl_link_ops * ops )
2013-03-25 18:49:35 +04:00
{
LIST_HEAD ( list ) ;
rtnl_lock ( ) ;
2013-08-13 19:51:11 +04:00
ip_tunnel_destroy ( itn , & list , ops ) ;
2013-03-25 18:49:35 +04:00
unregister_netdevice_many ( & list ) ;
rtnl_unlock ( ) ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_delete_net ) ;
int ip_tunnel_newlink ( struct net_device * dev , struct nlattr * tb [ ] ,
struct ip_tunnel_parm * p )
{
struct ip_tunnel * nt ;
struct net * net = dev_net ( dev ) ;
struct ip_tunnel_net * itn ;
int mtu ;
int err ;
nt = netdev_priv ( dev ) ;
itn = net_generic ( net , nt - > ip_tnl_net_id ) ;
if ( ip_tunnel_find ( itn , p , dev - > type ) )
return - EEXIST ;
2013-06-26 18:11:28 +04:00
nt - > net = net ;
2013-03-25 18:49:35 +04:00
nt - > parms = * p ;
err = register_netdevice ( dev ) ;
if ( err )
goto out ;
if ( dev - > type = = ARPHRD_ETHER & & ! tb [ IFLA_ADDRESS ] )
eth_hw_addr_random ( dev ) ;
mtu = ip_tunnel_bind_dev ( dev ) ;
if ( ! tb [ IFLA_MTU ] )
dev - > mtu = mtu ;
ip_tunnel_add ( itn , nt ) ;
out :
return err ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_newlink ) ;
int ip_tunnel_changelink ( struct net_device * dev , struct nlattr * tb [ ] ,
struct ip_tunnel_parm * p )
{
2013-08-13 19:51:11 +04:00
struct ip_tunnel * t ;
2013-03-25 18:49:35 +04:00
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2013-08-13 19:51:11 +04:00
struct net * net = tunnel - > net ;
2013-03-25 18:49:35 +04:00
struct ip_tunnel_net * itn = net_generic ( net , tunnel - > ip_tnl_net_id ) ;
if ( dev = = itn - > fb_tunnel_dev )
return - EINVAL ;
t = ip_tunnel_find ( itn , p , dev - > type ) ;
if ( t ) {
if ( t - > dev ! = dev )
return - EEXIST ;
} else {
2013-08-13 19:51:11 +04:00
t = tunnel ;
2013-03-25 18:49:35 +04:00
if ( dev - > type ! = ARPHRD_ETHER ) {
unsigned int nflags = 0 ;
if ( ipv4_is_multicast ( p - > iph . daddr ) )
nflags = IFF_BROADCAST ;
else if ( p - > iph . daddr )
nflags = IFF_POINTOPOINT ;
if ( ( dev - > flags ^ nflags ) &
( IFF_POINTOPOINT | IFF_BROADCAST ) )
return - EINVAL ;
}
}
ip_tunnel_update ( itn , t , dev , p , ! tb [ IFLA_MTU ] ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_changelink ) ;
int ip_tunnel_init ( struct net_device * dev )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
struct iphdr * iph = & tunnel - > parms . iph ;
2013-10-08 02:51:58 +04:00
int i , err ;
2013-03-25 18:49:35 +04:00
dev - > destructor = ip_tunnel_dev_free ;
2014-01-04 09:57:59 +04:00
dev - > tstats = alloc_percpu ( struct pcpu_sw_netstats ) ;
2013-03-25 18:49:35 +04:00
if ( ! dev - > tstats )
return - ENOMEM ;
2013-10-08 02:51:58 +04:00
for_each_possible_cpu ( i ) {
2014-01-04 09:57:59 +04:00
struct pcpu_sw_netstats * ipt_stats ;
2013-10-08 02:51:58 +04:00
ipt_stats = per_cpu_ptr ( dev - > tstats , i ) ;
u64_stats_init ( & ipt_stats - > syncp ) ;
}
2014-01-02 23:48:33 +04:00
tunnel - > dst_cache = alloc_percpu ( struct ip_tunnel_dst ) ;
if ( ! tunnel - > dst_cache ) {
free_percpu ( dev - > tstats ) ;
return - ENOMEM ;
}
for_each_possible_cpu ( i ) {
struct ip_tunnel_dst * idst = per_cpu_ptr ( tunnel - > dst_cache , i ) ;
idst - > dst = NULL ;
spin_lock_init ( & idst - > lock ) ;
}
2013-03-25 18:49:35 +04:00
err = gro_cells_init ( & tunnel - > gro_cells , dev ) ;
if ( err ) {
2014-01-02 23:48:33 +04:00
free_percpu ( tunnel - > dst_cache ) ;
2013-03-25 18:49:35 +04:00
free_percpu ( dev - > tstats ) ;
return err ;
}
tunnel - > dev = dev ;
2013-08-13 19:51:11 +04:00
tunnel - > net = dev_net ( dev ) ;
2013-03-25 18:49:35 +04:00
strcpy ( tunnel - > parms . name , dev - > name ) ;
iph - > version = 4 ;
iph - > ihl = 5 ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_init ) ;
void ip_tunnel_uninit ( struct net_device * dev )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2013-08-13 19:51:11 +04:00
struct net * net = tunnel - > net ;
2013-03-25 18:49:35 +04:00
struct ip_tunnel_net * itn ;
itn = net_generic ( net , tunnel - > ip_tnl_net_id ) ;
/* fb_tunnel_dev will be unregisted in net-exit call. */
if ( itn - > fb_tunnel_dev ! = dev )
ip_tunnel_del ( netdev_priv ( dev ) ) ;
2014-01-02 23:48:26 +04:00
2014-01-02 23:48:33 +04:00
tunnel_dst_reset_all ( tunnel ) ;
2013-03-25 18:49:35 +04:00
}
EXPORT_SYMBOL_GPL ( ip_tunnel_uninit ) ;
/* Do least required initialization, rest of init is done in tunnel_init call */
void ip_tunnel_setup ( struct net_device * dev , int net_id )
{
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
tunnel - > ip_tnl_net_id = net_id ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_setup ) ;
MODULE_LICENSE ( " GPL " ) ;