2013-06-18 04:49:56 +04:00
/*
* Copyright ( c ) 2013 Nicira , Inc .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA
* 02110 - 1301 , USA
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/skbuff.h>
# include <linux/netdevice.h>
# include <linux/in.h>
# include <linux/if_arp.h>
# include <linux/init.h>
# include <linux/in6.h>
# include <linux/inetdevice.h>
# include <linux/netfilter_ipv4.h>
# include <linux/etherdevice.h>
# include <linux/if_ether.h>
# include <linux/if_vlan.h>
2015-07-21 11:44:01 +03:00
# include <linux/static_key.h>
2013-06-18 04:49:56 +04:00
# include <net/ip.h>
# include <net/icmp.h>
# include <net/protocol.h>
# include <net/ip_tunnels.h>
# include <net/arp.h>
# include <net/checksum.h>
# include <net/dsfield.h>
# include <net/inet_ecn.h>
# include <net/xfrm.h>
# include <net/net_namespace.h>
# include <net/netns/generic.h>
# include <net/rtnetlink.h>
2015-09-22 19:12:11 +03:00
# include <net/dst_metadata.h>
2013-06-18 04:49:56 +04:00
2016-05-18 19:06:13 +03:00
const struct ip_tunnel_encap_ops __rcu *
iptun_encaps [ MAX_IPTUN_ENCAP_OPS ] __read_mostly ;
EXPORT_SYMBOL ( iptun_encaps ) ;
2015-12-25 01:34:54 +03:00
void iptunnel_xmit ( struct sock * sk , struct rtable * rt , struct sk_buff * skb ,
__be32 src , __be32 dst , __u8 proto ,
__u8 tos , __u8 ttl , __be16 df , bool xnet )
2013-06-18 04:49:56 +04:00
{
2015-09-18 12:47:40 +03:00
int pkt_len = skb - > len - skb_inner_network_offset ( skb ) ;
2015-10-08 00:48:41 +03:00
struct net * net = dev_net ( rt - > dst . dev ) ;
2015-12-25 01:34:54 +03:00
struct net_device * dev = skb - > dev ;
2013-06-18 04:49:56 +04:00
struct iphdr * iph ;
int err ;
2013-09-02 17:34:57 +04:00
skb_scrub_packet ( skb , xnet ) ;
2013-12-16 10:12:18 +04:00
skb_clear_hash ( skb ) ;
2013-06-18 04:49:56 +04:00
skb_dst_set ( skb , & rt - > dst ) ;
memset ( IPCB ( skb ) , 0 , sizeof ( * IPCB ( skb ) ) ) ;
/* Push down and install the IP header. */
2013-10-01 13:35:51 +04:00
skb_push ( skb , sizeof ( struct iphdr ) ) ;
2013-06-18 04:49:56 +04:00
skb_reset_network_header ( skb ) ;
iph = ip_hdr ( skb ) ;
iph - > version = 4 ;
iph - > ihl = sizeof ( struct iphdr ) > > 2 ;
iph - > frag_off = df ;
iph - > protocol = proto ;
iph - > tos = tos ;
iph - > daddr = dst ;
iph - > saddr = src ;
iph - > ttl = ttl ;
2015-10-08 00:48:41 +03:00
__ip_select_ident ( net , iph , skb_shinfo ( skb ) - > gso_segs ? : 1 ) ;
2013-06-18 04:49:56 +04:00
2015-10-08 00:48:46 +03:00
err = ip_local_out ( net , sk , skb ) ;
2013-06-18 04:49:56 +04:00
if ( unlikely ( net_xmit_eval ( err ) ) )
pkt_len = 0 ;
2015-12-25 01:34:54 +03:00
iptunnel_xmit_stats ( dev , pkt_len ) ;
2013-06-18 04:49:56 +04:00
}
EXPORT_SYMBOL_GPL ( iptunnel_xmit ) ;
2013-06-18 04:50:02 +04:00
2016-04-05 15:47:12 +03:00
int __iptunnel_pull_header ( struct sk_buff * skb , int hdr_len ,
__be16 inner_proto , bool raw_proto , bool xnet )
2013-06-18 04:50:02 +04:00
{
if ( unlikely ( ! pskb_may_pull ( skb , hdr_len ) ) )
return - ENOMEM ;
skb_pull_rcsum ( skb , hdr_len ) ;
2016-04-05 15:47:12 +03:00
if ( ! raw_proto & & inner_proto = = htons ( ETH_P_TEB ) ) {
2014-10-17 12:53:23 +04:00
struct ethhdr * eh ;
2013-06-18 04:50:02 +04:00
if ( unlikely ( ! pskb_may_pull ( skb , ETH_HLEN ) ) )
return - ENOMEM ;
2014-10-17 12:53:23 +04:00
eh = ( struct ethhdr * ) skb - > data ;
2015-05-05 00:33:59 +03:00
if ( likely ( eth_proto_is_802_3 ( eh - > h_proto ) ) )
2013-06-18 04:50:02 +04:00
skb - > protocol = eh - > h_proto ;
else
skb - > protocol = htons ( ETH_P_802_2 ) ;
} else {
skb - > protocol = inner_proto ;
}
2013-12-16 10:12:18 +04:00
skb_clear_hash_if_not_l4 ( skb ) ;
2013-06-18 04:50:02 +04:00
skb - > vlan_tci = 0 ;
skb_set_queue_mapping ( skb , 0 ) ;
2016-02-18 13:22:52 +03:00
skb_scrub_packet ( skb , xnet ) ;
2016-03-19 19:32:02 +03:00
return iptunnel_pull_offloads ( skb ) ;
2013-06-18 04:50:02 +04:00
}
2016-04-05 15:47:12 +03:00
EXPORT_SYMBOL_GPL ( __iptunnel_pull_header ) ;
2013-10-19 22:42:55 +04:00
2015-09-22 19:12:11 +03:00
struct metadata_dst * iptunnel_metadata_reply ( struct metadata_dst * md ,
gfp_t flags )
{
struct metadata_dst * res ;
struct ip_tunnel_info * dst , * src ;
if ( ! md | | md - > u . tun_info . mode & IP_TUNNEL_INFO_TX )
return NULL ;
res = metadata_dst_alloc ( 0 , flags ) ;
if ( ! res )
return NULL ;
dst = & res - > u . tun_info ;
src = & md - > u . tun_info ;
dst - > key . tun_id = src - > key . tun_id ;
if ( src - > mode & IP_TUNNEL_INFO_IPV6 )
memcpy ( & dst - > key . u . ipv6 . dst , & src - > key . u . ipv6 . src ,
sizeof ( struct in6_addr ) ) ;
else
dst - > key . u . ipv4 . dst = src - > key . u . ipv4 . src ;
dst - > mode = src - > mode | IP_TUNNEL_INFO_TX ;
return res ;
}
EXPORT_SYMBOL_GPL ( iptunnel_metadata_reply ) ;
2016-04-14 22:33:37 +03:00
int iptunnel_handle_offloads ( struct sk_buff * skb ,
int gso_type_mask )
2013-10-19 22:42:55 +04:00
{
int err ;
if ( likely ( ! skb - > encapsulation ) ) {
skb_reset_inner_headers ( skb ) ;
skb - > encapsulation = 1 ;
}
if ( skb_is_gso ( skb ) ) {
2016-04-30 20:19:29 +03:00
err = skb_header_unclone ( skb , GFP_ATOMIC ) ;
2013-10-19 22:42:55 +04:00
if ( unlikely ( err ) )
2016-04-14 22:33:37 +03:00
return err ;
2013-10-19 22:42:55 +04:00
skb_shinfo ( skb ) - > gso_type | = gso_type_mask ;
2016-04-14 22:33:37 +03:00
return 0 ;
2013-10-19 22:42:55 +04:00
}
2016-02-12 00:02:31 +03:00
if ( skb - > ip_summed ! = CHECKSUM_PARTIAL ) {
2013-10-19 22:42:55 +04:00
skb - > ip_summed = CHECKSUM_NONE ;
2016-02-12 00:02:31 +03:00
/* We clear encapsulation here to prevent badly-written
* drivers potentially deciding to offload an inner checksum
* if we set CHECKSUM_PARTIAL on the outer header .
* This should go away when the drivers are all fixed .
*/
2016-02-11 23:48:04 +03:00
skb - > encapsulation = 0 ;
}
2013-10-19 22:42:55 +04:00
2016-04-14 22:33:37 +03:00
return 0 ;
2013-10-19 22:42:55 +04:00
}
EXPORT_SYMBOL_GPL ( iptunnel_handle_offloads ) ;
2014-02-20 11:14:23 +04:00
/* Often modified stats are per cpu, other are shared (netdev->stats) */
struct rtnl_link_stats64 * ip_tunnel_get_stats64 ( struct net_device * dev ,
struct rtnl_link_stats64 * tot )
{
int i ;
2015-05-15 00:31:28 +03:00
netdev_stats_to_stats64 ( tot , & dev - > stats ) ;
2014-02-20 11:14:23 +04:00
for_each_possible_cpu ( i ) {
const struct pcpu_sw_netstats * tstats =
per_cpu_ptr ( dev - > tstats , i ) ;
u64 rx_packets , rx_bytes , tx_packets , tx_bytes ;
unsigned int start ;
do {
2014-03-14 08:26:42 +04:00
start = u64_stats_fetch_begin_irq ( & tstats - > syncp ) ;
2014-02-20 11:14:23 +04:00
rx_packets = tstats - > rx_packets ;
tx_packets = tstats - > tx_packets ;
rx_bytes = tstats - > rx_bytes ;
tx_bytes = tstats - > tx_bytes ;
2014-03-14 08:26:42 +04:00
} while ( u64_stats_fetch_retry_irq ( & tstats - > syncp , start ) ) ;
2014-02-20 11:14:23 +04:00
tot - > rx_packets + = rx_packets ;
tot - > tx_packets + = tx_packets ;
tot - > rx_bytes + = rx_bytes ;
tot - > tx_bytes + = tx_bytes ;
}
return tot ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_get_stats64 ) ;
2015-07-21 11:44:00 +03:00
2015-08-14 17:40:40 +03:00
static const struct nla_policy ip_tun_policy [ LWTUNNEL_IP_MAX + 1 ] = {
[ LWTUNNEL_IP_ID ] = { . type = NLA_U64 } ,
[ LWTUNNEL_IP_DST ] = { . type = NLA_U32 } ,
[ LWTUNNEL_IP_SRC ] = { . type = NLA_U32 } ,
[ LWTUNNEL_IP_TTL ] = { . type = NLA_U8 } ,
[ LWTUNNEL_IP_TOS ] = { . type = NLA_U8 } ,
[ LWTUNNEL_IP_FLAGS ] = { . type = NLA_U16 } ,
2015-07-21 11:44:00 +03:00
} ;
static int ip_tun_build_state ( struct net_device * dev , struct nlattr * attr ,
2015-08-24 19:45:41 +03:00
unsigned int family , const void * cfg ,
2015-07-21 11:44:00 +03:00
struct lwtunnel_state * * ts )
{
struct ip_tunnel_info * tun_info ;
struct lwtunnel_state * new_state ;
2015-08-14 17:40:40 +03:00
struct nlattr * tb [ LWTUNNEL_IP_MAX + 1 ] ;
2015-07-21 11:44:00 +03:00
int err ;
2015-08-14 17:40:40 +03:00
err = nla_parse_nested ( tb , LWTUNNEL_IP_MAX , attr , ip_tun_policy ) ;
2015-07-21 11:44:00 +03:00
if ( err < 0 )
return err ;
new_state = lwtunnel_state_alloc ( sizeof ( * tun_info ) ) ;
if ( ! new_state )
return - ENOMEM ;
new_state - > type = LWTUNNEL_ENCAP_IP ;
tun_info = lwt_tun_info ( new_state ) ;
2015-08-14 17:40:40 +03:00
if ( tb [ LWTUNNEL_IP_ID ] )
2016-01-07 01:22:45 +03:00
tun_info - > key . tun_id = nla_get_be64 ( tb [ LWTUNNEL_IP_ID ] ) ;
2015-07-21 11:44:00 +03:00
2015-08-14 17:40:40 +03:00
if ( tb [ LWTUNNEL_IP_DST ] )
2016-03-31 13:21:38 +03:00
tun_info - > key . u . ipv4 . dst = nla_get_in_addr ( tb [ LWTUNNEL_IP_DST ] ) ;
2015-07-21 11:44:00 +03:00
2015-08-14 17:40:40 +03:00
if ( tb [ LWTUNNEL_IP_SRC ] )
2016-03-31 13:21:38 +03:00
tun_info - > key . u . ipv4 . src = nla_get_in_addr ( tb [ LWTUNNEL_IP_SRC ] ) ;
2015-07-21 11:44:00 +03:00
2015-08-14 17:40:40 +03:00
if ( tb [ LWTUNNEL_IP_TTL ] )
2015-08-20 14:56:24 +03:00
tun_info - > key . ttl = nla_get_u8 ( tb [ LWTUNNEL_IP_TTL ] ) ;
2015-07-21 11:44:00 +03:00
2015-08-14 17:40:40 +03:00
if ( tb [ LWTUNNEL_IP_TOS ] )
2015-08-20 14:56:24 +03:00
tun_info - > key . tos = nla_get_u8 ( tb [ LWTUNNEL_IP_TOS ] ) ;
2015-07-21 11:44:00 +03:00
2015-08-14 17:40:40 +03:00
if ( tb [ LWTUNNEL_IP_FLAGS ] )
2016-01-07 01:22:45 +03:00
tun_info - > key . tun_flags = nla_get_be16 ( tb [ LWTUNNEL_IP_FLAGS ] ) ;
2015-07-21 11:44:00 +03:00
tun_info - > mode = IP_TUNNEL_INFO_TX ;
tun_info - > options_len = 0 ;
* ts = new_state ;
return 0 ;
}
static int ip_tun_fill_encap_info ( struct sk_buff * skb ,
struct lwtunnel_state * lwtstate )
{
struct ip_tunnel_info * tun_info = lwt_tun_info ( lwtstate ) ;
2016-04-22 18:31:18 +03:00
if ( nla_put_be64 ( skb , LWTUNNEL_IP_ID , tun_info - > key . tun_id ,
LWTUNNEL_IP_PAD ) | |
2016-03-31 13:21:38 +03:00
nla_put_in_addr ( skb , LWTUNNEL_IP_DST , tun_info - > key . u . ipv4 . dst ) | |
nla_put_in_addr ( skb , LWTUNNEL_IP_SRC , tun_info - > key . u . ipv4 . src ) | |
2015-08-20 14:56:24 +03:00
nla_put_u8 ( skb , LWTUNNEL_IP_TOS , tun_info - > key . tos ) | |
nla_put_u8 ( skb , LWTUNNEL_IP_TTL , tun_info - > key . ttl ) | |
2016-01-07 01:22:45 +03:00
nla_put_be16 ( skb , LWTUNNEL_IP_FLAGS , tun_info - > key . tun_flags ) )
2015-07-21 11:44:00 +03:00
return - ENOMEM ;
return 0 ;
}
static int ip_tun_encap_nlsize ( struct lwtunnel_state * lwtstate )
{
2016-04-22 18:31:18 +03:00
return nla_total_size_64bit ( 8 ) /* LWTUNNEL_IP_ID */
2015-08-14 17:40:40 +03:00
+ nla_total_size ( 4 ) /* LWTUNNEL_IP_DST */
+ nla_total_size ( 4 ) /* LWTUNNEL_IP_SRC */
+ nla_total_size ( 1 ) /* LWTUNNEL_IP_TOS */
+ nla_total_size ( 1 ) /* LWTUNNEL_IP_TTL */
+ nla_total_size ( 2 ) ; /* LWTUNNEL_IP_FLAGS */
2015-07-21 11:44:00 +03:00
}
2015-08-18 19:42:09 +03:00
static int ip_tun_cmp_encap ( struct lwtunnel_state * a , struct lwtunnel_state * b )
{
return memcmp ( lwt_tun_info ( a ) , lwt_tun_info ( b ) ,
sizeof ( struct ip_tunnel_info ) ) ;
}
2015-07-21 11:44:00 +03:00
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
. build_state = ip_tun_build_state ,
. fill_encap = ip_tun_fill_encap_info ,
. get_encap_size = ip_tun_encap_nlsize ,
2015-08-18 19:42:09 +03:00
. cmp_encap = ip_tun_cmp_encap ,
2015-07-21 11:44:00 +03:00
} ;
2015-08-20 14:56:32 +03:00
static const struct nla_policy ip6_tun_policy [ LWTUNNEL_IP6_MAX + 1 ] = {
[ LWTUNNEL_IP6_ID ] = { . type = NLA_U64 } ,
[ LWTUNNEL_IP6_DST ] = { . len = sizeof ( struct in6_addr ) } ,
[ LWTUNNEL_IP6_SRC ] = { . len = sizeof ( struct in6_addr ) } ,
[ LWTUNNEL_IP6_HOPLIMIT ] = { . type = NLA_U8 } ,
[ LWTUNNEL_IP6_TC ] = { . type = NLA_U8 } ,
[ LWTUNNEL_IP6_FLAGS ] = { . type = NLA_U16 } ,
} ;
static int ip6_tun_build_state ( struct net_device * dev , struct nlattr * attr ,
2015-08-24 19:45:41 +03:00
unsigned int family , const void * cfg ,
2015-08-20 14:56:32 +03:00
struct lwtunnel_state * * ts )
{
struct ip_tunnel_info * tun_info ;
struct lwtunnel_state * new_state ;
struct nlattr * tb [ LWTUNNEL_IP6_MAX + 1 ] ;
int err ;
err = nla_parse_nested ( tb , LWTUNNEL_IP6_MAX , attr , ip6_tun_policy ) ;
if ( err < 0 )
return err ;
new_state = lwtunnel_state_alloc ( sizeof ( * tun_info ) ) ;
if ( ! new_state )
return - ENOMEM ;
new_state - > type = LWTUNNEL_ENCAP_IP6 ;
tun_info = lwt_tun_info ( new_state ) ;
if ( tb [ LWTUNNEL_IP6_ID ] )
2016-01-07 01:22:45 +03:00
tun_info - > key . tun_id = nla_get_be64 ( tb [ LWTUNNEL_IP6_ID ] ) ;
2015-08-20 14:56:32 +03:00
if ( tb [ LWTUNNEL_IP6_DST ] )
tun_info - > key . u . ipv6 . dst = nla_get_in6_addr ( tb [ LWTUNNEL_IP6_DST ] ) ;
if ( tb [ LWTUNNEL_IP6_SRC ] )
tun_info - > key . u . ipv6 . src = nla_get_in6_addr ( tb [ LWTUNNEL_IP6_SRC ] ) ;
if ( tb [ LWTUNNEL_IP6_HOPLIMIT ] )
tun_info - > key . ttl = nla_get_u8 ( tb [ LWTUNNEL_IP6_HOPLIMIT ] ) ;
if ( tb [ LWTUNNEL_IP6_TC ] )
tun_info - > key . tos = nla_get_u8 ( tb [ LWTUNNEL_IP6_TC ] ) ;
if ( tb [ LWTUNNEL_IP6_FLAGS ] )
2016-01-07 01:22:45 +03:00
tun_info - > key . tun_flags = nla_get_be16 ( tb [ LWTUNNEL_IP6_FLAGS ] ) ;
2015-08-20 14:56:32 +03:00
2015-08-28 21:48:20 +03:00
tun_info - > mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6 ;
2015-08-20 14:56:32 +03:00
tun_info - > options_len = 0 ;
* ts = new_state ;
return 0 ;
}
static int ip6_tun_fill_encap_info ( struct sk_buff * skb ,
struct lwtunnel_state * lwtstate )
{
struct ip_tunnel_info * tun_info = lwt_tun_info ( lwtstate ) ;
2016-04-22 18:31:18 +03:00
if ( nla_put_be64 ( skb , LWTUNNEL_IP6_ID , tun_info - > key . tun_id ,
LWTUNNEL_IP6_PAD ) | |
2015-08-20 14:56:32 +03:00
nla_put_in6_addr ( skb , LWTUNNEL_IP6_DST , & tun_info - > key . u . ipv6 . dst ) | |
nla_put_in6_addr ( skb , LWTUNNEL_IP6_SRC , & tun_info - > key . u . ipv6 . src ) | |
2016-03-27 19:06:11 +03:00
nla_put_u8 ( skb , LWTUNNEL_IP6_TC , tun_info - > key . tos ) | |
nla_put_u8 ( skb , LWTUNNEL_IP6_HOPLIMIT , tun_info - > key . ttl ) | |
2016-01-07 01:22:45 +03:00
nla_put_be16 ( skb , LWTUNNEL_IP6_FLAGS , tun_info - > key . tun_flags ) )
2015-08-20 14:56:32 +03:00
return - ENOMEM ;
return 0 ;
}
static int ip6_tun_encap_nlsize ( struct lwtunnel_state * lwtstate )
{
2016-04-22 18:31:18 +03:00
return nla_total_size_64bit ( 8 ) /* LWTUNNEL_IP6_ID */
2015-08-20 14:56:32 +03:00
+ nla_total_size ( 16 ) /* LWTUNNEL_IP6_DST */
+ nla_total_size ( 16 ) /* LWTUNNEL_IP6_SRC */
+ nla_total_size ( 1 ) /* LWTUNNEL_IP6_HOPLIMIT */
+ nla_total_size ( 1 ) /* LWTUNNEL_IP6_TC */
+ nla_total_size ( 2 ) ; /* LWTUNNEL_IP6_FLAGS */
}
static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
. build_state = ip6_tun_build_state ,
. fill_encap = ip6_tun_fill_encap_info ,
. get_encap_size = ip6_tun_encap_nlsize ,
. cmp_encap = ip_tun_cmp_encap ,
} ;
2015-07-23 11:08:44 +03:00
void __init ip_tunnel_core_init ( void )
2015-07-21 11:44:00 +03:00
{
2016-03-16 03:42:51 +03:00
/* If you land here, make sure whether increasing ip_tunnel_info's
* options_len is a reasonable choice with its usage in front ends
* ( f . e . , it ' s part of flow keys , etc ) .
*/
BUILD_BUG_ON ( IP_TUNNEL_OPTS_MAX ! = 255 ) ;
2015-07-21 11:44:00 +03:00
lwtunnel_encap_add_ops ( & ip_tun_lwt_ops , LWTUNNEL_ENCAP_IP ) ;
2015-08-20 14:56:32 +03:00
lwtunnel_encap_add_ops ( & ip6_tun_lwt_ops , LWTUNNEL_ENCAP_IP6 ) ;
2015-07-21 11:44:00 +03:00
}
2015-07-21 11:44:01 +03:00
struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE ;
EXPORT_SYMBOL ( ip_tunnel_metadata_cnt ) ;
void ip_tunnel_need_metadata ( void )
{
static_key_slow_inc ( & ip_tunnel_metadata_cnt ) ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_need_metadata ) ;
void ip_tunnel_unneed_metadata ( void )
{
static_key_slow_dec ( & ip_tunnel_metadata_cnt ) ;
}
EXPORT_SYMBOL_GPL ( ip_tunnel_unneed_metadata ) ;