2005-04-17 02:20:36 +04:00
/*
2007-02-09 17:24:47 +03:00
* Linux NET3 : IP / IP protocol decoder .
2005-04-17 02:20:36 +04:00
*
* Authors :
* Sam Lantinga ( slouken @ cs . ucdavis . edu ) 02 / 01 / 95
*
* Fixes :
* Alan Cox : Merged and made usable non modular ( its so tiny its silly as
* a module taking up 2 pages ) .
* Alan Cox : Fixed bug with 1.3 .18 and IPIP not working ( now needs to set skb - > h . iph )
* to keep ip_forward happy .
* Alan Cox : More fixes for 1.3 .21 , and firewall fix . Maybe this will work soon 8 ) .
* Kai Schulte : Fixed # defines for IP_FIREWALL - > FIREWALL
* David Woodhouse : Perform some basic ICMP handling .
* IPIP Routing without decapsulation .
* Carlos Picoto : GRE over IP support
* Alexey Kuznetsov : Reworked . Really , now it is truncated version of ipv4 / ip_gre . c .
* I do not want to merge them together .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
*/
/* tunnel.c: an IP tunnel driver
The purpose of this driver is to provide an IP tunnel through
which you can tunnel network traffic transparently across subnets .
This was written by looking at Nick Holloway ' s dummy driver
Thanks for the great code !
- Sam Lantinga ( slouken @ cs . ucdavis . edu ) 02 / 01 / 95
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
Minor tweaks :
Cleaned up the code a little and added some pre - 1.3 .0 tweaks .
dev - > hard_header / hard_header_len changed to use no headers .
Comments / bracketing tweaked .
Made the tunnels use dev - > name not tunnel : when error reporting .
Added tx_dropped stat
2007-02-09 17:24:47 +03:00
2008-10-14 06:01:08 +04:00
- Alan Cox ( alan @ lxorguk . ukuu . org . uk ) 21 March 95
2005-04-17 02:20:36 +04:00
Reworked :
Changed to tunnel to destination gateway in addition to the
tunnel ' s pointopoint address
Almost completely rewritten
Note : There is currently no firewall or ICMP handling done .
- Sam Lantinga ( slouken @ cs . ucdavis . edu ) 02 / 13 / 96
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
*/
/* Things I wish I had known when writing the tunnel driver:
When the tunnel_xmit ( ) function is called , the skb contains the
packet to be sent ( plus a great deal of extra info ) , and dev
contains the tunnel device that _we_ are .
When we are passed a packet , we are expected to fill in the
source address with our source IP address .
What is the proper way to allocate , copy and free a buffer ?
After you allocate it , it is a " 0 length " chunk of memory
starting at zero . If you want to add headers to the buffer
later , you ' ll have to call " skb_reserve(skb, amount) " with
the amount of memory you want reserved . Then , you call
" skb_put(skb, amount) " with the amount of space you want in
the buffer . skb_put ( ) returns a pointer to the top ( # 0 ) of
that buffer . skb - > len is set to the amount of space you have
" allocated " with skb_put ( ) . You can then write up to skb - > len
bytes to that buffer . If you need more , you can call skb_put ( )
again with the additional amount of space you need . You can
2007-02-09 17:24:47 +03:00
find out how much more space you can allocate by calling
2005-04-17 02:20:36 +04:00
" skb_tailroom(skb) " .
Now , to add header space , call " skb_push(skb, header_len) " .
This creates space at the beginning of the buffer and returns
a pointer to this new space . If later you need to strip a
header from a buffer , call " skb_pull(skb, header_len) " .
skb_headroom ( ) will return how much space is left at the top
of the buffer ( before the main data ) . Remember , this headroom
space must be reserved before the skb_put ( ) function is called .
*/
/*
This version of net / ipv4 / ipip . c is cloned of net / ipv4 / ip_gre . c
For comments look at net / ipv4 / ip_gre . c - - ANK
*/
2007-02-09 17:24:47 +03:00
2006-01-11 23:17:47 +03:00
# include <linux/capability.h>
2005-04-17 02:20:36 +04:00
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <asm/uaccess.h>
# include <linux/skbuff.h>
# include <linux/netdevice.h>
# include <linux/in.h>
# include <linux/tcp.h>
# include <linux/udp.h>
# include <linux/if_arp.h>
# include <linux/mroute.h>
# include <linux/init.h>
# include <linux/netfilter_ipv4.h>
2006-01-06 03:35:42 +03:00
# include <linux/if_ether.h>
2005-04-17 02:20:36 +04:00
# include <net/sock.h>
# include <net/ip.h>
# include <net/icmp.h>
# include <net/ipip.h>
# include <net/inet_ecn.h>
# include <net/xfrm.h>
2008-04-16 12:03:13 +04:00
# include <net/net_namespace.h>
# include <net/netns/generic.h>
2005-04-17 02:20:36 +04:00
# define HASH_SIZE 16
2006-11-08 11:23:14 +03:00
# define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
2005-04-17 02:20:36 +04:00
2008-04-16 12:03:13 +04:00
static int ipip_net_id ;
struct ipip_net {
2008-04-16 12:05:32 +04:00
struct ip_tunnel * tunnels_r_l [ HASH_SIZE ] ;
struct ip_tunnel * tunnels_r [ HASH_SIZE ] ;
struct ip_tunnel * tunnels_l [ HASH_SIZE ] ;
struct ip_tunnel * tunnels_wc [ 1 ] ;
struct ip_tunnel * * tunnels [ 4 ] ;
2008-04-16 12:04:13 +04:00
struct net_device * fb_tunnel_dev ;
2008-04-16 12:03:13 +04:00
} ;
2008-11-21 07:33:21 +03:00
static void ipip_fb_tunnel_init ( struct net_device * dev ) ;
static void ipip_tunnel_init ( struct net_device * dev ) ;
2005-04-17 02:20:36 +04:00
static void ipip_tunnel_setup ( struct net_device * dev ) ;
static DEFINE_RWLOCK ( ipip_lock ) ;
2008-04-16 12:04:35 +04:00
static struct ip_tunnel * ipip_tunnel_lookup ( struct net * net ,
__be32 remote , __be32 local )
2005-04-17 02:20:36 +04:00
{
unsigned h0 = HASH ( remote ) ;
unsigned h1 = HASH ( local ) ;
struct ip_tunnel * t ;
2008-04-16 12:05:32 +04:00
struct ipip_net * ipn = net_generic ( net , ipip_net_id ) ;
2005-04-17 02:20:36 +04:00
2008-04-16 12:05:32 +04:00
for ( t = ipn - > tunnels_r_l [ h0 ^ h1 ] ; t ; t = t - > next ) {
2005-04-17 02:20:36 +04:00
if ( local = = t - > parms . iph . saddr & &
remote = = t - > parms . iph . daddr & & ( t - > dev - > flags & IFF_UP ) )
return t ;
}
2008-04-16 12:05:32 +04:00
for ( t = ipn - > tunnels_r [ h0 ] ; t ; t = t - > next ) {
2005-04-17 02:20:36 +04:00
if ( remote = = t - > parms . iph . daddr & & ( t - > dev - > flags & IFF_UP ) )
return t ;
}
2008-04-16 12:05:32 +04:00
for ( t = ipn - > tunnels_l [ h1 ] ; t ; t = t - > next ) {
2005-04-17 02:20:36 +04:00
if ( local = = t - > parms . iph . saddr & & ( t - > dev - > flags & IFF_UP ) )
return t ;
}
2008-04-16 12:05:32 +04:00
if ( ( t = ipn - > tunnels_wc [ 0 ] ) ! = NULL & & ( t - > dev - > flags & IFF_UP ) )
2005-04-17 02:20:36 +04:00
return t ;
return NULL ;
}
2008-04-16 12:04:35 +04:00
static struct ip_tunnel * * __ipip_bucket ( struct ipip_net * ipn ,
struct ip_tunnel_parm * parms )
2005-04-17 02:20:36 +04:00
{
2007-04-24 15:44:47 +04:00
__be32 remote = parms - > iph . daddr ;
__be32 local = parms - > iph . saddr ;
2005-04-17 02:20:36 +04:00
unsigned h = 0 ;
int prio = 0 ;
if ( remote ) {
prio | = 2 ;
h ^ = HASH ( remote ) ;
}
if ( local ) {
prio | = 1 ;
h ^ = HASH ( local ) ;
}
2008-04-16 12:05:32 +04:00
return & ipn - > tunnels [ prio ] [ h ] ;
2005-04-17 02:20:36 +04:00
}
2008-04-16 12:04:35 +04:00
static inline struct ip_tunnel * * ipip_bucket ( struct ipip_net * ipn ,
struct ip_tunnel * t )
2007-04-24 15:44:47 +04:00
{
2008-04-16 12:04:35 +04:00
return __ipip_bucket ( ipn , & t - > parms ) ;
2007-04-24 15:44:47 +04:00
}
2005-04-17 02:20:36 +04:00
2008-04-16 12:04:35 +04:00
static void ipip_tunnel_unlink ( struct ipip_net * ipn , struct ip_tunnel * t )
2005-04-17 02:20:36 +04:00
{
struct ip_tunnel * * tp ;
2008-04-16 12:04:35 +04:00
for ( tp = ipip_bucket ( ipn , t ) ; * tp ; tp = & ( * tp ) - > next ) {
2005-04-17 02:20:36 +04:00
if ( t = = * tp ) {
write_lock_bh ( & ipip_lock ) ;
* tp = t - > next ;
write_unlock_bh ( & ipip_lock ) ;
break ;
}
}
}
2008-04-16 12:04:35 +04:00
static void ipip_tunnel_link ( struct ipip_net * ipn , struct ip_tunnel * t )
2005-04-17 02:20:36 +04:00
{
2008-04-16 12:04:35 +04:00
struct ip_tunnel * * tp = ipip_bucket ( ipn , t ) ;
2005-04-17 02:20:36 +04:00
t - > next = * tp ;
write_lock_bh ( & ipip_lock ) ;
* tp = t ;
write_unlock_bh ( & ipip_lock ) ;
}
2008-04-16 12:04:35 +04:00
static struct ip_tunnel * ipip_tunnel_locate ( struct net * net ,
struct ip_tunnel_parm * parms , int create )
2005-04-17 02:20:36 +04:00
{
2006-11-08 11:23:14 +03:00
__be32 remote = parms - > iph . daddr ;
__be32 local = parms - > iph . saddr ;
2005-04-17 02:20:36 +04:00
struct ip_tunnel * t , * * tp , * nt ;
struct net_device * dev ;
char name [ IFNAMSIZ ] ;
2008-04-16 12:04:35 +04:00
struct ipip_net * ipn = net_generic ( net , ipip_net_id ) ;
2005-04-17 02:20:36 +04:00
2008-04-16 12:04:35 +04:00
for ( tp = __ipip_bucket ( ipn , parms ) ; ( t = * tp ) ! = NULL ; tp = & t - > next ) {
2005-04-17 02:20:36 +04:00
if ( local = = t - > parms . iph . saddr & & remote = = t - > parms . iph . daddr )
return t ;
}
if ( ! create )
return NULL ;
if ( parms - > name [ 0 ] )
strlcpy ( name , parms - > name , IFNAMSIZ ) ;
2008-02-24 07:19:20 +03:00
else
sprintf ( name , " tunl%%d " ) ;
2005-04-17 02:20:36 +04:00
dev = alloc_netdev ( sizeof ( * t ) , name , ipip_tunnel_setup ) ;
if ( dev = = NULL )
return NULL ;
2008-04-16 12:06:18 +04:00
dev_net_set ( dev , net ) ;
[INET]: Don't create tunnels with '%' in name.
Four tunnel drivers (ip_gre, ipip, ip6_tunnel and sit) can receive a
pre-defined name for a device from the userspace. Since these drivers
call the register_netdevice() (rtnl_lock, is held), which does _not_
generate the device's name, this name may contain a '%' character.
Not sure how bad is this to have a device with a '%' in its name, but
all the other places either use the register_netdev(), which call the
dev_alloc_name(), or explicitly call the dev_alloc_name() before
registering, i.e. do not allow for such names.
This had to be prior to the commit 34cc7b, but I forgot to number the
patches and this one got lost, sorry.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-27 10:51:04 +03:00
if ( strchr ( name , ' % ' ) ) {
if ( dev_alloc_name ( dev , name ) < 0 )
goto failed_free ;
}
2006-01-09 09:05:26 +03:00
nt = netdev_priv ( dev ) ;
2005-04-17 02:20:36 +04:00
nt - > parms = * parms ;
2008-11-21 07:33:21 +03:00
ipip_tunnel_init ( dev ) ;
[INET]: Don't create tunnels with '%' in name.
Four tunnel drivers (ip_gre, ipip, ip6_tunnel and sit) can receive a
pre-defined name for a device from the userspace. Since these drivers
call the register_netdevice() (rtnl_lock, is held), which does _not_
generate the device's name, this name may contain a '%' character.
Not sure how bad is this to have a device with a '%' in its name, but
all the other places either use the register_netdev(), which call the
dev_alloc_name(), or explicitly call the dev_alloc_name() before
registering, i.e. do not allow for such names.
This had to be prior to the commit 34cc7b, but I forgot to number the
patches and this one got lost, sorry.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-27 10:51:04 +03:00
if ( register_netdevice ( dev ) < 0 )
goto failed_free ;
2005-04-17 02:20:36 +04:00
dev_hold ( dev ) ;
2008-04-16 12:04:35 +04:00
ipip_tunnel_link ( ipn , nt ) ;
2005-04-17 02:20:36 +04:00
return nt ;
[INET]: Don't create tunnels with '%' in name.
Four tunnel drivers (ip_gre, ipip, ip6_tunnel and sit) can receive a
pre-defined name for a device from the userspace. Since these drivers
call the register_netdevice() (rtnl_lock, is held), which does _not_
generate the device's name, this name may contain a '%' character.
Not sure how bad is this to have a device with a '%' in its name, but
all the other places either use the register_netdev(), which call the
dev_alloc_name(), or explicitly call the dev_alloc_name() before
registering, i.e. do not allow for such names.
This had to be prior to the commit 34cc7b, but I forgot to number the
patches and this one got lost, sorry.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-27 10:51:04 +03:00
failed_free :
free_netdev ( dev ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
static void ipip_tunnel_uninit ( struct net_device * dev )
{
2008-04-16 12:04:13 +04:00
struct net * net = dev_net ( dev ) ;
struct ipip_net * ipn = net_generic ( net , ipip_net_id ) ;
if ( dev = = ipn - > fb_tunnel_dev ) {
2005-04-17 02:20:36 +04:00
write_lock_bh ( & ipip_lock ) ;
2008-04-16 12:05:32 +04:00
ipn - > tunnels_wc [ 0 ] = NULL ;
2005-04-17 02:20:36 +04:00
write_unlock_bh ( & ipip_lock ) ;
} else
2008-04-16 12:04:35 +04:00
ipip_tunnel_unlink ( ipn , netdev_priv ( dev ) ) ;
2005-04-17 02:20:36 +04:00
dev_put ( dev ) ;
}
2006-03-28 13:12:13 +04:00
static int ipip_err ( struct sk_buff * skb , u32 info )
2005-04-17 02:20:36 +04:00
{
2008-05-22 04:47:54 +04:00
/* All the routers (except for Linux) return only
2005-04-17 02:20:36 +04:00
8 bytes of packet payload . It means , that precise relaying of
ICMP in the real Internet is absolutely infeasible .
*/
2008-11-03 11:24:34 +03:00
struct iphdr * iph = ( struct iphdr * ) skb - > data ;
2007-03-13 20:43:18 +03:00
const int type = icmp_hdr ( skb ) - > type ;
const int code = icmp_hdr ( skb ) - > code ;
2005-04-17 02:20:36 +04:00
struct ip_tunnel * t ;
2006-03-28 13:12:13 +04:00
int err ;
2005-04-17 02:20:36 +04:00
switch ( type ) {
default :
case ICMP_PARAMETERPROB :
2006-03-28 13:12:13 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
case ICMP_DEST_UNREACH :
switch ( code ) {
case ICMP_SR_FAILED :
case ICMP_PORT_UNREACH :
/* Impossible event. */
2006-03-28 13:12:13 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
case ICMP_FRAG_NEEDED :
/* Soft state for pmtu is maintained by IP core. */
2006-03-28 13:12:13 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
default :
/* All others are translated to HOST_UNREACH.
rfc2003 contains " deep thoughts " about NET_UNREACH ,
I believe they are just ether pollution . - - ANK
*/
break ;
}
break ;
case ICMP_TIME_EXCEEDED :
if ( code ! = ICMP_EXC_TTL )
2006-03-28 13:12:13 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
break ;
}
2006-03-28 13:12:13 +04:00
err = - ENOENT ;
2005-04-17 02:20:36 +04:00
read_lock ( & ipip_lock ) ;
2008-04-16 12:05:03 +04:00
t = ipip_tunnel_lookup ( dev_net ( skb - > dev ) , iph - > daddr , iph - > saddr ) ;
2005-04-17 02:20:36 +04:00
if ( t = = NULL | | t - > parms . iph . daddr = = 0 )
goto out ;
2006-03-28 13:12:13 +04:00
err = 0 ;
2005-04-17 02:20:36 +04:00
if ( t - > parms . iph . ttl = = 0 & & type = = ICMP_TIME_EXCEEDED )
goto out ;
2009-02-25 10:36:47 +03:00
if ( time_before ( jiffies , t - > err_time + IPTUNNEL_ERR_TIMEO ) )
2005-04-17 02:20:36 +04:00
t - > err_count + + ;
else
t - > err_count = 1 ;
t - > err_time = jiffies ;
out :
read_unlock ( & ipip_lock ) ;
2006-03-28 13:12:13 +04:00
return err ;
2005-04-17 02:20:36 +04:00
}
2007-04-21 09:47:35 +04:00
static inline void ipip_ecn_decapsulate ( const struct iphdr * outer_iph ,
struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
2007-04-21 09:47:35 +04:00
struct iphdr * inner_iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
if ( INET_ECN_is_ce ( outer_iph - > tos ) )
IP_ECN_set_ce ( inner_iph ) ;
}
static int ipip_rcv ( struct sk_buff * skb )
{
struct ip_tunnel * tunnel ;
2007-04-21 09:47:35 +04:00
const struct iphdr * iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
read_lock ( & ipip_lock ) ;
2008-04-16 12:05:03 +04:00
if ( ( tunnel = ipip_tunnel_lookup ( dev_net ( skb - > dev ) ,
2008-04-16 12:04:35 +04:00
iph - > saddr , iph - > daddr ) ) ! = NULL ) {
2005-04-17 02:20:36 +04:00
if ( ! xfrm4_policy_check ( NULL , XFRM_POLICY_IN , skb ) ) {
read_unlock ( & ipip_lock ) ;
kfree_skb ( skb ) ;
return 0 ;
}
secpath_reset ( skb ) ;
2007-04-11 08:21:55 +04:00
skb - > mac_header = skb - > network_header ;
2007-04-11 07:45:18 +04:00
skb_reset_network_header ( skb ) ;
2005-04-17 02:20:36 +04:00
skb - > protocol = htons ( ETH_P_IP ) ;
skb - > pkt_type = PACKET_HOST ;
2008-05-22 01:15:16 +04:00
tunnel - > dev - > stats . rx_packets + + ;
tunnel - > dev - > stats . rx_bytes + = skb - > len ;
2005-04-17 02:20:36 +04:00
skb - > dev = tunnel - > dev ;
2009-06-02 09:19:30 +04:00
skb_dst_drop ( skb ) ;
2005-04-17 02:20:36 +04:00
nf_reset ( skb ) ;
ipip_ecn_decapsulate ( iph , skb ) ;
netif_rx ( skb ) ;
read_unlock ( & ipip_lock ) ;
return 0 ;
}
read_unlock ( & ipip_lock ) ;
return - 1 ;
}
/*
* This function assumes it is being called from dev_queue_xmit ( )
* and that skb is filled properly by that function .
*/
2009-08-31 23:50:41 +04:00
static netdev_tx_t ipip_tunnel_xmit ( struct sk_buff * skb , struct net_device * dev )
2005-04-17 02:20:36 +04:00
{
2006-01-09 09:05:26 +03:00
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2008-05-22 01:15:16 +04:00
struct net_device_stats * stats = & tunnel - > dev - > stats ;
2005-04-17 02:20:36 +04:00
struct iphdr * tiph = & tunnel - > parms . iph ;
u8 tos = tunnel - > parms . iph . tos ;
2006-11-08 11:23:14 +03:00
__be16 df = tiph - > frag_off ;
2005-04-17 02:20:36 +04:00
struct rtable * rt ; /* Route to the other host */
struct net_device * tdev ; /* Device to other host */
2007-04-21 09:47:35 +04:00
struct iphdr * old_iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
struct iphdr * iph ; /* Our new IP header */
2007-10-24 08:07:32 +04:00
unsigned int max_headroom ; /* The extra header space needed */
2006-11-08 11:23:14 +03:00
__be32 dst = tiph - > daddr ;
2005-04-17 02:20:36 +04:00
int mtu ;
if ( skb - > protocol ! = htons ( ETH_P_IP ) )
goto tx_error ;
if ( tos & 1 )
tos = old_iph - > tos ;
if ( ! dst ) {
/* NBMA tunnel */
2009-06-02 09:14:27 +04:00
if ( ( rt = skb_rtable ( skb ) ) = = NULL ) {
2008-05-22 01:15:16 +04:00
stats - > tx_fifo_errors + + ;
2005-04-17 02:20:36 +04:00
goto tx_error ;
}
if ( ( dst = rt - > rt_gateway ) = = 0 )
goto tx_error_icmp ;
}
{
struct flowi fl = { . oif = tunnel - > parms . link ,
. nl_u = { . ip4_u =
{ . daddr = dst ,
. saddr = tiph - > saddr ,
. tos = RT_TOS ( tos ) } } ,
. proto = IPPROTO_IPIP } ;
2008-04-16 12:05:57 +04:00
if ( ip_route_output_key ( dev_net ( dev ) , & rt , & fl ) ) {
2008-05-22 01:15:16 +04:00
stats - > tx_carrier_errors + + ;
2005-04-17 02:20:36 +04:00
goto tx_error_icmp ;
}
}
tdev = rt - > u . dst . dev ;
if ( tdev = = dev ) {
ip_rt_put ( rt ) ;
2008-05-22 01:15:16 +04:00
stats - > collisions + + ;
2005-04-17 02:20:36 +04:00
goto tx_error ;
}
if ( tiph - > frag_off )
mtu = dst_mtu ( & rt - > u . dst ) - sizeof ( struct iphdr ) ;
else
2009-06-02 09:19:30 +04:00
mtu = skb_dst ( skb ) ? dst_mtu ( skb_dst ( skb ) ) : dev - > mtu ;
2005-04-17 02:20:36 +04:00
if ( mtu < 68 ) {
2008-05-22 01:15:16 +04:00
stats - > collisions + + ;
2005-04-17 02:20:36 +04:00
ip_rt_put ( rt ) ;
goto tx_error ;
}
2009-06-02 09:19:30 +04:00
if ( skb_dst ( skb ) )
skb_dst ( skb ) - > ops - > update_pmtu ( skb_dst ( skb ) , mtu ) ;
2005-04-17 02:20:36 +04:00
df | = ( old_iph - > frag_off & htons ( IP_DF ) ) ;
if ( ( old_iph - > frag_off & htons ( IP_DF ) ) & & mtu < ntohs ( old_iph - > tot_len ) ) {
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_FRAG_NEEDED , htonl ( mtu ) ) ;
ip_rt_put ( rt ) ;
goto tx_error ;
}
if ( tunnel - > err_count > 0 ) {
2009-02-25 10:36:47 +03:00
if ( time_before ( jiffies ,
tunnel - > err_time + IPTUNNEL_ERR_TIMEO ) ) {
2005-04-17 02:20:36 +04:00
tunnel - > err_count - - ;
dst_link_failure ( skb ) ;
} else
tunnel - > err_count = 0 ;
}
/*
* Okay , now see if we can stuff it in the buffer as - is .
*/
max_headroom = ( LL_RESERVED_SPACE ( tdev ) + sizeof ( struct iphdr ) ) ;
2007-07-10 02:33:40 +04:00
if ( skb_headroom ( skb ) < max_headroom | | skb_shared ( skb ) | |
( skb_cloned ( skb ) & & ! skb_clone_writable ( skb , 0 ) ) ) {
2005-04-17 02:20:36 +04:00
struct sk_buff * new_skb = skb_realloc_headroom ( skb , max_headroom ) ;
if ( ! new_skb ) {
ip_rt_put ( rt ) ;
2007-02-09 17:24:47 +03:00
stats - > tx_dropped + + ;
2005-04-17 02:20:36 +04:00
dev_kfree_skb ( skb ) ;
2009-06-23 10:03:08 +04:00
return NETDEV_TX_OK ;
2005-04-17 02:20:36 +04:00
}
if ( skb - > sk )
skb_set_owner_w ( new_skb , skb - > sk ) ;
dev_kfree_skb ( skb ) ;
skb = new_skb ;
2007-04-21 09:47:35 +04:00
old_iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
}
2007-04-11 08:21:55 +04:00
skb - > transport_header = skb - > network_header ;
2007-04-11 07:46:21 +04:00
skb_push ( skb , sizeof ( struct iphdr ) ) ;
skb_reset_network_header ( skb ) ;
2005-04-17 02:20:36 +04:00
memset ( & ( IPCB ( skb ) - > opt ) , 0 , sizeof ( IPCB ( skb ) - > opt ) ) ;
2006-02-16 02:10:22 +03:00
IPCB ( skb ) - > flags & = ~ ( IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED ) ;
2009-06-02 09:19:30 +04:00
skb_dst_drop ( skb ) ;
skb_dst_set ( skb , & rt - > u . dst ) ;
2005-04-17 02:20:36 +04:00
/*
* Push down and install the IPIP header .
*/
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
iph - > version = 4 ;
iph - > ihl = sizeof ( struct iphdr ) > > 2 ;
iph - > frag_off = df ;
iph - > protocol = IPPROTO_IPIP ;
iph - > tos = INET_ECN_encapsulate ( tos , old_iph - > tos ) ;
iph - > daddr = rt - > rt_dst ;
iph - > saddr = rt - > rt_src ;
if ( ( iph - > ttl = tiph - > ttl ) = = 0 )
iph - > ttl = old_iph - > ttl ;
nf_reset ( skb ) ;
IPTUNNEL_XMIT ( ) ;
2009-06-23 10:03:08 +04:00
return NETDEV_TX_OK ;
2005-04-17 02:20:36 +04:00
tx_error_icmp :
dst_link_failure ( skb ) ;
tx_error :
stats - > tx_errors + + ;
dev_kfree_skb ( skb ) ;
2009-06-23 10:03:08 +04:00
return NETDEV_TX_OK ;
2005-04-17 02:20:36 +04:00
}
2007-12-12 22:01:43 +03:00
static void ipip_tunnel_bind_dev ( struct net_device * dev )
{
struct net_device * tdev = NULL ;
struct ip_tunnel * tunnel ;
struct iphdr * iph ;
tunnel = netdev_priv ( dev ) ;
iph = & tunnel - > parms . iph ;
if ( iph - > daddr ) {
struct flowi fl = { . oif = tunnel - > parms . link ,
. nl_u = { . ip4_u =
{ . daddr = iph - > daddr ,
. saddr = iph - > saddr ,
. tos = RT_TOS ( iph - > tos ) } } ,
. proto = IPPROTO_IPIP } ;
struct rtable * rt ;
2008-04-16 12:05:57 +04:00
if ( ! ip_route_output_key ( dev_net ( dev ) , & rt , & fl ) ) {
2007-12-12 22:01:43 +03:00
tdev = rt - > u . dst . dev ;
ip_rt_put ( rt ) ;
}
dev - > flags | = IFF_POINTOPOINT ;
}
if ( ! tdev & & tunnel - > parms . link )
2008-04-16 12:05:57 +04:00
tdev = __dev_get_by_index ( dev_net ( dev ) , tunnel - > parms . link ) ;
2007-12-12 22:01:43 +03:00
if ( tdev ) {
dev - > hard_header_len = tdev - > hard_header_len + sizeof ( struct iphdr ) ;
dev - > mtu = tdev - > mtu - sizeof ( struct iphdr ) ;
}
dev - > iflink = tunnel - > parms . link ;
}
2005-04-17 02:20:36 +04:00
static int
ipip_tunnel_ioctl ( struct net_device * dev , struct ifreq * ifr , int cmd )
{
int err = 0 ;
struct ip_tunnel_parm p ;
struct ip_tunnel * t ;
2008-04-16 12:04:13 +04:00
struct net * net = dev_net ( dev ) ;
struct ipip_net * ipn = net_generic ( net , ipip_net_id ) ;
2005-04-17 02:20:36 +04:00
switch ( cmd ) {
case SIOCGETTUNNEL :
t = NULL ;
2008-04-16 12:04:13 +04:00
if ( dev = = ipn - > fb_tunnel_dev ) {
2005-04-17 02:20:36 +04:00
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) ) {
err = - EFAULT ;
break ;
}
2008-04-16 12:04:35 +04:00
t = ipip_tunnel_locate ( net , & p , 0 ) ;
2005-04-17 02:20:36 +04:00
}
if ( t = = NULL )
2006-01-09 09:05:26 +03:00
t = netdev_priv ( dev ) ;
2005-04-17 02:20:36 +04:00
memcpy ( & p , & t - > parms , sizeof ( p ) ) ;
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & p , sizeof ( p ) ) )
err = - EFAULT ;
break ;
case SIOCADDTUNNEL :
case SIOCCHGTUNNEL :
err = - EPERM ;
if ( ! capable ( CAP_NET_ADMIN ) )
goto done ;
err = - EFAULT ;
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
goto done ;
err = - EINVAL ;
if ( p . iph . version ! = 4 | | p . iph . protocol ! = IPPROTO_IPIP | |
p . iph . ihl ! = 5 | | ( p . iph . frag_off & htons ( ~ IP_DF ) ) )
goto done ;
if ( p . iph . ttl )
p . iph . frag_off | = htons ( IP_DF ) ;
2008-04-16 12:04:35 +04:00
t = ipip_tunnel_locate ( net , & p , cmd = = SIOCADDTUNNEL ) ;
2005-04-17 02:20:36 +04:00
2008-04-16 12:04:13 +04:00
if ( dev ! = ipn - > fb_tunnel_dev & & cmd = = SIOCCHGTUNNEL ) {
2005-04-17 02:20:36 +04:00
if ( t ! = NULL ) {
if ( t - > dev ! = dev ) {
err = - EEXIST ;
break ;
}
} else {
if ( ( ( dev - > flags & IFF_POINTOPOINT ) & & ! p . iph . daddr ) | |
( ! ( dev - > flags & IFF_POINTOPOINT ) & & p . iph . daddr ) ) {
err = - EINVAL ;
break ;
}
2006-01-09 09:05:26 +03:00
t = netdev_priv ( dev ) ;
2008-04-16 12:04:35 +04:00
ipip_tunnel_unlink ( ipn , t ) ;
2005-04-17 02:20:36 +04:00
t - > parms . iph . saddr = p . iph . saddr ;
t - > parms . iph . daddr = p . iph . daddr ;
memcpy ( dev - > dev_addr , & p . iph . saddr , 4 ) ;
memcpy ( dev - > broadcast , & p . iph . daddr , 4 ) ;
2008-04-16 12:04:35 +04:00
ipip_tunnel_link ( ipn , t ) ;
2005-04-17 02:20:36 +04:00
netdev_state_change ( dev ) ;
}
}
if ( t ) {
err = 0 ;
if ( cmd = = SIOCCHGTUNNEL ) {
t - > parms . iph . ttl = p . iph . ttl ;
t - > parms . iph . tos = p . iph . tos ;
t - > parms . iph . frag_off = p . iph . frag_off ;
2007-12-12 22:01:43 +03:00
if ( t - > parms . link ! = p . link ) {
t - > parms . link = p . link ;
ipip_tunnel_bind_dev ( dev ) ;
netdev_state_change ( dev ) ;
}
2005-04-17 02:20:36 +04:00
}
if ( copy_to_user ( ifr - > ifr_ifru . ifru_data , & t - > parms , sizeof ( p ) ) )
err = - EFAULT ;
} else
err = ( cmd = = SIOCADDTUNNEL ? - ENOBUFS : - ENOENT ) ;
break ;
case SIOCDELTUNNEL :
err = - EPERM ;
if ( ! capable ( CAP_NET_ADMIN ) )
goto done ;
2008-04-16 12:04:13 +04:00
if ( dev = = ipn - > fb_tunnel_dev ) {
2005-04-17 02:20:36 +04:00
err = - EFAULT ;
if ( copy_from_user ( & p , ifr - > ifr_ifru . ifru_data , sizeof ( p ) ) )
goto done ;
err = - ENOENT ;
2008-04-16 12:04:35 +04:00
if ( ( t = ipip_tunnel_locate ( net , & p , 0 ) ) = = NULL )
2005-04-17 02:20:36 +04:00
goto done ;
err = - EPERM ;
2008-04-16 12:04:13 +04:00
if ( t - > dev = = ipn - > fb_tunnel_dev )
2005-04-17 02:20:36 +04:00
goto done ;
dev = t - > dev ;
}
2007-02-07 11:09:58 +03:00
unregister_netdevice ( dev ) ;
err = 0 ;
2005-04-17 02:20:36 +04:00
break ;
default :
err = - EINVAL ;
}
done :
return err ;
}
static int ipip_tunnel_change_mtu ( struct net_device * dev , int new_mtu )
{
if ( new_mtu < 68 | | new_mtu > 0xFFF8 - sizeof ( struct iphdr ) )
return - EINVAL ;
dev - > mtu = new_mtu ;
return 0 ;
}
2008-11-21 07:33:21 +03:00
static const struct net_device_ops ipip_netdev_ops = {
. ndo_uninit = ipip_tunnel_uninit ,
. ndo_start_xmit = ipip_tunnel_xmit ,
. ndo_do_ioctl = ipip_tunnel_ioctl ,
. ndo_change_mtu = ipip_tunnel_change_mtu ,
} ;
2005-04-17 02:20:36 +04:00
static void ipip_tunnel_setup ( struct net_device * dev )
{
2008-11-21 07:33:21 +03:00
dev - > netdev_ops = & ipip_netdev_ops ;
2005-04-17 02:20:36 +04:00
dev - > destructor = free_netdev ;
dev - > type = ARPHRD_TUNNEL ;
dev - > hard_header_len = LL_MAX_HEADER + sizeof ( struct iphdr ) ;
2006-01-06 03:35:42 +03:00
dev - > mtu = ETH_DATA_LEN - sizeof ( struct iphdr ) ;
2005-04-17 02:20:36 +04:00
dev - > flags = IFF_NOARP ;
dev - > iflink = 0 ;
dev - > addr_len = 4 ;
2008-04-16 12:06:18 +04:00
dev - > features | = NETIF_F_NETNS_LOCAL ;
2009-05-28 14:44:30 +04:00
dev - > priv_flags & = ~ IFF_XMIT_DST_RELEASE ;
2005-04-17 02:20:36 +04:00
}
2008-11-21 07:33:21 +03:00
static void ipip_tunnel_init ( struct net_device * dev )
2005-04-17 02:20:36 +04:00
{
2008-11-21 07:33:21 +03:00
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2005-04-17 02:20:36 +04:00
tunnel - > dev = dev ;
strcpy ( tunnel - > parms . name , dev - > name ) ;
memcpy ( dev - > dev_addr , & tunnel - > parms . iph . saddr , 4 ) ;
memcpy ( dev - > broadcast , & tunnel - > parms . iph . daddr , 4 ) ;
2007-12-12 22:01:43 +03:00
ipip_tunnel_bind_dev ( dev ) ;
2005-04-17 02:20:36 +04:00
}
2008-11-21 07:33:21 +03:00
static void ipip_fb_tunnel_init ( struct net_device * dev )
2005-04-17 02:20:36 +04:00
{
2006-01-09 09:05:26 +03:00
struct ip_tunnel * tunnel = netdev_priv ( dev ) ;
2005-04-17 02:20:36 +04:00
struct iphdr * iph = & tunnel - > parms . iph ;
2008-04-16 12:05:32 +04:00
struct ipip_net * ipn = net_generic ( dev_net ( dev ) , ipip_net_id ) ;
2005-04-17 02:20:36 +04:00
tunnel - > dev = dev ;
strcpy ( tunnel - > parms . name , dev - > name ) ;
iph - > version = 4 ;
iph - > protocol = IPPROTO_IPIP ;
iph - > ihl = 5 ;
dev_hold ( dev ) ;
2008-04-16 12:05:32 +04:00
ipn - > tunnels_wc [ 0 ] = tunnel ;
2005-04-17 02:20:36 +04:00
}
static struct xfrm_tunnel ipip_handler = {
. handler = ipip_rcv ,
. err_handler = ipip_err ,
2006-03-28 13:12:13 +04:00
. priority = 1 ,
2005-04-17 02:20:36 +04:00
} ;
2009-02-22 11:02:08 +03:00
static const char banner [ ] __initconst =
2005-04-17 02:20:36 +04:00
KERN_INFO " IPv4 over IPv4 tunneling driver \n " ;
2008-04-16 12:05:32 +04:00
static void ipip_destroy_tunnels ( struct ipip_net * ipn )
{
int prio ;
for ( prio = 1 ; prio < 4 ; prio + + ) {
int h ;
for ( h = 0 ; h < HASH_SIZE ; h + + ) {
struct ip_tunnel * t ;
while ( ( t = ipn - > tunnels [ prio ] [ h ] ) ! = NULL )
unregister_netdevice ( t - > dev ) ;
}
}
}
2008-04-16 12:03:13 +04:00
static int ipip_init_net ( struct net * net )
{
int err ;
struct ipip_net * ipn ;
err = - ENOMEM ;
2008-04-16 12:05:32 +04:00
ipn = kzalloc ( sizeof ( struct ipip_net ) , GFP_KERNEL ) ;
2008-04-16 12:03:13 +04:00
if ( ipn = = NULL )
goto err_alloc ;
err = net_assign_generic ( net , ipip_net_id , ipn ) ;
if ( err < 0 )
goto err_assign ;
2008-04-16 12:05:32 +04:00
ipn - > tunnels [ 0 ] = ipn - > tunnels_wc ;
ipn - > tunnels [ 1 ] = ipn - > tunnels_l ;
ipn - > tunnels [ 2 ] = ipn - > tunnels_r ;
ipn - > tunnels [ 3 ] = ipn - > tunnels_r_l ;
2008-04-16 12:04:13 +04:00
ipn - > fb_tunnel_dev = alloc_netdev ( sizeof ( struct ip_tunnel ) ,
" tunl0 " ,
ipip_tunnel_setup ) ;
if ( ! ipn - > fb_tunnel_dev ) {
err = - ENOMEM ;
goto err_alloc_dev ;
}
2008-11-24 04:26:26 +03:00
dev_net_set ( ipn - > fb_tunnel_dev , net ) ;
2008-04-16 12:04:13 +04:00
2008-11-21 07:33:21 +03:00
ipip_fb_tunnel_init ( ipn - > fb_tunnel_dev ) ;
2008-04-16 12:04:13 +04:00
if ( ( err = register_netdev ( ipn - > fb_tunnel_dev ) ) )
goto err_reg_dev ;
2008-04-16 12:03:13 +04:00
return 0 ;
2008-04-16 12:04:13 +04:00
err_reg_dev :
free_netdev ( ipn - > fb_tunnel_dev ) ;
err_alloc_dev :
/* nothing */
2008-04-16 12:03:13 +04:00
err_assign :
kfree ( ipn ) ;
err_alloc :
return err ;
}
static void ipip_exit_net ( struct net * net )
{
struct ipip_net * ipn ;
ipn = net_generic ( net , ipip_net_id ) ;
2008-04-16 12:04:13 +04:00
rtnl_lock ( ) ;
2008-04-16 12:05:32 +04:00
ipip_destroy_tunnels ( ipn ) ;
2008-04-16 12:04:13 +04:00
unregister_netdevice ( ipn - > fb_tunnel_dev ) ;
rtnl_unlock ( ) ;
2008-04-16 12:03:13 +04:00
kfree ( ipn ) ;
}
static struct pernet_operations ipip_net_ops = {
. init = ipip_init_net ,
. exit = ipip_exit_net ,
} ;
2005-04-17 02:20:36 +04:00
static int __init ipip_init ( void )
{
int err ;
printk ( banner ) ;
2007-02-13 23:54:47 +03:00
if ( xfrm4_tunnel_register ( & ipip_handler , AF_INET ) ) {
2005-04-17 02:20:36 +04:00
printk ( KERN_INFO " ipip init: can't register tunnel \n " ) ;
return - EAGAIN ;
}
2008-04-16 12:03:13 +04:00
err = register_pernet_gen_device ( & ipip_net_id , & ipip_net_ops ) ;
if ( err )
2008-04-16 12:04:13 +04:00
xfrm4_tunnel_deregister ( & ipip_handler , AF_INET ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
static void __exit ipip_fini ( void )
{
2007-02-13 23:54:47 +03:00
if ( xfrm4_tunnel_deregister ( & ipip_handler , AF_INET ) )
2005-04-17 02:20:36 +04:00
printk ( KERN_INFO " ipip close: can't deregister tunnel \n " ) ;
2008-04-16 12:03:13 +04:00
unregister_pernet_gen_device ( ipip_net_id , & ipip_net_ops ) ;
2005-04-17 02:20:36 +04:00
}
module_init ( ipip_init ) ;
module_exit ( ipip_fini ) ;
MODULE_LICENSE ( " GPL " ) ;