2005-04-17 02:20:36 +04:00
/*
* IPv6 output functions
2007-02-09 17:24:49 +03:00
* Linux INET6 implementation
2005-04-17 02:20:36 +04:00
*
* Authors :
2007-02-09 17:24:49 +03:00
* Pedro Roque < roque @ di . fc . ul . pt >
2005-04-17 02:20:36 +04:00
*
* $ Id : ip6_output . c , v 1.34 2002 / 02 / 01 22 : 01 : 04 davem Exp $
*
* Based on linux / net / ipv4 / ip_output . c
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Changes :
* A . N . Kuznetsov : airthmetics in fragmentation .
* extension headers are implemented .
* route changes now work .
* ip6_forward does not confuse sniffers .
* etc .
*
* H . von Brand : Added missing # include < linux / string . h >
* Imran Patel : frag id should be in NBO
* Kazunori MIYAZAWA @ USAGI
* : add ip6_append_data and related functions
* for datagram xmit
*/
# include <linux/errno.h>
# include <linux/types.h>
# include <linux/string.h>
# include <linux/socket.h>
# include <linux/net.h>
# include <linux/netdevice.h>
# include <linux/if_arp.h>
# include <linux/in6.h>
# include <linux/tcp.h>
# include <linux/route.h>
2006-05-28 10:05:54 +04:00
# include <linux/module.h>
2005-04-17 02:20:36 +04:00
# include <linux/netfilter.h>
# include <linux/netfilter_ipv6.h>
# include <net/sock.h>
# include <net/snmp.h>
# include <net/ipv6.h>
# include <net/ndisc.h>
# include <net/protocol.h>
# include <net/ip6_route.h>
# include <net/addrconf.h>
# include <net/rawv6.h>
# include <net/icmp.h>
# include <net/xfrm.h>
# include <net/checksum.h>
static int ip6_fragment ( struct sk_buff * skb , int ( * output ) ( struct sk_buff * ) ) ;
static __inline__ void ipv6_select_ident ( struct sk_buff * skb , struct frag_hdr * fhdr )
{
static u32 ipv6_fragmentation_id = 1 ;
static DEFINE_SPINLOCK ( ip6_id_lock ) ;
spin_lock_bh ( & ip6_id_lock ) ;
fhdr - > identification = htonl ( ipv6_fragmentation_id ) ;
if ( + + ipv6_fragmentation_id = = 0 )
ipv6_fragmentation_id = 1 ;
spin_unlock_bh ( & ip6_id_lock ) ;
}
static inline int ip6_output_finish ( struct sk_buff * skb )
{
struct dst_entry * dst = skb - > dst ;
2006-12-08 02:08:17 +03:00
if ( dst - > hh )
return neigh_hh_output ( dst - > hh , skb ) ;
else if ( dst - > neighbour )
2005-04-17 02:20:36 +04:00
return dst - > neighbour - > output ( skb ) ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS_BH ( ip6_dst_idev ( dst ) , IPSTATS_MIB_OUTNOROUTES ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return - EINVAL ;
}
/* dev_loopback_xmit for use with netfilter. */
static int ip6_dev_loopback_xmit ( struct sk_buff * newskb )
{
2007-03-20 01:30:44 +03:00
skb_reset_mac_header ( newskb ) ;
2007-03-11 04:16:10 +03:00
__skb_pull ( newskb , skb_network_offset ( newskb ) ) ;
2005-04-17 02:20:36 +04:00
newskb - > pkt_type = PACKET_LOOPBACK ;
newskb - > ip_summed = CHECKSUM_UNNECESSARY ;
BUG_TRAP ( newskb - > dst ) ;
netif_rx ( newskb ) ;
return 0 ;
}
static int ip6_output2 ( struct sk_buff * skb )
{
struct dst_entry * dst = skb - > dst ;
struct net_device * dev = dst - > dev ;
skb - > protocol = htons ( ETH_P_IPV6 ) ;
skb - > dev = dev ;
2007-04-26 04:54:47 +04:00
if ( ipv6_addr_is_multicast ( & ipv6_hdr ( skb ) - > daddr ) ) {
2005-04-17 02:20:36 +04:00
struct ipv6_pinfo * np = skb - > sk ? inet6_sk ( skb - > sk ) : NULL ;
2006-11-04 14:11:37 +03:00
struct inet6_dev * idev = ip6_dst_idev ( skb - > dst ) ;
2005-04-17 02:20:36 +04:00
if ( ! ( dev - > flags & IFF_LOOPBACK ) & & ( ! np | | np - > mc_loop ) & &
2007-04-26 04:54:47 +04:00
ipv6_chk_mcast_addr ( dev , & ipv6_hdr ( skb ) - > daddr ,
& ipv6_hdr ( skb ) - > saddr ) ) {
2005-04-17 02:20:36 +04:00
struct sk_buff * newskb = skb_clone ( skb , GFP_ATOMIC ) ;
/* Do not check for IFF_ALLMULTI; multicast routing
is not supported in any case .
*/
if ( newskb )
NF_HOOK ( PF_INET6 , NF_IP6_POST_ROUTING , newskb , NULL ,
newskb - > dev ,
ip6_dev_loopback_xmit ) ;
2007-04-26 04:54:47 +04:00
if ( ipv6_hdr ( skb ) - > hop_limit = = 0 ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( idev , IPSTATS_MIB_OUTDISCARDS ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return 0 ;
}
}
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( idev , IPSTATS_MIB_OUTMCASTPKTS ) ;
2005-04-17 02:20:36 +04:00
}
return NF_HOOK ( PF_INET6 , NF_IP6_POST_ROUTING , skb , NULL , skb - > dev , ip6_output_finish ) ;
}
2007-04-21 02:53:27 +04:00
static inline int ip6_skb_dst_mtu ( struct sk_buff * skb )
{
struct ipv6_pinfo * np = skb - > sk ? inet6_sk ( skb - > sk ) : NULL ;
return ( np & & np - > pmtudisc = = IPV6_PMTUDISC_PROBE ) ?
skb - > dst - > dev - > mtu : dst_mtu ( skb - > dst ) ;
}
2005-04-17 02:20:36 +04:00
int ip6_output ( struct sk_buff * skb )
{
2007-04-21 02:53:27 +04:00
if ( ( skb - > len > ip6_skb_dst_mtu ( skb ) & & ! skb_is_gso ( skb ) ) | |
2005-10-19 02:46:41 +04:00
dst_allfrag ( skb - > dst ) )
2005-04-17 02:20:36 +04:00
return ip6_fragment ( skb , ip6_output2 ) ;
else
return ip6_output2 ( skb ) ;
}
/*
* xmit an sk_buff ( used by TCP )
*/
int ip6_xmit ( struct sock * sk , struct sk_buff * skb , struct flowi * fl ,
struct ipv6_txoptions * opt , int ipfragok )
{
2006-03-23 12:17:25 +03:00
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
2005-04-17 02:20:36 +04:00
struct in6_addr * first_hop = & fl - > fl6_dst ;
struct dst_entry * dst = skb - > dst ;
struct ipv6hdr * hdr ;
u8 proto = fl - > proto ;
int seg_len = skb - > len ;
2005-09-08 05:19:03 +04:00
int hlimit , tclass ;
2005-04-17 02:20:36 +04:00
u32 mtu ;
if ( opt ) {
int head_room ;
/* First: exthdrs may take lots of space (~8K for now)
MAX_HEADER is not enough .
*/
head_room = opt - > opt_nflen + opt - > opt_flen ;
seg_len + = head_room ;
head_room + = sizeof ( struct ipv6hdr ) + LL_RESERVED_SPACE ( dst - > dev ) ;
if ( skb_headroom ( skb ) < head_room ) {
struct sk_buff * skb2 = skb_realloc_headroom ( skb , head_room ) ;
2006-11-04 14:11:37 +03:00
if ( skb2 = = NULL ) {
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) ,
IPSTATS_MIB_OUTDISCARDS ) ;
kfree_skb ( skb ) ;
2005-04-17 02:20:36 +04:00
return - ENOBUFS ;
}
2006-11-04 14:11:37 +03:00
kfree_skb ( skb ) ;
skb = skb2 ;
2005-04-17 02:20:36 +04:00
if ( sk )
skb_set_owner_w ( skb , sk ) ;
}
if ( opt - > opt_flen )
ipv6_push_frag_opts ( skb , opt , & proto ) ;
if ( opt - > opt_nflen )
ipv6_push_nfrag_opts ( skb , opt , & proto , & first_hop ) ;
}
2007-04-11 07:46:21 +04:00
skb_push ( skb , sizeof ( struct ipv6hdr ) ) ;
skb_reset_network_header ( skb ) ;
2007-04-26 04:54:47 +04:00
hdr = ipv6_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
/*
* Fill in the IPv6 header
*/
hlimit = - 1 ;
if ( np )
hlimit = np - > hop_limit ;
if ( hlimit < 0 )
hlimit = dst_metric ( dst , RTAX_HOPLIMIT ) ;
if ( hlimit < 0 )
hlimit = ipv6_get_hoplimit ( dst - > dev ) ;
2005-09-08 05:19:03 +04:00
tclass = - 1 ;
if ( np )
tclass = np - > tclass ;
if ( tclass < 0 )
tclass = 0 ;
2006-11-08 11:25:17 +03:00
* ( __be32 * ) hdr = htonl ( 0x60000000 | ( tclass < < 20 ) ) | fl - > fl6_flowlabel ;
2005-09-08 05:19:03 +04:00
2005-04-17 02:20:36 +04:00
hdr - > payload_len = htons ( seg_len ) ;
hdr - > nexthdr = proto ;
hdr - > hop_limit = hlimit ;
ipv6_addr_copy ( & hdr - > saddr , & fl - > fl6_src ) ;
ipv6_addr_copy ( & hdr - > daddr , first_hop ) ;
2006-01-09 09:37:26 +03:00
skb - > priority = sk - > sk_priority ;
2005-04-17 02:20:36 +04:00
mtu = dst_mtu ( dst ) ;
2006-07-09 00:34:32 +04:00
if ( ( skb - > len < = mtu ) | | ipfragok | | skb_is_gso ( skb ) ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) ,
IPSTATS_MIB_OUTREQUESTS ) ;
2005-08-10 06:24:19 +04:00
return NF_HOOK ( PF_INET6 , NF_IP6_LOCAL_OUT , skb , NULL , dst - > dev ,
dst_output ) ;
2005-04-17 02:20:36 +04:00
}
if ( net_ratelimit ( ) )
printk ( KERN_DEBUG " IPv6: sending pkt_too_big to self \n " ) ;
skb - > dev = dst - > dev ;
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu , skb - > dev ) ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) , IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return - EMSGSIZE ;
}
2007-02-22 16:05:40 +03:00
EXPORT_SYMBOL ( ip6_xmit ) ;
2005-04-17 02:20:36 +04:00
/*
* To avoid extra problems ND packets are send through this
* routine . It ' s code duplication but I really want to avoid
* extra checks since ipv6_build_header is used by TCP ( which
* is for us performance critical )
*/
int ip6_nd_hdr ( struct sock * sk , struct sk_buff * skb , struct net_device * dev ,
struct in6_addr * saddr , struct in6_addr * daddr ,
int proto , int len )
{
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct ipv6hdr * hdr ;
int totlen ;
skb - > protocol = htons ( ETH_P_IPV6 ) ;
skb - > dev = dev ;
totlen = len + sizeof ( struct ipv6hdr ) ;
2007-03-15 03:05:03 +03:00
skb_reset_network_header ( skb ) ;
skb_put ( skb , sizeof ( struct ipv6hdr ) ) ;
2007-04-26 04:54:47 +04:00
hdr = ipv6_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
2006-11-08 11:27:11 +03:00
* ( __be32 * ) hdr = htonl ( 0x60000000 ) ;
2005-04-17 02:20:36 +04:00
hdr - > payload_len = htons ( len ) ;
hdr - > nexthdr = proto ;
hdr - > hop_limit = np - > hop_limit ;
ipv6_addr_copy ( & hdr - > saddr , saddr ) ;
ipv6_addr_copy ( & hdr - > daddr , daddr ) ;
return 0 ;
}
static int ip6_call_ra_chain ( struct sk_buff * skb , int sel )
{
struct ip6_ra_chain * ra ;
struct sock * last = NULL ;
read_lock ( & ip6_ra_lock ) ;
for ( ra = ip6_ra_chain ; ra ; ra = ra - > next ) {
struct sock * sk = ra - > sk ;
2005-08-10 06:44:42 +04:00
if ( sk & & ra - > sel = = sel & &
( ! sk - > sk_bound_dev_if | |
sk - > sk_bound_dev_if = = skb - > dev - > ifindex ) ) {
2005-04-17 02:20:36 +04:00
if ( last ) {
struct sk_buff * skb2 = skb_clone ( skb , GFP_ATOMIC ) ;
if ( skb2 )
rawv6_rcv ( last , skb2 ) ;
}
last = sk ;
}
}
if ( last ) {
rawv6_rcv ( last , skb ) ;
read_unlock ( & ip6_ra_lock ) ;
return 1 ;
}
read_unlock ( & ip6_ra_lock ) ;
return 0 ;
}
2006-09-23 01:41:44 +04:00
static int ip6_forward_proxy_check ( struct sk_buff * skb )
{
2007-04-26 04:54:47 +04:00
struct ipv6hdr * hdr = ipv6_hdr ( skb ) ;
2006-09-23 01:41:44 +04:00
u8 nexthdr = hdr - > nexthdr ;
int offset ;
if ( ipv6_ext_hdr ( nexthdr ) ) {
offset = ipv6_skip_exthdr ( skb , sizeof ( * hdr ) , & nexthdr ) ;
if ( offset < 0 )
return 0 ;
} else
offset = sizeof ( struct ipv6hdr ) ;
if ( nexthdr = = IPPROTO_ICMPV6 ) {
struct icmp6hdr * icmp6 ;
2007-04-11 07:50:43 +04:00
if ( ! pskb_may_pull ( skb , ( skb_network_header ( skb ) +
offset + 1 - skb - > data ) ) )
2006-09-23 01:41:44 +04:00
return 0 ;
2007-04-11 07:50:43 +04:00
icmp6 = ( struct icmp6hdr * ) ( skb_network_header ( skb ) + offset ) ;
2006-09-23 01:41:44 +04:00
switch ( icmp6 - > icmp6_type ) {
case NDISC_ROUTER_SOLICITATION :
case NDISC_ROUTER_ADVERTISEMENT :
case NDISC_NEIGHBOUR_SOLICITATION :
case NDISC_NEIGHBOUR_ADVERTISEMENT :
case NDISC_REDIRECT :
/* For reaction involving unicast neighbor discovery
* message destined to the proxied address , pass it to
* input function .
*/
return 1 ;
default :
break ;
}
}
2006-09-23 01:42:18 +04:00
/*
* The proxying router can ' t forward traffic sent to a link - local
* address , so signal the sender and discard the packet . This
* behavior is clarified by the MIPv6 specification .
*/
if ( ipv6_addr_type ( & hdr - > daddr ) & IPV6_ADDR_LINKLOCAL ) {
dst_link_failure ( skb ) ;
return - 1 ;
}
2006-09-23 01:41:44 +04:00
return 0 ;
}
2005-04-17 02:20:36 +04:00
static inline int ip6_forward_finish ( struct sk_buff * skb )
{
return dst_output ( skb ) ;
}
int ip6_forward ( struct sk_buff * skb )
{
struct dst_entry * dst = skb - > dst ;
2007-04-26 04:54:47 +04:00
struct ipv6hdr * hdr = ipv6_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
struct inet6_skb_parm * opt = IP6CB ( skb ) ;
2007-02-09 17:24:49 +03:00
2005-04-17 02:20:36 +04:00
if ( ipv6_devconf . forwarding = = 0 )
goto error ;
if ( ! xfrm6_policy_check ( NULL , XFRM_POLICY_FWD , skb ) ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( dst ) , IPSTATS_MIB_INDISCARDS ) ;
2005-04-17 02:20:36 +04:00
goto drop ;
}
2007-03-27 10:22:20 +04:00
skb_forward_csum ( skb ) ;
2005-04-17 02:20:36 +04:00
/*
* We DO NOT make any processing on
* RA packets , pushing them to user level AS IS
* without ane WARRANTY that application will be able
* to interpret them . The reason is that we
* cannot make anything clever here .
*
* We are not end - node , so that if packet contains
* AH / ESP , we cannot make anything .
* Defragmentation also would be mistake , RA packets
* cannot be fragmented , because there is no warranty
* that different fragments will go along one path . - - ANK
*/
if ( opt - > ra ) {
2007-04-11 07:50:43 +04:00
u8 * ptr = skb_network_header ( skb ) + opt - > ra ;
2005-04-17 02:20:36 +04:00
if ( ip6_call_ra_chain ( skb , ( ptr [ 2 ] < < 8 ) + ptr [ 3 ] ) )
return 0 ;
}
/*
* check and decrement ttl
*/
if ( hdr - > hop_limit < = 1 ) {
/* Force OUTPUT device used as source address */
skb - > dev = dst - > dev ;
icmpv6_send ( skb , ICMPV6_TIME_EXCEED , ICMPV6_EXC_HOPLIMIT ,
0 , skb - > dev ) ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS_BH ( ip6_dst_idev ( dst ) , IPSTATS_MIB_INHDRERRORS ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return - ETIMEDOUT ;
}
2006-09-23 01:43:49 +04:00
/* XXX: idev->cnf.proxy_ndp? */
if ( ipv6_devconf . proxy_ndp & &
pneigh_lookup ( & nd_tbl , & hdr - > daddr , skb - > dev , 0 ) ) {
2006-09-23 01:42:18 +04:00
int proxied = ip6_forward_proxy_check ( skb ) ;
if ( proxied > 0 )
2006-09-23 01:41:44 +04:00
return ip6_input ( skb ) ;
2006-09-23 01:42:18 +04:00
else if ( proxied < 0 ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( dst ) , IPSTATS_MIB_INDISCARDS ) ;
2006-09-23 01:42:18 +04:00
goto drop ;
}
2006-09-23 01:41:44 +04:00
}
2005-04-17 02:20:36 +04:00
if ( ! xfrm6_route_forward ( skb ) ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( dst ) , IPSTATS_MIB_INDISCARDS ) ;
2005-04-17 02:20:36 +04:00
goto drop ;
}
dst = skb - > dst ;
/* IPv6 specs say nothing about it, but it is clear that we cannot
send redirects to source routed frames .
*/
if ( skb - > dev = = dst - > dev & & dst - > neighbour & & opt - > srcrt = = 0 ) {
struct in6_addr * target = NULL ;
struct rt6_info * rt ;
struct neighbour * n = dst - > neighbour ;
/*
* incoming and outgoing devices are the same
* send a redirect .
*/
rt = ( struct rt6_info * ) dst ;
if ( ( rt - > rt6i_flags & RTF_GATEWAY ) )
target = ( struct in6_addr * ) & n - > primary_key ;
else
target = & hdr - > daddr ;
/* Limit redirects both by destination (here)
and by source ( inside ndisc_send_redirect )
*/
if ( xrlim_allow ( dst , 1 * HZ ) )
ndisc_send_redirect ( skb , n , target ) ;
2007-05-10 00:53:44 +04:00
} else {
int addrtype = ipv6_addr_type ( & hdr - > saddr ) ;
2005-04-17 02:20:36 +04:00
/* This check is security critical. */
2007-05-10 00:53:44 +04:00
if ( addrtype & ( IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK ) )
goto error ;
if ( addrtype & IPV6_ADDR_LINKLOCAL ) {
icmpv6_send ( skb , ICMPV6_DEST_UNREACH ,
ICMPV6_NOT_NEIGHBOUR , 0 , skb - > dev ) ;
goto error ;
}
2005-04-17 02:20:36 +04:00
}
if ( skb - > len > dst_mtu ( dst ) ) {
/* Again, force OUTPUT device used as source address */
skb - > dev = dst - > dev ;
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , dst_mtu ( dst ) , skb - > dev ) ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS_BH ( ip6_dst_idev ( dst ) , IPSTATS_MIB_INTOOBIGERRORS ) ;
IP6_INC_STATS_BH ( ip6_dst_idev ( dst ) , IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return - EMSGSIZE ;
}
if ( skb_cow ( skb , dst - > dev - > hard_header_len ) ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( dst ) , IPSTATS_MIB_OUTDISCARDS ) ;
2005-04-17 02:20:36 +04:00
goto drop ;
}
2007-04-26 04:54:47 +04:00
hdr = ipv6_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
/* Mangling hops number delayed to point after skb COW */
2007-02-09 17:24:49 +03:00
2005-04-17 02:20:36 +04:00
hdr - > hop_limit - - ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS_BH ( ip6_dst_idev ( dst ) , IPSTATS_MIB_OUTFORWDATAGRAMS ) ;
2005-04-17 02:20:36 +04:00
return NF_HOOK ( PF_INET6 , NF_IP6_FORWARD , skb , skb - > dev , dst - > dev , ip6_forward_finish ) ;
error :
2006-11-04 14:11:37 +03:00
IP6_INC_STATS_BH ( ip6_dst_idev ( dst ) , IPSTATS_MIB_INADDRERRORS ) ;
2005-04-17 02:20:36 +04:00
drop :
kfree_skb ( skb ) ;
return - EINVAL ;
}
static void ip6_copy_metadata ( struct sk_buff * to , struct sk_buff * from )
{
to - > pkt_type = from - > pkt_type ;
to - > priority = from - > priority ;
to - > protocol = from - > protocol ;
dst_release ( to - > dst ) ;
to - > dst = dst_clone ( from - > dst ) ;
to - > dev = from - > dev ;
2006-11-10 02:19:14 +03:00
to - > mark = from - > mark ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_NET_SCHED
to - > tc_index = from - > tc_index ;
# endif
2007-03-15 02:44:01 +03:00
nf_copy ( to , from ) ;
2006-06-09 11:29:17 +04:00
skb_copy_secmark ( to , from ) ;
2005-04-17 02:20:36 +04:00
}
int ip6_find_1stfragopt ( struct sk_buff * skb , u8 * * nexthdr )
{
u16 offset = sizeof ( struct ipv6hdr ) ;
2007-04-26 04:54:47 +04:00
struct ipv6_opt_hdr * exthdr =
( struct ipv6_opt_hdr * ) ( ipv6_hdr ( skb ) + 1 ) ;
2007-04-20 07:29:13 +04:00
unsigned int packet_len = skb - > tail - skb - > network_header ;
2005-04-17 02:20:36 +04:00
int found_rhdr = 0 ;
2007-04-26 04:54:47 +04:00
* nexthdr = & ipv6_hdr ( skb ) - > nexthdr ;
2005-04-17 02:20:36 +04:00
while ( offset + 1 < = packet_len ) {
switch ( * * nexthdr ) {
case NEXTHDR_HOP :
2006-08-24 06:29:47 +04:00
break ;
2005-04-17 02:20:36 +04:00
case NEXTHDR_ROUTING :
2006-08-24 06:29:47 +04:00
found_rhdr = 1 ;
break ;
2005-04-17 02:20:36 +04:00
case NEXTHDR_DEST :
2006-08-24 06:29:47 +04:00
# ifdef CONFIG_IPV6_MIP6
if ( ipv6_find_tlv ( skb , offset , IPV6_TLV_HAO ) > = 0 )
break ;
# endif
if ( found_rhdr )
return offset ;
2005-04-17 02:20:36 +04:00
break ;
default :
return offset ;
}
2006-08-24 06:29:47 +04:00
offset + = ipv6_optlen ( exthdr ) ;
* nexthdr = & exthdr - > nexthdr ;
2007-04-11 07:50:43 +04:00
exthdr = ( struct ipv6_opt_hdr * ) ( skb_network_header ( skb ) +
offset ) ;
2005-04-17 02:20:36 +04:00
}
return offset ;
}
2006-05-28 10:05:54 +04:00
EXPORT_SYMBOL_GPL ( ip6_find_1stfragopt ) ;
2005-04-17 02:20:36 +04:00
static int ip6_fragment ( struct sk_buff * skb , int ( * output ) ( struct sk_buff * ) )
{
struct net_device * dev ;
struct sk_buff * frag ;
struct rt6_info * rt = ( struct rt6_info * ) skb - > dst ;
2006-02-25 00:18:33 +03:00
struct ipv6_pinfo * np = skb - > sk ? inet6_sk ( skb - > sk ) : NULL ;
2005-04-17 02:20:36 +04:00
struct ipv6hdr * tmp_hdr ;
struct frag_hdr * fh ;
unsigned int mtu , hlen , left , len ;
2006-11-08 11:27:11 +03:00
__be32 frag_id = 0 ;
2005-04-17 02:20:36 +04:00
int ptr , offset = 0 , err = 0 ;
u8 * prevhdr , nexthdr = 0 ;
dev = rt - > u . dst . dev ;
hlen = ip6_find_1stfragopt ( skb , & prevhdr ) ;
nexthdr = * prevhdr ;
2007-04-21 02:53:27 +04:00
mtu = ip6_skb_dst_mtu ( skb ) ;
2007-04-21 02:52:39 +04:00
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket . ( This last
* check should be redundant , but it ' s free . )
*/
if ( ! np | | np - > pmtudisc > = IPV6_PMTUDISC_DO ) {
skb - > dev = skb - > dst - > dev ;
icmpv6_send ( skb , ICMPV6_PKT_TOOBIG , 0 , mtu , skb - > dev ) ;
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) , IPSTATS_MIB_FRAGFAILS ) ;
kfree_skb ( skb ) ;
return - EMSGSIZE ;
}
2006-02-25 00:18:33 +03:00
if ( np & & np - > frag_size < mtu ) {
if ( np - > frag_size )
mtu = np - > frag_size ;
}
mtu - = hlen + sizeof ( struct frag_hdr ) ;
2005-04-17 02:20:36 +04:00
if ( skb_shinfo ( skb ) - > frag_list ) {
int first_len = skb_pagelen ( skb ) ;
if ( first_len - hlen > mtu | |
( ( first_len - hlen ) & 7 ) | |
skb_cloned ( skb ) )
goto slow_path ;
for ( frag = skb_shinfo ( skb ) - > frag_list ; frag ; frag = frag - > next ) {
/* Correct geometry. */
if ( frag - > len > mtu | |
( ( frag - > len & 7 ) & & frag - > next ) | |
skb_headroom ( frag ) < hlen )
goto slow_path ;
/* Partially cloned skb? */
if ( skb_shared ( frag ) )
goto slow_path ;
2005-05-19 09:52:33 +04:00
BUG_ON ( frag - > sk ) ;
if ( skb - > sk ) {
sock_hold ( skb - > sk ) ;
frag - > sk = skb - > sk ;
frag - > destructor = sock_wfree ;
skb - > truesize - = frag - > truesize ;
}
2005-04-17 02:20:36 +04:00
}
err = 0 ;
offset = 0 ;
frag = skb_shinfo ( skb ) - > frag_list ;
skb_shinfo ( skb ) - > frag_list = NULL ;
/* BUILD HEADER */
2006-12-06 00:47:21 +03:00
* prevhdr = NEXTHDR_FRAGMENT ;
2007-04-11 07:50:43 +04:00
tmp_hdr = kmemdup ( skb_network_header ( skb ) , hlen , GFP_ATOMIC ) ;
2005-04-17 02:20:36 +04:00
if ( ! tmp_hdr ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) , IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
}
__skb_pull ( skb , hlen ) ;
fh = ( struct frag_hdr * ) __skb_push ( skb , sizeof ( struct frag_hdr ) ) ;
2007-04-11 07:46:21 +04:00
__skb_push ( skb , hlen ) ;
skb_reset_network_header ( skb ) ;
2007-04-11 07:50:43 +04:00
memcpy ( skb_network_header ( skb ) , tmp_hdr , hlen ) ;
2005-04-17 02:20:36 +04:00
ipv6_select_ident ( skb , fh ) ;
fh - > nexthdr = nexthdr ;
fh - > reserved = 0 ;
fh - > frag_off = htons ( IP6_MF ) ;
frag_id = fh - > identification ;
first_len = skb_pagelen ( skb ) ;
skb - > data_len = first_len - skb_headlen ( skb ) ;
skb - > len = first_len ;
2007-04-26 04:54:47 +04:00
ipv6_hdr ( skb ) - > payload_len = htons ( first_len -
sizeof ( struct ipv6hdr ) ) ;
2006-11-04 14:11:37 +03:00
dst_hold ( & rt - > u . dst ) ;
2005-04-17 02:20:36 +04:00
for ( ; ; ) {
/* Prepare header of the next frame,
* before previous one went down . */
if ( frag ) {
frag - > ip_summed = CHECKSUM_NONE ;
2007-03-13 19:06:52 +03:00
skb_reset_transport_header ( frag ) ;
2005-04-17 02:20:36 +04:00
fh = ( struct frag_hdr * ) __skb_push ( frag , sizeof ( struct frag_hdr ) ) ;
2007-04-11 07:46:21 +04:00
__skb_push ( frag , hlen ) ;
skb_reset_network_header ( frag ) ;
2007-04-11 07:50:43 +04:00
memcpy ( skb_network_header ( frag ) , tmp_hdr ,
hlen ) ;
2005-04-17 02:20:36 +04:00
offset + = skb - > len - hlen - sizeof ( struct frag_hdr ) ;
fh - > nexthdr = nexthdr ;
fh - > reserved = 0 ;
fh - > frag_off = htons ( offset ) ;
if ( frag - > next ! = NULL )
fh - > frag_off | = htons ( IP6_MF ) ;
fh - > identification = frag_id ;
2007-04-26 04:54:47 +04:00
ipv6_hdr ( frag ) - > payload_len =
htons ( frag - > len -
sizeof ( struct ipv6hdr ) ) ;
2005-04-17 02:20:36 +04:00
ip6_copy_metadata ( frag , skb ) ;
}
2007-02-09 17:24:49 +03:00
2005-04-17 02:20:36 +04:00
err = output ( skb ) ;
2006-08-03 00:41:21 +04:00
if ( ! err )
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( & rt - > u . dst ) , IPSTATS_MIB_FRAGCREATES ) ;
2006-08-03 00:41:21 +04:00
2005-04-17 02:20:36 +04:00
if ( err | | ! frag )
break ;
skb = frag ;
frag = skb - > next ;
skb - > next = NULL ;
}
2005-11-08 20:41:34 +03:00
kfree ( tmp_hdr ) ;
2005-04-17 02:20:36 +04:00
if ( err = = 0 ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( & rt - > u . dst ) , IPSTATS_MIB_FRAGOKS ) ;
dst_release ( & rt - > u . dst ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
while ( frag ) {
skb = frag - > next ;
kfree_skb ( frag ) ;
frag = skb ;
}
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( & rt - > u . dst ) , IPSTATS_MIB_FRAGFAILS ) ;
dst_release ( & rt - > u . dst ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
slow_path :
left = skb - > len - hlen ; /* Space per frame */
ptr = hlen ; /* Where to start from */
/*
* Fragment the datagram .
*/
* prevhdr = NEXTHDR_FRAGMENT ;
/*
* Keep copying data until we run out .
*/
while ( left > 0 ) {
len = left ;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if ( len > mtu )
len = mtu ;
/* IF: we are not sending upto and including the packet end
then align the next start on an eight byte boundary */
if ( len < left ) {
len & = ~ 7 ;
}
/*
* Allocate buffer .
*/
if ( ( frag = alloc_skb ( len + hlen + sizeof ( struct frag_hdr ) + LL_RESERVED_SPACE ( rt - > u . dst . dev ) , GFP_ATOMIC ) ) = = NULL ) {
2005-08-10 07:50:53 +04:00
NETDEBUG ( KERN_INFO " IPv6: frag: no memory for new fragment! \n " ) ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) ,
IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
err = - ENOMEM ;
goto fail ;
}
/*
* Set up data on packet
*/
ip6_copy_metadata ( frag , skb ) ;
skb_reserve ( frag , LL_RESERVED_SPACE ( rt - > u . dst . dev ) ) ;
skb_put ( frag , len + hlen + sizeof ( struct frag_hdr ) ) ;
2007-04-11 07:45:18 +04:00
skb_reset_network_header ( frag ) ;
2007-03-13 19:06:52 +03:00
fh = ( struct frag_hdr * ) ( skb_network_header ( frag ) + hlen ) ;
2007-04-11 08:21:55 +04:00
frag - > transport_header = ( frag - > network_header + hlen +
sizeof ( struct frag_hdr ) ) ;
2005-04-17 02:20:36 +04:00
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if ( skb - > sk )
skb_set_owner_w ( frag , skb - > sk ) ;
/*
* Copy the packet header into the new buffer .
*/
2007-03-28 01:55:52 +04:00
skb_copy_from_linear_data ( skb , skb_network_header ( frag ) , hlen ) ;
2005-04-17 02:20:36 +04:00
/*
* Build fragment header .
*/
fh - > nexthdr = nexthdr ;
fh - > reserved = 0 ;
2005-10-04 01:19:15 +04:00
if ( ! frag_id ) {
2005-04-17 02:20:36 +04:00
ipv6_select_ident ( skb , fh ) ;
frag_id = fh - > identification ;
} else
fh - > identification = frag_id ;
/*
* Copy a block of the IP datagram .
*/
2007-04-11 08:21:55 +04:00
if ( skb_copy_bits ( skb , ptr , skb_transport_header ( skb ) , len ) )
2005-04-17 02:20:36 +04:00
BUG ( ) ;
left - = len ;
fh - > frag_off = htons ( offset ) ;
if ( left > 0 )
fh - > frag_off | = htons ( IP6_MF ) ;
2007-04-26 04:54:47 +04:00
ipv6_hdr ( frag ) - > payload_len = htons ( frag - > len -
sizeof ( struct ipv6hdr ) ) ;
2005-04-17 02:20:36 +04:00
ptr + = len ;
offset + = len ;
/*
* Put this fragment into the sending queue .
*/
err = output ( frag ) ;
if ( err )
goto fail ;
2006-08-03 00:41:21 +04:00
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) , IPSTATS_MIB_FRAGCREATES ) ;
2005-04-17 02:20:36 +04:00
}
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) ,
IPSTATS_MIB_FRAGOKS ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return err ;
fail :
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) ,
IPSTATS_MIB_FRAGFAILS ) ;
2007-02-09 17:24:49 +03:00
kfree_skb ( skb ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
2006-08-24 04:19:18 +04:00
static inline int ip6_rt_check ( struct rt6key * rt_key ,
struct in6_addr * fl_addr ,
struct in6_addr * addr_cache )
{
return ( ( rt_key - > plen ! = 128 | | ! ipv6_addr_equal ( fl_addr , & rt_key - > addr ) ) & &
( addr_cache = = NULL | | ! ipv6_addr_equal ( fl_addr , addr_cache ) ) ) ;
}
2006-07-31 07:19:33 +04:00
static struct dst_entry * ip6_sk_dst_check ( struct sock * sk ,
struct dst_entry * dst ,
struct flowi * fl )
2005-04-17 02:20:36 +04:00
{
2006-07-31 07:19:33 +04:00
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct rt6_info * rt = ( struct rt6_info * ) dst ;
2005-04-17 02:20:36 +04:00
2006-07-31 07:19:33 +04:00
if ( ! dst )
goto out ;
/* Yes, checking route validity in not connected
* case is not very simple . Take into account ,
* that we do not support routing by source , TOS ,
* and MSG_DONTROUTE - - ANK ( 980726 )
*
2006-08-24 04:19:18 +04:00
* 1. ip6_rt_check ( ) : If route was host route ,
* check that cached destination is current .
2006-07-31 07:19:33 +04:00
* If it is network route , we still may
* check its validity using saved pointer
* to the last used address : daddr_cache .
* We do not want to save whole address now ,
* ( because main consumer of this service
* is tcp , which has not this problem ) ,
* so that the last trick works only on connected
* sockets .
* 2. oif also should be the same .
*/
2006-08-24 04:19:18 +04:00
if ( ip6_rt_check ( & rt - > rt6i_dst , & fl - > fl6_dst , np - > daddr_cache ) | |
2006-08-30 04:15:09 +04:00
# ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check ( & rt - > rt6i_src , & fl - > fl6_src , np - > saddr_cache ) | |
# endif
2006-08-24 04:19:18 +04:00
( fl - > oif & & fl - > oif ! = dst - > dev - > ifindex ) ) {
2006-07-31 07:19:33 +04:00
dst_release ( dst ) ;
dst = NULL ;
2005-04-17 02:20:36 +04:00
}
2006-07-31 07:19:33 +04:00
out :
return dst ;
}
static int ip6_dst_lookup_tail ( struct sock * sk ,
struct dst_entry * * dst , struct flowi * fl )
{
int err ;
2005-04-17 02:20:36 +04:00
if ( * dst = = NULL )
* dst = ip6_route_output ( sk , fl ) ;
if ( ( err = ( * dst ) - > error ) )
goto out_err_release ;
if ( ipv6_addr_any ( & fl - > fl6_src ) ) {
err = ipv6_get_saddr ( * dst , & fl - > fl6_dst , & fl - > fl6_src ) ;
2005-07-27 22:45:17 +04:00
if ( err )
2005-04-17 02:20:36 +04:00
goto out_err_release ;
}
2007-04-26 04:08:10 +04:00
# ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
* Here if the dst entry we ' ve looked up
* has a neighbour entry that is in the INCOMPLETE
* state and the src address from the flow is
* marked as OPTIMISTIC , we release the found
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
if ( ! ( ( * dst ) - > neighbour - > nud_state & NUD_VALID ) ) {
struct inet6_ifaddr * ifp ;
struct flowi fl_gw ;
int redirect ;
ifp = ipv6_get_ifaddr ( & fl - > fl6_src , ( * dst ) - > dev , 1 ) ;
redirect = ( ifp & & ifp - > flags & IFA_F_OPTIMISTIC ) ;
if ( ifp )
in6_ifa_put ( ifp ) ;
if ( redirect ) {
/*
* We need to get the dst entry for the
* default router instead
*/
dst_release ( * dst ) ;
memcpy ( & fl_gw , fl , sizeof ( struct flowi ) ) ;
memset ( & fl_gw . fl6_dst , 0 , sizeof ( struct in6_addr ) ) ;
* dst = ip6_route_output ( sk , & fl_gw ) ;
if ( ( err = ( * dst ) - > error ) )
goto out_err_release ;
}
}
# endif
2005-04-17 02:20:36 +04:00
return 0 ;
out_err_release :
dst_release ( * dst ) ;
* dst = NULL ;
return err ;
}
2005-11-30 03:28:56 +03:00
2006-07-31 07:19:33 +04:00
/**
* ip6_dst_lookup - perform route lookup on flow
* @ sk : socket which provides route info
* @ dst : pointer to dst_entry * for result
* @ fl : flow to lookup
*
* This function performs a route lookup on the given flow .
*
* It returns zero on success , or a standard errno code on error .
*/
int ip6_dst_lookup ( struct sock * sk , struct dst_entry * * dst , struct flowi * fl )
{
* dst = NULL ;
return ip6_dst_lookup_tail ( sk , dst , fl ) ;
}
2005-12-14 10:23:20 +03:00
EXPORT_SYMBOL_GPL ( ip6_dst_lookup ) ;
2006-07-31 07:19:33 +04:00
/**
* ip6_sk_dst_lookup - perform socket cached route lookup on flow
* @ sk : socket which provides the dst cache and route info
* @ dst : pointer to dst_entry * for result
* @ fl : flow to lookup
*
* This function performs a route lookup on the given flow with the
* possibility of using the cached route in the socket if it is valid .
* It will take the socket dst lock when operating on the dst cache .
* As a result , this function can only be used in process context .
*
* It returns zero on success , or a standard errno code on error .
*/
int ip6_sk_dst_lookup ( struct sock * sk , struct dst_entry * * dst , struct flowi * fl )
{
* dst = NULL ;
if ( sk ) {
* dst = sk_dst_check ( sk , inet6_sk ( sk ) - > dst_cookie ) ;
* dst = ip6_sk_dst_check ( sk , * dst , fl ) ;
}
return ip6_dst_lookup_tail ( sk , dst , fl ) ;
}
EXPORT_SYMBOL_GPL ( ip6_sk_dst_lookup ) ;
2005-11-30 03:28:56 +03:00
static inline int ip6_ufo_append_data ( struct sock * sk ,
2005-10-19 02:46:41 +04:00
int getfrag ( void * from , char * to , int offset , int len ,
int odd , struct sk_buff * skb ) ,
void * from , int length , int hh_len , int fragheaderlen ,
int transhdrlen , int mtu , unsigned int flags )
{
struct sk_buff * skb ;
int err ;
/* There is support for UDP large send offload by network
* device , so create one single skb packet containing complete
* udp datagram
*/
if ( ( skb = skb_peek_tail ( & sk - > sk_write_queue ) ) = = NULL ) {
skb = sock_alloc_send_skb ( sk ,
hh_len + fragheaderlen + transhdrlen + 20 ,
( flags & MSG_DONTWAIT ) , & err ) ;
if ( skb = = NULL )
return - ENOMEM ;
/* reserve space for Hardware header */
skb_reserve ( skb , hh_len ) ;
/* create space for UDP/IP header */
skb_put ( skb , fragheaderlen + transhdrlen ) ;
/* initialize network header pointer */
2007-04-11 07:45:18 +04:00
skb_reset_network_header ( skb ) ;
2005-10-19 02:46:41 +04:00
/* initialize protocol header pointer */
2007-04-11 08:21:55 +04:00
skb - > transport_header = skb - > network_header + fragheaderlen ;
2005-10-19 02:46:41 +04:00
2006-08-30 03:44:56 +04:00
skb - > ip_summed = CHECKSUM_PARTIAL ;
2005-10-19 02:46:41 +04:00
skb - > csum = 0 ;
sk - > sk_sndmsg_off = 0 ;
}
err = skb_append_datato_frags ( sk , skb , getfrag , from ,
( length - transhdrlen ) ) ;
if ( ! err ) {
struct frag_hdr fhdr ;
/* specify the length of each IP datagram fragment*/
2007-02-09 17:24:49 +03:00
skb_shinfo ( skb ) - > gso_size = mtu - fragheaderlen -
2006-06-22 13:40:14 +04:00
sizeof ( struct frag_hdr ) ;
2006-07-01 00:37:03 +04:00
skb_shinfo ( skb ) - > gso_type = SKB_GSO_UDP ;
2005-10-19 02:46:41 +04:00
ipv6_select_ident ( skb , & fhdr ) ;
skb_shinfo ( skb ) - > ip6_frag_id = fhdr . identification ;
__skb_queue_tail ( & sk - > sk_write_queue , skb ) ;
return 0 ;
}
/* There is not enough support do UPD LSO,
* so follow normal path
*/
kfree_skb ( skb ) ;
return err ;
}
2005-04-17 02:20:36 +04:00
2005-09-08 05:19:03 +04:00
int ip6_append_data ( struct sock * sk , int getfrag ( void * from , char * to ,
int offset , int len , int odd , struct sk_buff * skb ) ,
void * from , int length , int transhdrlen ,
int hlimit , int tclass , struct ipv6_txoptions * opt , struct flowi * fl ,
struct rt6_info * rt , unsigned int flags )
2005-04-17 02:20:36 +04:00
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct sk_buff * skb ;
unsigned int maxfraglen , fragheaderlen ;
int exthdrlen ;
int hh_len ;
int mtu ;
int copy ;
int err ;
int offset = 0 ;
int csummode = CHECKSUM_NONE ;
if ( flags & MSG_PROBE )
return 0 ;
if ( skb_queue_empty ( & sk - > sk_write_queue ) ) {
/*
* setup for corking
*/
if ( opt ) {
if ( np - > cork . opt = = NULL ) {
np - > cork . opt = kmalloc ( opt - > tot_len ,
sk - > sk_allocation ) ;
if ( unlikely ( np - > cork . opt = = NULL ) )
return - ENOBUFS ;
} else if ( np - > cork . opt - > tot_len < opt - > tot_len ) {
printk ( KERN_DEBUG " ip6_append_data: invalid option length \n " ) ;
return - EINVAL ;
}
memcpy ( np - > cork . opt , opt , opt - > tot_len ) ;
inet - > cork . flags | = IPCORK_OPT ;
/* need source address above miyazawa*/
}
dst_hold ( & rt - > u . dst ) ;
np - > cork . rt = rt ;
inet - > cork . fl = * fl ;
np - > cork . hop_limit = hlimit ;
2005-09-08 05:19:03 +04:00
np - > cork . tclass = tclass ;
2007-04-21 02:53:27 +04:00
mtu = np - > pmtudisc = = IPV6_PMTUDISC_PROBE ?
rt - > u . dst . dev - > mtu : dst_mtu ( rt - > u . dst . path ) ;
2006-03-21 09:44:52 +03:00
if ( np - > frag_size < mtu ) {
2006-02-25 00:18:33 +03:00
if ( np - > frag_size )
mtu = np - > frag_size ;
}
inet - > cork . fragsize = mtu ;
2005-04-17 02:20:36 +04:00
if ( dst_allfrag ( rt - > u . dst . path ) )
inet - > cork . flags | = IPCORK_ALLFRAG ;
inet - > cork . length = 0 ;
sk - > sk_sndmsg_page = NULL ;
sk - > sk_sndmsg_off = 0 ;
exthdrlen = rt - > u . dst . header_len + ( opt ? opt - > opt_flen : 0 ) ;
length + = exthdrlen ;
transhdrlen + = exthdrlen ;
} else {
rt = np - > cork . rt ;
fl = & inet - > cork . fl ;
if ( inet - > cork . flags & IPCORK_OPT )
opt = np - > cork . opt ;
transhdrlen = 0 ;
exthdrlen = 0 ;
mtu = inet - > cork . fragsize ;
}
hh_len = LL_RESERVED_SPACE ( rt - > u . dst . dev ) ;
2006-08-24 05:11:50 +04:00
fragheaderlen = sizeof ( struct ipv6hdr ) + rt - > u . dst . nfheader_len + ( opt ? opt - > opt_nflen : 0 ) ;
2005-04-17 02:20:36 +04:00
maxfraglen = ( ( mtu - fragheaderlen ) & ~ 7 ) + fragheaderlen - sizeof ( struct frag_hdr ) ;
if ( mtu < = sizeof ( struct ipv6hdr ) + IPV6_MAXPLEN ) {
if ( inet - > cork . length + length > sizeof ( struct ipv6hdr ) + IPV6_MAXPLEN - fragheaderlen ) {
ipv6_local_error ( sk , EMSGSIZE , fl , mtu - exthdrlen ) ;
return - EMSGSIZE ;
}
}
/*
* Let ' s try using as much space as possible .
* Use MTU if total length of the message fits into the MTU .
* Otherwise , we need to reserve fragment header and
* fragment alignment ( = 8 - 15 octects , in total ) .
*
* Note that we may need to " move " the data from the tail of
2007-02-09 17:24:49 +03:00
* of the buffer to the new fragment when we split
2005-04-17 02:20:36 +04:00
* the message .
*
2007-02-09 17:24:49 +03:00
* FIXME : It may be fragmented into multiple chunks
2005-04-17 02:20:36 +04:00
* at once if non - fragmentable extension headers
* are too large .
2007-02-09 17:24:49 +03:00
* - - yoshfuji
2005-04-17 02:20:36 +04:00
*/
inet - > cork . length + = length ;
2005-10-19 02:46:41 +04:00
if ( ( ( length > mtu ) & & ( sk - > sk_protocol = = IPPROTO_UDP ) ) & &
( rt - > u . dst . dev - > features & NETIF_F_UFO ) ) {
2006-03-13 07:35:12 +03:00
err = ip6_ufo_append_data ( sk , getfrag , from , length , hh_len ,
fragheaderlen , transhdrlen , mtu ,
flags ) ;
if ( err )
2005-10-19 02:46:41 +04:00
goto error ;
return 0 ;
}
2005-04-17 02:20:36 +04:00
if ( ( skb = skb_peek_tail ( & sk - > sk_write_queue ) ) = = NULL )
goto alloc_new_skb ;
while ( length > 0 ) {
/* Check if the remaining data fits into current packet. */
copy = ( inet - > cork . length < = mtu & & ! ( inet - > cork . flags & IPCORK_ALLFRAG ) ? mtu : maxfraglen ) - skb - > len ;
if ( copy < length )
copy = maxfraglen - skb - > len ;
if ( copy < = 0 ) {
char * data ;
unsigned int datalen ;
unsigned int fraglen ;
unsigned int fraggap ;
unsigned int alloclen ;
struct sk_buff * skb_prev ;
alloc_new_skb :
skb_prev = skb ;
/* There's no room in the current skb */
if ( skb_prev )
fraggap = skb_prev - > len - maxfraglen ;
else
fraggap = 0 ;
/*
* If remaining data exceeds the mtu ,
* we know we need more fragment ( s ) .
*/
datalen = length + fraggap ;
if ( datalen > ( inet - > cork . length < = mtu & & ! ( inet - > cork . flags & IPCORK_ALLFRAG ) ? mtu : maxfraglen ) - fragheaderlen )
datalen = maxfraglen - fragheaderlen ;
fraglen = datalen + fragheaderlen ;
if ( ( flags & MSG_MORE ) & &
! ( rt - > u . dst . dev - > features & NETIF_F_SG ) )
alloclen = mtu ;
else
alloclen = datalen + fragheaderlen ;
/*
* The last fragment gets additional space at tail .
* Note : we overallocate on fragments with MSG_MODE
* because we have no idea if we ' re the last one .
*/
if ( datalen = = length + fraggap )
alloclen + = rt - > u . dst . trailer_len ;
/*
* We just reserve space for fragment header .
2007-02-09 17:24:49 +03:00
* Note : this may be overallocation if the message
2005-04-17 02:20:36 +04:00
* ( without MSG_MORE ) fits into the MTU .
*/
alloclen + = sizeof ( struct frag_hdr ) ;
if ( transhdrlen ) {
skb = sock_alloc_send_skb ( sk ,
alloclen + hh_len ,
( flags & MSG_DONTWAIT ) , & err ) ;
} else {
skb = NULL ;
if ( atomic_read ( & sk - > sk_wmem_alloc ) < =
2 * sk - > sk_sndbuf )
skb = sock_wmalloc ( sk ,
alloclen + hh_len , 1 ,
sk - > sk_allocation ) ;
if ( unlikely ( skb = = NULL ) )
err = - ENOBUFS ;
}
if ( skb = = NULL )
goto error ;
/*
* Fill in the control structures
*/
skb - > ip_summed = csummode ;
skb - > csum = 0 ;
/* reserve for fragmentation */
skb_reserve ( skb , hh_len + sizeof ( struct frag_hdr ) ) ;
/*
* Find where to start putting bytes
*/
data = skb_put ( skb , fraglen ) ;
2007-03-12 04:39:41 +03:00
skb_set_network_header ( skb , exthdrlen ) ;
2005-04-17 02:20:36 +04:00
data + = fragheaderlen ;
2007-04-11 08:21:55 +04:00
skb - > transport_header = ( skb - > network_header +
fragheaderlen ) ;
2005-04-17 02:20:36 +04:00
if ( fraggap ) {
skb - > csum = skb_copy_and_csum_bits (
skb_prev , maxfraglen ,
data + transhdrlen , fraggap , 0 ) ;
skb_prev - > csum = csum_sub ( skb_prev - > csum ,
skb - > csum ) ;
data + = fraggap ;
2006-08-14 07:12:58 +04:00
pskb_trim_unique ( skb_prev , maxfraglen ) ;
2005-04-17 02:20:36 +04:00
}
copy = datalen - transhdrlen - fraggap ;
if ( copy < 0 ) {
err = - EINVAL ;
kfree_skb ( skb ) ;
goto error ;
} else if ( copy > 0 & & getfrag ( from , data + transhdrlen , offset , copy , fraggap , skb ) < 0 ) {
err = - EFAULT ;
kfree_skb ( skb ) ;
goto error ;
}
offset + = copy ;
length - = datalen - fraggap ;
transhdrlen = 0 ;
exthdrlen = 0 ;
csummode = CHECKSUM_NONE ;
/*
* Put the packet on the pending queue
*/
__skb_queue_tail ( & sk - > sk_write_queue , skb ) ;
continue ;
}
if ( copy > length )
copy = length ;
if ( ! ( rt - > u . dst . dev - > features & NETIF_F_SG ) ) {
unsigned int off ;
off = skb - > len ;
if ( getfrag ( from , skb_put ( skb , copy ) ,
offset , copy , off , skb ) < 0 ) {
__skb_trim ( skb , off ) ;
err = - EFAULT ;
goto error ;
}
} else {
int i = skb_shinfo ( skb ) - > nr_frags ;
skb_frag_t * frag = & skb_shinfo ( skb ) - > frags [ i - 1 ] ;
struct page * page = sk - > sk_sndmsg_page ;
int off = sk - > sk_sndmsg_off ;
unsigned int left ;
if ( page & & ( left = PAGE_SIZE - off ) > 0 ) {
if ( copy > = left )
copy = left ;
if ( page ! = frag - > page ) {
if ( i = = MAX_SKB_FRAGS ) {
err = - EMSGSIZE ;
goto error ;
}
get_page ( page ) ;
skb_fill_page_desc ( skb , i , page , sk - > sk_sndmsg_off , 0 ) ;
frag = & skb_shinfo ( skb ) - > frags [ i ] ;
}
} else if ( i < MAX_SKB_FRAGS ) {
if ( copy > PAGE_SIZE )
copy = PAGE_SIZE ;
page = alloc_pages ( sk - > sk_allocation , 0 ) ;
if ( page = = NULL ) {
err = - ENOMEM ;
goto error ;
}
sk - > sk_sndmsg_page = page ;
sk - > sk_sndmsg_off = 0 ;
skb_fill_page_desc ( skb , i , page , 0 , 0 ) ;
frag = & skb_shinfo ( skb ) - > frags [ i ] ;
skb - > truesize + = PAGE_SIZE ;
atomic_add ( PAGE_SIZE , & sk - > sk_wmem_alloc ) ;
} else {
err = - EMSGSIZE ;
goto error ;
}
if ( getfrag ( from , page_address ( frag - > page ) + frag - > page_offset + frag - > size , offset , copy , skb - > len , skb ) < 0 ) {
err = - EFAULT ;
goto error ;
}
sk - > sk_sndmsg_off + = copy ;
frag - > size + = copy ;
skb - > len + = copy ;
skb - > data_len + = copy ;
}
offset + = copy ;
length - = copy ;
}
return 0 ;
error :
inet - > cork . length - = length ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( rt - > rt6i_idev , IPSTATS_MIB_OUTDISCARDS ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
int ip6_push_pending_frames ( struct sock * sk )
{
struct sk_buff * skb , * tmp_skb ;
struct sk_buff * * tail_skb ;
struct in6_addr final_dst_buf , * final_dst = & final_dst_buf ;
struct inet_sock * inet = inet_sk ( sk ) ;
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct ipv6hdr * hdr ;
struct ipv6_txoptions * opt = np - > cork . opt ;
struct rt6_info * rt = np - > cork . rt ;
struct flowi * fl = & inet - > cork . fl ;
unsigned char proto = fl - > proto ;
int err = 0 ;
if ( ( skb = __skb_dequeue ( & sk - > sk_write_queue ) ) = = NULL )
goto out ;
tail_skb = & ( skb_shinfo ( skb ) - > frag_list ) ;
/* move skb->data to ip header from ext header */
2007-04-11 07:50:43 +04:00
if ( skb - > data < skb_network_header ( skb ) )
2007-03-11 04:16:10 +03:00
__skb_pull ( skb , skb_network_offset ( skb ) ) ;
2005-04-17 02:20:36 +04:00
while ( ( tmp_skb = __skb_dequeue ( & sk - > sk_write_queue ) ) ! = NULL ) {
2007-03-16 23:26:39 +03:00
__skb_pull ( tmp_skb , skb_network_header_len ( skb ) ) ;
2005-04-17 02:20:36 +04:00
* tail_skb = tmp_skb ;
tail_skb = & ( tmp_skb - > next ) ;
skb - > len + = tmp_skb - > len ;
skb - > data_len + = tmp_skb - > len ;
skb - > truesize + = tmp_skb - > truesize ;
__sock_put ( tmp_skb - > sk ) ;
tmp_skb - > destructor = NULL ;
tmp_skb - > sk = NULL ;
}
ipv6_addr_copy ( final_dst , & fl - > fl6_dst ) ;
2007-03-16 23:26:39 +03:00
__skb_pull ( skb , skb_network_header_len ( skb ) ) ;
2005-04-17 02:20:36 +04:00
if ( opt & & opt - > opt_flen )
ipv6_push_frag_opts ( skb , opt , & proto ) ;
if ( opt & & opt - > opt_nflen )
ipv6_push_nfrag_opts ( skb , opt , & proto , & final_dst ) ;
2007-04-11 07:46:21 +04:00
skb_push ( skb , sizeof ( struct ipv6hdr ) ) ;
skb_reset_network_header ( skb ) ;
2007-04-26 04:54:47 +04:00
hdr = ipv6_hdr ( skb ) ;
2007-02-09 17:24:49 +03:00
2006-11-08 11:25:17 +03:00
* ( __be32 * ) hdr = fl - > fl6_flowlabel |
2005-09-08 05:19:03 +04:00
htonl ( 0x60000000 | ( ( int ) np - > cork . tclass < < 20 ) ) ;
2005-04-17 02:20:36 +04:00
if ( skb - > len < = sizeof ( struct ipv6hdr ) + IPV6_MAXPLEN )
hdr - > payload_len = htons ( skb - > len - sizeof ( struct ipv6hdr ) ) ;
else
hdr - > payload_len = 0 ;
hdr - > hop_limit = np - > cork . hop_limit ;
hdr - > nexthdr = proto ;
ipv6_addr_copy ( & hdr - > saddr , & fl - > fl6_src ) ;
ipv6_addr_copy ( & hdr - > daddr , final_dst ) ;
2006-01-09 09:37:26 +03:00
skb - > priority = sk - > sk_priority ;
2005-04-17 02:20:36 +04:00
skb - > dst = dst_clone ( & rt - > u . dst ) ;
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( rt - > rt6i_idev , IPSTATS_MIB_OUTREQUESTS ) ;
2005-04-17 02:20:36 +04:00
err = NF_HOOK ( PF_INET6 , NF_IP6_LOCAL_OUT , skb , NULL , skb - > dst - > dev , dst_output ) ;
if ( err ) {
if ( err > 0 )
2005-04-20 09:32:22 +04:00
err = np - > recverr ? net_xmit_errno ( err ) : 0 ;
2005-04-17 02:20:36 +04:00
if ( err )
goto error ;
}
out :
inet - > cork . flags & = ~ IPCORK_OPT ;
2005-11-08 20:41:34 +03:00
kfree ( np - > cork . opt ) ;
np - > cork . opt = NULL ;
2005-04-17 02:20:36 +04:00
if ( np - > cork . rt ) {
dst_release ( & np - > cork . rt - > u . dst ) ;
np - > cork . rt = NULL ;
inet - > cork . flags & = ~ IPCORK_ALLFRAG ;
}
memset ( & inet - > cork . fl , 0 , sizeof ( inet - > cork . fl ) ) ;
return err ;
error :
goto out ;
}
void ip6_flush_pending_frames ( struct sock * sk )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct sk_buff * skb ;
while ( ( skb = __skb_dequeue_tail ( & sk - > sk_write_queue ) ) ! = NULL ) {
2006-11-04 14:11:37 +03:00
IP6_INC_STATS ( ip6_dst_idev ( skb - > dst ) ,
IPSTATS_MIB_OUTDISCARDS ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
}
inet - > cork . flags & = ~ IPCORK_OPT ;
2005-11-08 20:41:34 +03:00
kfree ( np - > cork . opt ) ;
np - > cork . opt = NULL ;
2005-04-17 02:20:36 +04:00
if ( np - > cork . rt ) {
dst_release ( & np - > cork . rt - > u . dst ) ;
np - > cork . rt = NULL ;
inet - > cork . flags & = ~ IPCORK_ALLFRAG ;
}
memset ( & inet - > cork . fl , 0 , sizeof ( inet - > cork . fl ) ) ;
}