2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* The Internet Protocol ( IP ) output module .
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Donald Becker , < becker @ super . org >
* Alan Cox , < Alan . Cox @ linux . org >
* Richard Underwood
* Stefan Becker , < stefanb @ yello . ping . de >
* Jorge Cwik , < jorge @ laser . satlink . net >
* Arnt Gulbrandsen , < agulbra @ nvg . unit . no >
* Hirokazu Takahashi , < taka @ valinux . co . jp >
*
* See ip_input . c for original log
*
* Fixes :
* Alan Cox : Missing nonblock feature in ip_build_xmit .
* Mike Kilburn : htons ( ) missing in ip_build_xmit .
2007-02-09 17:24:47 +03:00
* Bradford Johnson : Fix faulty handling of some frames when
2005-04-17 02:20:36 +04:00
* no route is found .
* Alexander Demenshin : Missing sk / skb free in ip_queue_xmit
* ( in case if packet not accepted by
* output firewall rules )
* Mike McLagan : Routing by source
* Alexey Kuznetsov : use new route cache
* Andi Kleen : Fix broken PMTU recovery and remove
* some redundant tests .
* Vitaly E . Lavrov : Transparent proxy revived after year coma .
* Andi Kleen : Replace ip_reply with ip_send_reply .
2007-02-09 17:24:47 +03:00
* Andi Kleen : Split fast and slow ip_build_xmit path
* for decreased register pressure on x86
* and more readibility .
2005-04-17 02:20:36 +04:00
* Marc Boucher : When call_out_firewall returns FW_QUEUE ,
* silently drop skb instead of failing with - EPERM .
* Detlev Wengorz : Copy protocol for fragments .
* Hirokazu Takahashi : HW checksumming for outgoing UDP
* datagrams .
* Hirokazu Takahashi : sendfile ( ) on UDP works now .
*/
# include <asm/uaccess.h>
# include <asm/system.h>
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/string.h>
# include <linux/errno.h>
2006-10-20 00:08:53 +04:00
# include <linux/highmem.h>
2005-04-17 02:20:36 +04:00
# include <linux/socket.h>
# include <linux/sockios.h>
# include <linux/in.h>
# include <linux/inet.h>
# include <linux/netdevice.h>
# include <linux/etherdevice.h>
# include <linux/proc_fs.h>
# include <linux/stat.h>
# include <linux/init.h>
# include <net/snmp.h>
# include <net/ip.h>
# include <net/protocol.h>
# include <net/route.h>
2006-01-09 09:36:54 +03:00
# include <net/xfrm.h>
2005-04-17 02:20:36 +04:00
# include <linux/skbuff.h>
# include <net/sock.h>
# include <net/arp.h>
# include <net/icmp.h>
# include <net/checksum.h>
# include <net/inetpeer.h>
# include <linux/igmp.h>
# include <linux/netfilter_ipv4.h>
# include <linux/netfilter_bridge.h>
# include <linux/mroute.h>
# include <linux/netlink.h>
2005-08-10 06:49:02 +04:00
# include <linux/tcp.h>
2005-04-17 02:20:36 +04:00
2006-09-23 01:15:41 +04:00
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL ;
2005-04-17 02:20:36 +04:00
/* Generate a checksum for an outgoing IP datagram. */
__inline__ void ip_send_check ( struct iphdr * iph )
{
iph - > check = 0 ;
iph - > check = ip_fast_csum ( ( unsigned char * ) iph , iph - > ihl ) ;
}
2008-01-12 06:14:00 +03:00
int __ip_local_out ( struct sk_buff * skb )
{
struct iphdr * iph = ip_hdr ( skb ) ;
iph - > tot_len = htons ( skb - > len ) ;
ip_send_check ( iph ) ;
2007-11-20 05:53:30 +03:00
return nf_hook ( PF_INET , NF_INET_LOCAL_OUT , skb , NULL , skb - > dst - > dev ,
2008-01-12 06:14:00 +03:00
dst_output ) ;
}
int ip_local_out ( struct sk_buff * skb )
{
int err ;
err = __ip_local_out ( skb ) ;
if ( likely ( err = = 1 ) )
err = dst_output ( skb ) ;
return err ;
}
EXPORT_SYMBOL_GPL ( ip_local_out ) ;
2005-04-17 02:20:36 +04:00
/* dev_loopback_xmit for use with netfilter. */
static int ip_dev_loopback_xmit ( struct sk_buff * newskb )
{
2007-03-20 01:30:44 +03:00
skb_reset_mac_header ( newskb ) ;
2007-03-11 04:16:10 +03:00
__skb_pull ( newskb , skb_network_offset ( newskb ) ) ;
2005-04-17 02:20:36 +04:00
newskb - > pkt_type = PACKET_LOOPBACK ;
newskb - > ip_summed = CHECKSUM_UNNECESSARY ;
2008-07-26 08:43:18 +04:00
WARN_ON ( ! newskb - > dst ) ;
2005-04-17 02:20:36 +04:00
netif_rx ( newskb ) ;
return 0 ;
}
static inline int ip_select_ttl ( struct inet_sock * inet , struct dst_entry * dst )
{
int ttl = inet - > uc_ttl ;
if ( ttl < 0 )
ttl = dst_metric ( dst , RTAX_HOPLIMIT ) ;
return ttl ;
}
2007-02-09 17:24:47 +03:00
/*
2005-04-17 02:20:36 +04:00
* Add an ip header to a skbuff and send it out .
*
*/
int ip_build_and_send_pkt ( struct sk_buff * skb , struct sock * sk ,
2006-09-27 09:27:30 +04:00
__be32 saddr , __be32 daddr , struct ip_options * opt )
2005-04-17 02:20:36 +04:00
{
struct inet_sock * inet = inet_sk ( sk ) ;
2008-03-06 05:30:47 +03:00
struct rtable * rt = skb - > rtable ;
2005-04-17 02:20:36 +04:00
struct iphdr * iph ;
/* Build the IP header. */
2007-03-11 01:40:39 +03:00
skb_push ( skb , sizeof ( struct iphdr ) + ( opt ? opt - > optlen : 0 ) ) ;
skb_reset_network_header ( skb ) ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
iph - > version = 4 ;
iph - > ihl = 5 ;
iph - > tos = inet - > tos ;
if ( ip_dont_fragment ( sk , & rt - > u . dst ) )
iph - > frag_off = htons ( IP_DF ) ;
else
iph - > frag_off = 0 ;
iph - > ttl = ip_select_ttl ( inet , & rt - > u . dst ) ;
iph - > daddr = rt - > rt_dst ;
iph - > saddr = rt - > rt_src ;
iph - > protocol = sk - > sk_protocol ;
ip_select_ident ( iph , & rt - > u . dst , sk ) ;
if ( opt & & opt - > optlen ) {
iph - > ihl + = opt - > optlen > > 2 ;
ip_options_build ( skb , opt , daddr , rt , 0 ) ;
}
skb - > priority = sk - > sk_priority ;
2008-01-31 06:08:16 +03:00
skb - > mark = sk - > sk_mark ;
2005-04-17 02:20:36 +04:00
/* Send it out. */
2008-01-12 06:14:00 +03:00
return ip_local_out ( skb ) ;
2005-04-17 02:20:36 +04:00
}
2005-08-10 07:12:12 +04:00
EXPORT_SYMBOL_GPL ( ip_build_and_send_pkt ) ;
2005-04-17 02:20:36 +04:00
static inline int ip_finish_output2 ( struct sk_buff * skb )
{
struct dst_entry * dst = skb - > dst ;
2007-04-30 11:48:20 +04:00
struct rtable * rt = ( struct rtable * ) dst ;
2005-04-17 02:20:36 +04:00
struct net_device * dev = dst - > dev ;
2007-10-24 08:07:32 +04:00
unsigned int hh_len = LL_RESERVED_SPACE ( dev ) ;
2005-04-17 02:20:36 +04:00
2007-04-30 11:48:20 +04:00
if ( rt - > rt_type = = RTN_MULTICAST )
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_OUTMCASTPKTS ) ;
2007-04-30 11:48:20 +04:00
else if ( rt - > rt_type = = RTN_BROADCAST )
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_OUTBCASTPKTS ) ;
2007-04-30 11:48:20 +04:00
2005-04-17 02:20:36 +04:00
/* Be paranoid, rather than too clever. */
2007-10-09 12:40:57 +04:00
if ( unlikely ( skb_headroom ( skb ) < hh_len & & dev - > header_ops ) ) {
2005-04-17 02:20:36 +04:00
struct sk_buff * skb2 ;
skb2 = skb_realloc_headroom ( skb , LL_RESERVED_SPACE ( dev ) ) ;
if ( skb2 = = NULL ) {
kfree_skb ( skb ) ;
return - ENOMEM ;
}
if ( skb - > sk )
skb_set_owner_w ( skb2 , skb - > sk ) ;
kfree_skb ( skb ) ;
skb = skb2 ;
}
2006-12-08 02:08:17 +03:00
if ( dst - > hh )
return neigh_hh_output ( dst - > hh , skb ) ;
else if ( dst - > neighbour )
2005-04-17 02:20:36 +04:00
return dst - > neighbour - > output ( skb ) ;
if ( net_ratelimit ( ) )
printk ( KERN_DEBUG " ip_finish_output2: No header cache and no neighbour! \n " ) ;
kfree_skb ( skb ) ;
return - EINVAL ;
}
2007-04-21 02:53:27 +04:00
static inline int ip_skb_dst_mtu ( struct sk_buff * skb )
{
struct inet_sock * inet = skb - > sk ? inet_sk ( skb - > sk ) : NULL ;
return ( inet & & inet - > pmtudisc = = IP_PMTUDISC_PROBE ) ?
skb - > dst - > dev - > mtu : dst_mtu ( skb - > dst ) ;
}
2007-10-15 12:48:39 +04:00
static int ip_finish_output ( struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
2006-01-07 10:05:36 +03:00
# if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
2006-02-16 02:10:22 +03:00
if ( skb - > dst - > xfrm ! = NULL ) {
IPCB ( skb ) - > flags | = IPSKB_REROUTED ;
return dst_output ( skb ) ;
}
2006-01-07 10:05:36 +03:00
# endif
2007-04-21 02:53:27 +04:00
if ( skb - > len > ip_skb_dst_mtu ( skb ) & & ! skb_is_gso ( skb ) )
2006-01-05 23:20:59 +03:00
return ip_fragment ( skb , ip_finish_output2 ) ;
else
return ip_finish_output2 ( skb ) ;
2005-04-17 02:20:36 +04:00
}
int ip_mc_output ( struct sk_buff * skb )
{
struct sock * sk = skb - > sk ;
2008-03-06 05:30:47 +03:00
struct rtable * rt = skb - > rtable ;
2005-04-17 02:20:36 +04:00
struct net_device * dev = rt - > u . dst . dev ;
/*
* If the indicated interface is up and running , send the packet .
*/
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_OUTREQUESTS ) ;
2005-04-17 02:20:36 +04:00
skb - > dev = dev ;
skb - > protocol = htons ( ETH_P_IP ) ;
/*
* Multicasts are looped back for other local users
*/
if ( rt - > rt_flags & RTCF_MULTICAST ) {
if ( ( ! sk | | inet_sk ( sk ) - > mc_loop )
# ifdef CONFIG_IP_MROUTE
/* Small optimization: do not loopback not local frames,
which returned after forwarding ; they will be dropped
by ip_mr_input in any case .
Note , that local frames are looped back to be delivered
to local recipients .
This check is duplicated in ip_mr_input at the moment .
*/
& & ( ( rt - > rt_flags & RTCF_LOCAL ) | | ! ( IPCB ( skb ) - > flags & IPSKB_FORWARDED ) )
# endif
) {
struct sk_buff * newskb = skb_clone ( skb , GFP_ATOMIC ) ;
if ( newskb )
2007-11-20 05:53:30 +03:00
NF_HOOK ( PF_INET , NF_INET_POST_ROUTING , newskb ,
NULL , newskb - > dev ,
2005-04-17 02:20:36 +04:00
ip_dev_loopback_xmit ) ;
}
/* Multicasts with ttl 0 must not go beyond the host */
2007-04-21 09:47:35 +04:00
if ( ip_hdr ( skb ) - > ttl = = 0 ) {
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return 0 ;
}
}
if ( rt - > rt_flags & RTCF_BROADCAST ) {
struct sk_buff * newskb = skb_clone ( skb , GFP_ATOMIC ) ;
if ( newskb )
2007-11-20 05:53:30 +03:00
NF_HOOK ( PF_INET , NF_INET_POST_ROUTING , newskb , NULL ,
2005-04-17 02:20:36 +04:00
newskb - > dev , ip_dev_loopback_xmit ) ;
}
2007-11-20 05:53:30 +03:00
return NF_HOOK_COND ( PF_INET , NF_INET_POST_ROUTING , skb , NULL , skb - > dev ,
2006-02-16 02:10:22 +03:00
ip_finish_output ,
! ( IPCB ( skb ) - > flags & IPSKB_REROUTED ) ) ;
2005-04-17 02:20:36 +04:00
}
int ip_output ( struct sk_buff * skb )
{
2006-01-05 23:20:59 +03:00
struct net_device * dev = skb - > dst - > dev ;
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_OUTREQUESTS ) ;
2005-04-17 02:20:36 +04:00
2006-01-05 23:20:59 +03:00
skb - > dev = dev ;
skb - > protocol = htons ( ETH_P_IP ) ;
2007-11-20 05:53:30 +03:00
return NF_HOOK_COND ( PF_INET , NF_INET_POST_ROUTING , skb , NULL , dev ,
2007-02-09 17:24:47 +03:00
ip_finish_output ,
2006-02-16 02:10:22 +03:00
! ( IPCB ( skb ) - > flags & IPSKB_REROUTED ) ) ;
2005-04-17 02:20:36 +04:00
}
2007-01-26 12:04:55 +03:00
int ip_queue_xmit ( struct sk_buff * skb , int ipfragok )
2005-04-17 02:20:36 +04:00
{
2007-01-26 12:04:55 +03:00
struct sock * sk = skb - > sk ;
2005-04-17 02:20:36 +04:00
struct inet_sock * inet = inet_sk ( sk ) ;
struct ip_options * opt = inet - > opt ;
struct rtable * rt ;
struct iphdr * iph ;
/* Skip all of this if the packet is already routed,
* f . e . by something like SCTP .
*/
2008-03-06 05:30:47 +03:00
rt = skb - > rtable ;
2005-04-17 02:20:36 +04:00
if ( rt ! = NULL )
goto packet_routed ;
/* Make sure we can route this packet. */
rt = ( struct rtable * ) __sk_dst_check ( sk , 0 ) ;
if ( rt = = NULL ) {
2006-09-28 05:28:07 +04:00
__be32 daddr ;
2005-04-17 02:20:36 +04:00
/* Use correct destination address if we have options. */
daddr = inet - > daddr ;
if ( opt & & opt - > srr )
daddr = opt - > faddr ;
{
struct flowi fl = { . oif = sk - > sk_bound_dev_if ,
. nl_u = { . ip4_u =
{ . daddr = daddr ,
. saddr = inet - > saddr ,
. tos = RT_CONN_FLAGS ( sk ) } } ,
. proto = sk - > sk_protocol ,
. uli_u = { . ports =
{ . sport = inet - > sport ,
. dport = inet - > dport } } } ;
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
* itself out .
*/
2006-08-05 10:12:42 +04:00
security_sk_classify_flow ( sk , & fl ) ;
2008-03-25 20:26:21 +03:00
if ( ip_route_output_flow ( sock_net ( sk ) , & rt , & fl , sk , 0 ) )
2005-04-17 02:20:36 +04:00
goto no_route ;
}
2005-08-10 06:49:02 +04:00
sk_setup_caps ( sk , & rt - > u . dst ) ;
2005-04-17 02:20:36 +04:00
}
skb - > dst = dst_clone ( & rt - > u . dst ) ;
packet_routed :
if ( opt & & opt - > is_strictroute & & rt - > rt_dst ! = rt - > rt_gateway )
goto no_route ;
/* OK, we know where to send it, allocate and build IP header. */
2007-03-11 01:40:39 +03:00
skb_push ( skb , sizeof ( struct iphdr ) + ( opt ? opt - > optlen : 0 ) ) ;
skb_reset_network_header ( skb ) ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2006-11-15 07:51:49 +03:00
* ( ( __be16 * ) iph ) = htons ( ( 4 < < 12 ) | ( 5 < < 8 ) | ( inet - > tos & 0xff ) ) ;
2005-04-17 02:20:36 +04:00
if ( ip_dont_fragment ( sk , & rt - > u . dst ) & & ! ipfragok )
iph - > frag_off = htons ( IP_DF ) ;
else
iph - > frag_off = 0 ;
iph - > ttl = ip_select_ttl ( inet , & rt - > u . dst ) ;
iph - > protocol = sk - > sk_protocol ;
iph - > saddr = rt - > rt_src ;
iph - > daddr = rt - > rt_dst ;
/* Transport layer set skb->h.foo itself. */
if ( opt & & opt - > optlen ) {
iph - > ihl + = opt - > optlen > > 2 ;
ip_options_build ( skb , opt , inet - > daddr , rt , 0 ) ;
}
2005-11-08 20:41:56 +03:00
ip_select_ident_more ( iph , & rt - > u . dst , sk ,
2006-06-22 13:40:14 +04:00
( skb_shinfo ( skb ) - > gso_segs ? : 1 ) - 1 ) ;
2005-04-17 02:20:36 +04:00
skb - > priority = sk - > sk_priority ;
2008-01-31 06:08:16 +03:00
skb - > mark = sk - > sk_mark ;
2005-04-17 02:20:36 +04:00
2008-01-12 06:14:00 +03:00
return ip_local_out ( skb ) ;
2005-04-17 02:20:36 +04:00
no_route :
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( sock_net ( sk ) , IPSTATS_MIB_OUTNOROUTES ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return - EHOSTUNREACH ;
}
static void ip_copy_metadata ( struct sk_buff * to , struct sk_buff * from )
{
to - > pkt_type = from - > pkt_type ;
to - > priority = from - > priority ;
to - > protocol = from - > protocol ;
dst_release ( to - > dst ) ;
to - > dst = dst_clone ( from - > dst ) ;
to - > dev = from - > dev ;
2006-11-10 02:19:14 +03:00
to - > mark = from - > mark ;
2005-04-17 02:20:36 +04:00
/* Copy the flags to each fragment. */
IPCB ( to ) - > flags = IPCB ( from ) - > flags ;
# ifdef CONFIG_NET_SCHED
to - > tc_index = from - > tc_index ;
# endif
2007-03-15 02:44:01 +03:00
nf_copy ( to , from ) ;
2007-07-08 09:21:23 +04:00
# if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined ( CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE )
to - > nf_trace = from - > nf_trace ;
# endif
2005-10-22 14:39:21 +04:00
# if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
to - > ipvs_property = from - > ipvs_property ;
2005-04-17 02:20:36 +04:00
# endif
2006-06-09 11:29:17 +04:00
skb_copy_secmark ( to , from ) ;
2005-04-17 02:20:36 +04:00
}
/*
* This IP datagram is too large to be sent in one piece . Break it up into
* smaller pieces ( each of size equal to IP header plus
* a block of the data of the original IP data part ) that will yet fit in a
* single device frame , and queue such a frame for sending .
*/
2006-04-05 00:42:35 +04:00
int ip_fragment ( struct sk_buff * skb , int ( * output ) ( struct sk_buff * ) )
2005-04-17 02:20:36 +04:00
{
struct iphdr * iph ;
int raw = 0 ;
int ptr ;
struct net_device * dev ;
struct sk_buff * skb2 ;
2006-08-30 04:48:57 +04:00
unsigned int mtu , hlen , left , len , ll_rs , pad ;
2005-04-17 02:20:36 +04:00
int offset ;
2006-01-07 00:24:29 +03:00
__be16 not_last_frag ;
2008-03-06 05:30:47 +03:00
struct rtable * rt = skb - > rtable ;
2005-04-17 02:20:36 +04:00
int err = 0 ;
dev = rt - > u . dst . dev ;
/*
* Point into the IP datagram header .
*/
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( ( iph - > frag_off & htons ( IP_DF ) ) & & ! skb - > local_df ) ) {
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_FRAG_NEEDED ,
2007-04-21 02:53:27 +04:00
htonl ( ip_skb_dst_mtu ( skb ) ) ) ;
2005-04-17 02:20:36 +04:00
kfree_skb ( skb ) ;
return - EMSGSIZE ;
}
/*
* Setup starting values .
*/
hlen = iph - > ihl * 4 ;
mtu = dst_mtu ( & rt - > u . dst ) - hlen ; /* Size of data space */
2005-12-14 10:14:27 +03:00
IPCB ( skb ) - > flags | = IPSKB_FRAG_COMPLETE ;
2005-04-17 02:20:36 +04:00
/* When frag_list is given, use it. First, check its validity:
* some transformers could create wrong frag_list or break existing
* one , it is not prohibited . In this case fall back to copying .
*
* LATER : this step can be merged to real generation of fragments ,
* we can switch to copy when see the first bad fragment .
*/
if ( skb_shinfo ( skb ) - > frag_list ) {
struct sk_buff * frag ;
int first_len = skb_pagelen ( skb ) ;
2008-01-29 07:45:20 +03:00
int truesizes = 0 ;
2005-04-17 02:20:36 +04:00
if ( first_len - hlen > mtu | |
( ( first_len - hlen ) & 7 ) | |
( iph - > frag_off & htons ( IP_MF | IP_OFFSET ) ) | |
skb_cloned ( skb ) )
goto slow_path ;
for ( frag = skb_shinfo ( skb ) - > frag_list ; frag ; frag = frag - > next ) {
/* Correct geometry. */
if ( frag - > len > mtu | |
( ( frag - > len & 7 ) & & frag - > next ) | |
skb_headroom ( frag ) < hlen )
goto slow_path ;
/* Partially cloned skb? */
if ( skb_shared ( frag ) )
goto slow_path ;
2005-05-19 09:52:33 +04:00
BUG_ON ( frag - > sk ) ;
if ( skb - > sk ) {
sock_hold ( skb - > sk ) ;
frag - > sk = skb - > sk ;
frag - > destructor = sock_wfree ;
2008-01-29 07:45:20 +03:00
truesizes + = frag - > truesize ;
2005-05-19 09:52:33 +04:00
}
2005-04-17 02:20:36 +04:00
}
/* Everything is OK. Generate! */
err = 0 ;
offset = 0 ;
frag = skb_shinfo ( skb ) - > frag_list ;
skb_shinfo ( skb ) - > frag_list = NULL ;
skb - > data_len = first_len - skb_headlen ( skb ) ;
2008-01-29 07:45:20 +03:00
skb - > truesize - = truesizes ;
2005-04-17 02:20:36 +04:00
skb - > len = first_len ;
iph - > tot_len = htons ( first_len ) ;
iph - > frag_off = htons ( IP_MF ) ;
ip_send_check ( iph ) ;
for ( ; ; ) {
/* Prepare header of the next frame,
* before previous one went down . */
if ( frag ) {
frag - > ip_summed = CHECKSUM_NONE ;
2007-03-13 19:06:52 +03:00
skb_reset_transport_header ( frag ) ;
2007-04-11 07:46:21 +04:00
__skb_push ( frag , hlen ) ;
skb_reset_network_header ( frag ) ;
2007-04-11 07:50:43 +04:00
memcpy ( skb_network_header ( frag ) , iph , hlen ) ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( frag ) ;
2005-04-17 02:20:36 +04:00
iph - > tot_len = htons ( frag - > len ) ;
ip_copy_metadata ( frag , skb ) ;
if ( offset = = 0 )
ip_options_fragment ( frag ) ;
offset + = skb - > len - hlen ;
iph - > frag_off = htons ( offset > > 3 ) ;
if ( frag - > next ! = NULL )
iph - > frag_off | = htons ( IP_MF ) ;
/* Ready, complete checksum */
ip_send_check ( iph ) ;
}
err = output ( skb ) ;
2006-08-03 00:41:21 +04:00
if ( ! err )
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGCREATES ) ;
2005-04-17 02:20:36 +04:00
if ( err | | ! frag )
break ;
skb = frag ;
frag = skb - > next ;
skb - > next = NULL ;
}
if ( err = = 0 ) {
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGOKS ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
while ( frag ) {
skb = frag - > next ;
kfree_skb ( frag ) ;
frag = skb ;
}
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
slow_path :
left = skb - > len - hlen ; /* Space per frame */
ptr = raw + hlen ; /* Where to start from */
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
2006-08-30 04:48:57 +04:00
* we need to make room for the encapsulating header
*/
pad = nf_bridge_pad ( skb ) ;
ll_rs = LL_RESERVED_SPACE_EXTRA ( rt - > u . dst . dev , pad ) ;
mtu - = pad ;
2005-04-17 02:20:36 +04:00
/*
* Fragment the datagram .
*/
offset = ( ntohs ( iph - > frag_off ) & IP_OFFSET ) < < 3 ;
not_last_frag = iph - > frag_off & htons ( IP_MF ) ;
/*
* Keep copying data until we run out .
*/
2007-03-09 07:44:43 +03:00
while ( left > 0 ) {
2005-04-17 02:20:36 +04:00
len = left ;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if ( len > mtu )
len = mtu ;
/* IF: we are not sending upto and including the packet end
then align the next start on an eight byte boundary */
if ( len < left ) {
len & = ~ 7 ;
}
/*
* Allocate buffer .
*/
if ( ( skb2 = alloc_skb ( len + hlen + ll_rs , GFP_ATOMIC ) ) = = NULL ) {
2005-08-10 07:50:53 +04:00
NETDEBUG ( KERN_INFO " IP: frag: no memory for new fragment! \n " ) ;
2005-04-17 02:20:36 +04:00
err = - ENOMEM ;
goto fail ;
}
/*
* Set up data on packet
*/
ip_copy_metadata ( skb2 , skb ) ;
skb_reserve ( skb2 , ll_rs ) ;
skb_put ( skb2 , len + hlen ) ;
2007-04-11 07:45:18 +04:00
skb_reset_network_header ( skb2 ) ;
2007-04-11 08:21:55 +04:00
skb2 - > transport_header = skb2 - > network_header + hlen ;
2005-04-17 02:20:36 +04:00
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if ( skb - > sk )
skb_set_owner_w ( skb2 , skb - > sk ) ;
/*
* Copy the packet header into the new buffer .
*/
2007-03-28 01:55:52 +04:00
skb_copy_from_linear_data ( skb , skb_network_header ( skb2 ) , hlen ) ;
2005-04-17 02:20:36 +04:00
/*
* Copy a block of the IP datagram .
*/
2007-03-16 23:19:57 +03:00
if ( skb_copy_bits ( skb , ptr , skb_transport_header ( skb2 ) , len ) )
2005-04-17 02:20:36 +04:00
BUG ( ) ;
left - = len ;
/*
* Fill in the new header fields .
*/
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb2 ) ;
2005-04-17 02:20:36 +04:00
iph - > frag_off = htons ( ( offset > > 3 ) ) ;
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST ( otherwise ,
* options are already fixed ) and make it ONCE
* on the initial skb , so that all the following fragments
* will inherit fixed options .
*/
if ( offset = = 0 )
ip_options_fragment ( skb ) ;
/*
* Added AC : If we are fragmenting a fragment that ' s not the
* last fragment then keep MF on each bit
*/
if ( left > 0 | | not_last_frag )
iph - > frag_off | = htons ( IP_MF ) ;
ptr + = len ;
offset + = len ;
/*
* Put this fragment into the sending queue .
*/
iph - > tot_len = htons ( len + hlen ) ;
ip_send_check ( iph ) ;
err = output ( skb2 ) ;
if ( err )
goto fail ;
2006-08-03 00:41:21 +04:00
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGCREATES ) ;
2005-04-17 02:20:36 +04:00
}
kfree_skb ( skb ) ;
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGOKS ) ;
2005-04-17 02:20:36 +04:00
return err ;
fail :
2007-02-09 17:24:47 +03:00
kfree_skb ( skb ) ;
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( dev_net ( dev ) , IPSTATS_MIB_FRAGFAILS ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
2006-04-05 00:42:35 +04:00
EXPORT_SYMBOL ( ip_fragment ) ;
2005-04-17 02:20:36 +04:00
int
ip_generic_getfrag ( void * from , char * to , int offset , int len , int odd , struct sk_buff * skb )
{
struct iovec * iov = from ;
2006-08-30 03:44:56 +04:00
if ( skb - > ip_summed = = CHECKSUM_PARTIAL ) {
2005-04-17 02:20:36 +04:00
if ( memcpy_fromiovecend ( to , iov , offset , len ) < 0 )
return - EFAULT ;
} else {
2006-11-15 08:36:14 +03:00
__wsum csum = 0 ;
2005-04-17 02:20:36 +04:00
if ( csum_partial_copy_fromiovecend ( to , iov , offset , len , & csum ) < 0 )
return - EFAULT ;
skb - > csum = csum_block_add ( skb - > csum , csum , odd ) ;
}
return 0 ;
}
2006-11-15 08:36:14 +03:00
static inline __wsum
2005-04-17 02:20:36 +04:00
csum_page ( struct page * page , int offset , int copy )
{
char * kaddr ;
2006-11-15 08:36:14 +03:00
__wsum csum ;
2005-04-17 02:20:36 +04:00
kaddr = kmap ( page ) ;
csum = csum_partial ( kaddr + offset , copy , 0 ) ;
kunmap ( page ) ;
return csum ;
}
2005-11-30 03:27:20 +03:00
static inline int ip_ufo_append_data ( struct sock * sk ,
2005-10-19 02:46:41 +04:00
int getfrag ( void * from , char * to , int offset , int len ,
int odd , struct sk_buff * skb ) ,
void * from , int length , int hh_len , int fragheaderlen ,
int transhdrlen , int mtu , unsigned int flags )
{
struct sk_buff * skb ;
int err ;
/* There is support for UDP fragmentation offload by network
* device , so create one single skb packet containing complete
* udp datagram
*/
if ( ( skb = skb_peek_tail ( & sk - > sk_write_queue ) ) = = NULL ) {
skb = sock_alloc_send_skb ( sk ,
hh_len + fragheaderlen + transhdrlen + 20 ,
( flags & MSG_DONTWAIT ) , & err ) ;
if ( skb = = NULL )
return err ;
/* reserve space for Hardware header */
skb_reserve ( skb , hh_len ) ;
/* create space for UDP/IP header */
skb_put ( skb , fragheaderlen + transhdrlen ) ;
/* initialize network header pointer */
2007-04-11 07:45:18 +04:00
skb_reset_network_header ( skb ) ;
2005-10-19 02:46:41 +04:00
/* initialize protocol header pointer */
2007-04-11 08:21:55 +04:00
skb - > transport_header = skb - > network_header + fragheaderlen ;
2005-10-19 02:46:41 +04:00
2006-08-30 03:44:56 +04:00
skb - > ip_summed = CHECKSUM_PARTIAL ;
2005-10-19 02:46:41 +04:00
skb - > csum = 0 ;
sk - > sk_sndmsg_off = 0 ;
2008-04-30 09:36:30 +04:00
/* specify the length of each IP datagram fragment */
2006-06-22 13:40:14 +04:00
skb_shinfo ( skb ) - > gso_size = mtu - fragheaderlen ;
2006-07-01 00:37:03 +04:00
skb_shinfo ( skb ) - > gso_type = SKB_GSO_UDP ;
2005-10-19 02:46:41 +04:00
__skb_queue_tail ( & sk - > sk_write_queue , skb ) ;
}
2008-04-30 09:36:30 +04:00
return skb_append_datato_frags ( sk , skb , getfrag , from ,
( length - transhdrlen ) ) ;
2005-10-19 02:46:41 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* ip_append_data ( ) and ip_append_page ( ) can make one large IP datagram
* from many pieces of data . Each pieces will be holded on the socket
* until ip_push_pending_frames ( ) is called . Each piece can be a page
* or non - page data .
2007-02-09 17:24:47 +03:00
*
2005-04-17 02:20:36 +04:00
* Not only UDP , other transport protocols - e . g . raw sockets - can use
* this interface potentially .
*
* LATER : length must be adjusted by pad at tail , when it is required .
*/
int ip_append_data ( struct sock * sk ,
int getfrag ( void * from , char * to , int offset , int len ,
int odd , struct sk_buff * skb ) ,
void * from , int length , int transhdrlen ,
struct ipcm_cookie * ipc , struct rtable * rt ,
unsigned int flags )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct sk_buff * skb ;
struct ip_options * opt = NULL ;
int hh_len ;
int exthdrlen ;
int mtu ;
int copy ;
int err ;
int offset = 0 ;
unsigned int maxfraglen , fragheaderlen ;
int csummode = CHECKSUM_NONE ;
if ( flags & MSG_PROBE )
return 0 ;
if ( skb_queue_empty ( & sk - > sk_write_queue ) ) {
/*
* setup for corking .
*/
opt = ipc - > opt ;
if ( opt ) {
if ( inet - > cork . opt = = NULL ) {
inet - > cork . opt = kmalloc ( sizeof ( struct ip_options ) + 40 , sk - > sk_allocation ) ;
if ( unlikely ( inet - > cork . opt = = NULL ) )
return - ENOBUFS ;
}
memcpy ( inet - > cork . opt , opt , sizeof ( struct ip_options ) + opt - > optlen ) ;
inet - > cork . flags | = IPCORK_OPT ;
inet - > cork . addr = ipc - > addr ;
}
dst_hold ( & rt - > u . dst ) ;
2007-04-21 02:53:27 +04:00
inet - > cork . fragsize = mtu = inet - > pmtudisc = = IP_PMTUDISC_PROBE ?
rt - > u . dst . dev - > mtu :
dst_mtu ( rt - > u . dst . path ) ;
2008-03-10 11:30:37 +03:00
inet - > cork . dst = & rt - > u . dst ;
2005-04-17 02:20:36 +04:00
inet - > cork . length = 0 ;
sk - > sk_sndmsg_page = NULL ;
sk - > sk_sndmsg_off = 0 ;
if ( ( exthdrlen = rt - > u . dst . header_len ) ! = 0 ) {
length + = exthdrlen ;
transhdrlen + = exthdrlen ;
}
} else {
2008-03-10 11:30:37 +03:00
rt = ( struct rtable * ) inet - > cork . dst ;
2005-04-17 02:20:36 +04:00
if ( inet - > cork . flags & IPCORK_OPT )
opt = inet - > cork . opt ;
transhdrlen = 0 ;
exthdrlen = 0 ;
mtu = inet - > cork . fragsize ;
}
hh_len = LL_RESERVED_SPACE ( rt - > u . dst . dev ) ;
fragheaderlen = sizeof ( struct iphdr ) + ( opt ? opt - > optlen : 0 ) ;
maxfraglen = ( ( mtu - fragheaderlen ) & ~ 7 ) + fragheaderlen ;
if ( inet - > cork . length + length > 0xFFFF - fragheaderlen ) {
ip_local_error ( sk , EMSGSIZE , rt - > rt_dst , inet - > dport , mtu - exthdrlen ) ;
return - EMSGSIZE ;
}
/*
* transhdrlen > 0 means that this is the first fragment and we wish
* it won ' t be fragmented in the future .
*/
if ( transhdrlen & &
length + fragheaderlen < = mtu & &
2007-06-27 11:47:37 +04:00
rt - > u . dst . dev - > features & NETIF_F_V4_CSUM & &
2005-04-17 02:20:36 +04:00
! exthdrlen )
2006-08-30 03:44:56 +04:00
csummode = CHECKSUM_PARTIAL ;
2005-04-17 02:20:36 +04:00
inet - > cork . length + = length ;
2008-04-30 09:36:30 +04:00
if ( ( ( length > mtu ) | | ! skb_queue_empty ( & sk - > sk_write_queue ) ) & &
( sk - > sk_protocol = = IPPROTO_UDP ) & &
( rt - > u . dst . dev - > features & NETIF_F_UFO ) ) {
2006-03-13 07:35:12 +03:00
err = ip_ufo_append_data ( sk , getfrag , from , length , hh_len ,
fragheaderlen , transhdrlen , mtu ,
flags ) ;
if ( err )
2005-10-19 02:46:41 +04:00
goto error ;
return 0 ;
}
2005-04-17 02:20:36 +04:00
/* So, what's going on in the loop below?
*
* We use calculated fragment length to generate chained skb ,
* each of segments is IP fragment ready for sending to network after
* adding appropriate IP header .
*/
if ( ( skb = skb_peek_tail ( & sk - > sk_write_queue ) ) = = NULL )
goto alloc_new_skb ;
while ( length > 0 ) {
/* Check if the remaining data fits into current packet. */
copy = mtu - skb - > len ;
if ( copy < length )
copy = maxfraglen - skb - > len ;
if ( copy < = 0 ) {
char * data ;
unsigned int datalen ;
unsigned int fraglen ;
unsigned int fraggap ;
unsigned int alloclen ;
struct sk_buff * skb_prev ;
alloc_new_skb :
skb_prev = skb ;
if ( skb_prev )
fraggap = skb_prev - > len - maxfraglen ;
else
fraggap = 0 ;
/*
* If remaining data exceeds the mtu ,
* we know we need more fragment ( s ) .
*/
datalen = length + fraggap ;
if ( datalen > mtu - fragheaderlen )
datalen = maxfraglen - fragheaderlen ;
fraglen = datalen + fragheaderlen ;
2007-02-09 17:24:47 +03:00
if ( ( flags & MSG_MORE ) & &
2005-04-17 02:20:36 +04:00
! ( rt - > u . dst . dev - > features & NETIF_F_SG ) )
alloclen = mtu ;
else
alloclen = datalen + fragheaderlen ;
/* The last fragment gets additional space at tail.
* Note , with MSG_MORE we overallocate on fragments ,
* because we have no idea what fragment will be
* the last .
*/
2006-04-15 03:04:18 +04:00
if ( datalen = = length + fraggap )
2005-04-17 02:20:36 +04:00
alloclen + = rt - > u . dst . trailer_len ;
if ( transhdrlen ) {
2007-02-09 17:24:47 +03:00
skb = sock_alloc_send_skb ( sk ,
2005-04-17 02:20:36 +04:00
alloclen + hh_len + 15 ,
( flags & MSG_DONTWAIT ) , & err ) ;
} else {
skb = NULL ;
if ( atomic_read ( & sk - > sk_wmem_alloc ) < =
2 * sk - > sk_sndbuf )
2007-02-09 17:24:47 +03:00
skb = sock_wmalloc ( sk ,
2005-04-17 02:20:36 +04:00
alloclen + hh_len + 15 , 1 ,
sk - > sk_allocation ) ;
if ( unlikely ( skb = = NULL ) )
err = - ENOBUFS ;
}
if ( skb = = NULL )
goto error ;
/*
* Fill in the control structures
*/
skb - > ip_summed = csummode ;
skb - > csum = 0 ;
skb_reserve ( skb , hh_len ) ;
/*
* Find where to start putting bytes .
*/
data = skb_put ( skb , fraglen ) ;
2007-03-12 04:39:41 +03:00
skb_set_network_header ( skb , exthdrlen ) ;
2007-04-11 08:21:55 +04:00
skb - > transport_header = ( skb - > network_header +
fragheaderlen ) ;
2005-04-17 02:20:36 +04:00
data + = fragheaderlen ;
if ( fraggap ) {
skb - > csum = skb_copy_and_csum_bits (
skb_prev , maxfraglen ,
data + transhdrlen , fraggap , 0 ) ;
skb_prev - > csum = csum_sub ( skb_prev - > csum ,
skb - > csum ) ;
data + = fraggap ;
2006-08-14 07:12:58 +04:00
pskb_trim_unique ( skb_prev , maxfraglen ) ;
2005-04-17 02:20:36 +04:00
}
copy = datalen - transhdrlen - fraggap ;
if ( copy > 0 & & getfrag ( from , data + transhdrlen , offset , copy , fraggap , skb ) < 0 ) {
err = - EFAULT ;
kfree_skb ( skb ) ;
goto error ;
}
offset + = copy ;
length - = datalen - fraggap ;
transhdrlen = 0 ;
exthdrlen = 0 ;
csummode = CHECKSUM_NONE ;
/*
* Put the packet on the pending queue .
*/
__skb_queue_tail ( & sk - > sk_write_queue , skb ) ;
continue ;
}
if ( copy > length )
copy = length ;
if ( ! ( rt - > u . dst . dev - > features & NETIF_F_SG ) ) {
unsigned int off ;
off = skb - > len ;
2007-02-09 17:24:47 +03:00
if ( getfrag ( from , skb_put ( skb , copy ) ,
2005-04-17 02:20:36 +04:00
offset , copy , off , skb ) < 0 ) {
__skb_trim ( skb , off ) ;
err = - EFAULT ;
goto error ;
}
} else {
int i = skb_shinfo ( skb ) - > nr_frags ;
skb_frag_t * frag = & skb_shinfo ( skb ) - > frags [ i - 1 ] ;
struct page * page = sk - > sk_sndmsg_page ;
int off = sk - > sk_sndmsg_off ;
unsigned int left ;
if ( page & & ( left = PAGE_SIZE - off ) > 0 ) {
if ( copy > = left )
copy = left ;
if ( page ! = frag - > page ) {
if ( i = = MAX_SKB_FRAGS ) {
err = - EMSGSIZE ;
goto error ;
}
get_page ( page ) ;
2007-02-09 17:24:47 +03:00
skb_fill_page_desc ( skb , i , page , sk - > sk_sndmsg_off , 0 ) ;
2005-04-17 02:20:36 +04:00
frag = & skb_shinfo ( skb ) - > frags [ i ] ;
}
} else if ( i < MAX_SKB_FRAGS ) {
if ( copy > PAGE_SIZE )
copy = PAGE_SIZE ;
page = alloc_pages ( sk - > sk_allocation , 0 ) ;
if ( page = = NULL ) {
err = - ENOMEM ;
goto error ;
}
sk - > sk_sndmsg_page = page ;
sk - > sk_sndmsg_off = 0 ;
skb_fill_page_desc ( skb , i , page , 0 , 0 ) ;
frag = & skb_shinfo ( skb ) - > frags [ i ] ;
} else {
err = - EMSGSIZE ;
goto error ;
}
if ( getfrag ( from , page_address ( frag - > page ) + frag - > page_offset + frag - > size , offset , copy , skb - > len , skb ) < 0 ) {
err = - EFAULT ;
goto error ;
}
sk - > sk_sndmsg_off + = copy ;
frag - > size + = copy ;
skb - > len + = copy ;
skb - > data_len + = copy ;
2008-01-23 09:39:26 +03:00
skb - > truesize + = copy ;
atomic_add ( copy , & sk - > sk_wmem_alloc ) ;
2005-04-17 02:20:36 +04:00
}
offset + = copy ;
length - = copy ;
}
return 0 ;
error :
inet - > cork . length - = length ;
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( sock_net ( sk ) , IPSTATS_MIB_OUTDISCARDS ) ;
2007-02-09 17:24:47 +03:00
return err ;
2005-04-17 02:20:36 +04:00
}
ssize_t ip_append_page ( struct sock * sk , struct page * page ,
int offset , size_t size , int flags )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct sk_buff * skb ;
struct rtable * rt ;
struct ip_options * opt = NULL ;
int hh_len ;
int mtu ;
int len ;
int err ;
unsigned int maxfraglen , fragheaderlen , fraggap ;
if ( inet - > hdrincl )
return - EPERM ;
if ( flags & MSG_PROBE )
return 0 ;
if ( skb_queue_empty ( & sk - > sk_write_queue ) )
return - EINVAL ;
2008-03-10 11:30:37 +03:00
rt = ( struct rtable * ) inet - > cork . dst ;
2005-04-17 02:20:36 +04:00
if ( inet - > cork . flags & IPCORK_OPT )
opt = inet - > cork . opt ;
if ( ! ( rt - > u . dst . dev - > features & NETIF_F_SG ) )
return - EOPNOTSUPP ;
hh_len = LL_RESERVED_SPACE ( rt - > u . dst . dev ) ;
mtu = inet - > cork . fragsize ;
fragheaderlen = sizeof ( struct iphdr ) + ( opt ? opt - > optlen : 0 ) ;
maxfraglen = ( ( mtu - fragheaderlen ) & ~ 7 ) + fragheaderlen ;
if ( inet - > cork . length + size > 0xFFFF - fragheaderlen ) {
ip_local_error ( sk , EMSGSIZE , rt - > rt_dst , inet - > dport , mtu ) ;
return - EMSGSIZE ;
}
if ( ( skb = skb_peek_tail ( & sk - > sk_write_queue ) ) = = NULL )
return - EINVAL ;
inet - > cork . length + = size ;
2005-10-19 02:46:41 +04:00
if ( ( sk - > sk_protocol = = IPPROTO_UDP ) & &
2006-06-22 13:40:14 +04:00
( rt - > u . dst . dev - > features & NETIF_F_UFO ) ) {
skb_shinfo ( skb ) - > gso_size = mtu - fragheaderlen ;
2006-07-01 00:37:03 +04:00
skb_shinfo ( skb ) - > gso_type = SKB_GSO_UDP ;
2006-06-22 13:40:14 +04:00
}
2005-10-19 02:46:41 +04:00
2005-04-17 02:20:36 +04:00
while ( size > 0 ) {
int i ;
2006-07-09 00:34:32 +04:00
if ( skb_is_gso ( skb ) )
2005-10-19 02:46:41 +04:00
len = size ;
else {
/* Check if the remaining data fits into current packet. */
len = mtu - skb - > len ;
if ( len < size )
len = maxfraglen - skb - > len ;
}
2005-04-17 02:20:36 +04:00
if ( len < = 0 ) {
struct sk_buff * skb_prev ;
int alloclen ;
skb_prev = skb ;
2005-10-13 22:43:02 +04:00
fraggap = skb_prev - > len - maxfraglen ;
2005-04-17 02:20:36 +04:00
alloclen = fragheaderlen + hh_len + fraggap + 15 ;
skb = sock_wmalloc ( sk , alloclen , 1 , sk - > sk_allocation ) ;
if ( unlikely ( ! skb ) ) {
err = - ENOBUFS ;
goto error ;
}
/*
* Fill in the control structures
*/
skb - > ip_summed = CHECKSUM_NONE ;
skb - > csum = 0 ;
skb_reserve ( skb , hh_len ) ;
/*
* Find where to start putting bytes .
*/
2007-03-13 19:51:52 +03:00
skb_put ( skb , fragheaderlen + fraggap ) ;
2007-03-11 01:15:25 +03:00
skb_reset_network_header ( skb ) ;
2007-04-11 08:21:55 +04:00
skb - > transport_header = ( skb - > network_header +
fragheaderlen ) ;
2005-04-17 02:20:36 +04:00
if ( fraggap ) {
2007-03-13 19:51:52 +03:00
skb - > csum = skb_copy_and_csum_bits ( skb_prev ,
maxfraglen ,
2007-04-26 05:04:18 +04:00
skb_transport_header ( skb ) ,
2007-03-13 19:51:52 +03:00
fraggap , 0 ) ;
2005-04-17 02:20:36 +04:00
skb_prev - > csum = csum_sub ( skb_prev - > csum ,
skb - > csum ) ;
2006-08-14 07:12:58 +04:00
pskb_trim_unique ( skb_prev , maxfraglen ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Put the packet on the pending queue .
*/
__skb_queue_tail ( & sk - > sk_write_queue , skb ) ;
continue ;
}
i = skb_shinfo ( skb ) - > nr_frags ;
if ( len > size )
len = size ;
if ( skb_can_coalesce ( skb , i , page , offset ) ) {
skb_shinfo ( skb ) - > frags [ i - 1 ] . size + = len ;
} else if ( i < MAX_SKB_FRAGS ) {
get_page ( page ) ;
skb_fill_page_desc ( skb , i , page , offset , len ) ;
} else {
err = - EMSGSIZE ;
goto error ;
}
if ( skb - > ip_summed = = CHECKSUM_NONE ) {
2006-11-15 08:36:14 +03:00
__wsum csum ;
2005-04-17 02:20:36 +04:00
csum = csum_page ( page , offset , len ) ;
skb - > csum = csum_block_add ( skb - > csum , csum , skb - > len ) ;
}
skb - > len + = len ;
skb - > data_len + = len ;
2008-01-23 10:44:31 +03:00
skb - > truesize + = len ;
atomic_add ( len , & sk - > sk_wmem_alloc ) ;
2005-04-17 02:20:36 +04:00
offset + = len ;
size - = len ;
}
return 0 ;
error :
inet - > cork . length - = size ;
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( sock_net ( sk ) , IPSTATS_MIB_OUTDISCARDS ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
2007-11-06 08:03:24 +03:00
static void ip_cork_release ( struct inet_sock * inet )
{
inet - > cork . flags & = ~ IPCORK_OPT ;
kfree ( inet - > cork . opt ) ;
inet - > cork . opt = NULL ;
2008-03-10 11:30:37 +03:00
dst_release ( inet - > cork . dst ) ;
inet - > cork . dst = NULL ;
2007-11-06 08:03:24 +03:00
}
2005-04-17 02:20:36 +04:00
/*
* Combined all pending IP fragments on the socket as one IP datagram
* and push them out .
*/
int ip_push_pending_frames ( struct sock * sk )
{
struct sk_buff * skb , * tmp_skb ;
struct sk_buff * * tail_skb ;
struct inet_sock * inet = inet_sk ( sk ) ;
2008-07-15 10:00:43 +04:00
struct net * net = sock_net ( sk ) ;
2005-04-17 02:20:36 +04:00
struct ip_options * opt = NULL ;
2008-03-10 11:30:37 +03:00
struct rtable * rt = ( struct rtable * ) inet - > cork . dst ;
2005-04-17 02:20:36 +04:00
struct iphdr * iph ;
2006-01-07 00:24:29 +03:00
__be16 df = 0 ;
2005-04-17 02:20:36 +04:00
__u8 ttl ;
int err = 0 ;
if ( ( skb = __skb_dequeue ( & sk - > sk_write_queue ) ) = = NULL )
goto out ;
tail_skb = & ( skb_shinfo ( skb ) - > frag_list ) ;
/* move skb->data to ip header from ext header */
2007-04-11 07:50:43 +04:00
if ( skb - > data < skb_network_header ( skb ) )
2007-03-11 04:16:10 +03:00
__skb_pull ( skb , skb_network_offset ( skb ) ) ;
2005-04-17 02:20:36 +04:00
while ( ( tmp_skb = __skb_dequeue ( & sk - > sk_write_queue ) ) ! = NULL ) {
2007-03-16 23:26:39 +03:00
__skb_pull ( tmp_skb , skb_network_header_len ( skb ) ) ;
2005-04-17 02:20:36 +04:00
* tail_skb = tmp_skb ;
tail_skb = & ( tmp_skb - > next ) ;
skb - > len + = tmp_skb - > len ;
skb - > data_len + = tmp_skb - > len ;
skb - > truesize + = tmp_skb - > truesize ;
__sock_put ( tmp_skb - > sk ) ;
tmp_skb - > destructor = NULL ;
tmp_skb - > sk = NULL ;
}
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
* to fragment the frame generated here . No matter , what transforms
* how transforms change size of the packet , it will come out .
*/
2007-04-21 02:53:27 +04:00
if ( inet - > pmtudisc < IP_PMTUDISC_DO )
2005-04-17 02:20:36 +04:00
skb - > local_df = 1 ;
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too , we still allow to fragment this frame
* locally . */
2007-04-21 02:53:27 +04:00
if ( inet - > pmtudisc > = IP_PMTUDISC_DO | |
2005-04-17 02:20:36 +04:00
( skb - > len < = dst_mtu ( & rt - > u . dst ) & &
ip_dont_fragment ( sk , & rt - > u . dst ) ) )
df = htons ( IP_DF ) ;
if ( inet - > cork . flags & IPCORK_OPT )
opt = inet - > cork . opt ;
if ( rt - > rt_type = = RTN_MULTICAST )
ttl = inet - > mc_ttl ;
else
ttl = ip_select_ttl ( inet , & rt - > u . dst ) ;
iph = ( struct iphdr * ) skb - > data ;
iph - > version = 4 ;
iph - > ihl = 5 ;
if ( opt ) {
iph - > ihl + = opt - > optlen > > 2 ;
ip_options_build ( skb , opt , inet - > cork . addr , rt , 0 ) ;
}
iph - > tos = inet - > tos ;
iph - > frag_off = df ;
2006-03-23 01:27:59 +03:00
ip_select_ident ( iph , & rt - > u . dst , sk ) ;
2005-04-17 02:20:36 +04:00
iph - > ttl = ttl ;
iph - > protocol = sk - > sk_protocol ;
iph - > saddr = rt - > rt_src ;
iph - > daddr = rt - > rt_dst ;
skb - > priority = sk - > sk_priority ;
2008-01-31 06:08:16 +03:00
skb - > mark = sk - > sk_mark ;
2005-04-17 02:20:36 +04:00
skb - > dst = dst_clone ( & rt - > u . dst ) ;
2007-09-17 20:57:33 +04:00
if ( iph - > protocol = = IPPROTO_ICMP )
2008-07-15 10:00:43 +04:00
icmp_out_count ( net , ( ( struct icmphdr * )
2007-09-17 20:57:33 +04:00
skb_transport_header ( skb ) ) - > type ) ;
2005-04-17 02:20:36 +04:00
/* Netfilter gets whole the not fragmented skb. */
2008-01-12 06:14:00 +03:00
err = ip_local_out ( skb ) ;
2005-04-17 02:20:36 +04:00
if ( err ) {
if ( err > 0 )
err = inet - > recverr ? net_xmit_errno ( err ) : 0 ;
if ( err )
goto error ;
}
out :
2007-11-06 08:03:24 +03:00
ip_cork_release ( inet ) ;
2005-04-17 02:20:36 +04:00
return err ;
error :
2008-07-17 07:19:49 +04:00
IP_INC_STATS ( net , IPSTATS_MIB_OUTDISCARDS ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
/*
* Throw away all pending data on the socket .
*/
void ip_flush_pending_frames ( struct sock * sk )
{
struct sk_buff * skb ;
while ( ( skb = __skb_dequeue_tail ( & sk - > sk_write_queue ) ) ! = NULL )
kfree_skb ( skb ) ;
2007-11-06 08:03:24 +03:00
ip_cork_release ( inet_sk ( sk ) ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Fetch data from kernel space and fill in checksum if needed .
*/
2007-02-09 17:24:47 +03:00
static int ip_reply_glue_bits ( void * dptr , char * to , int offset ,
2005-04-17 02:20:36 +04:00
int len , int odd , struct sk_buff * skb )
{
2006-11-15 08:36:34 +03:00
__wsum csum ;
2005-04-17 02:20:36 +04:00
csum = csum_partial_copy_nocheck ( dptr + offset , to , len , 0 ) ;
skb - > csum = csum_block_add ( skb - > csum , csum , odd ) ;
2007-02-09 17:24:47 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2007-02-09 17:24:47 +03:00
/*
2005-04-17 02:20:36 +04:00
* Generic function to send a packet as reply to another packet .
* Used to send TCP resets so far . ICMP should use this function too .
*
2007-02-09 17:24:47 +03:00
* Should run single threaded per socket because it uses the sock
2005-04-17 02:20:36 +04:00
* structure to pass arguments .
*/
void ip_send_reply ( struct sock * sk , struct sk_buff * skb , struct ip_reply_arg * arg ,
unsigned int len )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct {
struct ip_options opt ;
char data [ 40 ] ;
} replyopts ;
struct ipcm_cookie ipc ;
2006-09-28 05:28:07 +04:00
__be32 daddr ;
2008-03-06 05:30:47 +03:00
struct rtable * rt = skb - > rtable ;
2005-04-17 02:20:36 +04:00
if ( ip_options_echo ( & replyopts . opt , skb ) )
return ;
daddr = ipc . addr = rt - > rt_src ;
ipc . opt = NULL ;
if ( replyopts . opt . optlen ) {
ipc . opt = & replyopts . opt ;
if ( ipc . opt - > srr )
daddr = replyopts . opt . faddr ;
}
{
2007-06-05 08:32:46 +04:00
struct flowi fl = { . oif = arg - > bound_dev_if ,
. nl_u = { . ip4_u =
2005-04-17 02:20:36 +04:00
{ . daddr = daddr ,
. saddr = rt - > rt_spec_dst ,
2007-04-21 09:47:35 +04:00
. tos = RT_TOS ( ip_hdr ( skb ) - > tos ) } } ,
2005-04-17 02:20:36 +04:00
/* Not quite clean, but right. */
. uli_u = { . ports =
2007-04-11 08:04:22 +04:00
{ . sport = tcp_hdr ( skb ) - > dest ,
. dport = tcp_hdr ( skb ) - > source } } ,
2005-04-17 02:20:36 +04:00
. proto = sk - > sk_protocol } ;
2006-08-05 10:12:42 +04:00
security_skb_classify_flow ( skb , & fl ) ;
2008-03-25 20:26:21 +03:00
if ( ip_route_output_key ( sock_net ( sk ) , & rt , & fl ) )
2005-04-17 02:20:36 +04:00
return ;
}
/* And let IP do all the hard work.
This chunk is not reenterable , hence spinlock .
Note that it uses the fact , that this function is called
with locally disabled BH and that sk cannot be already spinlocked .
*/
bh_lock_sock ( sk ) ;
2007-04-21 09:47:35 +04:00
inet - > tos = ip_hdr ( skb ) - > tos ;
2005-04-17 02:20:36 +04:00
sk - > sk_priority = skb - > priority ;
2007-04-21 09:47:35 +04:00
sk - > sk_protocol = ip_hdr ( skb ) - > protocol ;
2007-06-05 08:32:46 +04:00
sk - > sk_bound_dev_if = arg - > bound_dev_if ;
2005-04-17 02:20:36 +04:00
ip_append_data ( sk , ip_reply_glue_bits , arg - > iov - > iov_base , len , 0 ,
& ipc , rt , MSG_DONTWAIT ) ;
if ( ( skb = skb_peek ( & sk - > sk_write_queue ) ) ! = NULL ) {
if ( arg - > csumoffset > = 0 )
2007-04-26 05:04:18 +04:00
* ( ( __sum16 * ) skb_transport_header ( skb ) +
arg - > csumoffset ) = csum_fold ( csum_add ( skb - > csum ,
arg - > csum ) ) ;
2005-04-17 02:20:36 +04:00
skb - > ip_summed = CHECKSUM_NONE ;
ip_push_pending_frames ( sk ) ;
}
bh_unlock_sock ( sk ) ;
ip_rt_put ( rt ) ;
}
void __init ip_init ( void )
{
ip_rt_init ( ) ;
inet_initpeers ( ) ;
# if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
igmp_mc_proc_init ( ) ;
# endif
}
EXPORT_SYMBOL ( ip_generic_getfrag ) ;
EXPORT_SYMBOL ( ip_queue_xmit ) ;
EXPORT_SYMBOL ( ip_send_check ) ;