2005-04-16 15:20:36 -07:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* The IP fragmentation functionality .
2007-02-09 23:24:47 +09:00
*
2005-04-16 15:20:36 -07:00
* Authors : Fred N . van Kempen < waltje @ uWalt . NL . Mugnet . ORG >
2008-10-13 19:01:08 -07:00
* Alan Cox < alan @ lxorguk . ukuu . org . uk >
2005-04-16 15:20:36 -07:00
*
* Fixes :
* Alan Cox : Split from ip . c , see ip_input . c for history .
* David S . Miller : Begin massive cleanup . . .
* Andi Kleen : Add sysctls .
* xxxx : Overlapfrag bug .
* Ultima : ip_expire ( ) kernel panic .
* Bill Hawes : Frag accounting and evictor fixes .
* John McDonald : 0 length frag bug .
* Alexey Kuznetsov : SMP races , threading , cleanup .
* Patrick McHardy : LRU queue of frag heads for evictor .
*/
2005-12-13 23:14:27 -08:00
# include <linux/compiler.h>
2005-04-16 15:20:36 -07:00
# include <linux/module.h>
# include <linux/types.h>
# include <linux/mm.h>
# include <linux/jiffies.h>
# include <linux/skbuff.h>
# include <linux/list.h>
# include <linux/ip.h>
# include <linux/icmp.h>
# include <linux/netdevice.h>
# include <linux/jhash.h>
# include <linux/random.h>
# include <net/sock.h>
# include <net/ip.h>
# include <net/icmp.h>
# include <net/checksum.h>
2005-12-13 23:14:27 -08:00
# include <net/inetpeer.h>
2007-10-15 02:24:19 -07:00
# include <net/inet_frag.h>
2005-04-16 15:20:36 -07:00
# include <linux/tcp.h>
# include <linux/udp.h>
# include <linux/inet.h>
# include <linux/netfilter_ipv4.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now . If you change something here , _PLEASE_ update ipv6 / reassembly . c
* as well . Or notify me , at least . - - ANK
*/
2008-01-22 05:58:31 -08:00
static int sysctl_ipfrag_max_dist __read_mostly = 64 ;
2005-12-13 23:14:27 -08:00
2005-04-16 15:20:36 -07:00
struct ipfrag_skb_cb
{
struct inet_skb_parm h ;
int offset ;
} ;
2008-11-03 02:47:38 -08:00
# define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
2005-04-16 15:20:36 -07:00
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
2007-10-15 02:24:19 -07:00
struct inet_frag_queue q ;
2005-04-16 15:20:36 -07:00
u32 user ;
2006-09-26 22:19:02 -07:00
__be32 saddr ;
__be32 daddr ;
__be16 id ;
2005-04-16 15:20:36 -07:00
u8 protocol ;
2005-12-13 23:14:27 -08:00
int iif ;
unsigned int rid ;
struct inet_peer * peer ;
2005-04-16 15:20:36 -07:00
} ;
2007-10-15 02:31:52 -07:00
static struct inet_frags ip4_frags ;
2005-04-16 15:20:36 -07:00
2008-01-22 06:06:23 -08:00
int ip_frag_nqueues ( struct net * net )
2007-10-15 02:31:52 -07:00
{
2008-01-22 06:06:23 -08:00
return net - > ipv4 . frags . nqueues ;
2007-10-15 02:31:52 -07:00
}
2005-04-16 15:20:36 -07:00
2008-01-22 06:07:25 -08:00
int ip_frag_mem ( struct net * net )
2007-10-15 02:31:52 -07:00
{
2008-01-22 06:07:25 -08:00
return atomic_read ( & net - > ipv4 . frags . mem ) ;
2007-10-15 02:31:52 -07:00
}
2005-04-16 15:20:36 -07:00
2007-10-14 00:38:15 -07:00
static int ip_frag_reasm ( struct ipq * qp , struct sk_buff * prev ,
struct net_device * dev ) ;
2007-10-17 19:46:47 -07:00
struct ip4_create_arg {
struct iphdr * iph ;
u32 user ;
} ;
2006-09-26 22:19:02 -07:00
static unsigned int ipqhashfn ( __be16 id , __be32 saddr , __be32 daddr , u8 prot )
2005-04-16 15:20:36 -07:00
{
2006-09-26 22:19:02 -07:00
return jhash_3words ( ( __force u32 ) id < < 16 | prot ,
( __force u32 ) saddr , ( __force u32 ) daddr ,
2007-10-15 02:31:52 -07:00
ip4_frags . rnd ) & ( INETFRAGS_HASHSZ - 1 ) ;
2005-04-16 15:20:36 -07:00
}
2007-10-15 02:38:08 -07:00
static unsigned int ip4_hashfn ( struct inet_frag_queue * q )
2005-04-16 15:20:36 -07:00
{
2007-10-15 02:38:08 -07:00
struct ipq * ipq ;
2005-04-16 15:20:36 -07:00
2007-10-15 02:38:08 -07:00
ipq = container_of ( q , struct ipq , q ) ;
return ipqhashfn ( ipq - > id , ipq - > saddr , ipq - > daddr , ipq - > protocol ) ;
2005-04-16 15:20:36 -07:00
}
2007-10-17 19:47:21 -07:00
static int ip4_frag_match ( struct inet_frag_queue * q , void * a )
{
struct ipq * qp ;
struct ip4_create_arg * arg = a ;
qp = container_of ( q , struct ipq , q ) ;
return ( qp - > id = = arg - > iph - > id & &
qp - > saddr = = arg - > iph - > saddr & &
qp - > daddr = = arg - > iph - > daddr & &
qp - > protocol = = arg - > iph - > protocol & &
qp - > user = = arg - > user ) ;
}
2005-04-16 15:20:36 -07:00
/* Memory Tracking Functions. */
2008-01-22 06:07:25 -08:00
static __inline__ void frag_kfree_skb ( struct netns_frags * nf ,
struct sk_buff * skb , int * work )
2005-04-16 15:20:36 -07:00
{
if ( work )
* work - = skb - > truesize ;
2008-01-22 06:07:25 -08:00
atomic_sub ( skb - > truesize , & nf - > mem ) ;
2005-04-16 15:20:36 -07:00
kfree_skb ( skb ) ;
}
2007-10-17 19:46:47 -07:00
static void ip4_frag_init ( struct inet_frag_queue * q , void * a )
{
struct ipq * qp = container_of ( q , struct ipq , q ) ;
struct ip4_create_arg * arg = a ;
qp - > protocol = arg - > iph - > protocol ;
qp - > id = arg - > iph - > id ;
qp - > saddr = arg - > iph - > saddr ;
qp - > daddr = arg - > iph - > daddr ;
qp - > user = arg - > user ;
qp - > peer = sysctl_ipfrag_max_dist ?
inet_getpeer ( arg - > iph - > saddr , 1 ) : NULL ;
}
2007-10-15 02:39:14 -07:00
static __inline__ void ip4_frag_free ( struct inet_frag_queue * q )
2005-04-16 15:20:36 -07:00
{
2007-10-15 02:39:14 -07:00
struct ipq * qp ;
qp = container_of ( q , struct ipq , q ) ;
if ( qp - > peer )
inet_putpeer ( qp - > peer ) ;
2005-04-16 15:20:36 -07:00
}
/* Destruction primitives. */
2007-10-15 02:41:09 -07:00
static __inline__ void ipq_put ( struct ipq * ipq )
2005-04-16 15:20:36 -07:00
{
2007-10-15 02:41:56 -07:00
inet_frag_put ( & ipq - > q , & ip4_frags ) ;
2005-04-16 15:20:36 -07:00
}
/* Kill ipq entry. It is not destroyed immediately,
* because caller ( and someone more ) holds reference count .
*/
static void ipq_kill ( struct ipq * ipq )
{
2007-10-15 02:37:18 -07:00
inet_frag_kill ( & ipq - > q , & ip4_frags ) ;
2005-04-16 15:20:36 -07:00
}
2007-02-09 23:24:47 +09:00
/* Memory limiting on fragments. Evictor trashes the oldest
2005-04-16 15:20:36 -07:00
* fragment queue until we are back under the threshold .
*/
2008-01-22 06:07:25 -08:00
static void ip_evictor ( struct net * net )
2005-04-16 15:20:36 -07:00
{
2007-10-15 02:40:06 -07:00
int evicted ;
2008-01-22 06:07:25 -08:00
evicted = inet_frag_evictor ( & net - > ipv4 . frags , & ip4_frags ) ;
2007-10-15 02:40:06 -07:00
if ( evicted )
2008-07-16 20:20:33 -07:00
IP_ADD_STATS_BH ( net , IPSTATS_MIB_REASMFAILS , evicted ) ;
2005-04-16 15:20:36 -07:00
}
/*
* Oops , a fragment queue timed out . Kill it and send an ICMP reply .
*/
static void ip_expire ( unsigned long arg )
{
2007-10-17 19:45:23 -07:00
struct ipq * qp ;
2008-07-16 20:19:08 -07:00
struct net * net ;
2007-10-17 19:45:23 -07:00
qp = container_of ( ( struct inet_frag_queue * ) arg , struct ipq , q ) ;
2008-07-16 20:19:08 -07:00
net = container_of ( qp - > q . net , struct net , ipv4 . frags ) ;
2005-04-16 15:20:36 -07:00
2007-10-15 02:24:19 -07:00
spin_lock ( & qp - > q . lock ) ;
2005-04-16 15:20:36 -07:00
2008-03-28 16:35:27 -07:00
if ( qp - > q . last_in & INET_FRAG_COMPLETE )
2005-04-16 15:20:36 -07:00
goto out ;
ipq_kill ( qp ) ;
2008-07-16 20:20:11 -07:00
IP_INC_STATS_BH ( net , IPSTATS_MIB_REASMTIMEOUT ) ;
IP_INC_STATS_BH ( net , IPSTATS_MIB_REASMFAILS ) ;
2005-04-16 15:20:36 -07:00
2008-03-28 16:35:27 -07:00
if ( ( qp - > q . last_in & INET_FRAG_FIRST_IN ) & & qp - > q . fragments ! = NULL ) {
2007-10-15 02:24:19 -07:00
struct sk_buff * head = qp - > q . fragments ;
2008-03-24 15:31:00 -07:00
2005-04-16 15:20:36 -07:00
/* Send an ICMP "Fragment Reassembly Timeout" message. */
2008-03-24 15:31:00 -07:00
if ( ( head - > dev = dev_get_by_index ( net , qp - > iif ) ) ! = NULL ) {
2005-04-16 15:20:36 -07:00
icmp_send ( head , ICMP_TIME_EXCEEDED , ICMP_EXC_FRAGTIME , 0 ) ;
dev_put ( head - > dev ) ;
}
}
out :
2007-10-15 02:24:19 -07:00
spin_unlock ( & qp - > q . lock ) ;
2007-10-15 02:41:09 -07:00
ipq_put ( qp ) ;
2005-04-16 15:20:36 -07:00
}
2007-10-17 19:47:21 -07:00
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram , and create new one , if nothing is found .
*/
2008-01-22 06:02:14 -08:00
static inline struct ipq * ip_find ( struct net * net , struct iphdr * iph , u32 user )
2005-04-16 15:20:36 -07:00
{
2007-10-17 19:46:47 -07:00
struct inet_frag_queue * q ;
struct ip4_create_arg arg ;
2007-10-17 19:47:21 -07:00
unsigned int hash ;
2005-04-16 15:20:36 -07:00
2007-10-17 19:46:47 -07:00
arg . iph = iph ;
arg . user = user ;
2008-06-27 20:06:08 -07:00
read_lock ( & ip4_frags . lock ) ;
2007-10-17 19:47:21 -07:00
hash = ipqhashfn ( iph - > id , iph - > saddr , iph - > daddr , iph - > protocol ) ;
2005-04-16 15:20:36 -07:00
2008-01-22 06:02:14 -08:00
q = inet_frag_find ( & net - > ipv4 . frags , & ip4_frags , & arg , hash ) ;
2007-10-17 19:46:47 -07:00
if ( q = = NULL )
goto out_nomem ;
2005-04-16 15:20:36 -07:00
2007-10-17 19:46:47 -07:00
return container_of ( q , struct ipq , q ) ;
2005-04-16 15:20:36 -07:00
out_nomem :
2005-08-09 20:50:53 -07:00
LIMIT_NETDEBUG ( KERN_ERR " ip_frag_create: no memory left ! \n " ) ;
2005-04-16 15:20:36 -07:00
return NULL ;
}
2005-12-13 23:14:27 -08:00
/* Is the fragment too far ahead to be part of ipq? */
static inline int ip_frag_too_far ( struct ipq * qp )
{
struct inet_peer * peer = qp - > peer ;
unsigned int max = sysctl_ipfrag_max_dist ;
unsigned int start , end ;
int rc ;
if ( ! peer | | ! max )
return 0 ;
start = qp - > rid ;
end = atomic_inc_return ( & peer - > rid ) ;
qp - > rid = end ;
2007-10-15 02:24:19 -07:00
rc = qp - > q . fragments & & ( end - start ) > max ;
2005-12-13 23:14:27 -08:00
if ( rc ) {
2008-07-16 20:20:11 -07:00
struct net * net ;
net = container_of ( qp - > q . net , struct net , ipv4 . frags ) ;
IP_INC_STATS_BH ( net , IPSTATS_MIB_REASMFAILS ) ;
2005-12-13 23:14:27 -08:00
}
return rc ;
}
static int ip_frag_reinit ( struct ipq * qp )
{
struct sk_buff * fp ;
2008-01-22 06:09:37 -08:00
if ( ! mod_timer ( & qp - > q . timer , jiffies + qp - > q . net - > timeout ) ) {
2007-10-15 02:24:19 -07:00
atomic_inc ( & qp - > q . refcnt ) ;
2005-12-13 23:14:27 -08:00
return - ETIMEDOUT ;
}
2007-10-15 02:24:19 -07:00
fp = qp - > q . fragments ;
2005-12-13 23:14:27 -08:00
do {
struct sk_buff * xp = fp - > next ;
2008-01-22 06:07:25 -08:00
frag_kfree_skb ( qp - > q . net , fp , NULL ) ;
2005-12-13 23:14:27 -08:00
fp = xp ;
} while ( fp ) ;
2007-10-15 02:24:19 -07:00
qp - > q . last_in = 0 ;
qp - > q . len = 0 ;
qp - > q . meat = 0 ;
qp - > q . fragments = NULL ;
2005-12-13 23:14:27 -08:00
qp - > iif = 0 ;
return 0 ;
}
2005-04-16 15:20:36 -07:00
/* Add new segment to existing queue. */
2007-10-14 00:38:15 -07:00
static int ip_frag_queue ( struct ipq * qp , struct sk_buff * skb )
2005-04-16 15:20:36 -07:00
{
struct sk_buff * prev , * next ;
2007-10-14 00:38:15 -07:00
struct net_device * dev ;
2005-04-16 15:20:36 -07:00
int flags , offset ;
int ihl , end ;
2007-10-14 00:38:15 -07:00
int err = - ENOENT ;
2005-04-16 15:20:36 -07:00
2008-03-28 16:35:27 -07:00
if ( qp - > q . last_in & INET_FRAG_COMPLETE )
2005-04-16 15:20:36 -07:00
goto err ;
2005-12-13 23:14:27 -08:00
if ( ! ( IPCB ( skb ) - > flags & IPSKB_FRAG_COMPLETE ) & &
2007-10-14 00:38:15 -07:00
unlikely ( ip_frag_too_far ( qp ) ) & &
unlikely ( err = ip_frag_reinit ( qp ) ) ) {
2005-12-13 23:14:27 -08:00
ipq_kill ( qp ) ;
goto err ;
}
2007-04-20 22:47:35 -07:00
offset = ntohs ( ip_hdr ( skb ) - > frag_off ) ;
2005-04-16 15:20:36 -07:00
flags = offset & ~ IP_OFFSET ;
offset & = IP_OFFSET ;
offset < < = 3 ; /* offset is in 8-byte chunks */
2007-03-12 20:09:15 -03:00
ihl = ip_hdrlen ( skb ) ;
2005-04-16 15:20:36 -07:00
/* Determine the position of this fragment. */
2007-02-09 23:24:47 +09:00
end = offset + skb - > len - ihl ;
2007-10-14 00:38:15 -07:00
err = - EINVAL ;
2005-04-16 15:20:36 -07:00
/* Is this the final fragment? */
if ( ( flags & IP_MF ) = = 0 ) {
/* If we already have some bits beyond end
* or have different end , the segment is corrrupted .
*/
2007-10-15 02:24:19 -07:00
if ( end < qp - > q . len | |
2008-03-28 16:35:27 -07:00
( ( qp - > q . last_in & INET_FRAG_LAST_IN ) & & end ! = qp - > q . len ) )
2005-04-16 15:20:36 -07:00
goto err ;
2008-03-28 16:35:27 -07:00
qp - > q . last_in | = INET_FRAG_LAST_IN ;
2007-10-15 02:24:19 -07:00
qp - > q . len = end ;
2005-04-16 15:20:36 -07:00
} else {
if ( end & 7 ) {
end & = ~ 7 ;
if ( skb - > ip_summed ! = CHECKSUM_UNNECESSARY )
skb - > ip_summed = CHECKSUM_NONE ;
}
2007-10-15 02:24:19 -07:00
if ( end > qp - > q . len ) {
2005-04-16 15:20:36 -07:00
/* Some bits beyond end -> corruption. */
2008-03-28 16:35:27 -07:00
if ( qp - > q . last_in & INET_FRAG_LAST_IN )
2005-04-16 15:20:36 -07:00
goto err ;
2007-10-15 02:24:19 -07:00
qp - > q . len = end ;
2005-04-16 15:20:36 -07:00
}
}
if ( end = = offset )
goto err ;
2007-10-14 00:38:15 -07:00
err = - ENOMEM ;
2005-04-16 15:20:36 -07:00
if ( pskb_pull ( skb , ihl ) = = NULL )
goto err ;
2007-10-14 00:38:15 -07:00
err = pskb_trim_rcsum ( skb , end - offset ) ;
if ( err )
2005-04-16 15:20:36 -07:00
goto err ;
/* Find out which fragments are in front and at the back of us
* in the chain of fragments so far . We must know where to put
* this fragment , right ?
*/
prev = NULL ;
2007-10-15 02:24:19 -07:00
for ( next = qp - > q . fragments ; next ! = NULL ; next = next - > next ) {
2005-04-16 15:20:36 -07:00
if ( FRAG_CB ( next ) - > offset > = offset )
break ; /* bingo! */
prev = next ;
}
/* We found where to put this one. Check for overlap with
* preceding fragment , and , if needed , align things so that
* any overlaps are eliminated .
*/
if ( prev ) {
int i = ( FRAG_CB ( prev ) - > offset + prev - > len ) - offset ;
if ( i > 0 ) {
offset + = i ;
2007-10-14 00:38:15 -07:00
err = - EINVAL ;
2005-04-16 15:20:36 -07:00
if ( end < = offset )
goto err ;
2007-10-14 00:38:15 -07:00
err = - ENOMEM ;
2005-04-16 15:20:36 -07:00
if ( ! pskb_pull ( skb , i ) )
goto err ;
if ( skb - > ip_summed ! = CHECKSUM_UNNECESSARY )
skb - > ip_summed = CHECKSUM_NONE ;
}
}
2007-10-14 00:38:15 -07:00
err = - ENOMEM ;
2005-04-16 15:20:36 -07:00
while ( next & & FRAG_CB ( next ) - > offset < end ) {
int i = end - FRAG_CB ( next ) - > offset ; /* overlap is 'i' bytes */
if ( i < next - > len ) {
/* Eat head of the next overlapped fragment
* and leave the loop . The next ones cannot overlap .
*/
if ( ! pskb_pull ( next , i ) )
goto err ;
FRAG_CB ( next ) - > offset + = i ;
2007-10-15 02:24:19 -07:00
qp - > q . meat - = i ;
2005-04-16 15:20:36 -07:00
if ( next - > ip_summed ! = CHECKSUM_UNNECESSARY )
next - > ip_summed = CHECKSUM_NONE ;
break ;
} else {
struct sk_buff * free_it = next ;
2006-12-12 19:48:59 +01:00
/* Old fragment is completely overridden with
2005-04-16 15:20:36 -07:00
* new one drop it .
*/
next = next - > next ;
if ( prev )
prev - > next = next ;
else
2007-10-15 02:24:19 -07:00
qp - > q . fragments = next ;
2005-04-16 15:20:36 -07:00
2007-10-15 02:24:19 -07:00
qp - > q . meat - = free_it - > len ;
2008-01-22 06:07:25 -08:00
frag_kfree_skb ( qp - > q . net , free_it , NULL ) ;
2005-04-16 15:20:36 -07:00
}
}
FRAG_CB ( skb ) - > offset = offset ;
/* Insert this fragment in the chain of fragments. */
skb - > next = next ;
if ( prev )
prev - > next = skb ;
else
2007-10-15 02:24:19 -07:00
qp - > q . fragments = skb ;
2005-04-16 15:20:36 -07:00
2007-10-14 00:38:15 -07:00
dev = skb - > dev ;
if ( dev ) {
qp - > iif = dev - > ifindex ;
skb - > dev = NULL ;
}
2007-10-15 02:24:19 -07:00
qp - > q . stamp = skb - > tstamp ;
qp - > q . meat + = skb - > len ;
2008-01-22 06:07:25 -08:00
atomic_add ( skb - > truesize , & qp - > q . net - > mem ) ;
2005-04-16 15:20:36 -07:00
if ( offset = = 0 )
2008-03-28 16:35:27 -07:00
qp - > q . last_in | = INET_FRAG_FIRST_IN ;
2005-04-16 15:20:36 -07:00
2008-03-28 16:35:27 -07:00
if ( qp - > q . last_in = = ( INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN ) & &
qp - > q . meat = = qp - > q . len )
2007-10-14 00:38:15 -07:00
return ip_frag_reasm ( qp , prev , dev ) ;
2007-10-15 02:31:52 -07:00
write_lock ( & ip4_frags . lock ) ;
2008-01-22 06:11:48 -08:00
list_move_tail ( & qp - > q . lru_list , & qp - > q . net - > lru_list ) ;
2007-10-15 02:31:52 -07:00
write_unlock ( & ip4_frags . lock ) ;
2007-10-14 00:38:15 -07:00
return - EINPROGRESS ;
2005-04-16 15:20:36 -07:00
err :
kfree_skb ( skb ) ;
2007-10-14 00:38:15 -07:00
return err ;
2005-04-16 15:20:36 -07:00
}
/* Build a new IP datagram from all its fragments. */
2007-10-14 00:38:15 -07:00
static int ip_frag_reasm ( struct ipq * qp , struct sk_buff * prev ,
struct net_device * dev )
2005-04-16 15:20:36 -07:00
{
2009-03-18 23:26:11 -07:00
struct net * net = container_of ( qp - > q . net , struct net , ipv4 . frags ) ;
2005-04-16 15:20:36 -07:00
struct iphdr * iph ;
2007-10-15 02:24:19 -07:00
struct sk_buff * fp , * head = qp - > q . fragments ;
2005-04-16 15:20:36 -07:00
int len ;
int ihlen ;
2007-10-14 00:38:15 -07:00
int err ;
2005-04-16 15:20:36 -07:00
ipq_kill ( qp ) ;
2007-10-14 00:38:15 -07:00
/* Make the one we just received the head. */
if ( prev ) {
head = prev - > next ;
fp = skb_clone ( head , GFP_ATOMIC ) ;
if ( ! fp )
goto out_nomem ;
fp - > next = head - > next ;
prev - > next = fp ;
2007-10-15 02:24:19 -07:00
skb_morph ( head , qp - > q . fragments ) ;
head - > next = qp - > q . fragments - > next ;
2007-10-14 00:38:15 -07:00
2007-10-15 02:24:19 -07:00
kfree_skb ( qp - > q . fragments ) ;
qp - > q . fragments = head ;
2007-10-14 00:38:15 -07:00
}
2008-07-25 21:43:18 -07:00
WARN_ON ( head = = NULL ) ;
WARN_ON ( FRAG_CB ( head ) - > offset ! = 0 ) ;
2005-04-16 15:20:36 -07:00
/* Allocate a new buffer for the datagram. */
2007-03-12 20:09:15 -03:00
ihlen = ip_hdrlen ( head ) ;
2007-10-15 02:24:19 -07:00
len = ihlen + qp - > q . len ;
2005-04-16 15:20:36 -07:00
2007-10-14 00:38:15 -07:00
err = - E2BIG ;
2007-03-08 20:44:43 -08:00
if ( len > 65535 )
2005-04-16 15:20:36 -07:00
goto out_oversize ;
/* Head of list must not be cloned. */
if ( skb_cloned ( head ) & & pskb_expand_head ( head , 0 , 0 , GFP_ATOMIC ) )
goto out_nomem ;
/* If the first fragment is fragmented itself, we split
* it to two chunks : the first with data and paged part
* and the second , holding only fragments . */
2009-06-09 00:19:37 -07:00
if ( skb_has_frags ( head ) ) {
2005-04-16 15:20:36 -07:00
struct sk_buff * clone ;
int i , plen = 0 ;
if ( ( clone = alloc_skb ( 0 , GFP_ATOMIC ) ) = = NULL )
goto out_nomem ;
clone - > next = head - > next ;
head - > next = clone ;
skb_shinfo ( clone ) - > frag_list = skb_shinfo ( head ) - > frag_list ;
2009-06-09 00:19:37 -07:00
skb_frag_list_init ( head ) ;
2005-04-16 15:20:36 -07:00
for ( i = 0 ; i < skb_shinfo ( head ) - > nr_frags ; i + + )
plen + = skb_shinfo ( head ) - > frags [ i ] . size ;
clone - > len = clone - > data_len = head - > data_len - plen ;
head - > data_len - = clone - > len ;
head - > len - = clone - > len ;
clone - > csum = 0 ;
clone - > ip_summed = head - > ip_summed ;
2008-01-22 06:07:25 -08:00
atomic_add ( clone - > truesize , & qp - > q . net - > mem ) ;
2005-04-16 15:20:36 -07:00
}
skb_shinfo ( head ) - > frag_list = head - > next ;
2007-04-10 20:50:43 -07:00
skb_push ( head , head - > data - skb_network_header ( head ) ) ;
2008-01-22 06:07:25 -08:00
atomic_sub ( head - > truesize , & qp - > q . net - > mem ) ;
2005-04-16 15:20:36 -07:00
for ( fp = head - > next ; fp ; fp = fp - > next ) {
head - > data_len + = fp - > len ;
head - > len + = fp - > len ;
if ( head - > ip_summed ! = fp - > ip_summed )
head - > ip_summed = CHECKSUM_NONE ;
2006-08-29 16:44:56 -07:00
else if ( head - > ip_summed = = CHECKSUM_COMPLETE )
2005-04-16 15:20:36 -07:00
head - > csum = csum_add ( head - > csum , fp - > csum ) ;
head - > truesize + = fp - > truesize ;
2008-01-22 06:07:25 -08:00
atomic_sub ( fp - > truesize , & qp - > q . net - > mem ) ;
2005-04-16 15:20:36 -07:00
}
head - > next = NULL ;
head - > dev = dev ;
2007-10-15 02:24:19 -07:00
head - > tstamp = qp - > q . stamp ;
2005-04-16 15:20:36 -07:00
2007-04-20 22:47:35 -07:00
iph = ip_hdr ( head ) ;
2005-04-16 15:20:36 -07:00
iph - > frag_off = 0 ;
iph - > tot_len = htons ( len ) ;
2009-03-18 23:26:11 -07:00
IP_INC_STATS_BH ( net , IPSTATS_MIB_REASMOKS ) ;
2007-10-15 02:24:19 -07:00
qp - > q . fragments = NULL ;
2007-10-14 00:38:15 -07:00
return 0 ;
2005-04-16 15:20:36 -07:00
out_nomem :
2007-02-09 23:24:47 +09:00
LIMIT_NETDEBUG ( KERN_ERR " IP: queue_glue: no memory for gluing "
2005-08-09 20:50:53 -07:00
" queue %p \n " , qp ) ;
2007-10-17 21:37:22 -07:00
err = - ENOMEM ;
2005-04-16 15:20:36 -07:00
goto out_fail ;
out_oversize :
if ( net_ratelimit ( ) )
2008-10-31 00:53:57 -07:00
printk ( KERN_INFO " Oversized IP packet from %pI4. \n " ,
& qp - > saddr ) ;
2005-04-16 15:20:36 -07:00
out_fail :
2008-07-16 20:20:11 -07:00
IP_INC_STATS_BH ( dev_net ( dev ) , IPSTATS_MIB_REASMFAILS ) ;
2007-10-14 00:38:15 -07:00
return err ;
2005-04-16 15:20:36 -07:00
}
/* Process an incoming IP datagram fragment. */
2007-10-14 00:38:32 -07:00
int ip_defrag ( struct sk_buff * skb , u32 user )
2005-04-16 15:20:36 -07:00
{
struct ipq * qp ;
2008-01-22 06:02:14 -08:00
struct net * net ;
2007-02-09 23:24:47 +09:00
2009-06-02 05:19:30 +00:00
net = skb - > dev ? dev_net ( skb - > dev ) : dev_net ( skb_dst ( skb ) - > dev ) ;
2008-07-16 20:20:11 -07:00
IP_INC_STATS_BH ( net , IPSTATS_MIB_REASMREQDS ) ;
2005-04-16 15:20:36 -07:00
/* Start by cleaning up the memory. */
2008-01-22 06:10:13 -08:00
if ( atomic_read ( & net - > ipv4 . frags . mem ) > net - > ipv4 . frags . high_thresh )
2008-01-22 06:07:25 -08:00
ip_evictor ( net ) ;
2005-04-16 15:20:36 -07:00
/* Lookup (or create) queue header */
2008-01-22 06:02:14 -08:00
if ( ( qp = ip_find ( net , ip_hdr ( skb ) , user ) ) ! = NULL ) {
2007-10-14 00:38:15 -07:00
int ret ;
2005-04-16 15:20:36 -07:00
2007-10-15 02:24:19 -07:00
spin_lock ( & qp - > q . lock ) ;
2005-04-16 15:20:36 -07:00
2007-10-14 00:38:15 -07:00
ret = ip_frag_queue ( qp , skb ) ;
2005-04-16 15:20:36 -07:00
2007-10-15 02:24:19 -07:00
spin_unlock ( & qp - > q . lock ) ;
2007-10-15 02:41:09 -07:00
ipq_put ( qp ) ;
2007-10-14 00:38:32 -07:00
return ret ;
2005-04-16 15:20:36 -07:00
}
2008-07-16 20:20:11 -07:00
IP_INC_STATS_BH ( net , IPSTATS_MIB_REASMFAILS ) ;
2005-04-16 15:20:36 -07:00
kfree_skb ( skb ) ;
2007-10-14 00:38:32 -07:00
return - ENOMEM ;
2005-04-16 15:20:36 -07:00
}
2008-01-22 05:58:31 -08:00
# ifdef CONFIG_SYSCTL
static int zero ;
2008-05-19 13:51:29 -07:00
static struct ctl_table ip4_frags_ns_ctl_table [ ] = {
2008-01-22 05:58:31 -08:00
{
. ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH ,
. procname = " ipfrag_high_thresh " ,
2008-01-22 06:10:13 -08:00
. data = & init_net . ipv4 . frags . high_thresh ,
2008-01-22 05:58:31 -08:00
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
2008-11-03 18:21:05 -08:00
. proc_handler = proc_dointvec
2008-01-22 05:58:31 -08:00
} ,
{
. ctl_name = NET_IPV4_IPFRAG_LOW_THRESH ,
. procname = " ipfrag_low_thresh " ,
2008-01-22 06:10:13 -08:00
. data = & init_net . ipv4 . frags . low_thresh ,
2008-01-22 05:58:31 -08:00
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
2008-11-03 18:21:05 -08:00
. proc_handler = proc_dointvec
2008-01-22 05:58:31 -08:00
} ,
{
. ctl_name = NET_IPV4_IPFRAG_TIME ,
. procname = " ipfrag_time " ,
2008-01-22 06:09:37 -08:00
. data = & init_net . ipv4 . frags . timeout ,
2008-01-22 05:58:31 -08:00
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
2008-11-03 18:21:05 -08:00
. proc_handler = proc_dointvec_jiffies ,
. strategy = sysctl_jiffies
2008-01-22 05:58:31 -08:00
} ,
2008-05-19 13:53:02 -07:00
{ }
} ;
static struct ctl_table ip4_frags_ctl_table [ ] = {
2008-01-22 05:58:31 -08:00
{
. ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL ,
. procname = " ipfrag_secret_interval " ,
2008-01-22 06:11:04 -08:00
. data = & ip4_frags . secret_interval ,
2008-01-22 05:58:31 -08:00
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
2008-11-03 18:21:05 -08:00
. proc_handler = proc_dointvec_jiffies ,
. strategy = sysctl_jiffies
2008-01-22 05:58:31 -08:00
} ,
{
. procname = " ipfrag_max_dist " ,
. data = & sysctl_ipfrag_max_dist ,
. maxlen = sizeof ( int ) ,
. mode = 0644 ,
2008-11-03 18:21:05 -08:00
. proc_handler = proc_dointvec_minmax ,
2008-01-22 05:58:31 -08:00
. extra1 = & zero
} ,
{ }
} ;
2008-05-19 13:51:29 -07:00
static int ip4_frags_ns_ctl_register ( struct net * net )
2008-01-22 05:58:31 -08:00
{
2008-01-22 06:08:36 -08:00
struct ctl_table * table ;
2008-01-22 05:58:31 -08:00
struct ctl_table_header * hdr ;
2008-05-19 13:51:29 -07:00
table = ip4_frags_ns_ctl_table ;
2008-01-22 06:08:36 -08:00
if ( net ! = & init_net ) {
2008-05-19 13:51:29 -07:00
table = kmemdup ( table , sizeof ( ip4_frags_ns_ctl_table ) , GFP_KERNEL ) ;
2008-01-22 06:08:36 -08:00
if ( table = = NULL )
goto err_alloc ;
2008-01-22 06:10:13 -08:00
table [ 0 ] . data = & net - > ipv4 . frags . high_thresh ;
table [ 1 ] . data = & net - > ipv4 . frags . low_thresh ;
2008-01-22 06:09:37 -08:00
table [ 2 ] . data = & net - > ipv4 . frags . timeout ;
2008-01-22 06:08:36 -08:00
}
hdr = register_net_sysctl_table ( net , net_ipv4_ctl_path , table ) ;
if ( hdr = = NULL )
goto err_reg ;
net - > ipv4 . frags_hdr = hdr ;
return 0 ;
err_reg :
if ( net ! = & init_net )
kfree ( table ) ;
err_alloc :
return - ENOMEM ;
}
2008-05-19 13:51:29 -07:00
static void ip4_frags_ns_ctl_unregister ( struct net * net )
2008-01-22 06:08:36 -08:00
{
struct ctl_table * table ;
table = net - > ipv4 . frags_hdr - > ctl_table_arg ;
unregister_net_sysctl_table ( net - > ipv4 . frags_hdr ) ;
kfree ( table ) ;
2008-01-22 05:58:31 -08:00
}
2008-05-19 13:53:02 -07:00
static void ip4_frags_ctl_register ( void )
{
register_net_sysctl_rotable ( net_ipv4_ctl_path , ip4_frags_ctl_table ) ;
}
2008-01-22 05:58:31 -08:00
# else
2008-05-19 13:51:29 -07:00
static inline int ip4_frags_ns_ctl_register ( struct net * net )
2008-01-22 05:58:31 -08:00
{
return 0 ;
}
2008-01-22 06:08:36 -08:00
2008-05-19 13:51:29 -07:00
static inline void ip4_frags_ns_ctl_unregister ( struct net * net )
2008-01-22 06:08:36 -08:00
{
}
2008-05-19 13:53:02 -07:00
static inline void ip4_frags_ctl_register ( void )
{
}
2008-01-22 05:58:31 -08:00
# endif
static int ipv4_frags_init_net ( struct net * net )
{
2008-01-22 06:10:13 -08:00
/*
* Fragment cache limits . We will commit 256 K at one time . Should we
* cross that limit we will prune down to 192 K . This should cope with
* even the most extreme cases without allowing an attacker to
* measurably harm machine performance .
*/
net - > ipv4 . frags . high_thresh = 256 * 1024 ;
net - > ipv4 . frags . low_thresh = 192 * 1024 ;
2008-01-22 06:09:37 -08:00
/*
* Important NOTE ! Fragment queue must be destroyed before MSL expires .
* RFC791 is wrong proposing to prolongate timer each fragment arrival
* by TTL .
*/
net - > ipv4 . frags . timeout = IP_FRAG_TIME ;
2008-01-22 06:06:23 -08:00
inet_frags_init_net ( & net - > ipv4 . frags ) ;
2008-05-19 13:51:29 -07:00
return ip4_frags_ns_ctl_register ( net ) ;
2008-01-22 05:58:31 -08:00
}
2008-01-22 06:12:39 -08:00
static void ipv4_frags_exit_net ( struct net * net )
{
2008-05-19 13:51:29 -07:00
ip4_frags_ns_ctl_unregister ( net ) ;
2008-01-22 06:12:39 -08:00
inet_frags_exit_net ( & net - > ipv4 . frags , & ip4_frags ) ;
}
static struct pernet_operations ip4_frags_ops = {
. init = ipv4_frags_init_net ,
. exit = ipv4_frags_exit_net ,
} ;
2007-04-19 16:16:32 -07:00
void __init ipfrag_init ( void )
2005-04-16 15:20:36 -07:00
{
2008-05-19 13:53:02 -07:00
ip4_frags_ctl_register ( ) ;
2008-01-22 06:12:39 -08:00
register_pernet_subsys ( & ip4_frags_ops ) ;
2007-10-15 02:38:08 -07:00
ip4_frags . hashfn = ip4_hashfn ;
2007-10-17 19:46:47 -07:00
ip4_frags . constructor = ip4_frag_init ;
2007-10-15 02:39:14 -07:00
ip4_frags . destructor = ip4_frag_free ;
ip4_frags . skb_free = NULL ;
ip4_frags . qsize = sizeof ( struct ipq ) ;
2007-10-17 19:47:21 -07:00
ip4_frags . match = ip4_frag_match ;
2007-10-17 19:45:23 -07:00
ip4_frags . frag_expire = ip_expire ;
2008-01-22 06:11:04 -08:00
ip4_frags . secret_interval = 10 * 60 * HZ ;
2007-10-15 02:31:52 -07:00
inet_frags_init ( & ip4_frags ) ;
2005-04-16 15:20:36 -07:00
}
EXPORT_SYMBOL ( ip_defrag ) ;