2007-08-09 09:38:05 +04:00
/*
* linux / net / ipv4 / inet_lro . c
*
* Large Receive Offload ( ipv4 / tcp )
*
* ( C ) Copyright IBM Corp . 2007
*
* Authors :
* Jan - Bernd Themann < themann @ de . ibm . com >
* Christoph Raisch < raisch @ de . ibm . com >
*
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 , or ( at your option )
* any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include <linux/module.h>
# include <linux/if_vlan.h>
# include <linux/inet_lro.h>
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Jan-Bernd Themann <themann@de.ibm.com> " ) ;
MODULE_DESCRIPTION ( " Large Receive Offload (ipv4 / tcp) " ) ;
# define TCP_HDR_LEN(tcph) (tcph->doff << 2)
# define IP_HDR_LEN(iph) (iph->ihl << 2)
# define TCP_PAYLOAD_LENGTH(iph, tcph) \
( ntohs ( iph - > tot_len ) - IP_HDR_LEN ( iph ) - TCP_HDR_LEN ( tcph ) )
# define IPH_LEN_WO_OPTIONS 5
# define TCPH_LEN_WO_OPTIONS 5
# define TCPH_LEN_W_TIMESTAMP 8
# define LRO_MAX_PG_HLEN 64
# define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
/*
* Basic tcp checks whether packet is suitable for LRO
*/
2011-04-22 08:53:02 +04:00
static int lro_tcp_ip_check ( const struct iphdr * iph , const struct tcphdr * tcph ,
int len , const struct net_lro_desc * lro_desc )
2007-08-09 09:38:05 +04:00
{
/* check ip header: don't aggregate padded frames */
if ( ntohs ( iph - > tot_len ) ! = len )
return - 1 ;
if ( TCP_PAYLOAD_LENGTH ( iph , tcph ) = = 0 )
return - 1 ;
if ( iph - > ihl ! = IPH_LEN_WO_OPTIONS )
return - 1 ;
2009-11-23 21:41:23 +03:00
if ( tcph - > cwr | | tcph - > ece | | tcph - > urg | | ! tcph - > ack | |
tcph - > rst | | tcph - > syn | | tcph - > fin )
2007-08-09 09:38:05 +04:00
return - 1 ;
if ( INET_ECN_is_ce ( ipv4_get_dsfield ( iph ) ) )
return - 1 ;
2009-11-23 21:41:23 +03:00
if ( tcph - > doff ! = TCPH_LEN_WO_OPTIONS & &
tcph - > doff ! = TCPH_LEN_W_TIMESTAMP )
2007-08-09 09:38:05 +04:00
return - 1 ;
/* check tcp options (only timestamp allowed) */
if ( tcph - > doff = = TCPH_LEN_W_TIMESTAMP ) {
2007-10-14 22:40:59 +04:00
__be32 * topt = ( __be32 * ) ( tcph + 1 ) ;
2007-08-09 09:38:05 +04:00
if ( * topt ! = htonl ( ( TCPOPT_NOP < < 24 ) | ( TCPOPT_NOP < < 16 )
| ( TCPOPT_TIMESTAMP < < 8 )
| TCPOLEN_TIMESTAMP ) )
return - 1 ;
/* timestamp should be in right order */
topt + + ;
if ( lro_desc & & after ( ntohl ( lro_desc - > tcp_rcv_tsval ) ,
ntohl ( * topt ) ) )
return - 1 ;
/* timestamp reply should not be zero */
topt + + ;
if ( * topt = = 0 )
return - 1 ;
}
return 0 ;
}
static void lro_update_tcp_ip_header ( struct net_lro_desc * lro_desc )
{
struct iphdr * iph = lro_desc - > iph ;
struct tcphdr * tcph = lro_desc - > tcph ;
2007-10-14 22:40:59 +04:00
__be32 * p ;
2007-08-09 09:38:05 +04:00
__wsum tcp_hdr_csum ;
tcph - > ack_seq = lro_desc - > tcp_ack ;
tcph - > window = lro_desc - > tcp_window ;
if ( lro_desc - > tcp_saw_tstamp ) {
2007-10-14 22:40:59 +04:00
p = ( __be32 * ) ( tcph + 1 ) ;
2007-08-09 09:38:05 +04:00
* ( p + 2 ) = lro_desc - > tcp_rcv_tsecr ;
}
iph - > tot_len = htons ( lro_desc - > ip_tot_len ) ;
iph - > check = 0 ;
iph - > check = ip_fast_csum ( ( u8 * ) lro_desc - > iph , iph - > ihl ) ;
tcph - > check = 0 ;
2008-11-20 02:44:53 +03:00
tcp_hdr_csum = csum_partial ( tcph , TCP_HDR_LEN ( tcph ) , 0 ) ;
2007-08-09 09:38:05 +04:00
lro_desc - > data_csum = csum_add ( lro_desc - > data_csum , tcp_hdr_csum ) ;
tcph - > check = csum_tcpudp_magic ( iph - > saddr , iph - > daddr ,
lro_desc - > ip_tot_len -
IP_HDR_LEN ( iph ) , IPPROTO_TCP ,
lro_desc - > data_csum ) ;
}
static __wsum lro_tcp_data_csum ( struct iphdr * iph , struct tcphdr * tcph , int len )
{
__wsum tcp_csum ;
__wsum tcp_hdr_csum ;
__wsum tcp_ps_hdr_csum ;
tcp_csum = ~ csum_unfold ( tcph - > check ) ;
2008-11-20 02:44:53 +03:00
tcp_hdr_csum = csum_partial ( tcph , TCP_HDR_LEN ( tcph ) , tcp_csum ) ;
2007-08-09 09:38:05 +04:00
tcp_ps_hdr_csum = csum_tcpudp_nofold ( iph - > saddr , iph - > daddr ,
len + TCP_HDR_LEN ( tcph ) ,
IPPROTO_TCP , 0 ) ;
return csum_sub ( csum_sub ( tcp_csum , tcp_hdr_csum ) ,
tcp_ps_hdr_csum ) ;
}
static void lro_init_desc ( struct net_lro_desc * lro_desc , struct sk_buff * skb ,
2011-07-20 08:54:10 +04:00
struct iphdr * iph , struct tcphdr * tcph )
2007-08-09 09:38:05 +04:00
{
int nr_frags ;
2007-10-14 22:40:59 +04:00
__be32 * ptr ;
2007-08-09 09:38:05 +04:00
u32 tcp_data_len = TCP_PAYLOAD_LENGTH ( iph , tcph ) ;
nr_frags = skb_shinfo ( skb ) - > nr_frags ;
lro_desc - > parent = skb ;
lro_desc - > next_frag = & ( skb_shinfo ( skb ) - > frags [ nr_frags ] ) ;
lro_desc - > iph = iph ;
lro_desc - > tcph = tcph ;
lro_desc - > tcp_next_seq = ntohl ( tcph - > seq ) + tcp_data_len ;
2007-10-14 22:41:09 +04:00
lro_desc - > tcp_ack = tcph - > ack_seq ;
2007-08-09 09:38:05 +04:00
lro_desc - > tcp_window = tcph - > window ;
lro_desc - > pkt_aggr_cnt = 1 ;
lro_desc - > ip_tot_len = ntohs ( iph - > tot_len ) ;
if ( tcph - > doff = = 8 ) {
2007-10-14 22:40:59 +04:00
ptr = ( __be32 * ) ( tcph + 1 ) ;
2007-08-09 09:38:05 +04:00
lro_desc - > tcp_saw_tstamp = 1 ;
lro_desc - > tcp_rcv_tsval = * ( ptr + 1 ) ;
lro_desc - > tcp_rcv_tsecr = * ( ptr + 2 ) ;
}
lro_desc - > mss = tcp_data_len ;
lro_desc - > active = 1 ;
lro_desc - > data_csum = lro_tcp_data_csum ( iph , tcph ,
tcp_data_len ) ;
}
static inline void lro_clear_desc ( struct net_lro_desc * lro_desc )
{
memset ( lro_desc , 0 , sizeof ( struct net_lro_desc ) ) ;
}
static void lro_add_common ( struct net_lro_desc * lro_desc , struct iphdr * iph ,
struct tcphdr * tcph , int tcp_data_len )
{
struct sk_buff * parent = lro_desc - > parent ;
2007-10-14 22:40:59 +04:00
__be32 * topt ;
2007-08-09 09:38:05 +04:00
lro_desc - > pkt_aggr_cnt + + ;
lro_desc - > ip_tot_len + = tcp_data_len ;
lro_desc - > tcp_next_seq + = tcp_data_len ;
lro_desc - > tcp_window = tcph - > window ;
lro_desc - > tcp_ack = tcph - > ack_seq ;
/* don't update tcp_rcv_tsval, would not work with PAWS */
if ( lro_desc - > tcp_saw_tstamp ) {
2007-10-14 22:40:59 +04:00
topt = ( __be32 * ) ( tcph + 1 ) ;
2007-08-09 09:38:05 +04:00
lro_desc - > tcp_rcv_tsecr = * ( topt + 2 ) ;
}
lro_desc - > data_csum = csum_block_add ( lro_desc - > data_csum ,
lro_tcp_data_csum ( iph , tcph ,
tcp_data_len ) ,
parent - > len ) ;
parent - > len + = tcp_data_len ;
parent - > data_len + = tcp_data_len ;
if ( tcp_data_len > lro_desc - > mss )
lro_desc - > mss = tcp_data_len ;
}
static void lro_add_packet ( struct net_lro_desc * lro_desc , struct sk_buff * skb ,
struct iphdr * iph , struct tcphdr * tcph )
{
struct sk_buff * parent = lro_desc - > parent ;
int tcp_data_len = TCP_PAYLOAD_LENGTH ( iph , tcph ) ;
lro_add_common ( lro_desc , iph , tcph , tcp_data_len ) ;
skb_pull ( skb , ( skb - > len - tcp_data_len ) ) ;
parent - > truesize + = skb - > truesize ;
if ( lro_desc - > last_skb )
lro_desc - > last_skb - > next = skb ;
else
skb_shinfo ( parent ) - > frag_list = skb ;
lro_desc - > last_skb = skb ;
}
static void lro_add_frags ( struct net_lro_desc * lro_desc ,
int len , int hlen , int truesize ,
struct skb_frag_struct * skb_frags ,
struct iphdr * iph , struct tcphdr * tcph )
{
struct sk_buff * skb = lro_desc - > parent ;
int tcp_data_len = TCP_PAYLOAD_LENGTH ( iph , tcph ) ;
lro_add_common ( lro_desc , iph , tcph , tcp_data_len ) ;
skb - > truesize + = truesize ;
skb_frags [ 0 ] . page_offset + = hlen ;
2011-10-19 01:00:24 +04:00
skb_frag_size_sub ( & skb_frags [ 0 ] , hlen ) ;
2007-08-09 09:38:05 +04:00
while ( tcp_data_len > 0 ) {
* ( lro_desc - > next_frag ) = * skb_frags ;
2011-10-19 01:00:24 +04:00
tcp_data_len - = skb_frag_size ( skb_frags ) ;
2007-08-09 09:38:05 +04:00
lro_desc - > next_frag + + ;
skb_frags + + ;
skb_shinfo ( skb ) - > nr_frags + + ;
}
}
static int lro_check_tcp_conn ( struct net_lro_desc * lro_desc ,
struct iphdr * iph ,
struct tcphdr * tcph )
{
2009-11-23 21:41:23 +03:00
if ( ( lro_desc - > iph - > saddr ! = iph - > saddr ) | |
( lro_desc - > iph - > daddr ! = iph - > daddr ) | |
( lro_desc - > tcph - > source ! = tcph - > source ) | |
( lro_desc - > tcph - > dest ! = tcph - > dest ) )
2007-08-09 09:38:05 +04:00
return - 1 ;
return 0 ;
}
static struct net_lro_desc * lro_get_desc ( struct net_lro_mgr * lro_mgr ,
struct net_lro_desc * lro_arr ,
struct iphdr * iph ,
struct tcphdr * tcph )
{
struct net_lro_desc * lro_desc = NULL ;
struct net_lro_desc * tmp ;
int max_desc = lro_mgr - > max_desc ;
int i ;
for ( i = 0 ; i < max_desc ; i + + ) {
tmp = & lro_arr [ i ] ;
if ( tmp - > active )
if ( ! lro_check_tcp_conn ( tmp , iph , tcph ) ) {
lro_desc = tmp ;
goto out ;
}
}
for ( i = 0 ; i < max_desc ; i + + ) {
if ( ! lro_arr [ i ] . active ) {
lro_desc = & lro_arr [ i ] ;
goto out ;
}
}
LRO_INC_STATS ( lro_mgr , no_desc ) ;
out :
return lro_desc ;
}
static void lro_flush ( struct net_lro_mgr * lro_mgr ,
struct net_lro_desc * lro_desc )
{
if ( lro_desc - > pkt_aggr_cnt > 1 )
lro_update_tcp_ip_header ( lro_desc ) ;
skb_shinfo ( lro_desc - > parent ) - > gso_size = lro_desc - > mss ;
2011-07-20 08:54:10 +04:00
if ( lro_mgr - > features & LRO_F_NAPI )
netif_receive_skb ( lro_desc - > parent ) ;
else
netif_rx ( lro_desc - > parent ) ;
2007-08-09 09:38:05 +04:00
LRO_INC_STATS ( lro_mgr , flushed ) ;
lro_clear_desc ( lro_desc ) ;
}
static int __lro_proc_skb ( struct net_lro_mgr * lro_mgr , struct sk_buff * skb ,
2011-07-20 08:54:10 +04:00
void * priv )
2007-08-09 09:38:05 +04:00
{
struct net_lro_desc * lro_desc ;
struct iphdr * iph ;
struct tcphdr * tcph ;
u64 flags ;
int vlan_hdr_len = 0 ;
2009-11-23 21:41:23 +03:00
if ( ! lro_mgr - > get_skb_header | |
lro_mgr - > get_skb_header ( skb , ( void * ) & iph , ( void * ) & tcph ,
& flags , priv ) )
2007-08-09 09:38:05 +04:00
goto out ;
if ( ! ( flags & LRO_IPV4 ) | | ! ( flags & LRO_TCP ) )
goto out ;
lro_desc = lro_get_desc ( lro_mgr , lro_mgr - > lro_arr , iph , tcph ) ;
if ( ! lro_desc )
goto out ;
2009-11-23 21:41:23 +03:00
if ( ( skb - > protocol = = htons ( ETH_P_8021Q ) ) & &
! ( lro_mgr - > features & LRO_F_EXTRACT_VLAN_ID ) )
2007-08-09 09:38:05 +04:00
vlan_hdr_len = VLAN_HLEN ;
if ( ! lro_desc - > active ) { /* start new lro session */
if ( lro_tcp_ip_check ( iph , tcph , skb - > len - vlan_hdr_len , NULL ) )
goto out ;
skb - > ip_summed = lro_mgr - > ip_summed_aggr ;
2011-07-20 08:54:10 +04:00
lro_init_desc ( lro_desc , skb , iph , tcph ) ;
2007-08-09 09:38:05 +04:00
LRO_INC_STATS ( lro_mgr , aggregated ) ;
return 0 ;
}
if ( lro_desc - > tcp_next_seq ! = ntohl ( tcph - > seq ) )
goto out2 ;
if ( lro_tcp_ip_check ( iph , tcph , skb - > len , lro_desc ) )
goto out2 ;
lro_add_packet ( lro_desc , skb , iph , tcph ) ;
LRO_INC_STATS ( lro_mgr , aggregated ) ;
if ( ( lro_desc - > pkt_aggr_cnt > = lro_mgr - > max_aggr ) | |
lro_desc - > parent - > len > ( 0xFFFF - lro_mgr - > dev - > mtu ) )
lro_flush ( lro_mgr , lro_desc ) ;
return 0 ;
out2 : /* send aggregated SKBs to stack */
lro_flush ( lro_mgr , lro_desc ) ;
2008-06-28 07:09:00 +04:00
out :
2007-08-09 09:38:05 +04:00
return 1 ;
}
static struct sk_buff * lro_gen_skb ( struct net_lro_mgr * lro_mgr ,
struct skb_frag_struct * frags ,
int len , int true_size ,
void * mac_hdr ,
int hlen , __wsum sum ,
u32 ip_summed )
{
struct sk_buff * skb ;
struct skb_frag_struct * skb_frags ;
int data_len = len ;
int hdr_len = min ( len , hlen ) ;
2007-12-05 13:31:42 +03:00
skb = netdev_alloc_skb ( lro_mgr - > dev , hlen + lro_mgr - > frag_align_pad ) ;
2007-08-09 09:38:05 +04:00
if ( ! skb )
return NULL ;
2007-12-05 13:31:42 +03:00
skb_reserve ( skb , lro_mgr - > frag_align_pad ) ;
2007-08-09 09:38:05 +04:00
skb - > len = len ;
skb - > data_len = len - hdr_len ;
skb - > truesize + = true_size ;
skb - > tail + = hdr_len ;
memcpy ( skb - > data , mac_hdr , hdr_len ) ;
skb_frags = skb_shinfo ( skb ) - > frags ;
while ( data_len > 0 ) {
* skb_frags = * frags ;
2011-10-19 01:00:24 +04:00
data_len - = skb_frag_size ( frags ) ;
2007-08-09 09:38:05 +04:00
skb_frags + + ;
frags + + ;
skb_shinfo ( skb ) - > nr_frags + + ;
}
skb_shinfo ( skb ) - > frags [ 0 ] . page_offset + = hdr_len ;
2011-10-19 01:00:24 +04:00
skb_frag_size_sub ( & skb_shinfo ( skb ) - > frags [ 0 ] , hdr_len ) ;
2007-08-09 09:38:05 +04:00
skb - > ip_summed = ip_summed ;
skb - > csum = sum ;
skb - > protocol = eth_type_trans ( skb , lro_mgr - > dev ) ;
return skb ;
}
static struct sk_buff * __lro_proc_segment ( struct net_lro_mgr * lro_mgr ,
struct skb_frag_struct * frags ,
int len , int true_size ,
2011-07-20 08:54:10 +04:00
void * priv , __wsum sum )
2007-08-09 09:38:05 +04:00
{
struct net_lro_desc * lro_desc ;
struct iphdr * iph ;
struct tcphdr * tcph ;
struct sk_buff * skb ;
u64 flags ;
void * mac_hdr ;
int mac_hdr_len ;
int hdr_len = LRO_MAX_PG_HLEN ;
int vlan_hdr_len = 0 ;
2009-11-23 21:41:23 +03:00
if ( ! lro_mgr - > get_frag_header | |
lro_mgr - > get_frag_header ( frags , ( void * ) & mac_hdr , ( void * ) & iph ,
( void * ) & tcph , & flags , priv ) ) {
2011-08-23 03:44:59 +04:00
mac_hdr = skb_frag_address ( frags ) ;
2007-08-09 09:38:05 +04:00
goto out1 ;
}
if ( ! ( flags & LRO_IPV4 ) | | ! ( flags & LRO_TCP ) )
goto out1 ;
hdr_len = ( int ) ( ( void * ) ( tcph ) + TCP_HDR_LEN ( tcph ) - mac_hdr ) ;
mac_hdr_len = ( int ) ( ( void * ) ( iph ) - mac_hdr ) ;
lro_desc = lro_get_desc ( lro_mgr , lro_mgr - > lro_arr , iph , tcph ) ;
if ( ! lro_desc )
goto out1 ;
if ( ! lro_desc - > active ) { /* start new lro session */
if ( lro_tcp_ip_check ( iph , tcph , len - mac_hdr_len , NULL ) )
goto out1 ;
skb = lro_gen_skb ( lro_mgr , frags , len , true_size , mac_hdr ,
hdr_len , 0 , lro_mgr - > ip_summed_aggr ) ;
if ( ! skb )
goto out ;
2009-11-23 21:41:23 +03:00
if ( ( skb - > protocol = = htons ( ETH_P_8021Q ) ) & &
! ( lro_mgr - > features & LRO_F_EXTRACT_VLAN_ID ) )
2007-08-09 09:38:05 +04:00
vlan_hdr_len = VLAN_HLEN ;
iph = ( void * ) ( skb - > data + vlan_hdr_len ) ;
tcph = ( void * ) ( ( u8 * ) skb - > data + vlan_hdr_len
+ IP_HDR_LEN ( iph ) ) ;
2011-07-20 08:54:10 +04:00
lro_init_desc ( lro_desc , skb , iph , tcph ) ;
2007-08-09 09:38:05 +04:00
LRO_INC_STATS ( lro_mgr , aggregated ) ;
2007-10-09 12:59:42 +04:00
return NULL ;
2007-08-09 09:38:05 +04:00
}
if ( lro_desc - > tcp_next_seq ! = ntohl ( tcph - > seq ) )
goto out2 ;
if ( lro_tcp_ip_check ( iph , tcph , len - mac_hdr_len , lro_desc ) )
goto out2 ;
lro_add_frags ( lro_desc , len , hdr_len , true_size , frags , iph , tcph ) ;
LRO_INC_STATS ( lro_mgr , aggregated ) ;
if ( ( skb_shinfo ( lro_desc - > parent ) - > nr_frags > = lro_mgr - > max_aggr ) | |
lro_desc - > parent - > len > ( 0xFFFF - lro_mgr - > dev - > mtu ) )
lro_flush ( lro_mgr , lro_desc ) ;
return NULL ;
out2 : /* send aggregated packets to the stack */
lro_flush ( lro_mgr , lro_desc ) ;
out1 : /* Original packet has to be posted to the stack */
skb = lro_gen_skb ( lro_mgr , frags , len , true_size , mac_hdr ,
hdr_len , sum , lro_mgr - > ip_summed ) ;
out :
return skb ;
}
void lro_receive_skb ( struct net_lro_mgr * lro_mgr ,
struct sk_buff * skb ,
void * priv )
{
2011-07-20 08:54:10 +04:00
if ( __lro_proc_skb ( lro_mgr , skb , priv ) ) {
2008-01-08 09:09:08 +03:00
if ( lro_mgr - > features & LRO_F_NAPI )
2007-08-09 09:38:05 +04:00
netif_receive_skb ( skb ) ;
else
netif_rx ( skb ) ;
}
}
EXPORT_SYMBOL ( lro_receive_skb ) ;
void lro_receive_frags ( struct net_lro_mgr * lro_mgr ,
struct skb_frag_struct * frags ,
int len , int true_size , void * priv , __wsum sum )
{
struct sk_buff * skb ;
2011-07-20 08:54:10 +04:00
skb = __lro_proc_segment ( lro_mgr , frags , len , true_size , priv , sum ) ;
2007-08-09 09:38:05 +04:00
if ( ! skb )
return ;
2008-01-08 09:09:08 +03:00
if ( lro_mgr - > features & LRO_F_NAPI )
2007-08-09 09:38:05 +04:00
netif_receive_skb ( skb ) ;
else
netif_rx ( skb ) ;
}
EXPORT_SYMBOL ( lro_receive_frags ) ;
void lro_flush_all ( struct net_lro_mgr * lro_mgr )
{
int i ;
struct net_lro_desc * lro_desc = lro_mgr - > lro_arr ;
for ( i = 0 ; i < lro_mgr - > max_desc ; i + + ) {
if ( lro_desc [ i ] . active )
lro_flush ( lro_mgr , & lro_desc [ i ] ) ;
}
}
EXPORT_SYMBOL ( lro_flush_all ) ;
void lro_flush_pkt ( struct net_lro_mgr * lro_mgr ,
struct iphdr * iph , struct tcphdr * tcph )
{
struct net_lro_desc * lro_desc ;
lro_desc = lro_get_desc ( lro_mgr , lro_mgr - > lro_arr , iph , tcph ) ;
if ( lro_desc - > active )
lro_flush ( lro_mgr , lro_desc ) ;
}
EXPORT_SYMBOL ( lro_flush_pkt ) ;