2019-05-27 09:55:01 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2012-11-15 12:49:16 +04:00
/*
* IPV6 GSO / GRO offload support
* Linux INET6 implementation
*/
# include <linux/kernel.h>
# include <linux/socket.h>
# include <linux/netdevice.h>
# include <linux/skbuff.h>
2012-11-15 12:49:22 +04:00
# include <linux/printk.h>
2012-11-15 12:49:16 +04:00
# include <net/protocol.h>
# include <net/ipv6.h>
2016-05-18 19:06:23 +03:00
# include <net/inet_common.h>
2020-06-24 01:31:14 +03:00
# include <net/tcp.h>
2020-06-24 01:31:15 +03:00
# include <net/udp.h>
2021-02-03 16:51:11 +03:00
# include <net/gro.h>
2012-11-15 12:49:16 +04:00
# include "ip6_offload.h"
2018-12-14 13:51:59 +03:00
/* All GRO functions are always builtin, except UDP over ipv6, which lays in
* ipv6 module , as it depends on UDPv6 lookup function , so we need special care
* when ipv6 is built as a module
*/
# if IS_BUILTIN(CONFIG_IPV6)
# define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
# else
# define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
# endif
# define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \
( { \
unlikely ( gro_recursion_inc_test ( skb ) ) ? \
NAPI_GRO_CB ( skb ) - > flush | = 1 , NULL : \
INDIRECT_CALL_L4 ( cb , f2 , f1 , head , skb ) ; \
} )
2012-11-15 12:49:16 +04:00
static int ipv6_gso_pull_exthdrs ( struct sk_buff * skb , int proto )
{
const struct net_offload * ops = NULL ;
for ( ; ; ) {
struct ipv6_opt_hdr * opth ;
int len ;
if ( proto ! = NEXTHDR_HOP ) {
ops = rcu_dereference ( inet6_offloads [ proto ] ) ;
if ( unlikely ( ! ops ) )
break ;
if ( ! ( ops - > flags & INET6_PROTO_GSO_EXTHDR ) )
break ;
}
if ( unlikely ( ! pskb_may_pull ( skb , 8 ) ) )
break ;
opth = ( void * ) skb - > data ;
len = ipv6_optlen ( opth ) ;
if ( unlikely ( ! pskb_may_pull ( skb , len ) ) )
break ;
2014-10-18 13:27:42 +04:00
opth = ( void * ) skb - > data ;
2012-11-15 12:49:16 +04:00
proto = opth - > nexthdr ;
__skb_pull ( skb , len ) ;
}
return proto ;
}
static struct sk_buff * ipv6_gso_segment ( struct sk_buff * skb ,
netdev_features_t features )
{
struct sk_buff * segs = ERR_PTR ( - EINVAL ) ;
struct ipv6hdr * ipv6h ;
const struct net_offload * ops ;
2022-05-13 21:34:01 +03:00
int proto , nexthdr ;
2012-11-15 12:49:16 +04:00
struct frag_hdr * fptr ;
2016-04-11 04:45:03 +03:00
unsigned int payload_len ;
2012-11-15 12:49:16 +04:00
u8 * prevhdr ;
int offset = 0 ;
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
bool encap , udpfrag ;
2013-10-21 07:47:29 +04:00
int nhoff ;
2016-09-19 13:58:47 +03:00
bool gso_partial ;
2012-11-15 12:49:16 +04:00
2013-10-21 07:47:29 +04:00
skb_reset_network_header ( skb ) ;
2022-05-13 21:34:01 +03:00
nexthdr = ipv6_has_hopopt_jumbo ( skb ) ;
if ( nexthdr ) {
const int hophdr_len = sizeof ( struct hop_jumbo_hdr ) ;
int err ;
err = skb_cow_head ( skb , 0 ) ;
if ( err < 0 )
return ERR_PTR ( err ) ;
/* remove the HBH header.
* Layout : [ Ethernet header ] [ IPv6 header ] [ HBH ] [ TCP header ]
*/
memmove ( skb_mac_header ( skb ) + hophdr_len ,
skb_mac_header ( skb ) ,
ETH_HLEN + sizeof ( struct ipv6hdr ) ) ;
skb - > data + = hophdr_len ;
skb - > len - = hophdr_len ;
skb - > network_header + = hophdr_len ;
skb - > mac_header + = hophdr_len ;
ipv6h = ( struct ipv6hdr * ) skb - > data ;
ipv6h - > nexthdr = nexthdr ;
}
2013-10-21 07:47:29 +04:00
nhoff = skb_network_header ( skb ) - skb_mac_header ( skb ) ;
2012-11-15 12:49:16 +04:00
if ( unlikely ( ! pskb_may_pull ( skb , sizeof ( * ipv6h ) ) ) )
goto out ;
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
encap = SKB_GSO_CB ( skb ) - > encap_level > 0 ;
if ( encap )
2014-10-20 15:49:16 +04:00
features & = skb - > dev - > hw_enc_features ;
2013-10-21 07:47:29 +04:00
SKB_GSO_CB ( skb ) - > encap_level + = sizeof ( * ipv6h ) ;
2012-11-15 12:49:16 +04:00
ipv6h = ipv6_hdr ( skb ) ;
__skb_pull ( skb , sizeof ( * ipv6h ) ) ;
segs = ERR_PTR ( - EPROTONOSUPPORT ) ;
proto = ipv6_gso_pull_exthdrs ( skb , ipv6h - > nexthdr ) ;
2013-10-19 01:43:55 +04:00
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
if ( skb - > encapsulation & &
2016-05-18 19:06:10 +03:00
skb_shinfo ( skb ) - > gso_type & ( SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6 ) )
2018-04-26 20:42:16 +03:00
udpfrag = proto = = IPPROTO_UDP & & encap & &
( skb_shinfo ( skb ) - > gso_type & SKB_GSO_UDP ) ;
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
else
2018-04-26 20:42:16 +03:00
udpfrag = proto = = IPPROTO_UDP & & ! skb - > encapsulation & &
( skb_shinfo ( skb ) - > gso_type & SKB_GSO_UDP ) ;
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
2012-11-15 12:49:16 +04:00
ops = rcu_dereference ( inet6_offloads [ proto ] ) ;
2012-11-15 12:49:23 +04:00
if ( likely ( ops & & ops - > callbacks . gso_segment ) ) {
2012-11-15 12:49:16 +04:00
skb_reset_transport_header ( skb ) ;
2012-11-15 12:49:23 +04:00
segs = ops - > callbacks . gso_segment ( skb , features ) ;
2022-02-18 17:35:24 +03:00
if ( ! segs )
skb - > network_header = skb_mac_header ( skb ) + nhoff - skb - > head ;
2012-11-15 12:49:16 +04:00
}
2016-12-01 16:06:04 +03:00
if ( IS_ERR_OR_NULL ( segs ) )
2012-11-15 12:49:16 +04:00
goto out ;
2016-09-19 13:58:47 +03:00
gso_partial = ! ! ( skb_shinfo ( segs ) - > gso_type & SKB_GSO_PARTIAL ) ;
2012-11-15 12:49:16 +04:00
for ( skb = segs ; skb ; skb = skb - > next ) {
2013-10-21 07:47:29 +04:00
ipv6h = ( struct ipv6hdr * ) ( skb_mac_header ( skb ) + nhoff ) ;
2017-10-06 19:02:35 +03:00
if ( gso_partial & & skb_is_gso ( skb ) )
2016-04-11 04:45:03 +03:00
payload_len = skb_shinfo ( skb ) - > gso_size +
SKB_GSO_CB ( skb ) - > data_offset +
skb - > head - ( unsigned char * ) ( ipv6h + 1 ) ;
else
payload_len = skb - > len - nhoff - sizeof ( * ipv6h ) ;
ipv6h - > payload_len = htons ( payload_len ) ;
2013-10-21 07:47:29 +04:00
skb - > network_header = ( u8 * ) ipv6h - skb - > head ;
2018-09-13 17:43:07 +03:00
skb_reset_mac_len ( skb ) ;
2013-10-21 07:47:29 +04:00
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
if ( udpfrag ) {
2017-05-18 05:54:11 +03:00
int err = ip6_find_1stfragopt ( skb , & prevhdr ) ;
2017-06-05 04:41:10 +03:00
if ( err < 0 ) {
kfree_skb_list ( segs ) ;
2017-05-18 05:54:11 +03:00
return ERR_PTR ( err ) ;
2017-06-05 04:41:10 +03:00
}
2017-05-18 05:54:11 +03:00
fptr = ( struct frag_hdr * ) ( ( u8 * ) ipv6h + err ) ;
2012-11-15 12:49:16 +04:00
fptr - > frag_off = htons ( offset ) ;
2015-03-29 16:00:05 +03:00
if ( skb - > next )
2012-11-15 12:49:16 +04:00
fptr - > frag_off | = htons ( IP6_MF ) ;
offset + = ( ntohs ( ipv6h - > payload_len ) -
sizeof ( struct frag_hdr ) ) ;
}
ipv4: ipv6: better estimate tunnel header cut for correct ufo handling
Currently the UFO fragmentation process does not correctly handle inner
UDP frames.
(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):
IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471
We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).
IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109
In this case fragmentation id is incremented and offset is not updated.
First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
always ensure that the state of this flag is left untouched when
returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
fragmentation header data, we don't care for updating fragmentation
header data
* remove currently unneeded comment indicating skb->encapsulation might
get changed by upper gso_segment callback (gre and udp-tunnel reset
encapsulation after segmentation on each fragment)
If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.
The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.
Reported-by: Wolfgang Walter <linux@stwm.de>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-24 03:48:05 +04:00
if ( encap )
skb_reset_inner_headers ( skb ) ;
2012-11-15 12:49:16 +04:00
}
out :
return segs ;
}
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
/* Return the total length of all the extension hdrs, following the same
* logic in ipv6_gso_pull_exthdrs ( ) when parsing ext - hdrs .
*/
static int ipv6_exthdrs_len ( struct ipv6hdr * iph ,
const struct net_offload * * opps )
{
2013-12-16 06:48:07 +04:00
struct ipv6_opt_hdr * opth = ( void * ) iph ;
int len = 0 , proto , optlen = sizeof ( * iph ) ;
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
proto = iph - > nexthdr ;
for ( ; ; ) {
if ( proto ! = NEXTHDR_HOP ) {
* opps = rcu_dereference ( inet6_offloads [ proto ] ) ;
if ( unlikely ( ! ( * opps ) ) )
break ;
if ( ! ( ( * opps ) - > flags & INET6_PROTO_GSO_EXTHDR ) )
break ;
}
2013-12-16 06:48:07 +04:00
opth = ( void * ) opth + optlen ;
optlen = ipv6_optlen ( opth ) ;
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
len + = optlen ;
proto = opth - > nexthdr ;
}
return len ;
}
2018-12-14 13:51:58 +03:00
INDIRECT_CALLABLE_SCOPE struct sk_buff * ipv6_gro_receive ( struct list_head * head ,
struct sk_buff * skb )
2012-11-15 12:49:16 +04:00
{
const struct net_offload * ops ;
2018-06-24 08:13:49 +03:00
struct sk_buff * pp = NULL ;
2012-11-15 12:49:16 +04:00
struct sk_buff * p ;
struct ipv6hdr * iph ;
unsigned int nlen ;
unsigned int hlen ;
unsigned int off ;
net-gre-gro: Add GRE support to the GRO stack
This patch built on top of Commit 299603e8370a93dd5d8e8d800f0dff1ce2c53d36
("net-gro: Prepare GRO stack for the upcoming tunneling support") to add
the support of the standard GRE (RFC1701/RFC2784/RFC2890) to the GRO
stack. It also serves as an example for supporting other encapsulation
protocols in the GRO stack in the future.
The patch supports version 0 and all the flags (key, csum, seq#) but
will flush any pkt with the S (seq#) flag. This is because the S flag
is not support by GSO, and a GRO pkt may end up in the forwarding path,
thus requiring GSO support to break it up correctly.
Currently the "packet_offload" structure only contains L3 (ETH_P_IP/
ETH_P_IPV6) GRO offload support so the encapped pkts are limited to
IP pkts (i.e., w/o L2 hdr). But support for other protocol type can
be easily added, so is the support for GRE variations like NVGRE.
The patch also support csum offload. Specifically if the csum flag is on
and the h/w is capable of checksumming the payload (CHECKSUM_COMPLETE),
the code will take advantage of the csum computed by the h/w when
validating the GRE csum.
Note that commit 60769a5dcd8755715c7143b4571d5c44f01796f1 "ipv4: gre:
add GRO capability" already introduces GRO capability to IPv4 GRE
tunnels, using the gro_cells infrastructure. But GRO is done after
GRE hdr has been removed (i.e., decapped). The following patch applies
GRO when pkts first come in (before hitting the GRE tunnel code). There
is some performance advantage for applying GRO as early as possible.
Also this approach is transparent to other subsystem like Open vSwitch
where GRE decap is handled outside of the IP stack hence making it
harder for the gro_cells stuff to apply. On the other hand, some NICs
are still not capable of hashing on the inner hdr of a GRE pkt (RSS).
In that case the GRO processing of pkts from the same remote host will
all happen on the same CPU and the performance may be suboptimal.
I'm including some rough preliminary performance numbers below. Note
that the performance will be highly dependent on traffic load, mix as
usual. Moreover it also depends on NIC offload features hence the
following is by no means a comprehesive study. Local testing and tuning
will be needed to decide the best setting.
All tests spawned 50 copies of netperf TCP_STREAM and ran for 30 secs.
(super_netperf 50 -H 192.168.1.18 -l 30)
An IP GRE tunnel with only the key flag on (e.g., ip tunnel add gre1
mode gre local 10.246.17.18 remote 10.246.17.17 ttl 255 key 123)
is configured.
The GRO support for pkts AFTER decap are controlled through the device
feature of the GRE device (e.g., ethtool -K gre1 gro on/off).
1.1 ethtool -K gre1 gro off; ethtool -K eth0 gro off
thruput: 9.16Gbps
CPU utilization: 19%
1.2 ethtool -K gre1 gro on; ethtool -K eth0 gro off
thruput: 5.9Gbps
CPU utilization: 15%
1.3 ethtool -K gre1 gro off; ethtool -K eth0 gro on
thruput: 9.26Gbps
CPU utilization: 12-13%
1.4 ethtool -K gre1 gro on; ethtool -K eth0 gro on
thruput: 9.26Gbps
CPU utilization: 10%
The following tests were performed on a different NIC that is capable of
csum offload. I.e., the h/w is capable of computing IP payload csum
(CHECKSUM_COMPLETE).
2.1 ethtool -K gre1 gro on (hence will use gro_cells)
2.1.1 ethtool -K eth0 gro off; csum offload disabled
thruput: 8.53Gbps
CPU utilization: 9%
2.1.2 ethtool -K eth0 gro off; csum offload enabled
thruput: 8.97Gbps
CPU utilization: 7-8%
2.1.3 ethtool -K eth0 gro on; csum offload disabled
thruput: 8.83Gbps
CPU utilization: 5-6%
2.1.4 ethtool -K eth0 gro on; csum offload enabled
thruput: 8.98Gbps
CPU utilization: 5%
2.2 ethtool -K gre1 gro off
2.2.1 ethtool -K eth0 gro off; csum offload disabled
thruput: 5.93Gbps
CPU utilization: 9%
2.2.2 ethtool -K eth0 gro off; csum offload enabled
thruput: 5.62Gbps
CPU utilization: 8%
2.2.3 ethtool -K eth0 gro on; csum offload disabled
thruput: 7.69Gbps
CPU utilization: 8%
2.2.4 ethtool -K eth0 gro on; csum offload enabled
thruput: 8.96Gbps
CPU utilization: 5-6%
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-07 22:23:19 +04:00
u16 flush = 1 ;
2012-11-15 12:49:16 +04:00
int proto ;
off = skb_gro_offset ( skb ) ;
hlen = off + sizeof ( * iph ) ;
iph = skb_gro_header_fast ( skb , off ) ;
if ( skb_gro_header_hard ( skb , hlen ) ) {
iph = skb_gro_header_slow ( skb , hlen , off ) ;
if ( unlikely ( ! iph ) )
goto out ;
}
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
skb_set_network_header ( skb , off ) ;
2012-11-15 12:49:16 +04:00
skb_gro_pull ( skb , sizeof ( * iph ) ) ;
skb_set_transport_header ( skb , skb_gro_offset ( skb ) ) ;
flush + = ntohs ( iph - > payload_len ) ! = skb_gro_len ( skb ) ;
proto = iph - > nexthdr ;
ops = rcu_dereference ( inet6_offloads [ proto ] ) ;
2012-11-15 12:49:23 +04:00
if ( ! ops | | ! ops - > callbacks . gro_receive ) {
2012-11-15 12:49:16 +04:00
__pskb_pull ( skb , skb_gro_offset ( skb ) ) ;
2017-01-10 23:24:15 +03:00
skb_gro_frag0_invalidate ( skb ) ;
2012-11-15 12:49:16 +04:00
proto = ipv6_gso_pull_exthdrs ( skb , proto ) ;
skb_gro_pull ( skb , - skb_transport_offset ( skb ) ) ;
skb_reset_transport_header ( skb ) ;
__skb_push ( skb , skb_gro_offset ( skb ) ) ;
ops = rcu_dereference ( inet6_offloads [ proto ] ) ;
2012-11-15 12:49:23 +04:00
if ( ! ops | | ! ops - > callbacks . gro_receive )
2021-11-24 01:56:07 +03:00
goto out ;
2012-11-15 12:49:16 +04:00
iph = ipv6_hdr ( skb ) ;
}
NAPI_GRO_CB ( skb ) - > proto = proto ;
flush - - ;
nlen = skb_network_header_len ( skb ) ;
2018-06-24 08:13:49 +03:00
list_for_each_entry ( p , head , list ) {
2012-11-15 12:49:16 +04:00
const struct ipv6hdr * iph2 ;
__be32 first_word ; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
if ( ! NAPI_GRO_CB ( p ) - > same_flow )
continue ;
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
iph2 = ( struct ipv6hdr * ) ( p - > data + off ) ;
2014-08-25 00:53:10 +04:00
first_word = * ( __be32 * ) iph ^ * ( __be32 * ) iph2 ;
2012-11-15 12:49:16 +04:00
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
/* All fields must match except length and Traffic Class.
* XXX skbs on the gro_list have all been parsed and pulled
* already so we don ' t need to compare nlen
* ( nlen ! = ( sizeof ( * iph2 ) + ipv6_exthdrs_len ( iph2 , & ops ) ) )
2018-11-07 01:25:52 +03:00
* memcmp ( ) alone below is sufficient , right ?
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
*/
if ( ( first_word & htonl ( 0xF00FFFFF ) ) | |
2021-12-02 11:30:42 +03:00
! ipv6_addr_equal ( & iph - > saddr , & iph2 - > saddr ) | |
! ipv6_addr_equal ( & iph - > daddr , & iph2 - > daddr ) | |
2022-01-25 07:44:44 +03:00
iph - > nexthdr ! = iph2 - > nexthdr ) {
2018-11-07 01:25:52 +03:00
not_same_flow :
2012-11-15 12:49:16 +04:00
NAPI_GRO_CB ( p ) - > same_flow = 0 ;
continue ;
}
2018-11-07 01:25:52 +03:00
if ( unlikely ( nlen > sizeof ( struct ipv6hdr ) ) ) {
if ( memcmp ( iph + 1 , iph2 + 1 ,
nlen - sizeof ( struct ipv6hdr ) ) )
goto not_same_flow ;
}
2012-11-15 12:49:16 +04:00
/* flush if Traffic Class fields are different */
2022-01-25 07:44:44 +03:00
NAPI_GRO_CB ( p ) - > flush | = ! ! ( ( first_word & htonl ( 0x0FF00000 ) ) |
( __force __be32 ) ( iph - > hop_limit ^ iph2 - > hop_limit ) ) ;
2012-11-15 12:49:16 +04:00
NAPI_GRO_CB ( p ) - > flush | = flush ;
2014-09-09 22:23:14 +04:00
2016-04-11 04:44:57 +03:00
/* If the previous IP ID value was based on an atomic
* datagram we can overwrite the value and ignore it .
*/
if ( NAPI_GRO_CB ( skb ) - > is_atomic )
NAPI_GRO_CB ( p ) - > flush_id = 0 ;
2012-11-15 12:49:16 +04:00
}
2016-04-11 04:44:57 +03:00
NAPI_GRO_CB ( skb ) - > is_atomic = true ;
2012-11-15 12:49:16 +04:00
NAPI_GRO_CB ( skb ) - > flush | = flush ;
2014-05-20 08:56:34 +04:00
skb_gro_postpull_rcsum ( skb , iph , nlen ) ;
2012-11-15 12:49:16 +04:00
2018-12-14 13:51:59 +03:00
pp = indirect_call_gro_receive_l4 ( tcp6_gro_receive , udp6_gro_receive ,
ops - > callbacks . gro_receive , head , skb ) ;
2012-11-15 12:49:16 +04:00
out :
2017-02-15 11:39:39 +03:00
skb_gro_flush_final ( skb , pp , flush ) ;
2012-11-15 12:49:16 +04:00
return pp ;
}
2018-06-24 08:13:49 +03:00
static struct sk_buff * sit_ip6ip6_gro_receive ( struct list_head * head ,
struct sk_buff * skb )
2016-03-19 19:32:01 +03:00
{
2016-05-18 19:06:22 +03:00
/* Common GRO receive for SIT and IP6IP6 */
2016-03-19 19:32:01 +03:00
if ( NAPI_GRO_CB ( skb ) - > encap_mark ) {
NAPI_GRO_CB ( skb ) - > flush = 1 ;
return NULL ;
}
NAPI_GRO_CB ( skb ) - > encap_mark = 1 ;
return ipv6_gro_receive ( head , skb ) ;
}
2018-06-24 08:13:49 +03:00
static struct sk_buff * ip4ip6_gro_receive ( struct list_head * head ,
struct sk_buff * skb )
2016-05-18 19:06:23 +03:00
{
/* Common GRO receive for SIT and IP6IP6 */
if ( NAPI_GRO_CB ( skb ) - > encap_mark ) {
NAPI_GRO_CB ( skb ) - > flush = 1 ;
return NULL ;
}
NAPI_GRO_CB ( skb ) - > encap_mark = 1 ;
return inet_gro_receive ( head , skb ) ;
}
2018-12-14 13:51:58 +03:00
INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete ( struct sk_buff * skb , int nhoff )
2012-11-15 12:49:16 +04:00
{
const struct net_offload * ops ;
2022-05-13 21:34:02 +03:00
struct ipv6hdr * iph ;
2012-11-15 12:49:16 +04:00
int err = - ENOSYS ;
2022-05-13 21:34:02 +03:00
u32 payload_len ;
2012-11-15 12:49:16 +04:00
2017-03-07 20:33:31 +03:00
if ( skb - > encapsulation ) {
skb_set_inner_protocol ( skb , cpu_to_be16 ( ETH_P_IPV6 ) ) ;
2015-10-20 06:40:17 +03:00
skb_set_inner_network_header ( skb , nhoff ) ;
2017-03-07 20:33:31 +03:00
}
2015-10-20 06:40:17 +03:00
2022-05-13 21:34:02 +03:00
payload_len = skb - > len - nhoff - sizeof ( * iph ) ;
if ( unlikely ( payload_len > IPV6_MAXPLEN ) ) {
struct hop_jumbo_hdr * hop_jumbo ;
int hoplen = sizeof ( * hop_jumbo ) ;
/* Move network header left */
memmove ( skb_mac_header ( skb ) - hoplen , skb_mac_header ( skb ) ,
skb - > transport_header - skb - > mac_header ) ;
skb - > data - = hoplen ;
skb - > len + = hoplen ;
skb - > mac_header - = hoplen ;
skb - > network_header - = hoplen ;
iph = ( struct ipv6hdr * ) ( skb - > data + nhoff ) ;
hop_jumbo = ( struct hop_jumbo_hdr * ) ( iph + 1 ) ;
/* Build hop-by-hop options */
hop_jumbo - > nexthdr = iph - > nexthdr ;
hop_jumbo - > hdrlen = 0 ;
hop_jumbo - > tlv_type = IPV6_TLV_JUMBO ;
hop_jumbo - > tlv_len = 4 ;
hop_jumbo - > jumbo_payload_len = htonl ( payload_len + hoplen ) ;
iph - > nexthdr = NEXTHDR_HOP ;
iph - > payload_len = 0 ;
} else {
iph = ( struct ipv6hdr * ) ( skb - > data + nhoff ) ;
iph - > payload_len = htons ( payload_len ) ;
}
2012-11-15 12:49:16 +04:00
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header"
and associated macros like ip_hdr() and ipv6_hdr() in order to allow
an arbitary number of IP hdrs (v4 or v6) to be used in the
encapsulation chain. This lays the foundation for various IP
tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later.
With this patch, the GRO stack traversing now is mostly based on
skb_gro_offset rather than special hdr offsets saved in skb (e.g.,
skb->network_header). As a result all but the top layer (i.e., the
the transport layer) must have hdrs of the same length in order for
a pkt to be considered for aggregation. Therefore when adding a new
encap layer (e.g., for tunneling), one must check and skip flows
(e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a
different hdr length.
Note that unlike the network header, the transport header can and
will continue to be set by the GRO code since there will be at
most one "transport layer" in the encap chain.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-12 08:53:45 +04:00
nhoff + = sizeof ( * iph ) + ipv6_exthdrs_len ( iph , & ops ) ;
2012-11-15 12:49:23 +04:00
if ( WARN_ON ( ! ops | | ! ops - > callbacks . gro_complete ) )
2021-11-24 01:56:08 +03:00
goto out ;
2012-11-15 12:49:16 +04:00
2018-12-14 13:51:59 +03:00
err = INDIRECT_CALL_L4 ( ops - > callbacks . gro_complete , tcp6_gro_complete ,
udp6_gro_complete , skb , nhoff ) ;
2012-11-15 12:49:16 +04:00
2021-11-24 01:56:08 +03:00
out :
2012-11-15 12:49:16 +04:00
return err ;
}
2015-10-20 06:40:17 +03:00
static int sit_gro_complete ( struct sk_buff * skb , int nhoff )
{
skb - > encapsulation = 1 ;
2016-05-18 19:06:10 +03:00
skb_shinfo ( skb ) - > gso_type | = SKB_GSO_IPXIP4 ;
2015-10-20 06:40:17 +03:00
return ipv6_gro_complete ( skb , nhoff ) ;
}
2016-05-18 19:06:22 +03:00
static int ip6ip6_gro_complete ( struct sk_buff * skb , int nhoff )
{
skb - > encapsulation = 1 ;
skb_shinfo ( skb ) - > gso_type | = SKB_GSO_IPXIP6 ;
return ipv6_gro_complete ( skb , nhoff ) ;
}
2016-05-18 19:06:23 +03:00
static int ip4ip6_gro_complete ( struct sk_buff * skb , int nhoff )
{
skb - > encapsulation = 1 ;
skb_shinfo ( skb ) - > gso_type | = SKB_GSO_IPXIP6 ;
return inet_gro_complete ( skb , nhoff ) ;
}
2012-11-15 12:49:16 +04:00
static struct packet_offload ipv6_packet_offload __read_mostly = {
. type = cpu_to_be16 ( ETH_P_IPV6 ) ,
2012-11-15 12:49:23 +04:00
. callbacks = {
. gso_segment = ipv6_gso_segment ,
. gro_receive = ipv6_gro_receive ,
. gro_complete = ipv6_gro_complete ,
} ,
2012-11-15 12:49:16 +04:00
} ;
2019-02-20 18:52:12 +03:00
static struct sk_buff * sit_gso_segment ( struct sk_buff * skb ,
netdev_features_t features )
{
if ( ! ( skb_shinfo ( skb ) - > gso_type & SKB_GSO_IPXIP4 ) )
return ERR_PTR ( - EINVAL ) ;
return ipv6_gso_segment ( skb , features ) ;
}
static struct sk_buff * ip4ip6_gso_segment ( struct sk_buff * skb ,
netdev_features_t features )
{
if ( ! ( skb_shinfo ( skb ) - > gso_type & SKB_GSO_IPXIP6 ) )
return ERR_PTR ( - EINVAL ) ;
return inet_gso_segment ( skb , features ) ;
}
static struct sk_buff * ip6ip6_gso_segment ( struct sk_buff * skb ,
netdev_features_t features )
{
if ( ! ( skb_shinfo ( skb ) - > gso_type & SKB_GSO_IPXIP6 ) )
return ERR_PTR ( - EINVAL ) ;
return ipv6_gso_segment ( skb , features ) ;
}
2013-10-21 07:47:30 +04:00
static const struct net_offload sit_offload = {
. callbacks = {
2019-02-20 18:52:12 +03:00
. gso_segment = sit_gso_segment ,
2016-05-18 19:06:22 +03:00
. gro_receive = sit_ip6ip6_gro_receive ,
2015-10-20 06:40:17 +03:00
. gro_complete = sit_gro_complete ,
2013-10-21 07:47:30 +04:00
} ,
} ;
2016-05-18 19:06:23 +03:00
static const struct net_offload ip4ip6_offload = {
. callbacks = {
2019-02-20 18:52:12 +03:00
. gso_segment = ip4ip6_gso_segment ,
2016-05-18 19:06:23 +03:00
. gro_receive = ip4ip6_gro_receive ,
. gro_complete = ip4ip6_gro_complete ,
} ,
} ;
2016-05-18 19:06:22 +03:00
static const struct net_offload ip6ip6_offload = {
. callbacks = {
2019-02-20 18:52:12 +03:00
. gso_segment = ip6ip6_gso_segment ,
2016-05-18 19:06:22 +03:00
. gro_receive = sit_ip6ip6_gro_receive ,
. gro_complete = ip6ip6_gro_complete ,
} ,
} ;
2012-11-15 12:49:22 +04:00
static int __init ipv6_offload_init ( void )
2012-11-15 12:49:16 +04:00
{
2012-11-15 12:49:22 +04:00
if ( tcpv6_offload_init ( ) < 0 )
pr_crit ( " %s: Cannot add TCP protocol offload \n " , __func__ ) ;
if ( ipv6_exthdrs_offload_init ( ) < 0 )
pr_crit ( " %s: Cannot add EXTHDRS protocol offload \n " , __func__ ) ;
2012-11-15 12:49:16 +04:00
dev_add_offload ( & ipv6_packet_offload ) ;
2013-10-21 07:47:30 +04:00
inet_add_offload ( & sit_offload , IPPROTO_IPV6 ) ;
2016-05-18 19:06:22 +03:00
inet6_add_offload ( & ip6ip6_offload , IPPROTO_IPV6 ) ;
2016-05-18 19:06:23 +03:00
inet6_add_offload ( & ip4ip6_offload , IPPROTO_IPIP ) ;
2013-10-21 07:47:30 +04:00
2012-11-15 12:49:22 +04:00
return 0 ;
2012-11-15 12:49:16 +04:00
}
2012-11-15 12:49:22 +04:00
fs_initcall ( ipv6_offload_init ) ;