Merge branch 'gudp'
Tom Herbert says: ==================== net: Generic UDP Encapsulation Generic UDP Encapsulation (GUE) is UDP encapsulation protocol which encapsulates packets of various IP protocols. The GUE protocol is described in http://tools.ietf.org/html/draft-herbert-gue-01. The receive path of GUE is implemented in the FOU over UDP module (FOU). This includes a UDP encap receive function for GUE as well as GUE specific GRO functions. Management and configuration of GUE ports shares most of the same code with FOU. For the transmit path, the previous FOU support for IPIP, sit, and GRE was simply extended for GUE (when GUE is enabled insert the GUE header on transmit in addition to UDP header inserted for FOU). Semantically GUE is the same as FOU in that the encapsulation (UDP and GUE headers) that are inserted on transmission and removed on reception so that IP packet is processed with the inner header. This patch set includes: - Some fixes to FOU, removal of IPv4,v6 specific GRO functions - Support to configure a GUE receive port - Implementation of GUE receive path (normal and GRO) - Additions to ip_tunnel netlink to configure GUE - GUE header inserion in ip_tunnel transmit path v2: - Include net/gue.h in patch set Testing: I ran performance numbers using netperf TCP_RR with 200 streams, comparing encapsulation without GUE, encapsulation with GUE, and encapsulation with FOU. GRE TCP_STREAM IPv4, FOU, UDP checksum enabled 14.04% TX CPU utilization 13.17% RX CPU utilization 9211 Mbps IPv4, GUE, UDP checksum enabled 14.99% TX CPU utilization 13.79% RX CPU utilization 9185 Mbps IPv4, FOU, UDP checksum disabled 13.14% TX CPU utilization 23.18% RX CPU utilization 9277 Mbps IPv4, GUE, UDP checksum disabled 13.66% TX CPU utilization 23.57% RX CPU utilization 9184 Mbps TCP_RR IPv4, FOU, UDP checksum enabled 94.2% CPU utilization 155/249/460 90/95/99% latencies 1.17018e+06 tps IPv4, GUE, UDP checksum enabled 93.9% CPU utilization 158/253/472 90/95/99% latencies 1.15045e+06 tps IPIP TCP_STREAM FOU, UDP checksum enabled 15.28% TX CPU utilization 13.92% RX CPU utilization 9342 Mbps GUE, UDP checksum enabled 13.99% TX CPU utilization 13.34% RX CPU utilization 9210 Mbps FOU, UDP checksum disabled 15.08% TX CPU utilization 24.64% RX CPU utilization 9226 Mbps GUE, UDP checksum disabled 15.90% TX CPU utilization 24.77% RX CPU utilization 9197 Mbps TCP_RR FOU, UDP checksum enabled 94.23% CPU utilization 149/237/429 90/95/99% latencies 1.19553e+06 tps GUE, UDP checksum enabled 93.75% CPU utilization 152/243/442 90/95/99% latencies 1.17027e+06 tps SIT TCP_STREAM FOU, UDP checksum enabled 14.47% TX CPU utilization 14.58% RX CPU utilization 9106 Mbps GUE, UDP checksum enabled 15.09% TX CPU utilization 14.84% RX CPU utilization 9080 Mbps FOU, UDP checksum disabled 15.70% TX CPU utilization 27.93% RX CPU utilization 9097 Mbps GUE, UDP checksum disabled 15.04% TX CPU utilization 27.54% RX CPU utilization 9073 Mbps TCP_RR FOU, UDP checksum enabled 96.9% CPU utilization 170/281/581 90/95/99% latencies 1.03372e+06 tps GUE, UDP checksum enabled 97.16% CPU utilization 172/286/576 90/95/99% latencies 1.00469e+06 tps ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
6106253e69
@ -1886,6 +1886,9 @@ struct napi_gro_cb {
|
||||
/* Number of checksums via CHECKSUM_UNNECESSARY */
|
||||
u8 csum_cnt:3;
|
||||
|
||||
/* Used in foo-over-udp, set in udp[46]_gro_receive */
|
||||
u8 is_ipv6:1;
|
||||
|
||||
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
|
||||
__wsum csum;
|
||||
|
||||
|
23
include/net/gue.h
Normal file
23
include/net/gue.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef __NET_GUE_H
|
||||
#define __NET_GUE_H
|
||||
|
||||
struct guehdr {
|
||||
union {
|
||||
struct {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u8 hlen:4,
|
||||
version:4;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u8 version:4,
|
||||
hlen:4;
|
||||
#else
|
||||
#error "Please fix <asm/byteorder.h>"
|
||||
#endif
|
||||
__u8 next_hdr;
|
||||
__u16 flags;
|
||||
};
|
||||
__u32 word;
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
@ -13,6 +13,7 @@ enum {
|
||||
FOU_ATTR_PORT, /* u16 */
|
||||
FOU_ATTR_AF, /* u8 */
|
||||
FOU_ATTR_IPPROTO, /* u8 */
|
||||
FOU_ATTR_TYPE, /* u8 */
|
||||
|
||||
__FOU_ATTR_MAX,
|
||||
};
|
||||
@ -27,6 +28,12 @@ enum {
|
||||
__FOU_CMD_MAX,
|
||||
};
|
||||
|
||||
enum {
|
||||
FOU_ENCAP_UNSPEC,
|
||||
FOU_ENCAP_DIRECT,
|
||||
FOU_ENCAP_GUE,
|
||||
};
|
||||
|
||||
#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
|
||||
|
||||
#endif /* _UAPI_LINUX_FOU_H */
|
||||
|
@ -64,6 +64,7 @@ enum {
|
||||
enum tunnel_encap_types {
|
||||
TUNNEL_ENCAP_NONE,
|
||||
TUNNEL_ENCAP_FOU,
|
||||
TUNNEL_ENCAP_GUE,
|
||||
};
|
||||
|
||||
#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
|
||||
|
224
net/ipv4/fou.c
224
net/ipv4/fou.c
@ -7,6 +7,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <net/genetlink.h>
|
||||
#include <net/gue.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/udp.h>
|
||||
@ -27,6 +28,7 @@ struct fou {
|
||||
};
|
||||
|
||||
struct fou_cfg {
|
||||
u16 type;
|
||||
u8 protocol;
|
||||
struct udp_port_cfg udp_config;
|
||||
};
|
||||
@ -64,15 +66,51 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
|
||||
sizeof(struct udphdr));
|
||||
}
|
||||
|
||||
static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct fou *fou = fou_from_sock(sk);
|
||||
size_t len;
|
||||
struct guehdr *guehdr;
|
||||
struct udphdr *uh;
|
||||
|
||||
if (!fou)
|
||||
return 1;
|
||||
|
||||
len = sizeof(struct udphdr) + sizeof(struct guehdr);
|
||||
if (!pskb_may_pull(skb, len))
|
||||
goto drop;
|
||||
|
||||
uh = udp_hdr(skb);
|
||||
guehdr = (struct guehdr *)&uh[1];
|
||||
|
||||
len += guehdr->hlen << 2;
|
||||
if (!pskb_may_pull(skb, len))
|
||||
goto drop;
|
||||
|
||||
if (guehdr->version != 0)
|
||||
goto drop;
|
||||
|
||||
if (guehdr->flags) {
|
||||
/* No support yet */
|
||||
goto drop;
|
||||
}
|
||||
|
||||
return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
|
||||
drop:
|
||||
kfree_skb(skb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sk_buff **fou_gro_receive(struct sk_buff **head,
|
||||
struct sk_buff *skb,
|
||||
const struct net_offload **offloads)
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
const struct net_offload *ops;
|
||||
struct sk_buff **pp = NULL;
|
||||
u8 proto = NAPI_GRO_CB(skb)->proto;
|
||||
const struct net_offload **offloads;
|
||||
|
||||
rcu_read_lock();
|
||||
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
|
||||
ops = rcu_dereference(offloads[proto]);
|
||||
if (!ops || !ops->callbacks.gro_receive)
|
||||
goto out_unlock;
|
||||
@ -85,14 +123,15 @@ out_unlock:
|
||||
return pp;
|
||||
}
|
||||
|
||||
static int fou_gro_complete(struct sk_buff *skb, int nhoff,
|
||||
const struct net_offload **offloads)
|
||||
static int fou_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
const struct net_offload *ops;
|
||||
u8 proto = NAPI_GRO_CB(skb)->proto;
|
||||
int err = -ENOSYS;
|
||||
const struct net_offload **offloads;
|
||||
|
||||
rcu_read_lock();
|
||||
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
|
||||
ops = rcu_dereference(offloads[proto]);
|
||||
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
|
||||
goto out_unlock;
|
||||
@ -105,26 +144,110 @@ out_unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
|
||||
struct sk_buff *skb)
|
||||
static struct sk_buff **gue_gro_receive(struct sk_buff **head,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
return fou_gro_receive(head, skb, inet_offloads);
|
||||
const struct net_offload **offloads;
|
||||
const struct net_offload *ops;
|
||||
struct sk_buff **pp = NULL;
|
||||
struct sk_buff *p;
|
||||
u8 proto;
|
||||
struct guehdr *guehdr;
|
||||
unsigned int hlen, guehlen;
|
||||
unsigned int off;
|
||||
int flush = 1;
|
||||
|
||||
off = skb_gro_offset(skb);
|
||||
hlen = off + sizeof(*guehdr);
|
||||
guehdr = skb_gro_header_fast(skb, off);
|
||||
if (skb_gro_header_hard(skb, hlen)) {
|
||||
guehdr = skb_gro_header_slow(skb, hlen, off);
|
||||
if (unlikely(!guehdr))
|
||||
goto out;
|
||||
}
|
||||
|
||||
proto = guehdr->next_hdr;
|
||||
|
||||
rcu_read_lock();
|
||||
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
|
||||
ops = rcu_dereference(offloads[proto]);
|
||||
if (WARN_ON(!ops || !ops->callbacks.gro_receive))
|
||||
goto out_unlock;
|
||||
|
||||
guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
|
||||
|
||||
hlen = off + guehlen;
|
||||
if (skb_gro_header_hard(skb, hlen)) {
|
||||
guehdr = skb_gro_header_slow(skb, hlen, off);
|
||||
if (unlikely(!guehdr))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
flush = 0;
|
||||
|
||||
for (p = *head; p; p = p->next) {
|
||||
const struct guehdr *guehdr2;
|
||||
|
||||
if (!NAPI_GRO_CB(p)->same_flow)
|
||||
continue;
|
||||
|
||||
guehdr2 = (struct guehdr *)(p->data + off);
|
||||
|
||||
/* Compare base GUE header to be equal (covers
|
||||
* hlen, version, next_hdr, and flags.
|
||||
*/
|
||||
if (guehdr->word != guehdr2->word) {
|
||||
NAPI_GRO_CB(p)->same_flow = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Compare optional fields are the same. */
|
||||
if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
|
||||
guehdr->hlen << 2)) {
|
||||
NAPI_GRO_CB(p)->same_flow = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
skb_gro_pull(skb, guehlen);
|
||||
|
||||
/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
|
||||
skb_gro_postpull_rcsum(skb, guehdr, guehlen);
|
||||
|
||||
pp = ops->callbacks.gro_receive(head, skb);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
NAPI_GRO_CB(skb)->flush |= flush;
|
||||
|
||||
return pp;
|
||||
}
|
||||
|
||||
static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
static int gue_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
return fou_gro_complete(skb, nhoff, inet_offloads);
|
||||
}
|
||||
const struct net_offload **offloads;
|
||||
struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
|
||||
const struct net_offload *ops;
|
||||
unsigned int guehlen;
|
||||
u8 proto;
|
||||
int err = -ENOENT;
|
||||
|
||||
static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
return fou_gro_receive(head, skb, inet6_offloads);
|
||||
}
|
||||
proto = guehdr->next_hdr;
|
||||
|
||||
static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
return fou_gro_complete(skb, nhoff, inet6_offloads);
|
||||
guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
|
||||
|
||||
rcu_read_lock();
|
||||
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
|
||||
ops = rcu_dereference(offloads[proto]);
|
||||
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
|
||||
goto out_unlock;
|
||||
|
||||
err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fou_add_to_port_list(struct fou *fou)
|
||||
@ -162,6 +285,28 @@ static void fou_release(struct fou *fou)
|
||||
kfree(fou);
|
||||
}
|
||||
|
||||
static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
|
||||
{
|
||||
udp_sk(sk)->encap_rcv = fou_udp_recv;
|
||||
fou->protocol = cfg->protocol;
|
||||
fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
|
||||
fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
|
||||
fou->udp_offloads.port = cfg->udp_config.local_udp_port;
|
||||
fou->udp_offloads.ipproto = cfg->protocol;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
|
||||
{
|
||||
udp_sk(sk)->encap_rcv = gue_udp_recv;
|
||||
fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
|
||||
fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
|
||||
fou->udp_offloads.port = cfg->udp_config.local_udp_port;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fou_create(struct net *net, struct fou_cfg *cfg,
|
||||
struct socket **sockp)
|
||||
{
|
||||
@ -184,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
|
||||
|
||||
sk = sock->sk;
|
||||
|
||||
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
|
||||
fou->protocol = cfg->protocol;
|
||||
fou->port = cfg->udp_config.local_udp_port;
|
||||
udp_sk(sk)->encap_rcv = fou_udp_recv;
|
||||
fou->port = cfg->udp_config.local_udp_port;
|
||||
|
||||
/* Initial for fou type */
|
||||
switch (cfg->type) {
|
||||
case FOU_ENCAP_DIRECT:
|
||||
err = fou_encap_init(sk, fou, cfg);
|
||||
if (err)
|
||||
goto error;
|
||||
break;
|
||||
case FOU_ENCAP_GUE:
|
||||
err = gue_encap_init(sk, fou, cfg);
|
||||
if (err)
|
||||
goto error;
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
udp_sk(sk)->encap_type = 1;
|
||||
udp_encap_enable();
|
||||
@ -199,23 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
|
||||
|
||||
sk->sk_allocation = GFP_ATOMIC;
|
||||
|
||||
switch (cfg->udp_config.family) {
|
||||
case AF_INET:
|
||||
fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
|
||||
fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
|
||||
break;
|
||||
case AF_INET6:
|
||||
fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
|
||||
fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
|
||||
break;
|
||||
default:
|
||||
err = -EPFNOSUPPORT;
|
||||
goto error;
|
||||
}
|
||||
|
||||
fou->udp_offloads.port = cfg->udp_config.local_udp_port;
|
||||
fou->udp_offloads.ipproto = cfg->protocol;
|
||||
|
||||
if (cfg->udp_config.family == AF_INET) {
|
||||
err = udp_add_offload(&fou->udp_offloads);
|
||||
if (err)
|
||||
@ -272,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
|
||||
[FOU_ATTR_PORT] = { .type = NLA_U16, },
|
||||
[FOU_ATTR_AF] = { .type = NLA_U8, },
|
||||
[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
|
||||
[FOU_ATTR_TYPE] = { .type = NLA_U8, },
|
||||
};
|
||||
|
||||
static int parse_nl_config(struct genl_info *info,
|
||||
@ -299,6 +442,9 @@ static int parse_nl_config(struct genl_info *info,
|
||||
if (info->attrs[FOU_ATTR_IPPROTO])
|
||||
cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
|
||||
|
||||
if (info->attrs[FOU_ATTR_TYPE])
|
||||
cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -56,6 +56,7 @@
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/rtnetlink.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/gue.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <net/ipv6.h>
|
||||
@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
|
||||
return 0;
|
||||
case TUNNEL_ENCAP_FOU:
|
||||
return sizeof(struct udphdr);
|
||||
case TUNNEL_ENCAP_GUE:
|
||||
return sizeof(struct udphdr) + sizeof(struct guehdr);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
|
||||
skb_reset_transport_header(skb);
|
||||
uh = udp_hdr(skb);
|
||||
|
||||
if (e->type == TUNNEL_ENCAP_GUE) {
|
||||
struct guehdr *guehdr = (struct guehdr *)&uh[1];
|
||||
|
||||
guehdr->version = 0;
|
||||
guehdr->hlen = 0;
|
||||
guehdr->flags = 0;
|
||||
guehdr->next_hdr = *protocol;
|
||||
}
|
||||
|
||||
uh->dest = e->dport;
|
||||
uh->source = sport;
|
||||
uh->len = htons(skb->len);
|
||||
@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
|
||||
case TUNNEL_ENCAP_NONE:
|
||||
return 0;
|
||||
case TUNNEL_ENCAP_FOU:
|
||||
case TUNNEL_ENCAP_GUE:
|
||||
return fou_build_header(skb, &t->encap, t->encap_hlen,
|
||||
protocol, fl4);
|
||||
default:
|
||||
@ -759,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
|
||||
df |= (inner_iph->frag_off&htons(IP_DF));
|
||||
|
||||
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
|
||||
+ rt->dst.header_len;
|
||||
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
|
||||
if (max_headroom > dev->needed_headroom)
|
||||
dev->needed_headroom = max_headroom;
|
||||
|
||||
|
@ -334,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
|
||||
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
|
||||
inet_gro_compute_pseudo);
|
||||
skip:
|
||||
NAPI_GRO_CB(skb)->is_ipv6 = 0;
|
||||
return udp_gro_receive(head, skb, uh);
|
||||
|
||||
flush:
|
||||
|
@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
|
||||
ip6_gro_compute_pseudo);
|
||||
|
||||
skip:
|
||||
NAPI_GRO_CB(skb)->is_ipv6 = 1;
|
||||
return udp_gro_receive(head, skb, uh);
|
||||
|
||||
flush:
|
||||
|
Loading…
x
Reference in New Issue
Block a user