net: Save TX flow hash in sock and set in skbuf on xmit
For a connected socket we can precompute the flow hash for setting in skb->hash on output. This is a performance advantage over calculating the skb->hash for every packet on the connection. The computation is done using the common hash algorithm to be consistent with computations done for packets of the connection in other states where thers is no socket (e.g. time-wait, syn-recv, syn-cookies). This patch adds sk_txhash to the sock structure. inet_set_txhash and ip6_set_txhash functions are added which are called from points in TCP and UDP where socket moves to established state. skb_set_hash_from_sk is a function which sets skb->hash from the sock txhash value. This is called in UDP and TCP transmit path when transmitting within the context of a socket. Tested: ran super_netperf with 200 TCP_RR streams over a vxlan interface (in this case skb_get_hash called on every TX packet to create a UDP source port). Before fix: 95.02% CPU utilization 154/256/505 90/95/99% latencies 1.13042e+06 tps Time in functions: 0.28% skb_flow_dissect 0.21% __skb_get_hash After fix: 94.95% CPU utilization 156/254/485 90/95/99% latencies 1.15447e+06 Neither __skb_get_hash nor skb_flow_dissect appear in perf Signed-off-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
5ed20a68cd
commit
b73c3d0e4f
@ -31,6 +31,7 @@
|
||||
#include <net/route.h>
|
||||
#include <net/snmp.h>
|
||||
#include <net/flow.h>
|
||||
#include <net/flow_keys.h>
|
||||
|
||||
struct sock;
|
||||
|
||||
@ -353,6 +354,19 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
|
||||
skb->len, proto, 0);
|
||||
}
|
||||
|
||||
static inline void inet_set_txhash(struct sock *sk)
|
||||
{
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct flow_keys keys;
|
||||
|
||||
keys.src = inet->inet_saddr;
|
||||
keys.dst = inet->inet_daddr;
|
||||
keys.port16[0] = inet->inet_sport;
|
||||
keys.port16[1] = inet->inet_dport;
|
||||
|
||||
sk->sk_txhash = flow_hash_from_keys(&keys);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map a multicast IP onto multicast MAC for type ethernet.
|
||||
*/
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <net/if_inet6.h>
|
||||
#include <net/ndisc.h>
|
||||
#include <net/flow.h>
|
||||
#include <net/flow_keys.h>
|
||||
#include <net/snmp.h>
|
||||
|
||||
#define SIN6_LEN_RFC2133 24
|
||||
@ -684,6 +685,20 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
|
||||
return hlimit;
|
||||
}
|
||||
|
||||
static inline void ip6_set_txhash(struct sock *sk)
|
||||
{
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
struct flow_keys keys;
|
||||
|
||||
keys.src = (__force __be32)ipv6_addr_hash(&np->saddr);
|
||||
keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
|
||||
keys.port16[0] = inet->inet_sport;
|
||||
keys.port16[1] = inet->inet_dport;
|
||||
|
||||
sk->sk_txhash = flow_hash_from_keys(&keys);
|
||||
}
|
||||
|
||||
/*
|
||||
* Header manipulation
|
||||
*/
|
||||
|
@ -273,6 +273,7 @@ struct cg_proto;
|
||||
* @sk_rcvtimeo: %SO_RCVTIMEO setting
|
||||
* @sk_sndtimeo: %SO_SNDTIMEO setting
|
||||
* @sk_rxhash: flow hash received from netif layer
|
||||
* @sk_txhash: computed flow hash for use on transmit
|
||||
* @sk_filter: socket filtering instructions
|
||||
* @sk_protinfo: private area, net family specific, when not using slab
|
||||
* @sk_timer: sock cleanup timer
|
||||
@ -347,6 +348,7 @@ struct sock {
|
||||
#ifdef CONFIG_RPS
|
||||
__u32 sk_rxhash;
|
||||
#endif
|
||||
__u32 sk_txhash;
|
||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||
unsigned int sk_napi_id;
|
||||
unsigned int sk_ll_usec;
|
||||
@ -1980,6 +1982,14 @@ static inline void sock_poll_wait(struct file *filp,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
|
||||
{
|
||||
if (sk->sk_txhash) {
|
||||
skb->l4_hash = 1;
|
||||
skb->hash = sk->sk_txhash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Queue a received datagram if it will fit. Stream and sequenced
|
||||
* protocols can't normally use this as they need to fit buffers in
|
||||
@ -1994,6 +2004,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
|
||||
skb_orphan(skb);
|
||||
skb->sk = sk;
|
||||
skb->destructor = sock_wfree;
|
||||
skb_set_hash_from_sk(skb, sk);
|
||||
/*
|
||||
* We used to take a refcount on sk, but following operation
|
||||
* is enough to guarantee sk_free() wont free this sock until
|
||||
|
@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
inet->inet_daddr = fl4->daddr;
|
||||
inet->inet_dport = usin->sin_port;
|
||||
sk->sk_state = TCP_ESTABLISHED;
|
||||
inet_set_txhash(sk);
|
||||
inet->inet_id = jiffies;
|
||||
|
||||
sk_dst_set(sk, &rt->dst);
|
||||
|
@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
inet->inet_dport = usin->sin_port;
|
||||
inet->inet_daddr = daddr;
|
||||
|
||||
inet_set_txhash(sk);
|
||||
|
||||
inet_csk(sk)->icsk_ext_hdr_len = 0;
|
||||
if (inet_opt)
|
||||
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
|
||||
@ -1334,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
|
||||
newinet->mc_ttl = ip_hdr(skb)->ttl;
|
||||
newinet->rcv_tos = ip_hdr(skb)->tos;
|
||||
inet_csk(newsk)->icsk_ext_hdr_len = 0;
|
||||
inet_set_txhash(newsk);
|
||||
if (inet_opt)
|
||||
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
|
||||
newinet->inet_id = newtp->write_seq ^ jiffies;
|
||||
|
@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
||||
skb_orphan(skb);
|
||||
skb->sk = sk;
|
||||
skb->destructor = tcp_wfree;
|
||||
skb_set_hash_from_sk(skb, sk);
|
||||
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
|
||||
|
||||
/* Build TCP header and checksum it. */
|
||||
|
@ -199,6 +199,7 @@ ipv4_connected:
|
||||
NULL);
|
||||
|
||||
sk->sk_state = TCP_ESTABLISHED;
|
||||
ip6_set_txhash(sk);
|
||||
out:
|
||||
fl6_sock_release(flowlabel);
|
||||
return err;
|
||||
|
@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
sk->sk_v6_daddr = usin->sin6_addr;
|
||||
np->flow_label = fl6.flowlabel;
|
||||
|
||||
ip6_set_txhash(sk);
|
||||
|
||||
/*
|
||||
* TCP over IPv4
|
||||
*/
|
||||
@ -1132,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
|
||||
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
|
||||
newsk->sk_bound_dev_if = ireq->ir_iif;
|
||||
|
||||
ip6_set_txhash(newsk);
|
||||
|
||||
/* Now IPv6 options...
|
||||
|
||||
First: no IPv4 options.
|
||||
|
Loading…
Reference in New Issue
Block a user