2005-08-12 16:19:38 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Authors : Lotsa people , from code originally in tcp
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# ifndef _INET6_HASHTABLES_H
# define _INET6_HASHTABLES_H
2005-08-12 16:26:18 +04:00
# if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
# include <linux/in6.h>
# include <linux/ipv6.h>
2005-08-12 16:19:38 +04:00
# include <linux/types.h>
2007-03-23 21:40:27 +03:00
# include <linux/jhash.h>
# include <net/inet_sock.h>
2005-08-12 16:19:38 +04:00
2005-08-12 16:26:18 +04:00
# include <net/ipv6.h>
2008-06-17 04:14:11 +04:00
# include <net/netns/hash.h>
2005-08-12 16:26:18 +04:00
2005-08-12 16:19:38 +04:00
struct inet_hashinfo ;
2005-08-12 16:26:18 +04:00
/* I have no idea if this is a good hash for v6 or not. -DaveM */
2008-06-17 04:13:48 +04:00
static inline unsigned int inet6_ehashfn ( struct net * net ,
const struct in6_addr * laddr , const u16 lport ,
2006-11-08 11:20:00 +03:00
const struct in6_addr * faddr , const __be16 fport )
2005-08-12 16:26:18 +04:00
{
2007-03-23 21:40:27 +03:00
u32 ports = ( lport ^ ( __force u16 ) fport ) ;
2005-08-12 16:26:18 +04:00
2007-03-23 21:40:27 +03:00
return jhash_3words ( ( __force u32 ) laddr - > s6_addr32 [ 3 ] ,
( __force u32 ) faddr - > s6_addr32 [ 3 ] ,
2008-06-17 04:14:11 +04:00
ports , inet_ehash_secret + net_hash_mix ( net ) ) ;
2005-08-12 16:26:18 +04:00
}
[INET]: speedup inet (tcp/dccp) lookups
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-04 01:13:38 +04:00
static inline int inet6_sk_ehashfn ( const struct sock * sk )
2005-08-12 16:26:18 +04:00
{
const struct inet_sock * inet = inet_sk ( sk ) ;
const struct ipv6_pinfo * np = inet6_sk ( sk ) ;
const struct in6_addr * laddr = & np - > rcv_saddr ;
const struct in6_addr * faddr = & np - > daddr ;
2009-10-15 10:30:45 +04:00
const __u16 lport = inet - > inet_num ;
const __be16 fport = inet - > inet_dport ;
2008-06-17 04:13:48 +04:00
struct net * net = sock_net ( sk ) ;
return inet6_ehashfn ( net , laddr , lport , faddr , fport ) ;
2005-08-12 16:26:18 +04:00
}
2009-12-04 06:46:54 +03:00
extern int __inet6_hash ( struct sock * sk , struct inet_timewait_sock * twp ) ;
2005-12-14 10:15:01 +03:00
2005-08-12 16:26:18 +04:00
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash , so
* we need not check it for TCP lookups anymore , thanks Alexey . - DaveM
*
* The sockhash lock must be held as a reader here .
*/
2008-01-31 16:07:21 +03:00
extern struct sock * __inet6_lookup_established ( struct net * net ,
struct inet_hashinfo * hashinfo ,
2005-08-12 16:26:18 +04:00
const struct in6_addr * saddr ,
2006-11-08 11:20:00 +03:00
const __be16 sport ,
2005-08-12 16:26:18 +04:00
const struct in6_addr * daddr ,
const u16 hnum ,
2006-04-10 09:48:59 +04:00
const int dif ) ;
2005-08-12 16:26:18 +04:00
2008-01-31 16:07:21 +03:00
extern struct sock * inet6_lookup_listener ( struct net * net ,
struct inet_hashinfo * hashinfo ,
2005-08-12 16:26:18 +04:00
const struct in6_addr * daddr ,
const unsigned short hnum ,
const int dif ) ;
2008-01-31 16:07:21 +03:00
static inline struct sock * __inet6_lookup ( struct net * net ,
struct inet_hashinfo * hashinfo ,
2005-08-12 16:26:18 +04:00
const struct in6_addr * saddr ,
2006-11-08 11:20:00 +03:00
const __be16 sport ,
2005-08-12 16:26:18 +04:00
const struct in6_addr * daddr ,
const u16 hnum ,
const int dif )
{
2008-01-31 16:07:21 +03:00
struct sock * sk = __inet6_lookup_established ( net , hashinfo , saddr ,
sport , daddr , hnum , dif ) ;
2005-08-12 16:26:18 +04:00
if ( sk )
return sk ;
2008-01-31 16:07:21 +03:00
return inet6_lookup_listener ( net , hashinfo , daddr , hnum , dif ) ;
2005-08-12 16:26:18 +04:00
}
2008-10-07 22:41:57 +04:00
static inline struct sock * __inet6_lookup_skb ( struct inet_hashinfo * hashinfo ,
struct sk_buff * skb ,
const __be16 sport ,
const __be16 dport )
{
2008-10-07 23:41:01 +04:00
struct sock * sk ;
if ( unlikely ( sk = skb_steal_sock ( skb ) ) )
return sk ;
2009-06-02 09:19:30 +04:00
else return __inet6_lookup ( dev_net ( skb_dst ( skb ) - > dev ) , hashinfo ,
2008-10-07 23:41:01 +04:00
& ipv6_hdr ( skb ) - > saddr , sport ,
& ipv6_hdr ( skb ) - > daddr , ntohs ( dport ) ,
inet6_iif ( skb ) ) ;
2008-10-07 22:41:57 +04:00
}
2008-01-31 16:07:21 +03:00
extern struct sock * inet6_lookup ( struct net * net , struct inet_hashinfo * hashinfo ,
2006-11-08 11:20:00 +03:00
const struct in6_addr * saddr , const __be16 sport ,
const struct in6_addr * daddr , const __be16 dport ,
2005-08-12 16:19:38 +04:00
const int dif ) ;
2005-08-12 16:26:18 +04:00
# endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
2005-08-12 16:19:38 +04:00
# endif /* _INET6_HASHTABLES_H */