2005-04-17 02:20:36 +04:00
/*
* INETPEER - A storage for permanent information about peers
*
* Authors : Andrey V . Savochkin < saw @ msu . ru >
*/
# ifndef _NET_INETPEER_H
# define _NET_INETPEER_H
# include <linux/types.h>
# include <linux/init.h>
# include <linux/jiffies.h>
# include <linux/spinlock.h>
2011-01-27 07:55:53 +03:00
# include <linux/rtnetlink.h>
2010-11-30 23:20:00 +03:00
# include <net/ipv6.h>
2011-07-27 03:09:06 +04:00
# include <linux/atomic.h>
2005-04-17 02:20:36 +04:00
2011-02-10 01:30:26 +03:00
struct inetpeer_addr_base {
2010-11-30 22:53:55 +03:00
union {
2011-02-10 01:30:26 +03:00
__be32 a4 ;
__be32 a6 [ 4 ] ;
2010-11-30 22:53:55 +03:00
} ;
2011-02-10 01:30:26 +03:00
} ;
struct inetpeer_addr {
struct inetpeer_addr_base addr ;
__u16 family ;
2010-12-02 04:28:18 +03:00
} ;
2010-11-30 22:53:55 +03:00
2009-11-03 06:26:03 +03:00
struct inet_peer {
2006-10-20 11:28:35 +04:00
/* group together avl_left,avl_right,v4daddr to speedup lookups */
2010-10-26 03:55:38 +04:00
struct inet_peer __rcu * avl_left , * avl_right ;
2010-12-02 04:28:18 +03:00
struct inetpeer_addr daddr ;
2009-11-12 12:33:09 +03:00
__u32 avl_height ;
2011-06-09 10:31:27 +04:00
u32 metrics [ RTAX_MAX ] ;
u32 rate_tokens ; /* rate limiting for ICMP */
unsigned long rate_last ;
2012-06-05 07:00:18 +04:00
union {
struct list_head gc_list ;
struct rcu_head gc_rcu ;
} ;
2010-06-16 08:52:13 +04:00
/*
inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
is about 20.
4) If server deals with many tcp flows, we have a high probability of
not finding the inet_peer, allocating a fresh one, inserting it in
the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-02 16:26:03 +04:00
* Once inet_peer is queued for deletion ( refcnt = = - 1 ) , following field
* is not available : rid
2011-01-27 07:55:53 +03:00
* We can share memory with rcu_head to help keep inet_peer small .
2010-06-16 08:52:13 +04:00
*/
union {
struct {
2011-02-10 02:36:47 +03:00
atomic_t rid ; /* Frag reception counter */
2010-06-16 08:52:13 +04:00
} ;
struct rcu_head rcu ;
2011-06-08 17:35:34 +04:00
struct inet_peer * gc_next ;
2010-06-16 08:52:13 +04:00
} ;
2011-06-09 10:31:27 +04:00
/* following fields might be frequently dirtied */
__u32 dtime ; /* the time of last use of not referenced entries */
atomic_t refcnt ;
2005-04-17 02:20:36 +04:00
} ;
2012-06-10 03:27:05 +04:00
struct inet_peer_base {
struct inet_peer __rcu * root ;
seqlock_t lock ;
int total ;
} ;
2012-06-10 09:36:36 +04:00
# define INETPEER_BASE_BIT 0x1UL
static inline struct inet_peer * inetpeer_ptr ( unsigned long val )
{
BUG_ON ( val & INETPEER_BASE_BIT ) ;
return ( struct inet_peer * ) val ;
}
static inline struct inet_peer_base * inetpeer_base_ptr ( unsigned long val )
{
if ( ! ( val & INETPEER_BASE_BIT ) )
return NULL ;
val & = ~ INETPEER_BASE_BIT ;
return ( struct inet_peer_base * ) val ;
}
static inline bool inetpeer_ptr_is_peer ( unsigned long val )
{
return ! ( val & INETPEER_BASE_BIT ) ;
}
static inline void __inetpeer_ptr_set_peer ( unsigned long * val , struct inet_peer * peer )
{
/* This implicitly clears INETPEER_BASE_BIT */
* val = ( unsigned long ) peer ;
}
static inline bool inetpeer_ptr_set_peer ( unsigned long * ptr , struct inet_peer * peer )
{
unsigned long val = ( unsigned long ) peer ;
unsigned long orig = * ptr ;
2012-06-11 15:13:57 +04:00
if ( ! ( orig & INETPEER_BASE_BIT ) | |
2012-06-10 09:36:36 +04:00
cmpxchg ( ptr , orig , val ) ! = orig )
return false ;
return true ;
}
static inline void inetpeer_init_ptr ( unsigned long * ptr , struct inet_peer_base * base )
{
* ptr = ( unsigned long ) base | INETPEER_BASE_BIT ;
}
static inline void inetpeer_transfer_peer ( unsigned long * to , unsigned long * from )
{
unsigned long val = * from ;
* to = val ;
if ( inetpeer_ptr_is_peer ( val ) ) {
struct inet_peer * peer = inetpeer_ptr ( val ) ;
atomic_inc ( & peer - > refcnt ) ;
}
}
2013-09-21 21:22:41 +04:00
void inet_peer_base_init ( struct inet_peer_base * ) ;
2012-06-10 03:27:05 +04:00
2013-09-21 21:22:41 +04:00
void inet_initpeers ( void ) __init ;
2005-04-17 02:20:36 +04:00
2011-01-28 00:52:16 +03:00
# define INETPEER_METRICS_NEW (~(u32) 0)
static inline bool inet_metrics_new ( const struct inet_peer * p )
{
return p - > metrics [ RTAX_LOCK - 1 ] = = INETPEER_METRICS_NEW ;
}
2005-04-17 02:20:36 +04:00
/* can be called with or without local BH being disabled */
2012-06-10 06:12:36 +04:00
struct inet_peer * inet_getpeer ( struct inet_peer_base * base ,
2012-06-08 05:20:41 +04:00
const struct inetpeer_addr * daddr ,
int create ) ;
2010-11-30 22:54:19 +03:00
2012-06-10 06:12:36 +04:00
static inline struct inet_peer * inet_getpeer_v4 ( struct inet_peer_base * base ,
2012-06-08 05:21:40 +04:00
__be32 v4daddr ,
int create )
2010-11-30 22:54:19 +03:00
{
2010-12-02 04:28:18 +03:00
struct inetpeer_addr daddr ;
2010-11-30 22:54:19 +03:00
2011-02-10 01:30:26 +03:00
daddr . addr . a4 = v4daddr ;
2010-11-30 22:54:19 +03:00
daddr . family = AF_INET ;
2012-06-10 06:12:36 +04:00
return inet_getpeer ( base , & daddr , create ) ;
2010-11-30 22:54:19 +03:00
}
2005-04-17 02:20:36 +04:00
2012-06-10 06:12:36 +04:00
static inline struct inet_peer * inet_getpeer_v6 ( struct inet_peer_base * base ,
2012-06-08 05:21:40 +04:00
const struct in6_addr * v6daddr ,
int create )
2010-11-30 23:20:00 +03:00
{
2010-12-02 04:28:18 +03:00
struct inetpeer_addr daddr ;
2010-11-30 23:20:00 +03:00
2011-11-21 07:39:03 +04:00
* ( struct in6_addr * ) daddr . addr . a6 = * v6daddr ;
2010-11-30 23:20:00 +03:00
daddr . family = AF_INET6 ;
2012-06-10 06:12:36 +04:00
return inet_getpeer ( base , & daddr , create ) ;
2010-11-30 23:20:00 +03:00
}
2005-04-17 02:20:36 +04:00
/* can be called from BH context or outside */
2013-09-21 21:22:41 +04:00
void inet_putpeer ( struct inet_peer * p ) ;
bool inet_peer_xrlim_allow ( struct inet_peer * peer , int timeout ) ;
2005-04-17 02:20:36 +04:00
2013-09-21 21:22:41 +04:00
void inetpeer_invalidate_tree ( struct inet_peer_base * ) ;
2012-03-07 01:20:26 +04:00
2010-06-16 08:52:13 +04:00
/*
inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
is about 20.
4) If server deals with many tcp flows, we have a high probability of
not finding the inet_peer, allocating a fresh one, inserting it in
the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-02 16:26:03 +04:00
* temporary check to make sure we dont access rid , tcp_ts ,
2010-06-16 08:52:13 +04:00
* tcp_ts_stamp if no refcount is taken on inet_peer
*/
static inline void inet_peer_refcheck ( const struct inet_peer * p )
{
WARN_ON_ONCE ( atomic_read ( & p - > refcnt ) < = 0 ) ;
}
2005-04-17 02:20:36 +04:00
# endif /* _NET_INETPEER_H */