2005-04-17 02:20:36 +04:00
/*
* INETPEER - A storage for permanent information about peers
*
* This source is covered by the GNU GPL , the same as all kernel sources .
*
* Authors : Andrey V . Savochkin < saw @ msu . ru >
*/
2018-02-24 21:20:33 +03:00
# include <linux/cache.h>
2005-04-17 02:20:36 +04:00
# include <linux/module.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/interrupt.h>
# include <linux/spinlock.h>
# include <linux/random.h>
# include <linux/timer.h>
# include <linux/time.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/net.h>
2012-03-07 01:20:26 +04:00
# include <linux/workqueue.h>
2005-08-16 09:18:02 +04:00
# include <net/ip.h>
2005-04-17 02:20:36 +04:00
# include <net/inetpeer.h>
2011-08-04 07:50:44 +04:00
# include <net/secure_seq.h>
2005-04-17 02:20:36 +04:00
/*
* Theory of operations .
* We keep one entry for each peer IP address . The nodes contains long - living
* information about the peer which doesn ' t depend on routes .
*
* Nodes are removed only when reference counter goes to 0.
* When it ' s happened the node may be removed when a sufficient amount of
* time has been passed since its last use . The less - recently - used entry can
* also be removed if the pool is overloaded i . e . if the total amount of
* entries is greater - or - equal than the threshold .
*
2017-07-17 12:56:10 +03:00
* Node pool is organised as an RB tree .
2005-04-17 02:20:36 +04:00
* Such an implementation has been chosen not just for fun . It ' s a way to
* prevent easy and efficient DoS attacks by creating hash collisions . A huge
* amount of long living nodes in a single hash slot would significantly delay
* lookups performed with disabled BHs .
*
* Serialisation issues .
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* 1. Nodes may appear in the tree only with the pool lock held .
* 2. Nodes may disappear from the tree only with the pool lock held
2005-04-17 02:20:36 +04:00
* AND reference count being 0.
2011-06-08 17:35:34 +04:00
* 3. Global variable peer_total is modified under the pool lock .
* 4. struct inet_peer fields modification :
2017-07-17 12:56:10 +03:00
* rb_node : pool lock
2005-04-17 02:20:36 +04:00
* refcnt : atomically against modifications on other CPU ;
* usually under some other lock to prevent node disappearing
2010-11-30 22:53:55 +03:00
* daddr : unchangeable
2005-04-17 02:20:36 +04:00
*/
2018-02-24 21:20:33 +03:00
static struct kmem_cache * peer_cachep __ro_after_init ;
2005-04-17 02:20:36 +04:00
2012-06-10 03:27:05 +04:00
void inet_peer_base_init ( struct inet_peer_base * bp )
{
2017-07-17 12:56:10 +03:00
bp - > rb_root = RB_ROOT ;
2012-06-10 03:27:05 +04:00
seqlock_init ( & bp - > lock ) ;
bp - > total = 0 ;
}
EXPORT_SYMBOL_GPL ( inet_peer_base_init ) ;
2010-11-30 23:12:23 +03:00
2017-07-17 12:56:10 +03:00
# define PEER_MAX_GC 32
2005-04-17 02:20:36 +04:00
/* Exported for sysctl_net_ipv4. */
2007-03-07 07:23:10 +03:00
int inet_peer_threshold __read_mostly = 65536 + 128 ; /* start to throw entries more
2005-04-17 02:20:36 +04:00
* aggressively at this stage */
2007-03-07 07:23:10 +03:00
int inet_peer_minttl __read_mostly = 120 * HZ ; /* TTL under high load: 120 sec */
int inet_peer_maxttl __read_mostly = 10 * 60 * HZ ; /* usual time to live: 10 min */
2005-04-17 02:20:36 +04:00
/* Called from ip_output.c:ip_init */
void __init inet_initpeers ( void )
{
struct sysinfo si ;
/* Use the straight interface to information about memory. */
si_meminfo ( & si ) ;
/* The values below were suggested by Alexey Kuznetsov
* < kuznet @ ms2 . inr . ac . ru > . I don ' t have any opinion about the values
* myself . - - SAW
*/
if ( si . totalram < = ( 32768 * 1024 ) / PAGE_SIZE )
inet_peer_threshold > > = 1 ; /* max pool size about 1MB on IA32 */
if ( si . totalram < = ( 16384 * 1024 ) / PAGE_SIZE )
inet_peer_threshold > > = 1 ; /* about 512KB */
if ( si . totalram < = ( 8192 * 1024 ) / PAGE_SIZE )
inet_peer_threshold > > = 2 ; /* about 128KB */
peer_cachep = kmem_cache_create ( " inet_peer_cache " ,
sizeof ( struct inet_peer ) ,
2010-06-16 08:52:13 +04:00
0 , SLAB_HWCACHE_ALIGN | SLAB_PANIC ,
2007-07-20 05:11:58 +04:00
NULL ) ;
2005-04-17 02:20:36 +04:00
}
2017-07-17 12:56:10 +03:00
/* Called with rcu_read_lock() or base->lock held */
static struct inet_peer * lookup ( const struct inetpeer_addr * daddr ,
struct inet_peer_base * base ,
unsigned int seq ,
struct inet_peer * gc_stack [ ] ,
unsigned int * gc_cnt ,
struct rb_node * * parent_p ,
struct rb_node * * * pp_p )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
{
2017-09-02 00:03:32 +03:00
struct rb_node * * pp , * parent , * next ;
2017-07-17 12:56:10 +03:00
struct inet_peer * p ;
pp = & base - > rb_root . rb_node ;
parent = NULL ;
2017-09-02 00:03:32 +03:00
while ( 1 ) {
2017-07-17 12:56:10 +03:00
int cmp ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
2017-09-02 00:03:32 +03:00
next = rcu_dereference_raw ( * pp ) ;
if ( ! next )
break ;
parent = next ;
2017-07-17 12:56:10 +03:00
p = rb_entry ( parent , struct inet_peer , rb_node ) ;
cmp = inetpeer_addr_cmp ( daddr , & p - > daddr ) ;
2010-11-30 23:08:53 +03:00
if ( cmp = = 0 ) {
2017-07-17 12:56:10 +03:00
if ( ! refcount_inc_not_zero ( & p - > refcnt ) )
break ;
return p ;
}
if ( gc_stack ) {
if ( * gc_cnt < PEER_MAX_GC )
gc_stack [ ( * gc_cnt ) + + ] = p ;
} else if ( unlikely ( read_seqretry ( & base - > lock , seq ) ) ) {
break ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
}
2010-11-30 23:08:53 +03:00
if ( cmp = = - 1 )
2017-09-25 18:40:02 +03:00
pp = & next - > rb_left ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
else
2017-09-25 18:40:02 +03:00
pp = & next - > rb_right ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
}
2017-07-17 12:56:10 +03:00
* parent_p = parent ;
* pp_p = pp ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
return NULL ;
}
static void inetpeer_free_rcu ( struct rcu_head * head )
{
kmem_cache_free ( peer_cachep , container_of ( head , struct inet_peer , rcu ) ) ;
}
2011-06-08 17:35:34 +04:00
/* perform garbage collect on all items stacked during a lookup */
2017-07-17 12:56:10 +03:00
static void inet_peer_gc ( struct inet_peer_base * base ,
struct inet_peer * gc_stack [ ] ,
unsigned int gc_cnt )
2010-11-30 22:41:59 +03:00
{
2017-07-17 12:56:10 +03:00
struct inet_peer * p ;
2011-06-08 17:35:34 +04:00
__u32 delta , ttl ;
2017-07-17 12:56:10 +03:00
int i ;
2007-11-13 08:27:28 +03:00
2011-06-08 17:35:34 +04:00
if ( base - > total > = inet_peer_threshold )
ttl = 0 ; /* be aggressive */
else
ttl = inet_peer_maxttl
- ( inet_peer_maxttl - inet_peer_minttl ) / HZ *
base - > total / inet_peer_threshold * HZ ;
2017-07-17 12:56:10 +03:00
for ( i = 0 ; i < gc_cnt ; i + + ) {
p = gc_stack [ i ] ;
2019-11-07 21:30:42 +03:00
/* The READ_ONCE() pairs with the WRITE_ONCE()
* in inet_putpeer ( )
*/
delta = ( __u32 ) jiffies - READ_ONCE ( p - > dtime ) ;
2017-07-17 12:56:10 +03:00
if ( delta < ttl | | ! refcount_dec_if_one ( & p - > refcnt ) )
gc_stack [ i ] = NULL ;
2005-04-17 02:20:36 +04:00
}
2017-07-17 12:56:10 +03:00
for ( i = 0 ; i < gc_cnt ; i + + ) {
p = gc_stack [ i ] ;
if ( p ) {
rb_erase ( & p - > rb_node , & base - > rb_root ) ;
base - > total - - ;
call_rcu ( & p - > rcu , inetpeer_free_rcu ) ;
}
2011-06-08 17:35:34 +04:00
}
2005-04-17 02:20:36 +04:00
}
2012-06-10 06:12:36 +04:00
struct inet_peer * inet_getpeer ( struct inet_peer_base * base ,
2012-06-08 05:20:41 +04:00
const struct inetpeer_addr * daddr ,
int create )
2005-04-17 02:20:36 +04:00
{
2017-07-17 12:56:10 +03:00
struct inet_peer * p , * gc_stack [ PEER_MAX_GC ] ;
struct rb_node * * pp , * parent ;
unsigned int gc_cnt , seq ;
int invalidated ;
2005-04-17 02:20:36 +04:00
2011-06-08 17:35:34 +04:00
/* Attempt a lockless lookup first.
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* Because of a concurrent writer , we might not find an existing entry .
*/
2011-03-09 01:59:28 +03:00
rcu_read_lock ( ) ;
2017-07-17 12:56:10 +03:00
seq = read_seqbegin ( & base - > lock ) ;
p = lookup ( daddr , base , seq , NULL , & gc_cnt , & parent , & pp ) ;
invalidated = read_seqretry ( & base - > lock , seq ) ;
2011-03-09 01:59:28 +03:00
rcu_read_unlock ( ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
2011-06-08 17:35:34 +04:00
if ( p )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
return p ;
2005-04-17 02:20:36 +04:00
2011-03-05 01:33:59 +03:00
/* If no writer did a change during our lookup, we can return early. */
if ( ! create & & ! invalidated )
return NULL ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
/* retry an exact lookup, taking the lock before.
* At least , nodes should be hot in our cache .
*/
2017-07-17 12:56:10 +03:00
parent = NULL ;
2011-03-05 01:33:59 +03:00
write_seqlock_bh ( & base - > lock ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
2017-07-17 12:56:10 +03:00
gc_cnt = 0 ;
p = lookup ( daddr , base , seq , gc_stack , & gc_cnt , & parent , & pp ) ;
if ( ! p & & create ) {
p = kmem_cache_alloc ( peer_cachep , GFP_ATOMIC ) ;
if ( p ) {
p - > daddr = * daddr ;
2018-04-09 16:43:27 +03:00
p - > dtime = ( __u32 ) jiffies ;
2017-07-17 12:56:10 +03:00
refcount_set ( & p - > refcnt , 2 ) ;
atomic_set ( & p - > rid , 0 ) ;
p - > metrics [ RTAX_LOCK - 1 ] = INETPEER_METRICS_NEW ;
p - > rate_tokens = 0 ;
2019-02-06 21:18:04 +03:00
p - > n_redirects = 0 ;
2017-07-17 12:56:10 +03:00
/* 60*HZ is arbitrary, but chosen enough high so that the first
* calculation of tokens is at its maximum .
*/
p - > rate_last = jiffies - 60 * HZ ;
rb_link_node ( & p - > rb_node , parent , pp ) ;
rb_insert_color ( & p - > rb_node , & base - > rb_root ) ;
base - > total + + ;
}
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
}
2017-07-17 12:56:10 +03:00
if ( gc_cnt )
inet_peer_gc ( base , gc_stack , gc_cnt ) ;
2011-03-05 01:33:59 +03:00
write_sequnlock_bh ( & base - > lock ) ;
2005-04-17 02:20:36 +04:00
return p ;
}
2010-11-30 23:27:11 +03:00
EXPORT_SYMBOL_GPL ( inet_getpeer ) ;
2010-11-30 22:41:59 +03:00
2006-10-13 08:21:06 +04:00
void inet_putpeer ( struct inet_peer * p )
{
2019-11-07 21:30:42 +03:00
/* The WRITE_ONCE() pairs with itself (we run lockless)
* and the READ_ONCE ( ) in inet_peer_gc ( )
*/
WRITE_ONCE ( p - > dtime , ( __u32 ) jiffies ) ;
2017-07-17 12:56:10 +03:00
if ( refcount_dec_and_test ( & p - > refcnt ) )
call_rcu ( & p - > rcu , inetpeer_free_rcu ) ;
2006-10-13 08:21:06 +04:00
}
2010-11-30 23:27:11 +03:00
EXPORT_SYMBOL_GPL ( inet_putpeer ) ;
2011-02-05 02:55:25 +03:00
/*
* Check transmit rate limitation for given message .
* The rate information is held in the inet_peer entries now .
* This function is generic and could be used for other purposes
* too . It uses a Token bucket filter as suggested by Alexey Kuznetsov .
*
* Note that the same inet_peer fields are modified by functions in
* route . c too , but these work for packet destinations while xrlim_allow
* works for icmp destinations . This means the rate limiting information
* for one " ip object " is shared - and these ICMPs are twice limited :
* by source and by destination .
*
* RFC 1812 : 4.3 .2 .8 SHOULD be able to limit error message rate
* SHOULD allow setting of rate limits
*
* Shared between ICMPv4 and ICMPv6 .
*/
# define XRLIM_BURST_FACTOR 6
bool inet_peer_xrlim_allow ( struct inet_peer * peer , int timeout )
{
unsigned long now , token ;
bool rc = false ;
if ( ! peer )
return true ;
token = peer - > rate_tokens ;
now = jiffies ;
token + = now - peer - > rate_last ;
peer - > rate_last = now ;
if ( token > XRLIM_BURST_FACTOR * timeout )
token = XRLIM_BURST_FACTOR * timeout ;
if ( token > = timeout ) {
token - = timeout ;
rc = true ;
}
peer - > rate_tokens = token ;
return rc ;
}
EXPORT_SYMBOL ( inet_peer_xrlim_allow ) ;
2012-03-07 01:20:26 +04:00
2012-06-10 03:32:41 +04:00
void inetpeer_invalidate_tree ( struct inet_peer_base * base )
2012-03-07 01:20:26 +04:00
{
2017-09-25 19:14:14 +03:00
struct rb_node * p = rb_first ( & base - > rb_root ) ;
2012-03-07 01:20:26 +04:00
2017-09-25 19:14:14 +03:00
while ( p ) {
struct inet_peer * peer = rb_entry ( p , struct inet_peer , rb_node ) ;
p = rb_next ( p ) ;
rb_erase ( & peer - > rb_node , & base - > rb_root ) ;
inet_putpeer ( peer ) ;
2017-07-17 12:56:10 +03:00
cond_resched ( ) ;
2012-03-07 01:20:26 +04:00
}
2017-07-17 12:56:10 +03:00
base - > total = 0 ;
2012-03-07 01:20:26 +04:00
}
EXPORT_SYMBOL ( inetpeer_invalidate_tree ) ;