2005-04-17 02:20:36 +04:00
/*
* INETPEER - A storage for permanent information about peers
*
* This source is covered by the GNU GPL , the same as all kernel sources .
*
* Authors : Andrey V . Savochkin < saw @ msu . ru >
*/
# include <linux/module.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/interrupt.h>
# include <linux/spinlock.h>
# include <linux/random.h>
# include <linux/timer.h>
# include <linux/time.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/net.h>
2005-08-16 09:18:02 +04:00
# include <net/ip.h>
2005-04-17 02:20:36 +04:00
# include <net/inetpeer.h>
/*
* Theory of operations .
* We keep one entry for each peer IP address . The nodes contains long - living
* information about the peer which doesn ' t depend on routes .
* At this moment this information consists only of ID field for the next
* outgoing IP packet . This field is incremented with each packet as encoded
* in inet_getid ( ) function ( include / net / inetpeer . h ) .
* At the moment of writing this notes identifier of IP packets is generated
* to be unpredictable using this code only for packets subjected
* ( actually or potentially ) to defragmentation . I . e . DF packets less than
* PMTU in size uses a constant ID and do not use this code ( see
* ip_select_ident ( ) in include / net / ip . h ) .
*
* Route cache entries hold references to our nodes .
* New cache entries get references via lookup by destination IP address in
* the avl tree . The reference is grabbed only when it ' s needed i . e . only
* when we try to output IP packet which needs an unpredictable ID ( see
* __ip_select_ident ( ) in net / ipv4 / route . c ) .
* Nodes are removed only when reference counter goes to 0.
* When it ' s happened the node may be removed when a sufficient amount of
* time has been passed since its last use . The less - recently - used entry can
* also be removed if the pool is overloaded i . e . if the total amount of
* entries is greater - or - equal than the threshold .
*
* Node pool is organised as an AVL tree .
* Such an implementation has been chosen not just for fun . It ' s a way to
* prevent easy and efficient DoS attacks by creating hash collisions . A huge
* amount of long living nodes in a single hash slot would significantly delay
* lookups performed with disabled BHs .
*
* Serialisation issues .
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* 1. Nodes may appear in the tree only with the pool lock held .
* 2. Nodes may disappear from the tree only with the pool lock held
2005-04-17 02:20:36 +04:00
* AND reference count being 0.
* 3. Nodes appears and disappears from unused node list only under
* " inet_peer_unused_lock " .
* 4. Global variable peer_total is modified under the pool lock .
* 5. struct inet_peer fields modification :
* avl_left , avl_right , avl_parent , avl_height : pool lock
2007-11-13 08:27:28 +03:00
* unused : unused node list lock
2005-04-17 02:20:36 +04:00
* refcnt : atomically against modifications on other CPU ;
* usually under some other lock to prevent node disappearing
* dtime : unused node list lock
2010-11-30 22:53:55 +03:00
* daddr : unchangeable
2010-06-16 08:52:13 +04:00
* ip_id_count : atomic value ( no lock needed )
2005-04-17 02:20:36 +04:00
*/
2006-12-07 07:33:20 +03:00
static struct kmem_cache * peer_cachep __read_mostly ;
2005-04-17 02:20:36 +04:00
# define node_height(x) x->avl_height
2010-06-14 23:35:21 +04:00
# define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
2010-10-26 03:55:38 +04:00
# define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
2010-06-14 23:35:21 +04:00
static const struct inet_peer peer_fake_node = {
2010-10-26 03:55:38 +04:00
. avl_left = peer_avl_empty_rcu ,
. avl_right = peer_avl_empty_rcu ,
2005-04-17 02:20:36 +04:00
. avl_height = 0
} ;
2010-06-14 23:35:21 +04:00
2010-11-30 23:12:23 +03:00
struct inet_peer_base {
2010-10-26 03:55:38 +04:00
struct inet_peer __rcu * root ;
2011-03-05 01:33:59 +03:00
seqlock_t lock ;
2010-06-14 23:35:21 +04:00
int total ;
2010-11-30 23:12:23 +03:00
} ;
static struct inet_peer_base v4_peers = {
2010-10-26 03:55:38 +04:00
. root = peer_avl_empty_rcu ,
2011-03-05 01:33:59 +03:00
. lock = __SEQLOCK_UNLOCKED ( v4_peers . lock ) ,
2010-06-14 23:35:21 +04:00
. total = 0 ,
} ;
2010-11-30 23:12:23 +03:00
static struct inet_peer_base v6_peers = {
. root = peer_avl_empty_rcu ,
2011-03-05 01:33:59 +03:00
. lock = __SEQLOCK_UNLOCKED ( v6_peers . lock ) ,
2010-11-30 23:12:23 +03:00
. total = 0 ,
} ;
2005-04-17 02:20:36 +04:00
# define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
/* Exported for sysctl_net_ipv4. */
2007-03-07 07:23:10 +03:00
int inet_peer_threshold __read_mostly = 65536 + 128 ; /* start to throw entries more
2005-04-17 02:20:36 +04:00
* aggressively at this stage */
2007-03-07 07:23:10 +03:00
int inet_peer_minttl __read_mostly = 120 * HZ ; /* TTL under high load: 120 sec */
int inet_peer_maxttl __read_mostly = 10 * 60 * HZ ; /* usual time to live: 10 min */
int inet_peer_gc_mintime __read_mostly = 10 * HZ ;
int inet_peer_gc_maxtime __read_mostly = 120 * HZ ;
2005-04-17 02:20:36 +04:00
2010-06-14 23:35:21 +04:00
static struct {
struct list_head list ;
spinlock_t lock ;
} unused_peers = {
. list = LIST_HEAD_INIT ( unused_peers . list ) ,
. lock = __SPIN_LOCK_UNLOCKED ( unused_peers . lock ) ,
} ;
2005-04-17 02:20:36 +04:00
static void peer_check_expire ( unsigned long dummy ) ;
2005-09-10 00:10:40 +04:00
static DEFINE_TIMER ( peer_periodic_timer , peer_check_expire , 0 , 0 ) ;
2005-04-17 02:20:36 +04:00
/* Called from ip_output.c:ip_init */
void __init inet_initpeers ( void )
{
struct sysinfo si ;
/* Use the straight interface to information about memory. */
si_meminfo ( & si ) ;
/* The values below were suggested by Alexey Kuznetsov
* < kuznet @ ms2 . inr . ac . ru > . I don ' t have any opinion about the values
* myself . - - SAW
*/
if ( si . totalram < = ( 32768 * 1024 ) / PAGE_SIZE )
inet_peer_threshold > > = 1 ; /* max pool size about 1MB on IA32 */
if ( si . totalram < = ( 16384 * 1024 ) / PAGE_SIZE )
inet_peer_threshold > > = 1 ; /* about 512KB */
if ( si . totalram < = ( 8192 * 1024 ) / PAGE_SIZE )
inet_peer_threshold > > = 2 ; /* about 128KB */
peer_cachep = kmem_cache_create ( " inet_peer_cache " ,
sizeof ( struct inet_peer ) ,
2010-06-16 08:52:13 +04:00
0 , SLAB_HWCACHE_ALIGN | SLAB_PANIC ,
2007-07-20 05:11:58 +04:00
NULL ) ;
2005-04-17 02:20:36 +04:00
/* All the timers, started at system startup tend
to synchronize . Perturb it a bit .
*/
peer_periodic_timer . expires = jiffies
+ net_random ( ) % inet_peer_gc_maxtime
+ inet_peer_gc_maxtime ;
add_timer ( & peer_periodic_timer ) ;
}
/* Called with or without local BH being disabled. */
static void unlink_from_unused ( struct inet_peer * p )
{
2010-06-14 23:35:21 +04:00
if ( ! list_empty ( & p - > unused ) ) {
spin_lock_bh ( & unused_peers . lock ) ;
list_del_init ( & p - > unused ) ;
spin_unlock_bh ( & unused_peers . lock ) ;
}
2005-04-17 02:20:36 +04:00
}
2010-12-02 04:28:18 +03:00
static int addr_compare ( const struct inetpeer_addr * a ,
const struct inetpeer_addr * b )
2010-11-30 23:08:53 +03:00
{
int i , n = ( a - > family = = AF_INET ? 1 : 4 ) ;
for ( i = 0 ; i < n ; i + + ) {
2011-02-10 01:30:26 +03:00
if ( a - > addr . a6 [ i ] = = b - > addr . a6 [ i ] )
2010-11-30 23:08:53 +03:00
continue ;
2011-02-10 01:30:26 +03:00
if ( a - > addr . a6 [ i ] < b - > addr . a6 [ i ] )
2010-11-30 23:08:53 +03:00
return - 1 ;
return 1 ;
}
return 0 ;
}
2011-03-05 01:33:59 +03:00
# define rcu_deref_locked(X, BASE) \
rcu_dereference_protected ( X , lockdep_is_held ( & ( BASE ) - > lock . lock ) )
2007-03-07 07:23:10 +03:00
/*
* Called with local BH disabled and the pool lock held .
*/
2010-11-30 22:41:59 +03:00
# define lookup(_daddr, _stack, _base) \
2005-04-17 02:20:36 +04:00
( { \
2010-10-26 03:55:38 +04:00
struct inet_peer * u ; \
struct inet_peer __rcu * * v ; \
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
\
stackptr = _stack ; \
2010-11-30 22:41:59 +03:00
* stackptr + + = & _base - > root ; \
2011-03-05 01:33:59 +03:00
for ( u = rcu_deref_locked ( _base - > root , _base ) ; \
2010-10-26 03:55:38 +04:00
u ! = peer_avl_empty ; ) { \
2010-11-30 23:08:53 +03:00
int cmp = addr_compare ( _daddr , & u - > daddr ) ; \
if ( cmp = = 0 ) \
2005-04-17 02:20:36 +04:00
break ; \
2010-11-30 23:08:53 +03:00
if ( cmp = = - 1 ) \
2005-04-17 02:20:36 +04:00
v = & u - > avl_left ; \
else \
v = & u - > avl_right ; \
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* stackptr + + = v ; \
2011-03-05 01:33:59 +03:00
u = rcu_deref_locked ( * v , _base ) ; \
2005-04-17 02:20:36 +04:00
} \
u ; \
} )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
/*
2011-03-09 01:59:28 +03:00
* Called with rcu_read_lock ( )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* Because we hold no lock against a writer , its quite possible we fall
* in an endless loop .
* But every pointer we follow is guaranteed to be valid thanks to RCU .
* We exit from this function if number of links exceeds PEER_MAXDEPTH
*/
2011-03-09 01:59:28 +03:00
static struct inet_peer * lookup_rcu ( const struct inetpeer_addr * daddr ,
struct inet_peer_base * base )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
{
2011-03-09 01:59:28 +03:00
struct inet_peer * u = rcu_dereference ( base - > root ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
int count = 0 ;
while ( u ! = peer_avl_empty ) {
2010-11-30 23:08:53 +03:00
int cmp = addr_compare ( daddr , & u - > daddr ) ;
if ( cmp = = 0 ) {
2010-06-16 08:47:39 +04:00
/* Before taking a reference, check if this entry was
* deleted , unlink_from_pool ( ) sets refcnt = - 1 to make
* distinction between an unused entry ( refcnt = 0 ) and
* a freed one .
*/
if ( unlikely ( ! atomic_add_unless ( & u - > refcnt , 1 , - 1 ) ) )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
u = NULL ;
return u ;
}
2010-11-30 23:08:53 +03:00
if ( cmp = = - 1 )
2011-03-09 01:59:28 +03:00
u = rcu_dereference ( u - > avl_left ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
else
2011-03-09 01:59:28 +03:00
u = rcu_dereference ( u - > avl_right ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
if ( unlikely ( + + count = = PEER_MAXDEPTH ) )
break ;
}
return NULL ;
}
/* Called with local BH disabled and the pool lock held. */
2010-11-30 22:41:59 +03:00
# define lookup_rightempty(start, base) \
2005-04-17 02:20:36 +04:00
( { \
2010-10-26 03:55:38 +04:00
struct inet_peer * u ; \
struct inet_peer __rcu * * v ; \
2005-04-17 02:20:36 +04:00
* stackptr + + = & start - > avl_left ; \
v = & start - > avl_left ; \
2011-03-05 01:33:59 +03:00
for ( u = rcu_deref_locked ( * v , base ) ; \
2010-10-26 03:55:38 +04:00
u - > avl_right ! = peer_avl_empty_rcu ; ) { \
2005-04-17 02:20:36 +04:00
v = & u - > avl_right ; \
* stackptr + + = v ; \
2011-03-05 01:33:59 +03:00
u = rcu_deref_locked ( * v , base ) ; \
2005-04-17 02:20:36 +04:00
} \
u ; \
} )
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
/* Called with local BH disabled and the pool lock held.
2005-04-17 02:20:36 +04:00
* Variable names are the proof of operation correctness .
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* Look into mm / map_avl . c for more detail description of the ideas .
*/
2010-10-26 03:55:38 +04:00
static void peer_avl_rebalance ( struct inet_peer __rcu * * stack [ ] ,
2010-11-30 22:41:59 +03:00
struct inet_peer __rcu * * * stackend ,
struct inet_peer_base * base )
2005-04-17 02:20:36 +04:00
{
2010-10-26 03:55:38 +04:00
struct inet_peer __rcu * * nodep ;
struct inet_peer * node , * l , * r ;
2005-04-17 02:20:36 +04:00
int lh , rh ;
while ( stackend > stack ) {
nodep = * - - stackend ;
2011-03-05 01:33:59 +03:00
node = rcu_deref_locked ( * nodep , base ) ;
l = rcu_deref_locked ( node - > avl_left , base ) ;
r = rcu_deref_locked ( node - > avl_right , base ) ;
2005-04-17 02:20:36 +04:00
lh = node_height ( l ) ;
rh = node_height ( r ) ;
if ( lh > rh + 1 ) { /* l: RH+2 */
struct inet_peer * ll , * lr , * lrl , * lrr ;
int lrh ;
2011-03-05 01:33:59 +03:00
ll = rcu_deref_locked ( l - > avl_left , base ) ;
lr = rcu_deref_locked ( l - > avl_right , base ) ;
2005-04-17 02:20:36 +04:00
lrh = node_height ( lr ) ;
if ( lrh < = node_height ( ll ) ) { /* ll: RH+1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( node - > avl_left , lr ) ; /* lr: RH or RH+1 */
RCU_INIT_POINTER ( node - > avl_right , r ) ; /* r: RH */
2005-04-17 02:20:36 +04:00
node - > avl_height = lrh + 1 ; /* RH+1 or RH+2 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( l - > avl_left , ll ) ; /* ll: RH+1 */
RCU_INIT_POINTER ( l - > avl_right , node ) ; /* node: RH+1 or RH+2 */
2005-04-17 02:20:36 +04:00
l - > avl_height = node - > avl_height + 1 ;
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( * nodep , l ) ;
2005-04-17 02:20:36 +04:00
} else { /* ll: RH, lr: RH+1 */
2011-03-05 01:33:59 +03:00
lrl = rcu_deref_locked ( lr - > avl_left , base ) ; /* lrl: RH or RH-1 */
lrr = rcu_deref_locked ( lr - > avl_right , base ) ; /* lrr: RH or RH-1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( node - > avl_left , lrr ) ; /* lrr: RH or RH-1 */
RCU_INIT_POINTER ( node - > avl_right , r ) ; /* r: RH */
2005-04-17 02:20:36 +04:00
node - > avl_height = rh + 1 ; /* node: RH+1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( l - > avl_left , ll ) ; /* ll: RH */
RCU_INIT_POINTER ( l - > avl_right , lrl ) ; /* lrl: RH or RH-1 */
2005-04-17 02:20:36 +04:00
l - > avl_height = rh + 1 ; /* l: RH+1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( lr - > avl_left , l ) ; /* l: RH+1 */
RCU_INIT_POINTER ( lr - > avl_right , node ) ; /* node: RH+1 */
2005-04-17 02:20:36 +04:00
lr - > avl_height = rh + 2 ;
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( * nodep , lr ) ;
2005-04-17 02:20:36 +04:00
}
} else if ( rh > lh + 1 ) { /* r: LH+2 */
struct inet_peer * rr , * rl , * rlr , * rll ;
int rlh ;
2011-03-05 01:33:59 +03:00
rr = rcu_deref_locked ( r - > avl_right , base ) ;
rl = rcu_deref_locked ( r - > avl_left , base ) ;
2005-04-17 02:20:36 +04:00
rlh = node_height ( rl ) ;
if ( rlh < = node_height ( rr ) ) { /* rr: LH+1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( node - > avl_right , rl ) ; /* rl: LH or LH+1 */
RCU_INIT_POINTER ( node - > avl_left , l ) ; /* l: LH */
2005-04-17 02:20:36 +04:00
node - > avl_height = rlh + 1 ; /* LH+1 or LH+2 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( r - > avl_right , rr ) ; /* rr: LH+1 */
RCU_INIT_POINTER ( r - > avl_left , node ) ; /* node: LH+1 or LH+2 */
2005-04-17 02:20:36 +04:00
r - > avl_height = node - > avl_height + 1 ;
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( * nodep , r ) ;
2005-04-17 02:20:36 +04:00
} else { /* rr: RH, rl: RH+1 */
2011-03-05 01:33:59 +03:00
rlr = rcu_deref_locked ( rl - > avl_right , base ) ; /* rlr: LH or LH-1 */
rll = rcu_deref_locked ( rl - > avl_left , base ) ; /* rll: LH or LH-1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( node - > avl_right , rll ) ; /* rll: LH or LH-1 */
RCU_INIT_POINTER ( node - > avl_left , l ) ; /* l: LH */
2005-04-17 02:20:36 +04:00
node - > avl_height = lh + 1 ; /* node: LH+1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( r - > avl_right , rr ) ; /* rr: LH */
RCU_INIT_POINTER ( r - > avl_left , rlr ) ; /* rlr: LH or LH-1 */
2005-04-17 02:20:36 +04:00
r - > avl_height = lh + 1 ; /* r: LH+1 */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( rl - > avl_right , r ) ; /* r: LH+1 */
RCU_INIT_POINTER ( rl - > avl_left , node ) ; /* node: LH+1 */
2005-04-17 02:20:36 +04:00
rl - > avl_height = lh + 2 ;
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( * nodep , rl ) ;
2005-04-17 02:20:36 +04:00
}
} else {
node - > avl_height = ( lh > rh ? lh : rh ) + 1 ;
}
}
}
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
/* Called with local BH disabled and the pool lock held. */
2010-11-30 22:41:59 +03:00
# define link_to_pool(n, base) \
2005-04-17 02:20:36 +04:00
do { \
n - > avl_height = 1 ; \
2010-10-26 03:55:38 +04:00
n - > avl_left = peer_avl_empty_rcu ; \
n - > avl_right = peer_avl_empty_rcu ; \
/* lockless readers can catch us now */ \
rcu_assign_pointer ( * * - - stackptr , n ) ; \
2010-11-30 22:41:59 +03:00
peer_avl_rebalance ( stack , stackptr , base ) ; \
2010-06-14 23:35:21 +04:00
} while ( 0 )
2005-04-17 02:20:36 +04:00
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
static void inetpeer_free_rcu ( struct rcu_head * head )
{
kmem_cache_free ( peer_cachep , container_of ( head , struct inet_peer , rcu ) ) ;
}
2005-04-17 02:20:36 +04:00
/* May be called with local BH enabled. */
2011-04-12 02:39:40 +04:00
static void unlink_from_pool ( struct inet_peer * p , struct inet_peer_base * base ,
struct inet_peer __rcu * * stack [ PEER_MAXDEPTH ] )
2005-04-17 02:20:36 +04:00
{
int do_free ;
do_free = 0 ;
2011-03-05 01:33:59 +03:00
write_seqlock_bh ( & base - > lock ) ;
2005-04-17 02:20:36 +04:00
/* Check the reference counter. It was artificially incremented by 1
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* in cleanup ( ) function to prevent sudden disappearing . If we can
* atomically ( because of lockless readers ) take this last reference ,
* it ' s safe to remove the node and free it later .
2010-06-16 08:47:39 +04:00
* We use refcnt = - 1 to alert lockless readers this entry is deleted .
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
*/
2010-06-16 08:47:39 +04:00
if ( atomic_cmpxchg ( & p - > refcnt , 1 , - 1 ) = = 1 ) {
2010-10-26 03:55:38 +04:00
struct inet_peer __rcu * * * stackptr , * * * delp ;
2010-11-30 23:08:53 +03:00
if ( lookup ( & p - > daddr , stack , base ) ! = p )
2005-04-17 02:20:36 +04:00
BUG ( ) ;
delp = stackptr - 1 ; /* *delp[0] == p */
2010-10-26 03:55:38 +04:00
if ( p - > avl_left = = peer_avl_empty_rcu ) {
2005-04-17 02:20:36 +04:00
* delp [ 0 ] = p - > avl_right ;
- - stackptr ;
} else {
/* look for a node to insert instead of p */
struct inet_peer * t ;
2010-11-30 22:41:59 +03:00
t = lookup_rightempty ( p , base ) ;
2011-03-05 01:33:59 +03:00
BUG_ON ( rcu_deref_locked ( * stackptr [ - 1 ] , base ) ! = t ) ;
2005-04-17 02:20:36 +04:00
* * - - stackptr = t - > avl_left ;
2010-11-30 22:53:55 +03:00
/* t is removed, t->daddr > x->daddr for any
2005-04-17 02:20:36 +04:00
* x in p - > avl_left subtree .
* Put t in the old place of p . */
2010-10-26 03:55:38 +04:00
RCU_INIT_POINTER ( * delp [ 0 ] , t ) ;
2005-04-17 02:20:36 +04:00
t - > avl_left = p - > avl_left ;
t - > avl_right = p - > avl_right ;
t - > avl_height = p - > avl_height ;
2006-01-09 09:24:28 +03:00
BUG_ON ( delp [ 1 ] ! = & p - > avl_left ) ;
2005-04-17 02:20:36 +04:00
delp [ 1 ] = & t - > avl_left ; /* was &p->avl_left */
}
2010-11-30 22:41:59 +03:00
peer_avl_rebalance ( stack , stackptr , base ) ;
base - > total - - ;
2005-04-17 02:20:36 +04:00
do_free = 1 ;
}
2011-03-05 01:33:59 +03:00
write_sequnlock_bh ( & base - > lock ) ;
2005-04-17 02:20:36 +04:00
if ( do_free )
2011-03-14 09:22:23 +03:00
call_rcu ( & p - > rcu , inetpeer_free_rcu ) ;
2005-04-17 02:20:36 +04:00
else
/* The node is used again. Decrease the reference counter
* back . The loop " cleanup -> unlink_from_unused
* - > unlink_from_pool - > putpeer - > link_to_unused
* - > cleanup ( for the same node ) "
* doesn ' t really exist because the entry will have a
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
* recent deletion time and will not be cleaned again soon .
*/
2005-04-17 02:20:36 +04:00
inet_putpeer ( p ) ;
}
2010-11-30 23:12:23 +03:00
static struct inet_peer_base * family_to_base ( int family )
{
return ( family = = AF_INET ? & v4_peers : & v6_peers ) ;
}
2010-11-30 22:41:59 +03:00
static struct inet_peer_base * peer_to_base ( struct inet_peer * p )
{
2010-11-30 23:12:23 +03:00
return family_to_base ( p - > daddr . family ) ;
2010-11-30 22:41:59 +03:00
}
2005-04-17 02:20:36 +04:00
/* May be called with local BH enabled. */
2011-04-12 02:39:40 +04:00
static int cleanup_once ( unsigned long ttl , struct inet_peer __rcu * * stack [ PEER_MAXDEPTH ] )
2005-04-17 02:20:36 +04:00
{
2007-11-13 08:27:28 +03:00
struct inet_peer * p = NULL ;
2005-04-17 02:20:36 +04:00
/* Remove the first entry from the list of unused nodes. */
2010-06-14 23:35:21 +04:00
spin_lock_bh ( & unused_peers . lock ) ;
if ( ! list_empty ( & unused_peers . list ) ) {
2007-11-13 08:27:28 +03:00
__u32 delta ;
2010-06-14 23:35:21 +04:00
p = list_first_entry ( & unused_peers . list , struct inet_peer , unused ) ;
2007-11-13 08:27:28 +03:00
delta = ( __u32 ) jiffies - p - > dtime ;
2006-10-13 08:21:06 +04:00
if ( delta < ttl ) {
2005-04-17 02:20:36 +04:00
/* Do not prune fresh entries. */
2010-06-14 23:35:21 +04:00
spin_unlock_bh ( & unused_peers . lock ) ;
2005-04-17 02:20:36 +04:00
return - 1 ;
}
2007-11-13 08:27:28 +03:00
list_del_init ( & p - > unused ) ;
2005-04-17 02:20:36 +04:00
/* Grab an extra reference to prevent node disappearing
* before unlink_from_pool ( ) call . */
atomic_inc ( & p - > refcnt ) ;
}
2010-06-14 23:35:21 +04:00
spin_unlock_bh ( & unused_peers . lock ) ;
2005-04-17 02:20:36 +04:00
if ( p = = NULL )
/* It means that the total number of USED entries has
* grown over inet_peer_threshold . It shouldn ' t really
* happen because of entry limits in route cache . */
return - 1 ;
2011-04-12 02:39:40 +04:00
unlink_from_pool ( p , peer_to_base ( p ) , stack ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
/* Called with or without local BH being disabled. */
2010-12-02 04:28:18 +03:00
struct inet_peer * inet_getpeer ( struct inetpeer_addr * daddr , int create )
2005-04-17 02:20:36 +04:00
{
2010-10-26 03:55:38 +04:00
struct inet_peer __rcu * * stack [ PEER_MAXDEPTH ] , * * * stackptr ;
2011-01-25 01:37:46 +03:00
struct inet_peer_base * base = family_to_base ( daddr - > family ) ;
2010-11-30 22:41:59 +03:00
struct inet_peer * p ;
2011-03-05 01:33:59 +03:00
unsigned int sequence ;
int invalidated ;
2005-04-17 02:20:36 +04:00
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
/* Look up for the address quickly, lockless.
* Because of a concurrent writer , we might not find an existing entry .
*/
2011-03-09 01:59:28 +03:00
rcu_read_lock ( ) ;
2011-03-05 01:33:59 +03:00
sequence = read_seqbegin ( & base - > lock ) ;
2011-03-09 01:59:28 +03:00
p = lookup_rcu ( daddr , base ) ;
2011-03-05 01:33:59 +03:00
invalidated = read_seqretry ( & base - > lock , sequence ) ;
2011-03-09 01:59:28 +03:00
rcu_read_unlock ( ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
if ( p ) {
/* The existing node has been found.
* Remove the entry from unused list if it was there .
*/
unlink_from_unused ( p ) ;
return p ;
}
2005-04-17 02:20:36 +04:00
2011-03-05 01:33:59 +03:00
/* If no writer did a change during our lookup, we can return early. */
if ( ! create & & ! invalidated )
return NULL ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
/* retry an exact lookup, taking the lock before.
* At least , nodes should be hot in our cache .
*/
2011-03-05 01:33:59 +03:00
write_seqlock_bh ( & base - > lock ) ;
2010-11-30 23:08:53 +03:00
p = lookup ( daddr , stack , base ) ;
2005-04-17 02:20:36 +04:00
if ( p ! = peer_avl_empty ) {
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
atomic_inc ( & p - > refcnt ) ;
2011-03-05 01:33:59 +03:00
write_sequnlock_bh ( & base - > lock ) ;
2005-04-17 02:20:36 +04:00
/* Remove the entry from unused list if it was there. */
unlink_from_unused ( p ) ;
return p ;
}
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
p = create ? kmem_cache_alloc ( peer_cachep , GFP_ATOMIC ) : NULL ;
if ( p ) {
2010-11-30 22:54:19 +03:00
p - > daddr = * daddr ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
atomic_set ( & p - > refcnt , 1 ) ;
atomic_set ( & p - > rid , 0 ) ;
2011-02-10 01:30:26 +03:00
atomic_set ( & p - > ip_id_count , secure_ip_id ( daddr - > addr . a4 ) ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
p - > tcp_ts_stamp = 0 ;
2011-01-28 00:52:16 +03:00
p - > metrics [ RTAX_LOCK - 1 ] = INETPEER_METRICS_NEW ;
2011-02-05 02:55:25 +03:00
p - > rate_tokens = 0 ;
p - > rate_last = 0 ;
2011-02-10 02:36:47 +03:00
p - > pmtu_expires = 0 ;
2011-03-09 23:09:58 +03:00
p - > pmtu_orig = 0 ;
2011-02-10 02:36:47 +03:00
memset ( & p - > redirect_learned , 0 , sizeof ( p - > redirect_learned ) ) ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
INIT_LIST_HEAD ( & p - > unused ) ;
/* Link the node. */
2010-11-30 22:41:59 +03:00
link_to_pool ( p , base ) ;
base - > total + + ;
inetpeer: RCU conversion
inetpeer currently uses an AVL tree protected by an rwlock.
It's possible to make most lookups use RCU
1) Add a struct rcu_head to struct inet_peer
2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().
3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.
4) add an smp_wmb() in link_to_pool() right before node insert.
5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.
6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.
7) inet_getpeer() first attempts lockless lookup.
Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
If this attemps fails, lock is taken a regular lookup is performed
again.
8) convert peers.lock from rwlock to a spinlock
9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-15 12:23:14 +04:00
}
2011-03-05 01:33:59 +03:00
write_sequnlock_bh ( & base - > lock ) ;
2005-04-17 02:20:36 +04:00
2010-11-30 22:41:59 +03:00
if ( base - > total > = inet_peer_threshold )
2005-04-17 02:20:36 +04:00
/* Remove one less-recently-used entry. */
2011-04-12 02:39:40 +04:00
cleanup_once ( 0 , stack ) ;
2005-04-17 02:20:36 +04:00
return p ;
}
2010-11-30 22:41:59 +03:00
static int compute_total ( void )
{
2010-11-30 23:12:23 +03:00
return v4_peers . total + v6_peers . total ;
2010-11-30 22:41:59 +03:00
}
2010-11-30 23:27:11 +03:00
EXPORT_SYMBOL_GPL ( inet_getpeer ) ;
2010-11-30 22:41:59 +03:00
2005-04-17 02:20:36 +04:00
/* Called with local BH disabled. */
static void peer_check_expire ( unsigned long dummy )
{
2006-10-13 08:21:06 +04:00
unsigned long now = jiffies ;
2010-11-30 22:41:59 +03:00
int ttl , total ;
2011-04-12 02:39:40 +04:00
struct inet_peer __rcu * * stack [ PEER_MAXDEPTH ] ;
2005-04-17 02:20:36 +04:00
2010-11-30 22:41:59 +03:00
total = compute_total ( ) ;
if ( total > = inet_peer_threshold )
2005-04-17 02:20:36 +04:00
ttl = inet_peer_minttl ;
else
ttl = inet_peer_maxttl
- ( inet_peer_maxttl - inet_peer_minttl ) / HZ *
2010-11-30 22:41:59 +03:00
total / inet_peer_threshold * HZ ;
2011-04-12 02:39:40 +04:00
while ( ! cleanup_once ( ttl , stack ) ) {
2006-10-13 08:21:06 +04:00
if ( jiffies ! = now )
break ;
}
2005-04-17 02:20:36 +04:00
/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
* interval depending on the total number of entries ( more entries ,
* less interval ) . */
2010-11-30 22:41:59 +03:00
total = compute_total ( ) ;
if ( total > = inet_peer_threshold )
2005-08-23 21:10:15 +04:00
peer_periodic_timer . expires = jiffies + inet_peer_gc_mintime ;
else
peer_periodic_timer . expires = jiffies
+ inet_peer_gc_maxtime
- ( inet_peer_gc_maxtime - inet_peer_gc_mintime ) / HZ *
2010-11-30 22:41:59 +03:00
total / inet_peer_threshold * HZ ;
2005-04-17 02:20:36 +04:00
add_timer ( & peer_periodic_timer ) ;
}
2006-10-13 08:21:06 +04:00
void inet_putpeer ( struct inet_peer * p )
{
2010-06-14 23:35:21 +04:00
local_bh_disable ( ) ;
if ( atomic_dec_and_lock ( & p - > refcnt , & unused_peers . lock ) ) {
list_add_tail ( & p - > unused , & unused_peers . list ) ;
2006-10-13 08:21:06 +04:00
p - > dtime = ( __u32 ) jiffies ;
2010-06-14 23:35:21 +04:00
spin_unlock ( & unused_peers . lock ) ;
2006-10-13 08:21:06 +04:00
}
2010-06-14 23:35:21 +04:00
local_bh_enable ( ) ;
2006-10-13 08:21:06 +04:00
}
2010-11-30 23:27:11 +03:00
EXPORT_SYMBOL_GPL ( inet_putpeer ) ;
2011-02-05 02:55:25 +03:00
/*
* Check transmit rate limitation for given message .
* The rate information is held in the inet_peer entries now .
* This function is generic and could be used for other purposes
* too . It uses a Token bucket filter as suggested by Alexey Kuznetsov .
*
* Note that the same inet_peer fields are modified by functions in
* route . c too , but these work for packet destinations while xrlim_allow
* works for icmp destinations . This means the rate limiting information
* for one " ip object " is shared - and these ICMPs are twice limited :
* by source and by destination .
*
* RFC 1812 : 4.3 .2 .8 SHOULD be able to limit error message rate
* SHOULD allow setting of rate limits
*
* Shared between ICMPv4 and ICMPv6 .
*/
# define XRLIM_BURST_FACTOR 6
bool inet_peer_xrlim_allow ( struct inet_peer * peer , int timeout )
{
unsigned long now , token ;
bool rc = false ;
if ( ! peer )
return true ;
token = peer - > rate_tokens ;
now = jiffies ;
token + = now - peer - > rate_last ;
peer - > rate_last = now ;
if ( token > XRLIM_BURST_FACTOR * timeout )
token = XRLIM_BURST_FACTOR * timeout ;
if ( token > = timeout ) {
token - = timeout ;
rc = true ;
}
peer - > rate_tokens = token ;
return rc ;
}
EXPORT_SYMBOL ( inet_peer_xrlim_allow ) ;