2005-04-17 02:20:36 +04:00
/*
* net / core / dst . c Protocol independent destination cache .
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
*
*/
# include <linux/bitops.h>
# include <linux/errno.h>
# include <linux/init.h>
# include <linux/kernel.h>
# include <linux/mm.h>
# include <linux/module.h>
# include <linux/netdevice.h>
# include <linux/skbuff.h>
# include <linux/string.h>
# include <linux/types.h>
# include <net/dst.h>
/* Locking strategy:
* 1 ) Garbage collection state of dead destination cache
* entries is protected by dst_lock .
* 2 ) GC is run only from BH context , and is the only remover
* of entries .
* 3 ) Entries are added to the garbage list from both BH
* and non - BH context , so local BH disabling is needed .
* 4 ) All operations modify state , so a spinlock is used .
*/
static struct dst_entry * dst_garbage_list ;
2007-02-09 17:24:36 +03:00
# if RT_CACHE_DEBUG >= 2
2005-04-17 02:20:36 +04:00
static atomic_t dst_total = ATOMIC_INIT ( 0 ) ;
# endif
static DEFINE_SPINLOCK ( dst_lock ) ;
static unsigned long dst_gc_timer_expires ;
static unsigned long dst_gc_timer_inc = DST_GC_MAX ;
static void dst_run_gc ( unsigned long ) ;
static void ___dst_free ( struct dst_entry * dst ) ;
2005-09-10 00:10:40 +04:00
static DEFINE_TIMER ( dst_gc_timer , dst_run_gc , DST_GC_MIN , 0 ) ;
2005-04-17 02:20:36 +04:00
static void dst_run_gc ( unsigned long dummy )
{
int delayed = 0 ;
2005-07-31 04:47:25 +04:00
int work_performed ;
2005-04-17 02:20:36 +04:00
struct dst_entry * dst , * * dstp ;
if ( ! spin_trylock ( & dst_lock ) ) {
mod_timer ( & dst_gc_timer , jiffies + HZ / 10 ) ;
return ;
}
del_timer ( & dst_gc_timer ) ;
dstp = & dst_garbage_list ;
2005-07-31 04:47:25 +04:00
work_performed = 0 ;
2005-04-17 02:20:36 +04:00
while ( ( dst = * dstp ) ! = NULL ) {
if ( atomic_read ( & dst - > __refcnt ) ) {
dstp = & dst - > next ;
delayed + + ;
continue ;
}
* dstp = dst - > next ;
2005-07-31 04:47:25 +04:00
work_performed = 1 ;
2005-04-17 02:20:36 +04:00
dst = dst_destroy ( dst ) ;
if ( dst ) {
/* NOHASH and still referenced. Unless it is already
* on gc list , invalidate it and add to gc list .
*
* Note : this is temporary . Actually , NOHASH dst ' s
* must be obsoleted when parent is obsoleted .
* But we do not have state " obsoleted, but
* referenced by parent " , so it is right.
*/
if ( dst - > obsolete > 1 )
continue ;
___dst_free ( dst ) ;
dst - > next = * dstp ;
* dstp = dst ;
dstp = & dst - > next ;
}
}
if ( ! dst_garbage_list ) {
dst_gc_timer_inc = DST_GC_MAX ;
goto out ;
}
2005-07-31 04:47:25 +04:00
if ( ! work_performed ) {
if ( ( dst_gc_timer_expires + = dst_gc_timer_inc ) > DST_GC_MAX )
dst_gc_timer_expires = DST_GC_MAX ;
dst_gc_timer_inc + = DST_GC_INC ;
} else {
dst_gc_timer_inc = DST_GC_INC ;
dst_gc_timer_expires = DST_GC_MIN ;
}
2005-04-17 02:20:36 +04:00
# if RT_CACHE_DEBUG >= 2
printk ( " dst_total: %d/%d %ld \n " ,
atomic_read ( & dst_total ) , delayed , dst_gc_timer_expires ) ;
# endif
2007-02-06 04:59:51 +03:00
/* if the next desired timer is more than 4 seconds in the future
* then round the timer to whole seconds
*/
if ( dst_gc_timer_expires > 4 * HZ )
mod_timer ( & dst_gc_timer ,
round_jiffies ( jiffies + dst_gc_timer_expires ) ) ;
else
mod_timer ( & dst_gc_timer , jiffies + dst_gc_timer_expires ) ;
2005-04-17 02:20:36 +04:00
out :
spin_unlock ( & dst_lock ) ;
}
static int dst_discard_in ( struct sk_buff * skb )
{
kfree_skb ( skb ) ;
return 0 ;
}
static int dst_discard_out ( struct sk_buff * skb )
{
kfree_skb ( skb ) ;
return 0 ;
}
void * dst_alloc ( struct dst_ops * ops )
{
struct dst_entry * dst ;
if ( ops - > gc & & atomic_read ( & ops - > entries ) > ops - > gc_thresh ) {
if ( ops - > gc ( ) )
return NULL ;
}
2007-02-10 12:45:03 +03:00
dst = kmem_cache_zalloc ( ops - > kmem_cachep , GFP_ATOMIC ) ;
2005-04-17 02:20:36 +04:00
if ( ! dst )
return NULL ;
atomic_set ( & dst - > __refcnt , 0 ) ;
dst - > ops = ops ;
dst - > lastuse = jiffies ;
dst - > path = dst ;
dst - > input = dst_discard_in ;
dst - > output = dst_discard_out ;
2007-02-09 17:24:36 +03:00
# if RT_CACHE_DEBUG >= 2
2005-04-17 02:20:36 +04:00
atomic_inc ( & dst_total ) ;
# endif
atomic_inc ( & ops - > entries ) ;
return dst ;
}
static void ___dst_free ( struct dst_entry * dst )
{
/* The first case (dev==NULL) is required, when
protocol module is unloaded .
*/
if ( dst - > dev = = NULL | | ! ( dst - > dev - > flags & IFF_UP ) ) {
dst - > input = dst_discard_in ;
dst - > output = dst_discard_out ;
}
dst - > obsolete = 2 ;
}
void __dst_free ( struct dst_entry * dst )
{
spin_lock_bh ( & dst_lock ) ;
___dst_free ( dst ) ;
dst - > next = dst_garbage_list ;
dst_garbage_list = dst ;
if ( dst_gc_timer_inc > DST_GC_INC ) {
dst_gc_timer_inc = DST_GC_INC ;
dst_gc_timer_expires = DST_GC_MIN ;
mod_timer ( & dst_gc_timer , jiffies + dst_gc_timer_expires ) ;
}
spin_unlock_bh ( & dst_lock ) ;
}
struct dst_entry * dst_destroy ( struct dst_entry * dst )
{
struct dst_entry * child ;
struct neighbour * neigh ;
struct hh_cache * hh ;
smp_rmb ( ) ;
again :
neigh = dst - > neighbour ;
hh = dst - > hh ;
child = dst - > child ;
dst - > hh = NULL ;
if ( hh & & atomic_dec_and_test ( & hh - > hh_refcnt ) )
kfree ( hh ) ;
if ( neigh ) {
dst - > neighbour = NULL ;
neigh_release ( neigh ) ;
}
atomic_dec ( & dst - > ops - > entries ) ;
if ( dst - > ops - > destroy )
dst - > ops - > destroy ( dst ) ;
if ( dst - > dev )
dev_put ( dst - > dev ) ;
2007-02-09 17:24:36 +03:00
# if RT_CACHE_DEBUG >= 2
2005-04-17 02:20:36 +04:00
atomic_dec ( & dst_total ) ;
# endif
kmem_cache_free ( dst - > ops - > kmem_cachep , dst ) ;
dst = child ;
if ( dst ) {
2005-04-17 02:24:10 +04:00
int nohash = dst - > flags & DST_NOHASH ;
2005-04-17 02:20:36 +04:00
if ( atomic_dec_and_test ( & dst - > __refcnt ) ) {
/* We were real parent of this dst, so kill child. */
2005-04-17 02:24:10 +04:00
if ( nohash )
2005-04-17 02:20:36 +04:00
goto again ;
} else {
/* Child is still referenced, return it for freeing. */
2005-04-17 02:24:10 +04:00
if ( nohash )
2005-04-17 02:20:36 +04:00
return dst ;
/* Child is still in his hash table */
}
}
return NULL ;
}
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3 , but we have no choice
* now . _It_ _is_ _explicit_ _deliberate_
* _race_ _condition_ .
*
* Commented and originally written by Alexey .
*/
static inline void dst_ifdown ( struct dst_entry * dst , struct net_device * dev ,
int unregister )
{
if ( dst - > ops - > ifdown )
dst - > ops - > ifdown ( dst , dev , unregister ) ;
if ( dev ! = dst - > dev )
return ;
if ( ! unregister ) {
dst - > input = dst_discard_in ;
dst - > output = dst_discard_out ;
} else {
dst - > dev = & loopback_dev ;
dev_hold ( & loopback_dev ) ;
dev_put ( dev ) ;
if ( dst - > neighbour & & dst - > neighbour - > dev = = dev ) {
dst - > neighbour - > dev = & loopback_dev ;
dev_put ( dev ) ;
dev_hold ( & loopback_dev ) ;
}
}
}
static int dst_dev_event ( struct notifier_block * this , unsigned long event , void * ptr )
{
struct net_device * dev = ptr ;
struct dst_entry * dst ;
switch ( event ) {
case NETDEV_UNREGISTER :
case NETDEV_DOWN :
spin_lock_bh ( & dst_lock ) ;
for ( dst = dst_garbage_list ; dst ; dst = dst - > next ) {
dst_ifdown ( dst , dev , event ! = NETDEV_DOWN ) ;
}
spin_unlock_bh ( & dst_lock ) ;
break ;
}
return NOTIFY_DONE ;
}
static struct notifier_block dst_dev_notifier = {
. notifier_call = dst_dev_event ,
} ;
void __init dst_init ( void )
{
register_netdevice_notifier ( & dst_dev_notifier ) ;
}
EXPORT_SYMBOL ( __dst_free ) ;
EXPORT_SYMBOL ( dst_alloc ) ;
EXPORT_SYMBOL ( dst_destroy ) ;