2005-04-17 02:20:36 +04:00
/*
* Device round robin policy for multipath .
*
*
* Version : $ Id : multipath_drr . c , v 1.1 .2 .1 2004 / 09 / 16 07 : 42 : 34 elueck Exp $
*
* Authors : Einar Lueck < elueck @ de . ibm . com > < lkml @ einar - lueck . de >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <asm/system.h>
# include <asm/uaccess.h>
# include <linux/types.h>
# include <linux/sched.h>
# include <linux/errno.h>
# include <linux/timer.h>
# include <linux/mm.h>
# include <linux/kernel.h>
# include <linux/fcntl.h>
# include <linux/stat.h>
# include <linux/socket.h>
# include <linux/in.h>
# include <linux/inet.h>
# include <linux/netdevice.h>
# include <linux/inetdevice.h>
# include <linux/igmp.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2005-06-14 01:29:06 +04:00
# include <linux/module.h>
2005-04-17 02:20:36 +04:00
# include <linux/mroute.h>
# include <linux/init.h>
# include <net/ip.h>
# include <net/protocol.h>
# include <linux/skbuff.h>
# include <net/sock.h>
# include <net/icmp.h>
# include <net/udp.h>
# include <net/raw.h>
# include <linux/notifier.h>
# include <linux/if_arp.h>
# include <linux/netfilter_ipv4.h>
# include <net/ipip.h>
# include <net/checksum.h>
# include <net/ip_mp_alg.h>
struct multipath_device {
int ifi ; /* interface index of device */
atomic_t usecount ;
int allocated ;
} ;
# define MULTIPATH_MAX_DEVICECANDIDATES 10
static struct multipath_device state [ MULTIPATH_MAX_DEVICECANDIDATES ] ;
static DEFINE_SPINLOCK ( state_lock ) ;
static int inline __multipath_findslot ( void )
{
int i ;
for ( i = 0 ; i < MULTIPATH_MAX_DEVICECANDIDATES ; i + + ) {
if ( state [ i ] . allocated = = 0 )
return i ;
}
return - 1 ;
}
static int inline __multipath_finddev ( int ifindex )
{
int i ;
for ( i = 0 ; i < MULTIPATH_MAX_DEVICECANDIDATES ; i + + ) {
if ( state [ i ] . allocated ! = 0 & &
state [ i ] . ifi = = ifindex )
return i ;
}
return - 1 ;
}
static int drr_dev_event ( struct notifier_block * this ,
unsigned long event , void * ptr )
{
struct net_device * dev = ptr ;
int devidx ;
switch ( event ) {
case NETDEV_UNREGISTER :
case NETDEV_DOWN :
spin_lock_bh ( & state_lock ) ;
devidx = __multipath_finddev ( dev - > ifindex ) ;
if ( devidx ! = - 1 ) {
state [ devidx ] . allocated = 0 ;
state [ devidx ] . ifi = 0 ;
atomic_set ( & state [ devidx ] . usecount , 0 ) ;
}
spin_unlock_bh ( & state_lock ) ;
break ;
} ;
return NOTIFY_DONE ;
}
2005-08-10 06:35:47 +04:00
static struct notifier_block drr_dev_notifier = {
2005-04-17 02:20:36 +04:00
. notifier_call = drr_dev_event ,
} ;
static void drr_safe_inc ( atomic_t * usecount )
{
int n ;
atomic_inc ( usecount ) ;
n = atomic_read ( usecount ) ;
if ( n < = 0 ) {
int i ;
spin_lock_bh ( & state_lock ) ;
for ( i = 0 ; i < MULTIPATH_MAX_DEVICECANDIDATES ; i + + )
atomic_set ( & state [ i ] . usecount , 0 ) ;
spin_unlock_bh ( & state_lock ) ;
}
}
static void drr_select_route ( const struct flowi * flp ,
struct rtable * first , struct rtable * * rp )
{
struct rtable * nh , * result , * cur_min ;
int min_usecount = - 1 ;
int devidx = - 1 ;
int cur_min_devidx = - 1 ;
/* 1. make sure all alt. nexthops have the same GC related data */
/* 2. determine the new candidate to be returned */
result = NULL ;
cur_min = NULL ;
for ( nh = rcu_dereference ( first ) ; nh ;
nh = rcu_dereference ( nh - > u . rt_next ) ) {
if ( ( nh - > u . dst . flags & DST_BALANCED ) ! = 0 & &
multipath_comparekeys ( & nh - > fl , flp ) ) {
int nh_ifidx = nh - > u . dst . dev - > ifindex ;
nh - > u . dst . lastuse = jiffies ;
nh - > u . dst . __use + + ;
if ( result ! = NULL )
continue ;
/* search for the output interface */
/* this is not SMP safe, only add/remove are
* SMP safe as wrong usecount updates have no big
* impact
*/
devidx = __multipath_finddev ( nh_ifidx ) ;
if ( devidx = = - 1 ) {
/* add the interface to the array
* SMP safe
*/
spin_lock_bh ( & state_lock ) ;
/* due to SMP: search again */
devidx = __multipath_finddev ( nh_ifidx ) ;
if ( devidx = = - 1 ) {
/* add entry for device */
devidx = __multipath_findslot ( ) ;
if ( devidx = = - 1 ) {
/* unlikely but possible */
continue ;
}
state [ devidx ] . allocated = 1 ;
state [ devidx ] . ifi = nh_ifidx ;
atomic_set ( & state [ devidx ] . usecount , 0 ) ;
min_usecount = 0 ;
}
spin_unlock_bh ( & state_lock ) ;
}
if ( min_usecount = = 0 ) {
/* if the device has not been used it is
* the primary target
*/
drr_safe_inc ( & state [ devidx ] . usecount ) ;
result = nh ;
} else {
int count =
atomic_read ( & state [ devidx ] . usecount ) ;
if ( min_usecount = = - 1 | |
count < min_usecount ) {
cur_min = nh ;
cur_min_devidx = devidx ;
min_usecount = count ;
}
}
}
}
if ( ! result ) {
if ( cur_min ) {
drr_safe_inc ( & state [ cur_min_devidx ] . usecount ) ;
result = cur_min ;
} else {
result = first ;
}
}
* rp = result ;
}
static struct ip_mp_alg_ops drr_ops = {
. mp_alg_select_route = drr_select_route ,
} ;
static int __init drr_init ( void )
{
int err = register_netdevice_notifier ( & drr_dev_notifier ) ;
if ( err )
return err ;
2005-05-30 07:26:44 +04:00
err = multipath_alg_register ( & drr_ops , IP_MP_ALG_DRR ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto fail ;
return 0 ;
fail :
unregister_netdevice_notifier ( & drr_dev_notifier ) ;
return err ;
}
static void __exit drr_exit ( void )
{
unregister_netdevice_notifier ( & drr_dev_notifier ) ;
multipath_alg_unregister ( & drr_ops , IP_MP_ALG_DRR ) ;
}
module_init ( drr_init ) ;
module_exit ( drr_exit ) ;
2005-06-14 01:29:06 +04:00
MODULE_LICENSE ( " GPL " ) ;