2005-04-17 02:20:36 +04:00
/*
* Weighted random policy for multipath .
*
*
* Version : $ Id : multipath_wrandom . c , v 1.1 .2 .3 2004 / 09 / 22 07 : 51 : 40 elueck Exp $
*
* Authors : Einar Lueck < elueck @ de . ibm . com > < lkml @ einar - lueck . de >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <asm/system.h>
# include <asm/uaccess.h>
# include <linux/types.h>
# include <linux/errno.h>
# include <linux/timer.h>
# include <linux/mm.h>
# include <linux/kernel.h>
# include <linux/fcntl.h>
# include <linux/stat.h>
# include <linux/socket.h>
# include <linux/in.h>
# include <linux/inet.h>
# include <linux/netdevice.h>
# include <linux/inetdevice.h>
# include <linux/igmp.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2005-06-14 01:29:06 +04:00
# include <linux/module.h>
2005-04-17 02:20:36 +04:00
# include <linux/mroute.h>
# include <linux/init.h>
# include <net/ip.h>
# include <net/protocol.h>
# include <linux/skbuff.h>
# include <net/sock.h>
# include <net/icmp.h>
# include <net/udp.h>
# include <net/raw.h>
# include <linux/notifier.h>
# include <linux/if_arp.h>
# include <linux/netfilter_ipv4.h>
# include <net/ipip.h>
# include <net/checksum.h>
# include <net/ip_fib.h>
# include <net/ip_mp_alg.h>
# define MULTIPATH_STATE_SIZE 15
struct multipath_candidate {
struct multipath_candidate * next ;
int power ;
struct rtable * rt ;
} ;
struct multipath_dest {
struct list_head list ;
const struct fib_nh * nh_info ;
2006-09-27 09:25:13 +04:00
__be32 netmask ;
__be32 network ;
2005-04-17 02:20:36 +04:00
unsigned char prefixlen ;
struct rcu_head rcu ;
} ;
struct multipath_bucket {
struct list_head head ;
spinlock_t lock ;
} ;
struct multipath_route {
struct list_head list ;
int oif ;
2006-09-27 09:25:13 +04:00
__be32 gw ;
2005-04-17 02:20:36 +04:00
struct list_head dests ;
struct rcu_head rcu ;
} ;
/* state: primarily weight per route information */
static struct multipath_bucket state [ MULTIPATH_STATE_SIZE ] ;
/* interface to random number generation */
static unsigned int RANDOM_SEED = 93186752 ;
static inline unsigned int random ( unsigned int ubound )
{
static unsigned int a = 1588635695 ,
q = 2 ,
r = 1117695901 ;
RANDOM_SEED = a * ( RANDOM_SEED % q ) - r * ( RANDOM_SEED / q ) ;
return RANDOM_SEED % ubound ;
}
static unsigned char __multipath_lookup_weight ( const struct flowi * fl ,
const struct rtable * rt )
{
const int state_idx = rt - > idev - > dev - > ifindex % MULTIPATH_STATE_SIZE ;
struct multipath_route * r ;
struct multipath_route * target_route = NULL ;
struct multipath_dest * d ;
int weight = 1 ;
/* lookup the weight information for a certain route */
rcu_read_lock ( ) ;
/* find state entry for gateway or add one if necessary */
list_for_each_entry_rcu ( r , & state [ state_idx ] . head , list ) {
if ( r - > gw = = rt - > rt_gateway & &
r - > oif = = rt - > idev - > dev - > ifindex ) {
target_route = r ;
break ;
}
}
if ( ! target_route ) {
/* this should not happen... but we are prepared */
printk ( KERN_CRIT " %s: missing state for gateway: %u and " \
" device %d \n " , __FUNCTION__ , rt - > rt_gateway ,
rt - > idev - > dev - > ifindex ) ;
goto out ;
}
/* find state entry for destination */
list_for_each_entry_rcu ( d , & target_route - > dests , list ) {
2006-09-27 09:26:21 +04:00
__be32 targetnetwork = fl - > fl4_dst &
inet_make_mask ( d - > prefixlen ) ;
2005-04-17 02:20:36 +04:00
if ( ( targetnetwork & d - > netmask ) = = d - > network ) {
weight = d - > nh_info - > nh_weight ;
goto out ;
}
}
out :
rcu_read_unlock ( ) ;
return weight ;
}
2007-02-09 17:24:47 +03:00
static void wrandom_init_state ( void )
2005-04-17 02:20:36 +04:00
{
int i ;
for ( i = 0 ; i < MULTIPATH_STATE_SIZE ; + + i ) {
INIT_LIST_HEAD ( & state [ i ] . head ) ;
spin_lock_init ( & state [ i ] . lock ) ;
}
}
static void wrandom_select_route ( const struct flowi * flp ,
struct rtable * first ,
struct rtable * * rp )
{
struct rtable * rt ;
struct rtable * decision ;
struct multipath_candidate * first_mpc = NULL ;
struct multipath_candidate * mpc , * last_mpc = NULL ;
int power = 0 ;
int last_power ;
int selector ;
const size_t size_mpc = sizeof ( struct multipath_candidate ) ;
/* collect all candidates and identify their weights */
for ( rt = rcu_dereference ( first ) ; rt ;
2007-02-11 03:57:03 +03:00
rt = rcu_dereference ( rt - > u . dst . rt_next ) ) {
2005-04-17 02:20:36 +04:00
if ( ( rt - > u . dst . flags & DST_BALANCED ) ! = 0 & &
multipath_comparekeys ( & rt - > fl , flp ) ) {
struct multipath_candidate * mpc =
( struct multipath_candidate * )
2005-05-06 01:30:15 +04:00
kmalloc ( size_mpc , GFP_ATOMIC ) ;
2005-04-17 02:20:36 +04:00
if ( ! mpc )
return ;
power + = __multipath_lookup_weight ( flp , rt ) * 10000 ;
mpc - > power = power ;
mpc - > rt = rt ;
mpc - > next = NULL ;
if ( ! first_mpc )
first_mpc = mpc ;
else
last_mpc - > next = mpc ;
last_mpc = mpc ;
}
}
/* choose a weighted random candidate */
decision = first ;
selector = random ( power ) ;
last_power = 0 ;
/* select candidate, adjust GC data and cleanup local state */
decision = first ;
last_mpc = NULL ;
for ( mpc = first_mpc ; mpc ; mpc = mpc - > next ) {
mpc - > rt - > u . dst . lastuse = jiffies ;
if ( last_power < = selector & & selector < mpc - > power )
decision = mpc - > rt ;
last_power = mpc - > power ;
2005-11-08 20:41:34 +03:00
kfree ( last_mpc ) ;
2005-04-17 02:20:36 +04:00
last_mpc = mpc ;
}
2005-11-08 20:41:34 +03:00
/* concurrent __multipath_flush may lead to !last_mpc */
kfree ( last_mpc ) ;
2005-04-17 02:20:36 +04:00
decision - > u . dst . __use + + ;
* rp = decision ;
}
2006-09-27 09:22:50 +04:00
static void wrandom_set_nhinfo ( __be32 network ,
__be32 netmask ,
2005-04-17 02:20:36 +04:00
unsigned char prefixlen ,
const struct fib_nh * nh )
{
const int state_idx = nh - > nh_oif % MULTIPATH_STATE_SIZE ;
struct multipath_route * r , * target_route = NULL ;
struct multipath_dest * d , * target_dest = NULL ;
/* store the weight information for a certain route */
2006-02-03 03:59:16 +03:00
spin_lock_bh ( & state [ state_idx ] . lock ) ;
2005-04-17 02:20:36 +04:00
/* find state entry for gateway or add one if necessary */
list_for_each_entry_rcu ( r , & state [ state_idx ] . head , list ) {
if ( r - > gw = = nh - > nh_gw & & r - > oif = = nh - > nh_oif ) {
target_route = r ;
break ;
}
}
if ( ! target_route ) {
const size_t size_rt = sizeof ( struct multipath_route ) ;
target_route = ( struct multipath_route * )
2005-05-06 01:30:15 +04:00
kmalloc ( size_rt , GFP_ATOMIC ) ;
2005-04-17 02:20:36 +04:00
target_route - > gw = nh - > nh_gw ;
target_route - > oif = nh - > nh_oif ;
memset ( & target_route - > rcu , 0 , sizeof ( struct rcu_head ) ) ;
INIT_LIST_HEAD ( & target_route - > dests ) ;
list_add_rcu ( & target_route - > list , & state [ state_idx ] . head ) ;
}
/* find state entry for destination or add one if necessary */
list_for_each_entry_rcu ( d , & target_route - > dests , list ) {
if ( d - > nh_info = = nh ) {
target_dest = d ;
break ;
}
}
if ( ! target_dest ) {
const size_t size_dst = sizeof ( struct multipath_dest ) ;
target_dest = ( struct multipath_dest * )
2005-05-06 01:30:15 +04:00
kmalloc ( size_dst , GFP_ATOMIC ) ;
2005-04-17 02:20:36 +04:00
target_dest - > nh_info = nh ;
target_dest - > network = network ;
target_dest - > netmask = netmask ;
target_dest - > prefixlen = prefixlen ;
memset ( & target_dest - > rcu , 0 , sizeof ( struct rcu_head ) ) ;
list_add_rcu ( & target_dest - > list , & target_route - > dests ) ;
}
/* else: we already stored this info for another destination =>
* we are finished
*/
2006-02-03 03:59:16 +03:00
spin_unlock_bh ( & state [ state_idx ] . lock ) ;
2005-04-17 02:20:36 +04:00
}
static void __multipath_free ( struct rcu_head * head )
{
struct multipath_route * rt = container_of ( head , struct multipath_route ,
rcu ) ;
kfree ( rt ) ;
}
static void __multipath_free_dst ( struct rcu_head * head )
{
2007-02-09 17:24:47 +03:00
struct multipath_dest * dst = container_of ( head ,
2005-04-17 02:20:36 +04:00
struct multipath_dest ,
rcu ) ;
kfree ( dst ) ;
}
static void wrandom_flush ( void )
{
int i ;
/* defere delete to all entries */
for ( i = 0 ; i < MULTIPATH_STATE_SIZE ; + + i ) {
struct multipath_route * r ;
2006-02-03 03:59:16 +03:00
spin_lock_bh ( & state [ i ] . lock ) ;
2005-04-17 02:20:36 +04:00
list_for_each_entry_rcu ( r , & state [ i ] . head , list ) {
struct multipath_dest * d ;
list_for_each_entry_rcu ( d , & r - > dests , list ) {
list_del_rcu ( & d - > list ) ;
call_rcu ( & d - > rcu ,
__multipath_free_dst ) ;
}
list_del_rcu ( & r - > list ) ;
call_rcu ( & r - > rcu ,
__multipath_free ) ;
}
2006-02-03 03:59:16 +03:00
spin_unlock_bh ( & state [ i ] . lock ) ;
2005-04-17 02:20:36 +04:00
}
}
static struct ip_mp_alg_ops wrandom_ops = {
. mp_alg_select_route = wrandom_select_route ,
. mp_alg_flush = wrandom_flush ,
. mp_alg_set_nhinfo = wrandom_set_nhinfo ,
} ;
static int __init wrandom_init ( void )
{
wrandom_init_state ( ) ;
return multipath_alg_register ( & wrandom_ops , IP_MP_ALG_WRANDOM ) ;
}
static void __exit wrandom_exit ( void )
{
multipath_alg_unregister ( & wrandom_ops , IP_MP_ALG_WRANDOM ) ;
}
module_init ( wrandom_init ) ;
module_exit ( wrandom_exit ) ;
2005-06-14 01:29:06 +04:00
MODULE_LICENSE ( " GPL " ) ;