2005-04-17 02:20:36 +04:00
/*
* net / sched / gen_estimator . c Simple rate estimator .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
*
* Changes :
* Jamal Hadi Salim - moved it to net / core and reshulfed
* names to make it usable in general net subsystem .
*/
# include <asm/uaccess.h>
# include <asm/system.h>
2007-10-19 10:40:25 +04:00
# include <linux/bitops.h>
2005-04-17 02:20:36 +04:00
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/jiffies.h>
# include <linux/string.h>
# include <linux/mm.h>
# include <linux/socket.h>
# include <linux/sockios.h>
# include <linux/in.h>
# include <linux/errno.h>
# include <linux/interrupt.h>
# include <linux/netdevice.h>
# include <linux/skbuff.h>
# include <linux/rtnetlink.h>
# include <linux/init.h>
2008-11-25 02:48:05 +03:00
# include <linux/rbtree.h>
2005-04-17 02:20:36 +04:00
# include <net/sock.h>
# include <net/gen_stats.h>
/*
This code is NOT intended to be used for statistics collection ,
its purpose is to provide a base for statistical multiplexing
for controlled load service .
If you need only statistics , run a user level daemon which
periodically reads byte counters .
Unfortunately , rate estimation is not a very easy task .
F . e . I did not find a simple way to estimate the current peak rate
and even failed to formulate the problem 8 ) 8 )
So I preferred not to built an estimator into the scheduler ,
but run this task separately .
Ideally , it should be kernel thread ( s ) , but for now it runs
from timers , which puts apparent top bounds on the number of rated
flows , has minimal overhead on small , but is enough
to handle controlled load service , sets of aggregates .
We measure rate over A = ( 1 < < interval ) seconds and evaluate EWMA :
avrate = avrate * ( 1 - W ) + rate * W
where W is chosen as negative power of 2 : W = 2 ^ ( - ewma_log )
The resulting time constant is :
T = A / ( - ln ( 1 - W ) )
NOTES .
2009-05-19 06:26:37 +04:00
* avbps is scaled by 2 ^ 5 , avpps is scaled by 2 ^ 10.
* both values are reported as 32 bit unsigned values . bps can
overflow for fast links : max speed being 34360 Mbit / sec
2005-04-17 02:20:36 +04:00
* Minimal interval is HZ / 4 = 250 msec ( it is the greatest common divisor
for HZ = 100 and HZ = 1024 8 ) ) , maximal interval
is ( HZ * 2 ^ EST_MAX_INTERVAL ) / 4 = 8 sec . Shorter intervals
are too expensive , longer ones can be implemented
at user level painlessly .
*/
# define EST_MAX_INTERVAL 5
struct gen_estimator
{
2007-07-17 05:28:32 +04:00
struct list_head list ;
2005-04-17 02:20:36 +04:00
struct gnet_stats_basic * bstats ;
struct gnet_stats_rate_est * rate_est ;
spinlock_t * stats_lock ;
int ewma_log ;
u64 last_bytes ;
2009-05-19 06:26:37 +04:00
u64 avbps ;
2005-04-17 02:20:36 +04:00
u32 last_packets ;
u32 avpps ;
2007-07-17 05:28:32 +04:00
struct rcu_head e_rcu ;
2008-11-25 02:48:05 +03:00
struct rb_node node ;
2005-04-17 02:20:36 +04:00
} ;
struct gen_estimator_head
{
struct timer_list timer ;
2007-07-17 05:28:32 +04:00
struct list_head list ;
2005-04-17 02:20:36 +04:00
} ;
static struct gen_estimator_head elist [ EST_MAX_INTERVAL + 1 ] ;
2008-08-19 09:32:10 +04:00
/* Protects against NULL dereference */
2005-04-17 02:20:36 +04:00
static DEFINE_RWLOCK ( est_lock ) ;
2008-11-25 02:48:05 +03:00
/* Protects against soft lockup during large deletion */
static struct rb_root est_root = RB_ROOT ;
2005-04-17 02:20:36 +04:00
static void est_timer ( unsigned long arg )
{
int idx = ( int ) arg ;
struct gen_estimator * e ;
2007-07-17 05:28:32 +04:00
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( e , & elist [ idx ] . list , list ) {
2005-04-17 02:20:36 +04:00
u64 nbytes ;
2009-05-19 06:26:37 +04:00
u64 brate ;
2005-04-17 02:20:36 +04:00
u32 npackets ;
u32 rate ;
spin_lock ( e - > stats_lock ) ;
2007-07-17 05:28:32 +04:00
read_lock ( & est_lock ) ;
if ( e - > bstats = = NULL )
goto skip ;
2005-04-17 02:20:36 +04:00
nbytes = e - > bstats - > bytes ;
npackets = e - > bstats - > packets ;
2009-05-19 06:26:37 +04:00
brate = ( nbytes - e - > last_bytes ) < < ( 7 - idx ) ;
2005-04-17 02:20:36 +04:00
e - > last_bytes = nbytes ;
2009-05-19 06:26:37 +04:00
e - > avbps + = ( ( s64 ) ( brate - e - > avbps ) ) > > e - > ewma_log ;
2005-04-17 02:20:36 +04:00
e - > rate_est - > bps = ( e - > avbps + 0xF ) > > 5 ;
rate = ( npackets - e - > last_packets ) < < ( 12 - idx ) ;
e - > last_packets = npackets ;
e - > avpps + = ( ( long ) rate - ( long ) e - > avpps ) > > e - > ewma_log ;
e - > rate_est - > pps = ( e - > avpps + 0x1FF ) > > 10 ;
2007-07-17 05:28:32 +04:00
skip :
read_unlock ( & est_lock ) ;
2005-04-17 02:20:36 +04:00
spin_unlock ( e - > stats_lock ) ;
}
2007-07-17 05:28:32 +04:00
if ( ! list_empty ( & elist [ idx ] . list ) )
2008-01-04 07:40:01 +03:00
mod_timer ( & elist [ idx ] . timer , jiffies + ( ( HZ / 4 ) < < idx ) ) ;
2007-07-17 05:28:32 +04:00
rcu_read_unlock ( ) ;
2005-04-17 02:20:36 +04:00
}
2008-11-25 02:48:05 +03:00
static void gen_add_node ( struct gen_estimator * est )
{
struct rb_node * * p = & est_root . rb_node , * parent = NULL ;
while ( * p ) {
struct gen_estimator * e ;
parent = * p ;
e = rb_entry ( parent , struct gen_estimator , node ) ;
if ( est - > bstats > e - > bstats )
p = & parent - > rb_right ;
else
p = & parent - > rb_left ;
}
rb_link_node ( & est - > node , parent , p ) ;
rb_insert_color ( & est - > node , & est_root ) ;
}
2008-11-27 02:24:32 +03:00
static
struct gen_estimator * gen_find_node ( const struct gnet_stats_basic * bstats ,
const struct gnet_stats_rate_est * rate_est )
2008-11-25 02:48:05 +03:00
{
struct rb_node * p = est_root . rb_node ;
while ( p ) {
struct gen_estimator * e ;
e = rb_entry ( p , struct gen_estimator , node ) ;
if ( bstats > e - > bstats )
p = p - > rb_right ;
else if ( bstats < e - > bstats | | rate_est ! = e - > rate_est )
p = p - > rb_left ;
else
return e ;
}
return NULL ;
}
2005-04-17 02:20:36 +04:00
/**
* gen_new_estimator - create a new rate estimator
* @ bstats : basic statistics
* @ rate_est : rate estimator statistics
* @ stats_lock : statistics lock
* @ opt : rate estimator configuration TLV
*
* Creates a new rate estimator with & bstats as source and & rate_est
* as destination . A new timer with the interval specified in the
* configuration TLV is created . Upon each interval , the latest statistics
* will be read from & bstats and the estimated rate will be stored in
* & rate_est with the statistics lock grabed during this period .
2007-02-09 17:24:36 +03:00
*
2005-04-17 02:20:36 +04:00
* Returns 0 on success or a negative error code .
2007-07-17 05:28:32 +04:00
*
* NOTE : Called under rtnl_mutex
2005-04-17 02:20:36 +04:00
*/
int gen_new_estimator ( struct gnet_stats_basic * bstats ,
2007-07-17 05:28:32 +04:00
struct gnet_stats_rate_est * rate_est ,
spinlock_t * stats_lock ,
2008-01-23 09:11:17 +03:00
struct nlattr * opt )
2005-04-17 02:20:36 +04:00
{
struct gen_estimator * est ;
2008-01-23 09:11:17 +03:00
struct gnet_estimator * parm = nla_data ( opt ) ;
2007-07-17 05:28:32 +04:00
int idx ;
2005-04-17 02:20:36 +04:00
2008-01-23 09:11:17 +03:00
if ( nla_len ( opt ) < sizeof ( * parm ) )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
if ( parm - > interval < - 2 | | parm - > interval > 3 )
return - EINVAL ;
2006-04-08 01:52:59 +04:00
est = kzalloc ( sizeof ( * est ) , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( est = = NULL )
return - ENOBUFS ;
2007-07-17 05:28:32 +04:00
idx = parm - > interval + 2 ;
2005-04-17 02:20:36 +04:00
est - > bstats = bstats ;
est - > rate_est = rate_est ;
est - > stats_lock = stats_lock ;
est - > ewma_log = parm - > ewma_log ;
est - > last_bytes = bstats - > bytes ;
est - > avbps = rate_est - > bps < < 5 ;
est - > last_packets = bstats - > packets ;
est - > avpps = rate_est - > pps < < 10 ;
2007-07-17 05:28:32 +04:00
if ( ! elist [ idx ] . timer . function ) {
INIT_LIST_HEAD ( & elist [ idx ] . list ) ;
setup_timer ( & elist [ idx ] . timer , est_timer , idx ) ;
2005-04-17 02:20:36 +04:00
}
2007-07-17 05:28:32 +04:00
if ( list_empty ( & elist [ idx ] . list ) )
2008-01-04 07:40:01 +03:00
mod_timer ( & elist [ idx ] . timer , jiffies + ( ( HZ / 4 ) < < idx ) ) ;
2007-07-17 05:28:32 +04:00
list_add_rcu ( & est - > list , & elist [ idx ] . list ) ;
2008-11-25 02:48:05 +03:00
gen_add_node ( est ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2008-11-26 08:14:06 +03:00
EXPORT_SYMBOL ( gen_new_estimator ) ;
2005-04-17 02:20:36 +04:00
2007-07-17 05:28:32 +04:00
static void __gen_kill_estimator ( struct rcu_head * head )
{
struct gen_estimator * e = container_of ( head ,
struct gen_estimator , e_rcu ) ;
kfree ( e ) ;
}
2005-04-17 02:20:36 +04:00
/**
* gen_kill_estimator - remove a rate estimator
* @ bstats : basic statistics
* @ rate_est : rate estimator statistics
*
2008-11-25 02:48:05 +03:00
* Removes the rate estimator specified by & bstats and & rate_est .
2007-07-17 05:28:32 +04:00
*
2008-08-19 09:32:10 +04:00
* NOTE : Called under rtnl_mutex
2005-04-17 02:20:36 +04:00
*/
void gen_kill_estimator ( struct gnet_stats_basic * bstats ,
2008-11-25 02:48:05 +03:00
struct gnet_stats_rate_est * rate_est )
2005-04-17 02:20:36 +04:00
{
2008-11-25 02:48:05 +03:00
struct gen_estimator * e ;
2007-07-17 05:28:32 +04:00
2008-11-25 02:48:05 +03:00
while ( ( e = gen_find_node ( bstats , rate_est ) ) ) {
rb_erase ( & e - > node , & est_root ) ;
2005-04-17 02:20:36 +04:00
2008-11-25 02:48:05 +03:00
write_lock_bh ( & est_lock ) ;
e - > bstats = NULL ;
write_unlock_bh ( & est_lock ) ;
2005-04-17 02:20:36 +04:00
2008-11-25 02:48:05 +03:00
list_del_rcu ( & e - > list ) ;
call_rcu ( & e - > e_rcu , __gen_kill_estimator ) ;
2005-04-17 02:20:36 +04:00
}
}
2008-11-26 08:14:06 +03:00
EXPORT_SYMBOL ( gen_kill_estimator ) ;
2005-04-17 02:20:36 +04:00
/**
2008-01-21 13:36:02 +03:00
* gen_replace_estimator - replace rate estimator configuration
2005-04-17 02:20:36 +04:00
* @ bstats : basic statistics
* @ rate_est : rate estimator statistics
* @ stats_lock : statistics lock
* @ opt : rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
* gen_kill_estimator ( ) and gen_new_estimator ( ) .
2007-02-09 17:24:36 +03:00
*
2005-04-17 02:20:36 +04:00
* Returns 0 on success or a negative error code .
*/
2008-01-21 13:36:02 +03:00
int gen_replace_estimator ( struct gnet_stats_basic * bstats ,
struct gnet_stats_rate_est * rate_est ,
2008-01-23 09:11:17 +03:00
spinlock_t * stats_lock , struct nlattr * opt )
2005-04-17 02:20:36 +04:00
{
2008-01-21 13:36:02 +03:00
gen_kill_estimator ( bstats , rate_est ) ;
return gen_new_estimator ( bstats , rate_est , stats_lock , opt ) ;
2005-04-17 02:20:36 +04:00
}
2008-11-26 08:14:06 +03:00
EXPORT_SYMBOL ( gen_replace_estimator ) ;
/**
* gen_estimator_active - test if estimator is currently in use
2008-11-27 02:24:32 +03:00
* @ bstats : basic statistics
2008-11-26 08:14:06 +03:00
* @ rate_est : rate estimator statistics
*
2008-11-27 02:24:32 +03:00
* Returns true if estimator is active , and false if not .
2008-11-26 08:14:06 +03:00
*/
2008-11-27 02:24:32 +03:00
bool gen_estimator_active ( const struct gnet_stats_basic * bstats ,
const struct gnet_stats_rate_est * rate_est )
2008-11-26 08:14:06 +03:00
{
ASSERT_RTNL ( ) ;
2005-04-17 02:20:36 +04:00
2008-11-27 02:24:32 +03:00
return gen_find_node ( bstats , rate_est ) ! = NULL ;
2008-11-26 08:14:06 +03:00
}
EXPORT_SYMBOL ( gen_estimator_active ) ;