2005-04-17 02:20:36 +04:00
/*
* net / sched / sch_tbf . c Token Bucket Filter queue .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
* Dmitry Torokhov < dtor @ mail . ru > - allow attaching inner qdiscs -
* original idea by Martin Devera
*
*/
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/string.h>
# include <linux/errno.h>
# include <linux/skbuff.h>
2007-07-03 09:49:07 +04:00
# include <net/netlink.h>
2005-04-17 02:20:36 +04:00
# include <net/pkt_sched.h>
/* Simple Token Bucket Filter.
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
SOURCE .
- - - - - - -
None .
Description .
- - - - - - - - - - - -
A data flow obeys TBF with rate R and depth B , if for any
time interval t_i . . . t_f the number of transmitted bits
does not exceed B + R * ( t_f - t_i ) .
Packetized version of this definition :
The sequence of packets of sizes s_i served at moments t_i
obeys TBF , if for any i < = k :
s_i + . . . . + s_k < = B + R * ( t_k - t_i )
Algorithm .
- - - - - - - - - -
Let N ( t_i ) be B / R initially and N ( t ) grow continuously with time as :
N ( t + delta ) = min { B / R , N ( t ) + delta }
If the first packet in queue has length S , it may be
transmitted only at the time t_ * when S / R < = N ( t_ * ) ,
and in this case N ( t ) jumps :
N ( t_ * + 0 ) = N ( t_ * - 0 ) - S / R .
Actually , QoS requires two TBF to be applied to a data stream .
One of them controls steady state burst size , another
one with rate P ( peak rate ) and depth M ( equal to link MTU )
limits bursts at a smaller time scale .
It is easy to see that P > R , and B > M . If P is infinity , this double
TBF is equivalent to a single one .
When TBF works in reshaping mode , latency is estimated as :
lat = max ( ( L - B ) / R , ( L - M ) / P )
NOTES .
- - - - - -
If TBF throttles , it starts a watchdog timer , which will wake it up
when it is ready to transmit .
Note that the minimal timer resolution is 1 / HZ .
If no new packets arrive during this period ,
or if the device is not awaken by EOI for some previous packet ,
TBF can stop its activity for 1 / HZ .
This means , that with depth B , the maximal rate is
R_crit = B * HZ
F . e . for 10 Mbit ethernet and HZ = 100 the minimal allowed B is ~ 10 Kbytes .
Note that the peak rate TBF is much more tough : with MTU 1500
P_crit = 150 Kbytes / sec . So , if you need greater peak
rates , use alpha with HZ = 1000 : - )
With classful TBF , limit is just kept for backwards compatibility .
It is passed to the default bfifo qdisc - if the inner qdisc is
changed the limit is not effective anymore .
*/
struct tbf_sched_data
{
/* Parameters */
u32 limit ; /* Maximal length of backlog: bytes */
u32 buffer ; /* Token bucket depth/rate: MUST BE >= MTU/B */
u32 mtu ;
u32 max_size ;
struct qdisc_rate_table * R_tab ;
struct qdisc_rate_table * P_tab ;
/* Variables */
long tokens ; /* Current number of B tokens */
long ptokens ; /* Current number of P tokens */
psched_time_t t_c ; /* Time check-point */
struct Qdisc * qdisc ; /* Inner qdisc, default - bfifo queue */
2007-03-16 11:20:07 +03:00
struct qdisc_watchdog watchdog ; /* Watchdog timer */
2005-04-17 02:20:36 +04:00
} ;
2007-09-12 18:35:24 +04:00
# define L2T(q,L) qdisc_l2t((q)->R_tab,L)
# define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
2005-04-17 02:20:36 +04:00
static int tbf_enqueue ( struct sk_buff * skb , struct Qdisc * sch )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
int ret ;
2008-08-18 10:55:36 +04:00
if ( qdisc_pkt_len ( skb ) > q - > max_size )
return qdisc_reshape_fail ( skb , sch ) ;
2005-04-17 02:20:36 +04:00
2008-07-20 11:08:04 +04:00
ret = qdisc_enqueue ( skb , q - > qdisc ) ;
if ( ret ! = 0 ) {
2008-08-05 09:31:03 +04:00
if ( net_xmit_drop_count ( ret ) )
sch - > qstats . drops + + ;
2005-04-17 02:20:36 +04:00
return ret ;
}
sch - > q . qlen + + ;
2008-07-20 11:08:27 +04:00
sch - > bstats . bytes + = qdisc_pkt_len ( skb ) ;
2005-04-17 02:20:36 +04:00
sch - > bstats . packets + + ;
return 0 ;
}
static unsigned int tbf_drop ( struct Qdisc * sch )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
2006-03-21 06:00:49 +03:00
unsigned int len = 0 ;
2005-04-17 02:20:36 +04:00
2006-03-21 06:00:49 +03:00
if ( q - > qdisc - > ops - > drop & & ( len = q - > qdisc - > ops - > drop ( q - > qdisc ) ) ! = 0 ) {
2005-04-17 02:20:36 +04:00
sch - > q . qlen - - ;
sch - > qstats . drops + + ;
}
return len ;
}
static struct sk_buff * tbf_dequeue ( struct Qdisc * sch )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
struct sk_buff * skb ;
2008-10-31 10:46:19 +03:00
skb = q - > qdisc - > ops - > peek ( q - > qdisc ) ;
2005-04-17 02:20:36 +04:00
if ( skb ) {
psched_time_t now ;
2007-03-16 11:20:07 +03:00
long toks ;
2005-04-17 02:20:36 +04:00
long ptoks = 0 ;
2008-07-20 11:08:27 +04:00
unsigned int len = qdisc_pkt_len ( skb ) ;
2005-04-17 02:20:36 +04:00
2007-03-23 21:29:25 +03:00
now = psched_get_time ( ) ;
2007-03-23 21:29:11 +03:00
toks = psched_tdiff_bounded ( now , q - > t_c , q - > buffer ) ;
2005-04-17 02:20:36 +04:00
if ( q - > P_tab ) {
ptoks = toks + q - > ptokens ;
if ( ptoks > ( long ) q - > mtu )
ptoks = q - > mtu ;
ptoks - = L2T_P ( q , len ) ;
}
toks + = q - > tokens ;
if ( toks > ( long ) q - > buffer )
toks = q - > buffer ;
toks - = L2T ( q , len ) ;
if ( ( toks | ptoks ) > = 0 ) {
2008-10-31 10:47:01 +03:00
skb = qdisc_dequeue_peeked ( q - > qdisc ) ;
2008-10-31 10:46:19 +03:00
if ( unlikely ( ! skb ) )
return NULL ;
2005-04-17 02:20:36 +04:00
q - > t_c = now ;
q - > tokens = toks ;
q - > ptokens = ptoks ;
sch - > q . qlen - - ;
sch - > flags & = ~ TCQ_F_THROTTLED ;
return skb ;
}
2007-03-16 11:20:07 +03:00
qdisc_watchdog_schedule ( & q - > watchdog ,
now + max_t ( long , - toks , - ptoks ) ) ;
2005-04-17 02:20:36 +04:00
/* Maybe we have a shorter packet in the queue,
which can be sent now . It sounds cool ,
but , however , this is wrong in principle .
We MUST NOT reorder packets under these circumstances .
Really , if we split the flow into independent
subflows , it would be a very good solution .
This is the main idea of all FQ algorithms
( cf . CSZ , HPFQ , HFSC )
*/
sch - > qstats . overlimits + + ;
}
return NULL ;
}
static void tbf_reset ( struct Qdisc * sch )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
qdisc_reset ( q - > qdisc ) ;
sch - > q . qlen = 0 ;
2007-03-23 21:29:25 +03:00
q - > t_c = psched_get_time ( ) ;
2005-04-17 02:20:36 +04:00
q - > tokens = q - > buffer ;
q - > ptokens = q - > mtu ;
2007-03-16 11:20:07 +03:00
qdisc_watchdog_cancel ( & q - > watchdog ) ;
2005-04-17 02:20:36 +04:00
}
2008-01-24 07:35:39 +03:00
static const struct nla_policy tbf_policy [ TCA_TBF_MAX + 1 ] = {
[ TCA_TBF_PARMS ] = { . len = sizeof ( struct tc_tbf_qopt ) } ,
[ TCA_TBF_RTAB ] = { . type = NLA_BINARY , . len = TC_RTAB_SIZE } ,
[ TCA_TBF_PTAB ] = { . type = NLA_BINARY , . len = TC_RTAB_SIZE } ,
} ;
2008-01-23 09:11:17 +03:00
static int tbf_change ( struct Qdisc * sch , struct nlattr * opt )
2005-04-17 02:20:36 +04:00
{
2008-01-24 07:33:32 +03:00
int err ;
2005-04-17 02:20:36 +04:00
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
2008-01-23 09:11:17 +03:00
struct nlattr * tb [ TCA_TBF_PTAB + 1 ] ;
2005-04-17 02:20:36 +04:00
struct tc_tbf_qopt * qopt ;
struct qdisc_rate_table * rtab = NULL ;
struct qdisc_rate_table * ptab = NULL ;
struct Qdisc * child = NULL ;
int max_size , n ;
2008-01-24 07:35:39 +03:00
err = nla_parse_nested ( tb , TCA_TBF_PTAB , opt , tbf_policy ) ;
2008-01-24 07:33:32 +03:00
if ( err < 0 )
return err ;
err = - EINVAL ;
2008-01-24 07:35:39 +03:00
if ( tb [ TCA_TBF_PARMS ] = = NULL )
2005-04-17 02:20:36 +04:00
goto done ;
2008-01-23 09:11:17 +03:00
qopt = nla_data ( tb [ TCA_TBF_PARMS ] ) ;
rtab = qdisc_get_rtab ( & qopt - > rate , tb [ TCA_TBF_RTAB ] ) ;
2005-04-17 02:20:36 +04:00
if ( rtab = = NULL )
goto done ;
if ( qopt - > peakrate . rate ) {
if ( qopt - > peakrate . rate > qopt - > rate . rate )
2008-01-23 09:11:17 +03:00
ptab = qdisc_get_rtab ( & qopt - > peakrate , tb [ TCA_TBF_PTAB ] ) ;
2005-04-17 02:20:36 +04:00
if ( ptab = = NULL )
goto done ;
}
for ( n = 0 ; n < 256 ; n + + )
if ( rtab - > data [ n ] > qopt - > buffer ) break ;
max_size = ( n < < qopt - > rate . cell_log ) - 1 ;
if ( ptab ) {
int size ;
for ( n = 0 ; n < 256 ; n + + )
if ( ptab - > data [ n ] > qopt - > mtu ) break ;
size = ( n < < qopt - > peakrate . cell_log ) - 1 ;
if ( size < max_size ) max_size = size ;
}
if ( max_size < 0 )
goto done ;
2006-03-21 06:01:21 +03:00
if ( qopt - > limit > 0 ) {
2008-07-06 10:40:21 +04:00
child = fifo_create_dflt ( sch , & bfifo_qdisc_ops , qopt - > limit ) ;
if ( IS_ERR ( child ) ) {
err = PTR_ERR ( child ) ;
2005-04-17 02:20:36 +04:00
goto done ;
2008-07-06 10:40:21 +04:00
}
2005-04-17 02:20:36 +04:00
}
sch_tree_lock ( sch ) ;
2006-11-30 04:36:20 +03:00
if ( child ) {
qdisc_tree_decrease_qlen ( q - > qdisc , q - > qdisc - > q . qlen ) ;
2008-11-20 15:11:36 +03:00
qdisc_destroy ( q - > qdisc ) ;
q - > qdisc = child ;
2006-11-30 04:36:20 +03:00
}
2005-04-17 02:20:36 +04:00
q - > limit = qopt - > limit ;
q - > mtu = qopt - > mtu ;
q - > max_size = max_size ;
q - > buffer = qopt - > buffer ;
q - > tokens = q - > buffer ;
q - > ptokens = q - > mtu ;
2008-11-20 15:11:36 +03:00
2009-03-21 23:36:17 +03:00
swap ( q - > R_tab , rtab ) ;
swap ( q - > P_tab , ptab ) ;
2008-11-20 15:11:36 +03:00
2005-04-17 02:20:36 +04:00
sch_tree_unlock ( sch ) ;
err = 0 ;
done :
if ( rtab )
qdisc_put_rtab ( rtab ) ;
if ( ptab )
qdisc_put_rtab ( ptab ) ;
return err ;
}
2008-01-23 09:11:17 +03:00
static int tbf_init ( struct Qdisc * sch , struct nlattr * opt )
2005-04-17 02:20:36 +04:00
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
if ( opt = = NULL )
return - EINVAL ;
2007-03-23 21:29:25 +03:00
q - > t_c = psched_get_time ( ) ;
2007-03-16 11:20:07 +03:00
qdisc_watchdog_init ( & q - > watchdog , sch ) ;
2005-04-17 02:20:36 +04:00
q - > qdisc = & noop_qdisc ;
return tbf_change ( sch , opt ) ;
}
static void tbf_destroy ( struct Qdisc * sch )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
2007-03-16 11:20:07 +03:00
qdisc_watchdog_cancel ( & q - > watchdog ) ;
2005-04-17 02:20:36 +04:00
if ( q - > P_tab )
qdisc_put_rtab ( q - > P_tab ) ;
if ( q - > R_tab )
qdisc_put_rtab ( q - > R_tab ) ;
qdisc_destroy ( q - > qdisc ) ;
}
static int tbf_dump ( struct Qdisc * sch , struct sk_buff * skb )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
2008-01-24 07:34:11 +03:00
struct nlattr * nest ;
2005-04-17 02:20:36 +04:00
struct tc_tbf_qopt opt ;
2008-01-24 07:34:11 +03:00
nest = nla_nest_start ( skb , TCA_OPTIONS ) ;
if ( nest = = NULL )
goto nla_put_failure ;
2005-04-17 02:20:36 +04:00
opt . limit = q - > limit ;
opt . rate = q - > R_tab - > rate ;
if ( q - > P_tab )
opt . peakrate = q - > P_tab - > rate ;
else
memset ( & opt . peakrate , 0 , sizeof ( opt . peakrate ) ) ;
opt . mtu = q - > mtu ;
opt . buffer = q - > buffer ;
2008-01-23 09:11:17 +03:00
NLA_PUT ( skb , TCA_TBF_PARMS , sizeof ( opt ) , & opt ) ;
2005-04-17 02:20:36 +04:00
2008-01-24 07:34:11 +03:00
nla_nest_end ( skb , nest ) ;
2005-04-17 02:20:36 +04:00
return skb - > len ;
2008-01-23 09:11:17 +03:00
nla_put_failure :
2008-01-24 07:34:11 +03:00
nla_nest_cancel ( skb , nest ) ;
2005-04-17 02:20:36 +04:00
return - 1 ;
}
static int tbf_dump_class ( struct Qdisc * sch , unsigned long cl ,
struct sk_buff * skb , struct tcmsg * tcm )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
tcm - > tcm_handle | = TC_H_MIN ( 1 ) ;
tcm - > tcm_info = q - > qdisc - > handle ;
return 0 ;
}
static int tbf_graft ( struct Qdisc * sch , unsigned long arg , struct Qdisc * new ,
struct Qdisc * * old )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
if ( new = = NULL )
new = & noop_qdisc ;
sch_tree_lock ( sch ) ;
2008-11-20 15:11:36 +03:00
* old = q - > qdisc ;
q - > qdisc = new ;
2006-11-30 04:36:20 +03:00
qdisc_tree_decrease_qlen ( * old , ( * old ) - > q . qlen ) ;
2005-04-17 02:20:36 +04:00
qdisc_reset ( * old ) ;
sch_tree_unlock ( sch ) ;
return 0 ;
}
static struct Qdisc * tbf_leaf ( struct Qdisc * sch , unsigned long arg )
{
struct tbf_sched_data * q = qdisc_priv ( sch ) ;
return q - > qdisc ;
}
static unsigned long tbf_get ( struct Qdisc * sch , u32 classid )
{
return 1 ;
}
static void tbf_put ( struct Qdisc * sch , unsigned long arg )
{
}
static void tbf_walk ( struct Qdisc * sch , struct qdisc_walker * walker )
{
if ( ! walker - > stop ) {
if ( walker - > count > = walker - > skip )
if ( walker - > fn ( sch , 1 , walker ) < 0 ) {
walker - > stop = 1 ;
return ;
}
walker - > count + + ;
}
}
2007-11-14 12:44:41 +03:00
static const struct Qdisc_class_ops tbf_class_ops =
2005-04-17 02:20:36 +04:00
{
. graft = tbf_graft ,
. leaf = tbf_leaf ,
. get = tbf_get ,
. put = tbf_put ,
. walk = tbf_walk ,
. dump = tbf_dump_class ,
} ;
2007-11-14 12:44:41 +03:00
static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
2005-04-17 02:20:36 +04:00
. next = NULL ,
. cl_ops = & tbf_class_ops ,
. id = " tbf " ,
. priv_size = sizeof ( struct tbf_sched_data ) ,
. enqueue = tbf_enqueue ,
. dequeue = tbf_dequeue ,
2008-10-31 10:47:01 +03:00
. peek = qdisc_peek_dequeued ,
2005-04-17 02:20:36 +04:00
. drop = tbf_drop ,
. init = tbf_init ,
. reset = tbf_reset ,
. destroy = tbf_destroy ,
. change = tbf_change ,
. dump = tbf_dump ,
. owner = THIS_MODULE ,
} ;
static int __init tbf_module_init ( void )
{
return register_qdisc ( & tbf_qdisc_ops ) ;
}
static void __exit tbf_module_exit ( void )
{
unregister_qdisc ( & tbf_qdisc_ops ) ;
}
module_init ( tbf_module_init )
module_exit ( tbf_module_exit )
MODULE_LICENSE ( " GPL " ) ;