2005-04-17 02:20:36 +04:00
/*
* net / sched / sch_red . c Random Early Detection queue .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
*
* Changes :
2005-11-05 23:14:08 +03:00
* J Hadi Salim 980914 : computation fixes
2005-04-17 02:20:36 +04:00
* Alexey Makarenko < makar @ phoenix . kharkov . ua > 990814 : qave on idle link was calculated incorrectly .
2005-11-05 23:14:08 +03:00
* J Hadi Salim 980816 : ECN support
2005-04-17 02:20:36 +04:00
*/
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/skbuff.h>
# include <net/pkt_sched.h>
# include <net/inet_ecn.h>
2005-11-05 23:14:05 +03:00
# include <net/red.h>
2005-04-17 02:20:36 +04:00
2005-11-05 23:14:05 +03:00
/* Parameters, settable by user:
2005-04-17 02:20:36 +04:00
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
limit - bytes ( must be > qth_max + burst )
Hard limit on queue length , should be chosen > qth_max
to allow packet bursts . This parameter does not
affect the algorithms behaviour and can be chosen
arbitrarily high ( well , less than ram size )
Really , this limit will never be reached
if RED works correctly .
*/
2011-01-19 22:26:56 +03:00
struct red_sched_data {
2005-11-05 23:14:05 +03:00
u32 limit ; /* HARD maximal queue length */
unsigned char flags ;
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 10:06:03 +04:00
struct timer_list adapt_timer ;
2005-11-05 23:14:05 +03:00
struct red_parms parms ;
2012-01-05 06:25:16 +04:00
struct red_vars vars ;
2005-11-05 23:14:05 +03:00
struct red_stats stats ;
2006-03-21 06:20:44 +03:00
struct Qdisc * qdisc ;
2005-04-17 02:20:36 +04:00
} ;
2005-11-05 23:14:05 +03:00
static inline int red_use_ecn ( struct red_sched_data * q )
2005-04-17 02:20:36 +04:00
{
2005-11-05 23:14:05 +03:00
return q - > flags & TC_RED_ECN ;
2005-04-17 02:20:36 +04:00
}
2005-11-05 23:14:28 +03:00
static inline int red_use_harddrop ( struct red_sched_data * q )
{
return q - > flags & TC_RED_HARDDROP ;
}
2011-01-19 22:26:56 +03:00
static int red_enqueue ( struct sk_buff * skb , struct Qdisc * sch )
2005-04-17 02:20:36 +04:00
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
2006-03-21 06:20:44 +03:00
struct Qdisc * child = q - > qdisc ;
int ret ;
2005-04-17 02:20:36 +04:00
2012-01-05 06:25:16 +04:00
q - > vars . qavg = red_calc_qavg ( & q - > parms ,
& q - > vars ,
child - > qstats . backlog ) ;
2005-04-17 02:20:36 +04:00
2012-01-05 06:25:16 +04:00
if ( red_is_idling ( & q - > vars ) )
red_end_of_idle_period ( & q - > vars ) ;
2005-04-17 02:20:36 +04:00
2012-01-05 06:25:16 +04:00
switch ( red_action ( & q - > parms , & q - > vars , q - > vars . qavg ) ) {
2011-01-19 22:26:56 +03:00
case RED_DONT_MARK :
break ;
case RED_PROB_MARK :
sch - > qstats . overlimits + + ;
if ( ! red_use_ecn ( q ) | | ! INET_ECN_set_ce ( skb ) ) {
q - > stats . prob_drop + + ;
goto congestion_drop ;
}
q - > stats . prob_mark + + ;
break ;
case RED_HARD_MARK :
sch - > qstats . overlimits + + ;
if ( red_use_harddrop ( q ) | | ! red_use_ecn ( q ) | |
! INET_ECN_set_ce ( skb ) ) {
q - > stats . forced_drop + + ;
goto congestion_drop ;
}
q - > stats . forced_mark + + ;
break ;
2005-04-17 02:20:36 +04:00
}
2008-07-20 11:08:04 +04:00
ret = qdisc_enqueue ( skb , child ) ;
2006-03-21 06:20:44 +03:00
if ( likely ( ret = = NET_XMIT_SUCCESS ) ) {
sch - > q . qlen + + ;
2008-08-05 09:31:03 +04:00
} else if ( net_xmit_drop_count ( ret ) ) {
2006-03-21 06:20:44 +03:00
q - > stats . pdrop + + ;
sch - > qstats . drops + + ;
}
return ret ;
2005-11-05 23:14:05 +03:00
congestion_drop :
2005-11-05 23:14:06 +03:00
qdisc_drop ( skb , sch ) ;
2005-04-17 02:20:36 +04:00
return NET_XMIT_CN ;
}
2011-01-19 22:26:56 +03:00
static struct sk_buff * red_dequeue ( struct Qdisc * sch )
2005-04-17 02:20:36 +04:00
{
struct sk_buff * skb ;
struct red_sched_data * q = qdisc_priv ( sch ) ;
2006-03-21 06:20:44 +03:00
struct Qdisc * child = q - > qdisc ;
2005-04-17 02:20:36 +04:00
2006-03-21 06:20:44 +03:00
skb = child - > dequeue ( child ) ;
2011-01-21 10:31:33 +03:00
if ( skb ) {
qdisc_bstats_update ( sch , skb ) ;
2006-03-21 06:20:44 +03:00
sch - > q . qlen - - ;
2011-01-21 10:31:33 +03:00
} else {
2012-01-05 06:25:16 +04:00
if ( ! red_is_idling ( & q - > vars ) )
red_start_of_idle_period ( & q - > vars ) ;
2011-01-21 10:31:33 +03:00
}
2005-11-05 23:14:06 +03:00
return skb ;
2005-04-17 02:20:36 +04:00
}
2011-01-19 22:26:56 +03:00
static struct sk_buff * red_peek ( struct Qdisc * sch )
2008-10-31 10:45:55 +03:00
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
struct Qdisc * child = q - > qdisc ;
return child - > ops - > peek ( child ) ;
}
2011-01-19 22:26:56 +03:00
static unsigned int red_drop ( struct Qdisc * sch )
2005-04-17 02:20:36 +04:00
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
2006-03-21 06:20:44 +03:00
struct Qdisc * child = q - > qdisc ;
unsigned int len ;
2005-04-17 02:20:36 +04:00
2006-03-21 06:20:44 +03:00
if ( child - > ops - > drop & & ( len = child - > ops - > drop ( child ) ) > 0 ) {
2005-11-05 23:14:05 +03:00
q - > stats . other + + ;
2006-03-21 06:20:44 +03:00
sch - > qstats . drops + + ;
sch - > q . qlen - - ;
2005-04-17 02:20:36 +04:00
return len ;
}
2005-11-05 23:14:05 +03:00
2012-01-05 06:25:16 +04:00
if ( ! red_is_idling ( & q - > vars ) )
red_start_of_idle_period ( & q - > vars ) ;
2005-11-05 23:14:07 +03:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
2011-01-19 22:26:56 +03:00
static void red_reset ( struct Qdisc * sch )
2005-04-17 02:20:36 +04:00
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
2006-03-21 06:20:44 +03:00
qdisc_reset ( q - > qdisc ) ;
sch - > q . qlen = 0 ;
2012-01-05 06:25:16 +04:00
red_restart ( & q - > vars ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-21 06:20:44 +03:00
static void red_destroy ( struct Qdisc * sch )
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 10:06:03 +04:00
del_timer_sync ( & q - > adapt_timer ) ;
2006-03-21 06:20:44 +03:00
qdisc_destroy ( q - > qdisc ) ;
}
2008-01-24 07:35:39 +03:00
static const struct nla_policy red_policy [ TCA_RED_MAX + 1 ] = {
[ TCA_RED_PARMS ] = { . len = sizeof ( struct tc_red_qopt ) } ,
[ TCA_RED_STAB ] = { . len = RED_STAB_SIZE } ,
2011-12-09 06:46:45 +04:00
[ TCA_RED_MAX_P ] = { . type = NLA_U32 } ,
2008-01-24 07:35:39 +03:00
} ;
2008-01-23 09:11:17 +03:00
static int red_change ( struct Qdisc * sch , struct nlattr * opt )
2005-04-17 02:20:36 +04:00
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
2008-01-23 09:11:17 +03:00
struct nlattr * tb [ TCA_RED_MAX + 1 ] ;
2005-04-17 02:20:36 +04:00
struct tc_red_qopt * ctl ;
2006-03-21 06:20:44 +03:00
struct Qdisc * child = NULL ;
2008-01-24 07:33:32 +03:00
int err ;
2011-12-09 06:46:45 +04:00
u32 max_P ;
2005-04-17 02:20:36 +04:00
2008-01-24 07:33:32 +03:00
if ( opt = = NULL )
2005-11-05 23:14:08 +03:00
return - EINVAL ;
2008-01-24 07:35:39 +03:00
err = nla_parse_nested ( tb , TCA_RED_MAX , opt , red_policy ) ;
2008-01-24 07:33:32 +03:00
if ( err < 0 )
return err ;
2008-01-23 09:11:17 +03:00
if ( tb [ TCA_RED_PARMS ] = = NULL | |
2008-01-24 07:35:39 +03:00
tb [ TCA_RED_STAB ] = = NULL )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
2011-12-09 06:46:45 +04:00
max_P = tb [ TCA_RED_MAX_P ] ? nla_get_u32 ( tb [ TCA_RED_MAX_P ] ) : 0 ;
2008-01-23 09:11:17 +03:00
ctl = nla_data ( tb [ TCA_RED_PARMS ] ) ;
2005-04-17 02:20:36 +04:00
2006-03-21 06:20:44 +03:00
if ( ctl - > limit > 0 ) {
2008-07-06 10:40:21 +04:00
child = fifo_create_dflt ( sch , & bfifo_qdisc_ops , ctl - > limit ) ;
if ( IS_ERR ( child ) )
return PTR_ERR ( child ) ;
2006-03-21 06:20:44 +03:00
}
2005-04-17 02:20:36 +04:00
sch_tree_lock ( sch ) ;
q - > flags = ctl - > flags ;
q - > limit = ctl - > limit ;
2006-11-30 04:36:20 +03:00
if ( child ) {
qdisc_tree_decrease_qlen ( q - > qdisc , q - > qdisc - > q . qlen ) ;
2008-11-20 15:11:36 +03:00
qdisc_destroy ( q - > qdisc ) ;
q - > qdisc = child ;
2006-11-30 04:36:20 +03:00
}
2005-04-17 02:20:36 +04:00
2012-01-05 06:25:16 +04:00
red_set_parms ( & q - > parms ,
ctl - > qth_min , ctl - > qth_max , ctl - > Wlog ,
2011-12-09 06:46:45 +04:00
ctl - > Plog , ctl - > Scell_log ,
nla_data ( tb [ TCA_RED_STAB ] ) ,
max_P ) ;
2012-01-05 06:25:16 +04:00
red_set_vars ( & q - > vars ) ;
2005-11-05 23:14:05 +03:00
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 10:06:03 +04:00
del_timer ( & q - > adapt_timer ) ;
if ( ctl - > flags & TC_RED_ADAPTATIVE )
mod_timer ( & q - > adapt_timer , jiffies + HZ / 2 ) ;
sch_red: fix red_change
Le mercredi 30 novembre 2011 à 14:36 -0800, Stephen Hemminger a écrit :
> (Almost) nobody uses RED because they can't figure it out.
> According to Wikipedia, VJ says that:
> "there are not one, but two bugs in classic RED."
RED is useful for high throughput routers, I doubt many linux machines
act as such devices.
I was considering adding Adaptative RED (Sally Floyd, Ramakrishna
Gummadi, Scott Shender), August 2001
In this version, maxp is dynamic (from 1% to 50%), and user only have to
setup min_th (target average queue size)
(max_th and wq (burst in linux RED) are automatically setup)
By the way it seems we have a small bug in red_change()
if (skb_queue_empty(&sch->q))
red_end_of_idle_period(&q->parms);
First, if queue is empty, we should call
red_start_of_idle_period(&q->parms);
Second, since we dont use anymore sch->q, but q->qdisc, the test is
meaningless.
Oh well...
[PATCH] sch_red: fix red_change()
Now RED is classful, we must check q->qdisc->q.qlen, and if queue is empty,
we start an idle period, not end it.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-01 15:06:34 +04:00
if ( ! q - > qdisc - > q . qlen )
2012-01-05 06:25:16 +04:00
red_start_of_idle_period ( & q - > vars ) ;
2005-11-05 23:14:08 +03:00
2005-04-17 02:20:36 +04:00
sch_tree_unlock ( sch ) ;
return 0 ;
}
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 10:06:03 +04:00
static inline void red_adaptative_timer ( unsigned long arg )
{
struct Qdisc * sch = ( struct Qdisc * ) arg ;
struct red_sched_data * q = qdisc_priv ( sch ) ;
spinlock_t * root_lock = qdisc_lock ( qdisc_root_sleeping ( sch ) ) ;
spin_lock ( root_lock ) ;
2012-01-05 06:25:16 +04:00
red_adaptative_algo ( & q - > parms , & q - > vars ) ;
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 10:06:03 +04:00
mod_timer ( & q - > adapt_timer , jiffies + HZ / 2 ) ;
spin_unlock ( root_lock ) ;
}
2011-01-19 22:26:56 +03:00
static int red_init ( struct Qdisc * sch , struct nlattr * opt )
2005-04-17 02:20:36 +04:00
{
2006-03-21 06:20:44 +03:00
struct red_sched_data * q = qdisc_priv ( sch ) ;
q - > qdisc = & noop_qdisc ;
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 10:06:03 +04:00
setup_timer ( & q - > adapt_timer , red_adaptative_timer , ( unsigned long ) sch ) ;
2005-04-17 02:20:36 +04:00
return red_change ( sch , opt ) ;
}
static int red_dump ( struct Qdisc * sch , struct sk_buff * skb )
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
2008-01-23 09:11:17 +03:00
struct nlattr * opts = NULL ;
2005-11-05 23:14:05 +03:00
struct tc_red_qopt opt = {
. limit = q - > limit ,
. flags = q - > flags ,
. qth_min = q - > parms . qth_min > > q - > parms . Wlog ,
. qth_max = q - > parms . qth_max > > q - > parms . Wlog ,
. Wlog = q - > parms . Wlog ,
. Plog = q - > parms . Plog ,
. Scell_log = q - > parms . Scell_log ,
} ;
2005-04-17 02:20:36 +04:00
2011-01-03 11:11:38 +03:00
sch - > qstats . backlog = q - > qdisc - > qstats . backlog ;
2008-01-23 09:11:17 +03:00
opts = nla_nest_start ( skb , TCA_OPTIONS ) ;
if ( opts = = NULL )
goto nla_put_failure ;
2012-03-29 13:11:39 +04:00
if ( nla_put ( skb , TCA_RED_PARMS , sizeof ( opt ) , & opt ) | |
nla_put_u32 ( skb , TCA_RED_MAX_P , q - > parms . max_P ) )
goto nla_put_failure ;
2008-01-23 09:11:17 +03:00
return nla_nest_end ( skb , opts ) ;
2005-04-17 02:20:36 +04:00
2008-01-23 09:11:17 +03:00
nla_put_failure :
2008-06-04 03:36:54 +04:00
nla_nest_cancel ( skb , opts ) ;
return - EMSGSIZE ;
2005-04-17 02:20:36 +04:00
}
static int red_dump_stats ( struct Qdisc * sch , struct gnet_dump * d )
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
2005-11-05 23:14:05 +03:00
struct tc_red_xstats st = {
. early = q - > stats . prob_drop + q - > stats . forced_drop ,
. pdrop = q - > stats . pdrop ,
. other = q - > stats . other ,
. marked = q - > stats . prob_mark + q - > stats . forced_mark ,
} ;
return gnet_stats_copy_app ( d , & st , sizeof ( st ) ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-21 06:20:44 +03:00
static int red_dump_class ( struct Qdisc * sch , unsigned long cl ,
struct sk_buff * skb , struct tcmsg * tcm )
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
tcm - > tcm_handle | = TC_H_MIN ( 1 ) ;
tcm - > tcm_info = q - > qdisc - > handle ;
return 0 ;
}
static int red_graft ( struct Qdisc * sch , unsigned long arg , struct Qdisc * new ,
struct Qdisc * * old )
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
if ( new = = NULL )
new = & noop_qdisc ;
sch_tree_lock ( sch ) ;
2008-11-20 15:11:36 +03:00
* old = q - > qdisc ;
q - > qdisc = new ;
2006-11-30 04:36:20 +03:00
qdisc_tree_decrease_qlen ( * old , ( * old ) - > q . qlen ) ;
2006-03-21 06:20:44 +03:00
qdisc_reset ( * old ) ;
sch_tree_unlock ( sch ) ;
return 0 ;
}
static struct Qdisc * red_leaf ( struct Qdisc * sch , unsigned long arg )
{
struct red_sched_data * q = qdisc_priv ( sch ) ;
return q - > qdisc ;
}
static unsigned long red_get ( struct Qdisc * sch , u32 classid )
{
return 1 ;
}
static void red_put ( struct Qdisc * sch , unsigned long arg )
{
}
static void red_walk ( struct Qdisc * sch , struct qdisc_walker * walker )
{
if ( ! walker - > stop ) {
if ( walker - > count > = walker - > skip )
if ( walker - > fn ( sch , 1 , walker ) < 0 ) {
walker - > stop = 1 ;
return ;
}
walker - > count + + ;
}
}
2007-11-14 12:44:41 +03:00
static const struct Qdisc_class_ops red_class_ops = {
2006-03-21 06:20:44 +03:00
. graft = red_graft ,
. leaf = red_leaf ,
. get = red_get ,
. put = red_put ,
. walk = red_walk ,
. dump = red_dump_class ,
} ;
2007-11-14 12:44:41 +03:00
static struct Qdisc_ops red_qdisc_ops __read_mostly = {
2005-04-17 02:20:36 +04:00
. id = " red " ,
. priv_size = sizeof ( struct red_sched_data ) ,
2006-03-21 06:20:44 +03:00
. cl_ops = & red_class_ops ,
2005-04-17 02:20:36 +04:00
. enqueue = red_enqueue ,
. dequeue = red_dequeue ,
2008-10-31 10:45:55 +03:00
. peek = red_peek ,
2005-04-17 02:20:36 +04:00
. drop = red_drop ,
. init = red_init ,
. reset = red_reset ,
2006-03-21 06:20:44 +03:00
. destroy = red_destroy ,
2005-04-17 02:20:36 +04:00
. change = red_change ,
. dump = red_dump ,
. dump_stats = red_dump_stats ,
. owner = THIS_MODULE ,
} ;
static int __init red_module_init ( void )
{
return register_qdisc ( & red_qdisc_ops ) ;
}
2005-11-05 23:14:08 +03:00
static void __exit red_module_exit ( void )
2005-04-17 02:20:36 +04:00
{
unregister_qdisc ( & red_qdisc_ops ) ;
}
2005-11-05 23:14:08 +03:00
2005-04-17 02:20:36 +04:00
module_init ( red_module_init )
module_exit ( red_module_exit )
2005-11-05 23:14:08 +03:00
2005-04-17 02:20:36 +04:00
MODULE_LICENSE ( " GPL " ) ;