2011-02-02 15:21:10 +00:00
/*
* net / sched / sch_choke . c CHOKE scheduler
*
* Copyright ( c ) 2011 Stephen Hemminger < shemminger @ vyatta . com >
* Copyright ( c ) 2011 Eric Dumazet < eric . dumazet @ gmail . com >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation .
*
*/
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/skbuff.h>
2011-02-02 23:06:31 -08:00
# include <linux/vmalloc.h>
2011-02-02 15:21:10 +00:00
# include <net/pkt_sched.h>
2017-02-09 14:38:56 +01:00
# include <net/pkt_cls.h>
2011-02-02 15:21:10 +00:00
# include <net/inet_ecn.h>
# include <net/red.h>
2015-05-12 14:56:07 +02:00
# include <net/flow_dissector.h>
2011-02-02 15:21:10 +00:00
/*
CHOKe stateless AQM for fair bandwidth allocation
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
CHOKe ( CHOose and Keep for responsive flows , CHOose and Kill for
unresponsive flows ) is a variant of RED that penalizes misbehaving flows but
maintains no flow state . The difference from RED is an additional step
during the enqueuing process . If average queue size is over the
low threshold ( qmin ) , a packet is chosen at random from the queue .
If both the new and chosen packet are from the same flow , both
are dropped . Unlike RED , CHOKe is not really a " classful " qdisc because it
needs to access packets in queue randomly . It has a minimal class
interface to allow overriding the builtin flow classifier with
filters .
Source :
R . Pan , B . Prabhakar , and K . Psounis , " CHOKe, A Stateless
Active Queue Management Scheme for Approximating Fair Bandwidth Allocation " ,
IEEE INFOCOM , 2000.
A . Tang , J . Wang , S . Low , " Understanding CHOKe: Throughput and Spatial
Characteristics " , IEEE/ACM Transactions on Networking, 2004
*/
/* Upper bound on size of sk_buff table (packets) */
# define CHOKE_MAX_QUEUE (128*1024 - 1)
struct choke_sched_data {
/* Parameters */
u32 limit ;
unsigned char flags ;
struct red_parms parms ;
/* Variables */
2012-01-05 02:25:16 +00:00
struct red_vars vars ;
2011-02-02 15:21:10 +00:00
struct {
u32 prob_drop ; /* Early probability drops */
u32 prob_mark ; /* Early probability marks */
u32 forced_drop ; /* Forced drops, qavg > max_thresh */
u32 forced_mark ; /* Forced marks, qavg > max_thresh */
u32 pdrop ; /* Drops due to queue limits */
u32 other ; /* Drops due to drop() calls */
u32 matched ; /* Drops to flow match */
} stats ;
unsigned int head ;
unsigned int tail ;
unsigned int tab_mask ; /* size - 1 */
struct sk_buff * * tab ;
} ;
/* number of elements in queue including holes */
static unsigned int choke_len ( const struct choke_sched_data * q )
{
return ( q - > tail - q - > head ) & q - > tab_mask ;
}
/* Is ECN parameter configured */
static int use_ecn ( const struct choke_sched_data * q )
{
return q - > flags & TC_RED_ECN ;
}
/* Should packets over max just be dropped (versus marked) */
static int use_harddrop ( const struct choke_sched_data * q )
{
return q - > flags & TC_RED_HARDDROP ;
}
/* Move head pointer forward to skip over holes */
static void choke_zap_head_holes ( struct choke_sched_data * q )
{
do {
q - > head = ( q - > head + 1 ) & q - > tab_mask ;
if ( q - > head = = q - > tail )
break ;
} while ( q - > tab [ q - > head ] = = NULL ) ;
}
/* Move tail pointer backwards to reuse holes */
static void choke_zap_tail_holes ( struct choke_sched_data * q )
{
do {
q - > tail = ( q - > tail - 1 ) & q - > tab_mask ;
if ( q - > head = = q - > tail )
break ;
} while ( q - > tab [ q - > tail ] = = NULL ) ;
}
/* Drop packet from queue array by creating a "hole" */
2016-06-21 23:16:49 -07:00
static void choke_drop_by_idx ( struct Qdisc * sch , unsigned int idx ,
struct sk_buff * * to_free )
2011-02-02 15:21:10 +00:00
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
struct sk_buff * skb = q - > tab [ idx ] ;
q - > tab [ idx ] = NULL ;
if ( idx = = q - > head )
choke_zap_head_holes ( q ) ;
if ( idx = = q - > tail )
choke_zap_tail_holes ( q ) ;
2014-09-28 11:53:29 -07:00
qdisc_qstats_backlog_dec ( sch , skb ) ;
2016-02-25 14:55:01 -08:00
qdisc_tree_reduce_backlog ( sch , 1 , qdisc_pkt_len ( skb ) ) ;
2016-06-21 23:16:49 -07:00
qdisc_drop ( skb , sch , to_free ) ;
2011-02-02 15:21:10 +00:00
- - sch - > q . qlen ;
}
2011-02-24 17:45:41 +00:00
struct choke_skb_cb {
2011-11-29 04:22:15 +00:00
u16 classid ;
u8 keys_valid ;
2015-05-01 11:30:18 -07:00
struct flow_keys_digest keys ;
2011-02-24 17:45:41 +00:00
} ;
static inline struct choke_skb_cb * choke_skb_cb ( const struct sk_buff * skb )
{
2012-02-06 15:14:37 -05:00
qdisc_cb_private_validate ( skb , sizeof ( struct choke_skb_cb ) ) ;
2011-02-24 17:45:41 +00:00
return ( struct choke_skb_cb * ) qdisc_skb_cb ( skb ) - > data ;
}
2011-02-02 15:21:10 +00:00
static inline void choke_set_classid ( struct sk_buff * skb , u16 classid )
{
2011-02-24 17:45:41 +00:00
choke_skb_cb ( skb ) - > classid = classid ;
2011-02-02 15:21:10 +00:00
}
2011-11-29 04:22:15 +00:00
/*
* Compare flow of two packets
* Returns true only if source and destination address and port match .
* false for special cases
*/
static bool choke_match_flow ( struct sk_buff * skb1 ,
struct sk_buff * skb2 )
{
2014-09-18 08:02:05 -07:00
struct flow_keys temp ;
2011-11-29 04:22:15 +00:00
if ( skb1 - > protocol ! = skb2 - > protocol )
return false ;
if ( ! choke_skb_cb ( skb1 ) - > keys_valid ) {
choke_skb_cb ( skb1 ) - > keys_valid = 1 ;
2015-09-01 09:24:27 -07:00
skb_flow_dissect_flow_keys ( skb1 , & temp , 0 ) ;
2015-05-01 11:30:18 -07:00
make_flow_keys_digest ( & choke_skb_cb ( skb1 ) - > keys , & temp ) ;
2011-11-29 04:22:15 +00:00
}
if ( ! choke_skb_cb ( skb2 ) - > keys_valid ) {
choke_skb_cb ( skb2 ) - > keys_valid = 1 ;
2015-09-01 09:24:27 -07:00
skb_flow_dissect_flow_keys ( skb2 , & temp , 0 ) ;
2015-05-01 11:30:18 -07:00
make_flow_keys_digest ( & choke_skb_cb ( skb2 ) - > keys , & temp ) ;
2011-11-29 04:22:15 +00:00
}
return ! memcmp ( & choke_skb_cb ( skb1 ) - > keys ,
& choke_skb_cb ( skb2 ) - > keys ,
2015-05-01 11:30:18 -07:00
sizeof ( choke_skb_cb ( skb1 ) - > keys ) ) ;
2011-11-29 04:22:15 +00:00
}
2011-02-02 15:21:10 +00:00
/*
* Select a packet at random from queue
* HACK : since queue can have holes from previous deletion ; retry several
* times to find a random skb but then just give up and return the head
* Will return NULL if queue is empty ( q - > head = = q - > tail )
*/
static struct sk_buff * choke_peek_random ( const struct choke_sched_data * q ,
unsigned int * pidx )
{
struct sk_buff * skb ;
int retrys = 3 ;
do {
random32: add prandom_u32_max and convert open coded users
Many functions have open coded a function that returns a random
number in range [0,N-1]. Under the assumption that we have a PRNG
such as taus113 with being well distributed in [0, ~0U] space,
we can implement such a function as uword t = (n*m')>>32, where
m' is a random number obtained from PRNG, n the right open interval
border and t our resulting random number, with n,m',t in u32 universe.
Lets go with Joe and simply call it prandom_u32_max(), although
technically we have an right open interval endpoint, but that we
have documented. Other users can further be migrated to the new
prandom_u32_max() function later on; for now, we need to make sure
to migrate reciprocal_divide() users for the reciprocal_divide()
follow-up fixup since their function signatures are going to change.
Joint work with Hannes Frederic Sowa.
Cc: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-22 02:29:39 +01:00
* pidx = ( q - > head + prandom_u32_max ( choke_len ( q ) ) ) & q - > tab_mask ;
2011-02-02 15:21:10 +00:00
skb = q - > tab [ * pidx ] ;
if ( skb )
return skb ;
} while ( - - retrys > 0 ) ;
return q - > tab [ * pidx = q - > head ] ;
}
/*
* Compare new packet with random packet in queue
* returns true if matched and sets * pidx
*/
static bool choke_match_random ( const struct choke_sched_data * q ,
struct sk_buff * nskb ,
unsigned int * pidx )
{
struct sk_buff * oskb ;
if ( q - > head = = q - > tail )
return false ;
oskb = choke_peek_random ( q , pidx ) ;
return choke_match_flow ( oskb , nskb ) ;
}
2016-06-21 23:16:49 -07:00
static int choke_enqueue ( struct sk_buff * skb , struct Qdisc * sch ,
struct sk_buff * * to_free )
2011-02-02 15:21:10 +00:00
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
2012-01-05 02:25:16 +00:00
const struct red_parms * p = & q - > parms ;
2011-02-02 15:21:10 +00:00
2011-11-29 04:22:15 +00:00
choke_skb_cb ( skb ) - > keys_valid = 0 ;
2011-02-02 15:21:10 +00:00
/* Compute average queue usage (see RED) */
2012-01-05 02:25:16 +00:00
q - > vars . qavg = red_calc_qavg ( p , & q - > vars , sch - > q . qlen ) ;
if ( red_is_idling ( & q - > vars ) )
red_end_of_idle_period ( & q - > vars ) ;
2011-02-02 15:21:10 +00:00
/* Is queue small? */
2012-01-05 02:25:16 +00:00
if ( q - > vars . qavg < = p - > qth_min )
q - > vars . qcount = - 1 ;
2011-02-02 15:21:10 +00:00
else {
unsigned int idx ;
/* Draw a packet at random from queue and compare flow */
if ( choke_match_random ( q , skb , & idx ) ) {
q - > stats . matched + + ;
2016-06-21 23:16:49 -07:00
choke_drop_by_idx ( sch , idx , to_free ) ;
2011-02-02 15:21:10 +00:00
goto congestion_drop ;
}
/* Queue is large, always mark/drop */
2012-01-05 02:25:16 +00:00
if ( q - > vars . qavg > p - > qth_max ) {
q - > vars . qcount = - 1 ;
2011-02-02 15:21:10 +00:00
2014-09-28 11:53:29 -07:00
qdisc_qstats_overlimit ( sch ) ;
2011-02-02 15:21:10 +00:00
if ( use_harddrop ( q ) | | ! use_ecn ( q ) | |
! INET_ECN_set_ce ( skb ) ) {
q - > stats . forced_drop + + ;
goto congestion_drop ;
}
q - > stats . forced_mark + + ;
2012-01-05 02:25:16 +00:00
} else if ( + + q - > vars . qcount ) {
if ( red_mark_probability ( p , & q - > vars , q - > vars . qavg ) ) {
q - > vars . qcount = 0 ;
q - > vars . qR = red_random ( p ) ;
2011-02-02 15:21:10 +00:00
2014-09-28 11:53:29 -07:00
qdisc_qstats_overlimit ( sch ) ;
2011-02-02 15:21:10 +00:00
if ( ! use_ecn ( q ) | | ! INET_ECN_set_ce ( skb ) ) {
q - > stats . prob_drop + + ;
goto congestion_drop ;
}
q - > stats . prob_mark + + ;
}
} else
2012-01-05 02:25:16 +00:00
q - > vars . qR = red_random ( p ) ;
2011-02-02 15:21:10 +00:00
}
/* Admit new packet */
if ( sch - > q . qlen < q - > limit ) {
q - > tab [ q - > tail ] = skb ;
q - > tail = ( q - > tail + 1 ) & q - > tab_mask ;
+ + sch - > q . qlen ;
2014-09-28 11:53:29 -07:00
qdisc_qstats_backlog_inc ( sch , skb ) ;
2011-02-02 15:21:10 +00:00
return NET_XMIT_SUCCESS ;
}
q - > stats . pdrop + + ;
2016-06-21 23:16:49 -07:00
return qdisc_drop ( skb , sch , to_free ) ;
2011-02-02 15:21:10 +00:00
2012-05-04 04:37:21 +00:00
congestion_drop :
2016-06-21 23:16:49 -07:00
qdisc_drop ( skb , sch , to_free ) ;
2011-02-02 15:21:10 +00:00
return NET_XMIT_CN ;
}
static struct sk_buff * choke_dequeue ( struct Qdisc * sch )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
struct sk_buff * skb ;
if ( q - > head = = q - > tail ) {
2012-01-05 02:25:16 +00:00
if ( ! red_is_idling ( & q - > vars ) )
red_start_of_idle_period ( & q - > vars ) ;
2011-02-02 15:21:10 +00:00
return NULL ;
}
skb = q - > tab [ q - > head ] ;
q - > tab [ q - > head ] = NULL ;
choke_zap_head_holes ( q ) ;
- - sch - > q . qlen ;
2014-09-28 11:53:29 -07:00
qdisc_qstats_backlog_dec ( sch , skb ) ;
2011-02-02 15:21:10 +00:00
qdisc_bstats_update ( sch , skb ) ;
return skb ;
}
static void choke_reset ( struct Qdisc * sch )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
2015-07-21 16:52:43 -07:00
while ( q - > head ! = q - > tail ) {
struct sk_buff * skb = q - > tab [ q - > head ] ;
q - > head = ( q - > head + 1 ) & q - > tab_mask ;
if ( ! skb )
continue ;
2016-06-13 20:21:51 -07:00
rtnl_qdisc_drop ( skb , sch ) ;
2015-07-21 16:52:43 -07:00
}
2016-06-13 20:21:51 -07:00
sch - > q . qlen = 0 ;
sch - > qstats . backlog = 0 ;
2015-07-21 16:52:43 -07:00
memset ( q - > tab , 0 , ( q - > tab_mask + 1 ) * sizeof ( struct sk_buff * ) ) ;
q - > head = q - > tail = 0 ;
2012-01-05 02:25:16 +00:00
red_restart ( & q - > vars ) ;
2011-02-02 15:21:10 +00:00
}
static const struct nla_policy choke_policy [ TCA_CHOKE_MAX + 1 ] = {
[ TCA_CHOKE_PARMS ] = { . len = sizeof ( struct tc_red_qopt ) } ,
[ TCA_CHOKE_STAB ] = { . len = RED_STAB_SIZE } ,
2011-12-09 02:46:45 +00:00
[ TCA_CHOKE_MAX_P ] = { . type = NLA_U32 } ,
2011-02-02 15:21:10 +00:00
} ;
static void choke_free ( void * addr )
{
2014-06-02 15:55:22 -07:00
kvfree ( addr ) ;
2011-02-02 15:21:10 +00:00
}
static int choke_change ( struct Qdisc * sch , struct nlattr * opt )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
struct nlattr * tb [ TCA_CHOKE_MAX + 1 ] ;
const struct tc_red_qopt * ctl ;
int err ;
struct sk_buff * * old = NULL ;
unsigned int mask ;
2011-12-09 02:46:45 +00:00
u32 max_P ;
2011-02-02 15:21:10 +00:00
if ( opt = = NULL )
return - EINVAL ;
2017-04-12 14:34:07 +02:00
err = nla_parse_nested ( tb , TCA_CHOKE_MAX , opt , choke_policy , NULL ) ;
2011-02-02 15:21:10 +00:00
if ( err < 0 )
return err ;
if ( tb [ TCA_CHOKE_PARMS ] = = NULL | |
tb [ TCA_CHOKE_STAB ] = = NULL )
return - EINVAL ;
2011-12-09 02:46:45 +00:00
max_P = tb [ TCA_CHOKE_MAX_P ] ? nla_get_u32 ( tb [ TCA_CHOKE_MAX_P ] ) : 0 ;
2011-02-02 15:21:10 +00:00
ctl = nla_data ( tb [ TCA_CHOKE_PARMS ] ) ;
if ( ctl - > limit > CHOKE_MAX_QUEUE )
return - EINVAL ;
mask = roundup_pow_of_two ( ctl - > limit + 1 ) - 1 ;
if ( mask ! = q - > tab_mask ) {
struct sk_buff * * ntab ;
2017-05-08 15:57:27 -07:00
ntab = kvmalloc_array ( ( mask + 1 ) , sizeof ( struct sk_buff * ) , GFP_KERNEL | __GFP_ZERO ) ;
2011-02-02 15:21:10 +00:00
if ( ! ntab )
return - ENOMEM ;
sch_tree_lock ( sch ) ;
old = q - > tab ;
if ( old ) {
unsigned int oqlen = sch - > q . qlen , tail = 0 ;
2016-02-25 14:55:01 -08:00
unsigned dropped = 0 ;
2011-02-02 15:21:10 +00:00
while ( q - > head ! = q - > tail ) {
struct sk_buff * skb = q - > tab [ q - > head ] ;
q - > head = ( q - > head + 1 ) & q - > tab_mask ;
if ( ! skb )
continue ;
if ( tail < mask ) {
ntab [ tail + + ] = skb ;
continue ;
}
2016-02-25 14:55:01 -08:00
dropped + = qdisc_pkt_len ( skb ) ;
2014-09-28 11:53:29 -07:00
qdisc_qstats_backlog_dec ( sch , skb ) ;
2011-02-02 15:21:10 +00:00
- - sch - > q . qlen ;
2016-06-13 20:21:51 -07:00
rtnl_qdisc_drop ( skb , sch ) ;
2011-02-02 15:21:10 +00:00
}
2016-02-25 14:55:01 -08:00
qdisc_tree_reduce_backlog ( sch , oqlen - sch - > q . qlen , dropped ) ;
2011-02-02 15:21:10 +00:00
q - > head = 0 ;
q - > tail = tail ;
}
q - > tab_mask = mask ;
q - > tab = ntab ;
} else
sch_tree_lock ( sch ) ;
q - > flags = ctl - > flags ;
q - > limit = ctl - > limit ;
red_set_parms ( & q - > parms , ctl - > qth_min , ctl - > qth_max , ctl - > Wlog ,
ctl - > Plog , ctl - > Scell_log ,
2011-12-09 02:46:45 +00:00
nla_data ( tb [ TCA_CHOKE_STAB ] ) ,
max_P ) ;
2012-01-05 02:25:16 +00:00
red_set_vars ( & q - > vars ) ;
2011-02-02 15:21:10 +00:00
if ( q - > head = = q - > tail )
2012-01-05 02:25:16 +00:00
red_end_of_idle_period ( & q - > vars ) ;
2011-02-02 15:21:10 +00:00
sch_tree_unlock ( sch ) ;
choke_free ( old ) ;
return 0 ;
}
static int choke_init ( struct Qdisc * sch , struct nlattr * opt )
{
return choke_change ( sch , opt ) ;
}
static int choke_dump ( struct Qdisc * sch , struct sk_buff * skb )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
struct nlattr * opts = NULL ;
struct tc_red_qopt opt = {
. limit = q - > limit ,
. flags = q - > flags ,
. qth_min = q - > parms . qth_min > > q - > parms . Wlog ,
. qth_max = q - > parms . qth_max > > q - > parms . Wlog ,
. Wlog = q - > parms . Wlog ,
. Plog = q - > parms . Plog ,
. Scell_log = q - > parms . Scell_log ,
} ;
opts = nla_nest_start ( skb , TCA_OPTIONS ) ;
if ( opts = = NULL )
goto nla_put_failure ;
2012-03-29 05:11:39 -04:00
if ( nla_put ( skb , TCA_CHOKE_PARMS , sizeof ( opt ) , & opt ) | |
nla_put_u32 ( skb , TCA_CHOKE_MAX_P , q - > parms . max_P ) )
goto nla_put_failure ;
2011-02-02 15:21:10 +00:00
return nla_nest_end ( skb , opts ) ;
nla_put_failure :
nla_nest_cancel ( skb , opts ) ;
return - EMSGSIZE ;
}
static int choke_dump_stats ( struct Qdisc * sch , struct gnet_dump * d )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
struct tc_choke_xstats st = {
. early = q - > stats . prob_drop + q - > stats . forced_drop ,
. marked = q - > stats . prob_mark + q - > stats . forced_mark ,
. pdrop = q - > stats . pdrop ,
. other = q - > stats . other ,
. matched = q - > stats . matched ,
} ;
return gnet_stats_copy_app ( d , & st , sizeof ( st ) ) ;
}
static void choke_destroy ( struct Qdisc * sch )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
choke_free ( q - > tab ) ;
}
static struct sk_buff * choke_peek_head ( struct Qdisc * sch )
{
struct choke_sched_data * q = qdisc_priv ( sch ) ;
return ( q - > head ! = q - > tail ) ? q - > tab [ q - > head ] : NULL ;
}
static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
. id = " choke " ,
. priv_size = sizeof ( struct choke_sched_data ) ,
. enqueue = choke_enqueue ,
. dequeue = choke_dequeue ,
. peek = choke_peek_head ,
. init = choke_init ,
. destroy = choke_destroy ,
. reset = choke_reset ,
. change = choke_change ,
. dump = choke_dump ,
. dump_stats = choke_dump_stats ,
. owner = THIS_MODULE ,
} ;
static int __init choke_module_init ( void )
{
return register_qdisc ( & choke_qdisc_ops ) ;
}
static void __exit choke_module_exit ( void )
{
unregister_qdisc ( & choke_qdisc_ops ) ;
}
module_init ( choke_module_init )
module_exit ( choke_module_exit )
MODULE_LICENSE ( " GPL " ) ;