2005-06-23 12:28:11 -07:00
/*
* H - TCP congestion control . The algorithm is detailed in :
* R . N . Shorten , D . J . Leith :
* " H-TCP: TCP for high-speed and long-distance networks "
* Proc . PFLDnet , Argonne , 2004.
* http : //www.hamilton.ie/net/htcp3.pdf
*/
# include <linux/mm.h>
# include <linux/module.h>
# include <net/tcp.h>
# define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */
# define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
# define BETA_MAX 102 /* 0.8 with shift << 7 */
static int use_rtt_scaling = 1 ;
module_param ( use_rtt_scaling , int , 0644 ) ;
MODULE_PARM_DESC ( use_rtt_scaling , " turn on/off RTT scaling " ) ;
static int use_bandwidth_switch = 1 ;
module_param ( use_bandwidth_switch , int , 0644 ) ;
MODULE_PARM_DESC ( use_bandwidth_switch , " turn on/off bandwidth switcher " ) ;
struct htcp {
2006-10-25 23:05:52 -07:00
u32 alpha ; /* Fixed point arith, << 7 */
2005-06-23 12:28:11 -07:00
u8 beta ; /* Fixed point arith, << 7 */
u8 modeswitch ; /* Delay modeswitch until we had at least one congestion event */
2006-03-20 22:22:47 -08:00
u16 pkts_acked ;
u32 packetcount ;
2005-06-23 12:28:11 -07:00
u32 minRTT ;
u32 maxRTT ;
2006-11-10 15:01:14 -08:00
u32 last_cong ; /* Time since last congestion event end */
u32 undo_last_cong ;
2005-06-23 12:28:11 -07:00
u32 undo_maxRTT ;
u32 undo_old_maxB ;
/* Bandwidth estimation */
u32 minB ;
u32 maxB ;
u32 old_maxB ;
u32 Bi ;
u32 lasttime ;
} ;
2006-03-20 22:23:10 -08:00
static inline u32 htcp_cong_time ( struct htcp * ca )
{
return jiffies - ca - > last_cong ;
}
static inline u32 htcp_ccount ( struct htcp * ca )
{
return htcp_cong_time ( ca ) / ca - > minRTT ;
}
2005-06-23 12:28:11 -07:00
static inline void htcp_reset ( struct htcp * ca )
{
2006-03-20 22:23:10 -08:00
ca - > undo_last_cong = ca - > last_cong ;
2005-06-23 12:28:11 -07:00
ca - > undo_maxRTT = ca - > maxRTT ;
ca - > undo_old_maxB = ca - > old_maxB ;
2006-03-20 22:23:10 -08:00
ca - > last_cong = jiffies ;
2005-06-23 12:28:11 -07:00
}
2005-08-10 04:03:31 -03:00
static u32 htcp_cwnd_undo ( struct sock * sk )
2005-06-23 12:28:11 -07:00
{
2005-08-10 04:03:31 -03:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
struct htcp * ca = inet_csk_ca ( sk ) ;
2006-03-20 22:23:10 -08:00
ca - > last_cong = ca - > undo_last_cong ;
2005-06-23 12:28:11 -07:00
ca - > maxRTT = ca - > undo_maxRTT ;
ca - > old_maxB = ca - > undo_old_maxB ;
return max ( tp - > snd_cwnd , ( tp - > snd_ssthresh < < 7 ) / ca - > beta ) ;
}
2005-08-10 04:03:31 -03:00
static inline void measure_rtt ( struct sock * sk )
2005-06-23 12:28:11 -07:00
{
2005-08-10 04:03:31 -03:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
const struct tcp_sock * tp = tcp_sk ( sk ) ;
struct htcp * ca = inet_csk_ca ( sk ) ;
2005-06-23 12:28:11 -07:00
u32 srtt = tp - > srtt > > 3 ;
/* keep track of minimum RTT seen so far, minRTT is zero at first */
if ( ca - > minRTT > srtt | | ! ca - > minRTT )
ca - > minRTT = srtt ;
/* max RTT */
2006-03-20 22:23:10 -08:00
if ( icsk - > icsk_ca_state = = TCP_CA_Open & & tp - > snd_ssthresh < 0xFFFF & & htcp_ccount ( ca ) > 3 ) {
2005-06-23 12:28:11 -07:00
if ( ca - > maxRTT < ca - > minRTT )
ca - > maxRTT = ca - > minRTT ;
2006-03-20 22:22:20 -08:00
if ( ca - > maxRTT < srtt & & srtt < = ca - > maxRTT + msecs_to_jiffies ( 20 ) )
2005-06-23 12:28:11 -07:00
ca - > maxRTT = srtt ;
}
}
2005-08-10 04:03:31 -03:00
static void measure_achieved_throughput ( struct sock * sk , u32 pkts_acked )
2005-06-23 12:28:11 -07:00
{
2005-08-10 04:03:31 -03:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
const struct tcp_sock * tp = tcp_sk ( sk ) ;
struct htcp * ca = inet_csk_ca ( sk ) ;
2005-06-23 12:28:11 -07:00
u32 now = tcp_time_stamp ;
2006-03-20 22:22:47 -08:00
if ( icsk - > icsk_ca_state = = TCP_CA_Open )
ca - > pkts_acked = pkts_acked ;
if ( ! use_bandwidth_switch )
return ;
2005-06-23 12:28:11 -07:00
/* achieved throughput calculations */
2005-08-10 04:03:31 -03:00
if ( icsk - > icsk_ca_state ! = TCP_CA_Open & &
icsk - > icsk_ca_state ! = TCP_CA_Disorder ) {
2005-06-23 12:28:11 -07:00
ca - > packetcount = 0 ;
ca - > lasttime = now ;
return ;
}
ca - > packetcount + = pkts_acked ;
if ( ca - > packetcount > = tp - > snd_cwnd - ( ca - > alpha > > 7 ? : 1 )
& & now - ca - > lasttime > = ca - > minRTT
& & ca - > minRTT > 0 ) {
__u32 cur_Bi = ca - > packetcount * HZ / ( now - ca - > lasttime ) ;
2006-03-20 22:23:10 -08:00
if ( htcp_ccount ( ca ) < = 3 ) {
2005-06-23 12:28:11 -07:00
/* just after backoff */
ca - > minB = ca - > maxB = ca - > Bi = cur_Bi ;
} else {
ca - > Bi = ( 3 * ca - > Bi + cur_Bi ) / 4 ;
if ( ca - > Bi > ca - > maxB )
ca - > maxB = ca - > Bi ;
if ( ca - > minB > ca - > maxB )
ca - > minB = ca - > maxB ;
}
ca - > packetcount = 0 ;
ca - > lasttime = now ;
}
}
static inline void htcp_beta_update ( struct htcp * ca , u32 minRTT , u32 maxRTT )
{
if ( use_bandwidth_switch ) {
u32 maxB = ca - > maxB ;
u32 old_maxB = ca - > old_maxB ;
ca - > old_maxB = ca - > maxB ;
if ( ! between ( 5 * maxB , 4 * old_maxB , 6 * old_maxB ) ) {
ca - > beta = BETA_MIN ;
ca - > modeswitch = 0 ;
return ;
}
}
2006-03-20 22:22:20 -08:00
if ( ca - > modeswitch & & minRTT > msecs_to_jiffies ( 10 ) & & maxRTT ) {
2005-06-23 12:28:11 -07:00
ca - > beta = ( minRTT < < 7 ) / maxRTT ;
if ( ca - > beta < BETA_MIN )
ca - > beta = BETA_MIN ;
else if ( ca - > beta > BETA_MAX )
ca - > beta = BETA_MAX ;
} else {
ca - > beta = BETA_MIN ;
ca - > modeswitch = 1 ;
}
}
static inline void htcp_alpha_update ( struct htcp * ca )
{
u32 minRTT = ca - > minRTT ;
u32 factor = 1 ;
2006-03-20 22:23:10 -08:00
u32 diff = htcp_cong_time ( ca ) ;
2005-06-23 12:28:11 -07:00
if ( diff > HZ ) {
diff - = HZ ;
factor = 1 + ( 10 * diff + ( ( diff / 2 ) * ( diff / 2 ) / HZ ) ) / HZ ;
}
if ( use_rtt_scaling & & minRTT ) {
u32 scale = ( HZ < < 3 ) / ( 10 * minRTT ) ;
scale = min ( max ( scale , 1U < < 2 ) , 10U < < 3 ) ; /* clamping ratio to interval [0.5,10]<<3 */
factor = ( factor < < 3 ) / scale ;
if ( ! factor )
factor = 1 ;
}
ca - > alpha = 2 * factor * ( ( 1 < < 7 ) - ca - > beta ) ;
if ( ! ca - > alpha )
ca - > alpha = ALPHA_BASE ;
}
/* After we have the rtt data to calculate beta, we'd still prefer to wait one
* rtt before we adjust our beta to ensure we are working from a consistent
* data .
*
* This function should be called when we hit a congestion event since only at
* that point do we really have a real sense of maxRTT ( the queues en route
* were getting just too full now ) .
*/
2005-08-10 04:03:31 -03:00
static void htcp_param_update ( struct sock * sk )
2005-06-23 12:28:11 -07:00
{
2005-08-10 04:03:31 -03:00
struct htcp * ca = inet_csk_ca ( sk ) ;
2005-06-23 12:28:11 -07:00
u32 minRTT = ca - > minRTT ;
u32 maxRTT = ca - > maxRTT ;
htcp_beta_update ( ca , minRTT , maxRTT ) ;
htcp_alpha_update ( ca ) ;
/* add slowly fading memory for maxRTT to accommodate routing changes etc */
if ( minRTT > 0 & & maxRTT > minRTT )
ca - > maxRTT = minRTT + ( ( maxRTT - minRTT ) * 95 ) / 100 ;
}
2005-08-10 04:03:31 -03:00
static u32 htcp_recalc_ssthresh ( struct sock * sk )
2005-06-23 12:28:11 -07:00
{
2005-08-10 04:03:31 -03:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
const struct htcp * ca = inet_csk_ca ( sk ) ;
htcp_param_update ( sk ) ;
2005-06-23 12:28:11 -07:00
return max ( ( tp - > snd_cwnd * ca - > beta ) > > 7 , 2U ) ;
}
2005-08-10 04:03:31 -03:00
static void htcp_cong_avoid ( struct sock * sk , u32 ack , u32 rtt ,
2005-06-23 12:28:11 -07:00
u32 in_flight , int data_acked )
{
2005-08-10 04:03:31 -03:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
struct htcp * ca = inet_csk_ca ( sk ) ;
2005-06-23 12:28:11 -07:00
2005-11-10 16:53:30 -08:00
if ( ! tcp_is_cwnd_limited ( sk , in_flight ) )
2005-06-23 12:28:11 -07:00
return ;
2005-11-10 17:07:24 -08:00
if ( tp - > snd_cwnd < = tp - > snd_ssthresh )
tcp_slow_start ( tp ) ;
else {
2005-08-10 04:03:31 -03:00
measure_rtt ( sk ) ;
2005-06-23 12:28:11 -07:00
2005-11-10 17:07:24 -08:00
/* In dangerous area, increase slowly.
2005-06-23 12:28:11 -07:00
* In theory this is tp - > snd_cwnd + = alpha / tp - > snd_cwnd
*/
2006-03-20 22:22:47 -08:00
if ( ( tp - > snd_cwnd_cnt * ca - > alpha ) > > 7 > = tp - > snd_cwnd ) {
2005-06-23 12:28:11 -07:00
if ( tp - > snd_cwnd < tp - > snd_cwnd_clamp )
tp - > snd_cwnd + + ;
tp - > snd_cwnd_cnt = 0 ;
2006-03-20 22:23:10 -08:00
htcp_alpha_update ( ca ) ;
2006-03-20 22:22:47 -08:00
} else
tp - > snd_cwnd_cnt + = ca - > pkts_acked ;
ca - > pkts_acked = 1 ;
2005-06-23 12:28:11 -07:00
}
}
2005-08-10 04:03:31 -03:00
static void htcp_init ( struct sock * sk )
2005-06-23 12:28:11 -07:00
{
2005-08-10 04:03:31 -03:00
struct htcp * ca = inet_csk_ca ( sk ) ;
2005-06-23 12:28:11 -07:00
memset ( ca , 0 , sizeof ( struct htcp ) ) ;
ca - > alpha = ALPHA_BASE ;
ca - > beta = BETA_MIN ;
2006-03-20 22:22:47 -08:00
ca - > pkts_acked = 1 ;
2006-03-20 22:23:10 -08:00
ca - > last_cong = jiffies ;
2005-06-23 12:28:11 -07:00
}
2005-08-10 04:03:31 -03:00
static void htcp_state ( struct sock * sk , u8 new_state )
2005-06-23 12:28:11 -07:00
{
switch ( new_state ) {
2006-03-20 22:23:10 -08:00
case TCP_CA_Open :
{
struct htcp * ca = inet_csk_ca ( sk ) ;
ca - > last_cong = jiffies ;
}
break ;
2005-06-23 12:28:11 -07:00
case TCP_CA_CWR :
case TCP_CA_Recovery :
case TCP_CA_Loss :
2005-08-10 04:03:31 -03:00
htcp_reset ( inet_csk_ca ( sk ) ) ;
2005-06-23 12:28:11 -07:00
break ;
}
}
static struct tcp_congestion_ops htcp = {
. init = htcp_init ,
. ssthresh = htcp_recalc_ssthresh ,
. cong_avoid = htcp_cong_avoid ,
. set_state = htcp_state ,
. undo_cwnd = htcp_cwnd_undo ,
. pkts_acked = measure_achieved_throughput ,
. owner = THIS_MODULE ,
. name = " htcp " ,
} ;
static int __init htcp_register ( void )
{
2006-08-25 17:10:33 -07:00
BUILD_BUG_ON ( sizeof ( struct htcp ) > ICSK_CA_PRIV_SIZE ) ;
2005-06-23 12:28:11 -07:00
BUILD_BUG_ON ( BETA_MIN > = BETA_MAX ) ;
return tcp_register_congestion_control ( & htcp ) ;
}
static void __exit htcp_unregister ( void )
{
tcp_unregister_congestion_control ( & htcp ) ;
}
module_init ( htcp_register ) ;
module_exit ( htcp_unregister ) ;
MODULE_AUTHOR ( " Baruch Even " ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_DESCRIPTION ( " H-TCP " ) ;