2005-06-23 23:19:55 +04:00
/*
* Plugable TCP congestion control support and newReno
* congestion control .
* Based on ideas from I / O scheduler suport and Web100 .
*
* Copyright ( C ) 2005 Stephen Hemminger < shemminger @ osdl . org >
*/
# include <linux/config.h>
# include <linux/module.h>
# include <linux/mm.h>
# include <linux/types.h>
# include <linux/list.h>
# include <net/tcp.h>
static DEFINE_SPINLOCK ( tcp_cong_list_lock ) ;
static LIST_HEAD ( tcp_cong_list ) ;
/* Simple linear search, don't expect many entries! */
static struct tcp_congestion_ops * tcp_ca_find ( const char * name )
{
struct tcp_congestion_ops * e ;
2005-06-24 07:37:36 +04:00
list_for_each_entry_rcu ( e , & tcp_cong_list , list ) {
2005-06-23 23:19:55 +04:00
if ( strcmp ( e - > name , name ) = = 0 )
return e ;
}
return NULL ;
}
/*
* Attach new congestion control algorthim to the list
* of available options .
*/
int tcp_register_congestion_control ( struct tcp_congestion_ops * ca )
{
int ret = 0 ;
/* all algorithms must implement ssthresh and cong_avoid ops */
if ( ! ca - > ssthresh | | ! ca - > cong_avoid | | ! ca - > min_cwnd ) {
printk ( KERN_ERR " TCP %s does not implement required ops \n " ,
ca - > name ) ;
return - EINVAL ;
}
spin_lock ( & tcp_cong_list_lock ) ;
if ( tcp_ca_find ( ca - > name ) ) {
printk ( KERN_NOTICE " TCP %s already registered \n " , ca - > name ) ;
ret = - EEXIST ;
} else {
list_add_rcu ( & ca - > list , & tcp_cong_list ) ;
printk ( KERN_INFO " TCP %s registered \n " , ca - > name ) ;
}
spin_unlock ( & tcp_cong_list_lock ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( tcp_register_congestion_control ) ;
/*
* Remove congestion control algorithm , called from
* the module ' s remove function . Module ref counts are used
* to ensure that this can ' t be done till all sockets using
* that method are closed .
*/
void tcp_unregister_congestion_control ( struct tcp_congestion_ops * ca )
{
spin_lock ( & tcp_cong_list_lock ) ;
list_del_rcu ( & ca - > list ) ;
spin_unlock ( & tcp_cong_list_lock ) ;
}
EXPORT_SYMBOL_GPL ( tcp_unregister_congestion_control ) ;
/* Assign choice of congestion control. */
2005-08-10 11:03:31 +04:00
void tcp_init_congestion_control ( struct sock * sk )
2005-06-23 23:19:55 +04:00
{
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-06-23 23:19:55 +04:00
struct tcp_congestion_ops * ca ;
2005-08-10 11:03:31 +04:00
if ( icsk - > icsk_ca_ops ! = & tcp_init_congestion_ops )
2005-06-24 07:37:36 +04:00
return ;
2005-06-23 23:19:55 +04:00
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( ca , & tcp_cong_list , list ) {
if ( try_module_get ( ca - > owner ) ) {
2005-08-10 11:03:31 +04:00
icsk - > icsk_ca_ops = ca ;
2005-06-23 23:19:55 +04:00
break ;
}
}
rcu_read_unlock ( ) ;
2005-08-10 11:03:31 +04:00
if ( icsk - > icsk_ca_ops - > init )
icsk - > icsk_ca_ops - > init ( sk ) ;
2005-06-23 23:19:55 +04:00
}
/* Manage refcounts on socket close. */
2005-08-10 11:03:31 +04:00
void tcp_cleanup_congestion_control ( struct sock * sk )
2005-06-23 23:19:55 +04:00
{
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( icsk - > icsk_ca_ops - > release )
icsk - > icsk_ca_ops - > release ( sk ) ;
module_put ( icsk - > icsk_ca_ops - > owner ) ;
2005-06-23 23:19:55 +04:00
}
/* Used by sysctl to change default congestion control */
int tcp_set_default_congestion_control ( const char * name )
{
struct tcp_congestion_ops * ca ;
int ret = - ENOENT ;
spin_lock ( & tcp_cong_list_lock ) ;
ca = tcp_ca_find ( name ) ;
# ifdef CONFIG_KMOD
if ( ! ca ) {
spin_unlock ( & tcp_cong_list_lock ) ;
request_module ( " tcp_%s " , name ) ;
spin_lock ( & tcp_cong_list_lock ) ;
ca = tcp_ca_find ( name ) ;
}
# endif
if ( ca ) {
list_move ( & ca - > list , & tcp_cong_list ) ;
ret = 0 ;
}
spin_unlock ( & tcp_cong_list_lock ) ;
return ret ;
}
/* Get current default congestion control */
void tcp_get_default_congestion_control ( char * name )
{
struct tcp_congestion_ops * ca ;
/* We will always have reno... */
BUG_ON ( list_empty ( & tcp_cong_list ) ) ;
rcu_read_lock ( ) ;
ca = list_entry ( tcp_cong_list . next , struct tcp_congestion_ops , list ) ;
strncpy ( name , ca - > name , TCP_CA_NAME_MAX ) ;
rcu_read_unlock ( ) ;
}
2005-06-24 07:37:36 +04:00
/* Change congestion control for socket */
2005-08-10 11:03:31 +04:00
int tcp_set_congestion_control ( struct sock * sk , const char * name )
2005-06-24 07:37:36 +04:00
{
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-06-24 07:37:36 +04:00
struct tcp_congestion_ops * ca ;
int err = 0 ;
rcu_read_lock ( ) ;
ca = tcp_ca_find ( name ) ;
2005-08-10 11:03:31 +04:00
if ( ca = = icsk - > icsk_ca_ops )
2005-06-24 07:37:36 +04:00
goto out ;
if ( ! ca )
err = - ENOENT ;
else if ( ! try_module_get ( ca - > owner ) )
err = - EBUSY ;
else {
2005-08-10 11:03:31 +04:00
tcp_cleanup_congestion_control ( sk ) ;
icsk - > icsk_ca_ops = ca ;
if ( icsk - > icsk_ca_ops - > init )
icsk - > icsk_ca_ops - > init ( sk ) ;
2005-06-24 07:37:36 +04:00
}
out :
rcu_read_unlock ( ) ;
return err ;
}
2005-06-23 23:19:55 +04:00
/*
* TCP Reno congestion control
* This is special case used for fallback as well .
*/
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM ' 88 , p . 328.
*/
2005-08-10 11:03:31 +04:00
void tcp_reno_cong_avoid ( struct sock * sk , u32 ack , u32 rtt , u32 in_flight ,
2005-06-23 23:19:55 +04:00
int flag )
{
2005-08-10 11:03:31 +04:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-11-11 03:53:30 +03:00
if ( ! tcp_is_cwnd_limited ( sk , in_flight ) )
2005-06-23 23:19:55 +04:00
return ;
2005-11-11 04:07:24 +03:00
/* In "safe" area, increase. */
if ( tp - > snd_cwnd < = tp - > snd_ssthresh )
tcp_slow_start ( tp ) ;
2005-11-11 04:09:53 +03:00
/* In dangerous area, increase slowly. */
else if ( sysctl_tcp_abc ) {
/* RFC3465: Apppriate Byte Count
* increase once for each full cwnd acked
*/
if ( tp - > bytes_acked > = tp - > snd_cwnd * tp - > mss_cache ) {
tp - > bytes_acked - = tp - > snd_cwnd * tp - > mss_cache ;
if ( tp - > snd_cwnd < tp - > snd_cwnd_clamp )
tp - > snd_cwnd + + ;
}
} else {
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
if ( tp - > snd_cwnd_cnt > = tp - > snd_cwnd ) {
if ( tp - > snd_cwnd < tp - > snd_cwnd_clamp )
tp - > snd_cwnd + + ;
tp - > snd_cwnd_cnt = 0 ;
} else
tp - > snd_cwnd_cnt + + ;
}
2005-06-23 23:19:55 +04:00
}
EXPORT_SYMBOL_GPL ( tcp_reno_cong_avoid ) ;
/* Slow start threshold is half the congestion window (min 2) */
2005-08-10 11:03:31 +04:00
u32 tcp_reno_ssthresh ( struct sock * sk )
2005-06-23 23:19:55 +04:00
{
2005-08-10 11:03:31 +04:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-06-23 23:19:55 +04:00
return max ( tp - > snd_cwnd > > 1U , 2U ) ;
}
EXPORT_SYMBOL_GPL ( tcp_reno_ssthresh ) ;
/* Lower bound on congestion window. */
2005-08-10 11:03:31 +04:00
u32 tcp_reno_min_cwnd ( struct sock * sk )
2005-06-23 23:19:55 +04:00
{
2005-08-10 11:03:31 +04:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-06-23 23:19:55 +04:00
return tp - > snd_ssthresh / 2 ;
}
EXPORT_SYMBOL_GPL ( tcp_reno_min_cwnd ) ;
struct tcp_congestion_ops tcp_reno = {
. name = " reno " ,
. owner = THIS_MODULE ,
. ssthresh = tcp_reno_ssthresh ,
. cong_avoid = tcp_reno_cong_avoid ,
. min_cwnd = tcp_reno_min_cwnd ,
} ;
2005-06-24 07:37:36 +04:00
/* Initial congestion control used (until SYN)
* really reno under another name so we can tell difference
* during tcp_set_default_congestion_control
*/
struct tcp_congestion_ops tcp_init_congestion_ops = {
. name = " " ,
. owner = THIS_MODULE ,
. ssthresh = tcp_reno_ssthresh ,
. cong_avoid = tcp_reno_cong_avoid ,
. min_cwnd = tcp_reno_min_cwnd ,
} ;
EXPORT_SYMBOL_GPL ( tcp_init_congestion_ops ) ;