2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Implementation of the Transmission Control Protocol ( TCP ) .
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Mark Evans , < evansmp @ uhura . aston . ac . uk >
* Corey Minyard < wf - rch ! minyard @ relay . EU . net >
* Florian La Roche , < flla @ stud . uni - sb . de >
* Charles Hedrick , < hedrick @ klinzhai . rutgers . edu >
* Linus Torvalds , < torvalds @ cs . helsinki . fi >
* Alan Cox , < gw4pts @ gw4pts . ampr . org >
* Matthew Dillon , < dillon @ apollo . west . oic . com >
* Arnt Gulbrandsen , < agulbra @ nvg . unit . no >
* Jorge Cwik , < jorge @ laser . satlink . net >
*/
# include <linux/module.h>
# include <net/tcp.h>
2006-09-23 01:15:41 +04:00
int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES ;
int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES ;
int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME ;
int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES ;
int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL ;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1 ;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2 ;
int sysctl_tcp_orphan_retries __read_mostly ;
2005-04-17 02:20:36 +04:00
static void tcp_write_timer ( unsigned long ) ;
static void tcp_delack_timer ( unsigned long ) ;
static void tcp_keepalive_timer ( unsigned long data ) ;
2005-08-10 07:10:42 +04:00
void tcp_init_xmit_timers ( struct sock * sk )
{
inet_csk_init_xmit_timers ( sk , & tcp_write_timer , & tcp_delack_timer ,
& tcp_keepalive_timer ) ;
}
2005-08-10 07:11:08 +04:00
EXPORT_SYMBOL ( tcp_init_xmit_timers ) ;
2005-04-17 02:20:36 +04:00
static void tcp_write_err ( struct sock * sk )
{
sk - > sk_err = sk - > sk_err_soft ? : ETIMEDOUT ;
sk - > sk_error_report ( sk ) ;
tcp_done ( sk ) ;
2008-07-17 07:31:16 +04:00
NET_INC_STATS_BH ( sock_net ( sk ) , LINUX_MIB_TCPABORTONTIMEOUT ) ;
2005-04-17 02:20:36 +04:00
}
/* Do not allow orphaned sockets to eat all our resources.
* This is direct violation of TCP specs , but it is required
* to prevent DoS attacks . It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket .
*
2005-11-11 04:13:47 +03:00
* Criteria is still not confirmed experimentally and may change .
2005-04-17 02:20:36 +04:00
* We kill the socket , if :
* 1. If number of orphaned sockets exceeds an administratively configured
* limit .
* 2. If we have strong memory pressure .
*/
static int tcp_out_of_resources ( struct sock * sk , int do_reset )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
2008-11-26 08:17:14 +03:00
int orphans = percpu_counter_read_positive ( & tcp_orphan_count ) ;
2005-04-17 02:20:36 +04:00
2007-02-09 17:24:47 +03:00
/* If peer does not open window for long time, or did not transmit
2005-04-17 02:20:36 +04:00
* anything for long time , penalize it . */
if ( ( s32 ) ( tcp_time_stamp - tp - > lsndtime ) > 2 * TCP_RTO_MAX | | ! do_reset )
orphans < < = 1 ;
/* If some dubious ICMP arrived, penalize even more. */
if ( sk - > sk_err_soft )
orphans < < = 1 ;
2007-05-30 00:19:18 +04:00
if ( tcp_too_many_orphans ( sk , orphans ) ) {
2005-04-17 02:20:36 +04:00
if ( net_ratelimit ( ) )
printk ( KERN_INFO " Out of socket memory \n " ) ;
/* Catch exceptional cases, when connection requires reset.
* 1. Last segment was sent recently . */
if ( ( s32 ) ( tcp_time_stamp - tp - > lsndtime ) < = TCP_TIMEWAIT_LEN | |
/* 2. Window is closed. */
( ! tp - > snd_wnd & & ! tp - > packets_out ) )
do_reset = 1 ;
if ( do_reset )
tcp_send_active_reset ( sk , GFP_ATOMIC ) ;
tcp_done ( sk ) ;
2008-07-17 07:31:16 +04:00
NET_INC_STATS_BH ( sock_net ( sk ) , LINUX_MIB_TCPABORTONMEMORY ) ;
2005-04-17 02:20:36 +04:00
return 1 ;
}
return 0 ;
}
/* Calculate maximal number or retries on an orphaned socket. */
static int tcp_orphan_retries ( struct sock * sk , int alive )
{
int retries = sysctl_tcp_orphan_retries ; /* May be zero. */
/* We know from an ICMP that something is wrong. */
if ( sk - > sk_err_soft & & ! alive )
retries = 0 ;
/* However, if socket sent something recently, select some safe
* number of retries . 8 corresponds to > 100 seconds with minimal
* RTO of 200 msec . */
if ( retries = = 0 & & alive )
retries = 8 ;
return retries ;
}
2007-12-21 12:50:43 +03:00
static void tcp_mtu_probing ( struct inet_connection_sock * icsk , struct sock * sk )
{
/* Black hole detection */
if ( sysctl_tcp_mtu_probing ) {
if ( ! icsk - > icsk_mtup . enabled ) {
icsk - > icsk_mtup . enabled = 1 ;
tcp_sync_mss ( sk , icsk - > icsk_pmtu_cookie ) ;
} else {
struct tcp_sock * tp = tcp_sk ( sk ) ;
2007-12-21 15:29:16 +03:00
int mss ;
2007-12-21 16:58:29 +03:00
mss = tcp_mtu_to_mss ( sk , icsk - > icsk_mtup . search_low ) > > 1 ;
2007-12-21 12:50:43 +03:00
mss = min ( sysctl_tcp_base_mss , mss ) ;
mss = max ( mss , 68 - tp - > tcp_header_len ) ;
icsk - > icsk_mtup . search_low = tcp_mss_to_mtu ( sk , mss ) ;
tcp_sync_mss ( sk , icsk - > icsk_pmtu_cookie ) ;
}
}
}
2005-04-17 02:20:36 +04:00
/* A write timeout has occurred. Process the after effects. */
static int tcp_write_timeout ( struct sock * sk )
{
2006-03-21 04:53:41 +03:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
int retry_until ;
if ( ( 1 < < sk - > sk_state ) & ( TCPF_SYN_SENT | TCPF_SYN_RECV ) ) {
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_retransmits )
2005-04-17 02:20:36 +04:00
dst_negative_advice ( & sk - > sk_dst_cache ) ;
2005-08-10 07:10:42 +04:00
retry_until = icsk - > icsk_syn_retries ? : sysctl_tcp_syn_retries ;
2005-04-17 02:20:36 +04:00
} else {
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_retransmits > = sysctl_tcp_retries1 ) {
2006-03-21 04:53:41 +03:00
/* Black hole detection */
2007-12-21 12:50:43 +03:00
tcp_mtu_probing ( icsk , sk ) ;
2005-04-17 02:20:36 +04:00
dst_negative_advice ( & sk - > sk_dst_cache ) ;
}
retry_until = sysctl_tcp_retries2 ;
if ( sock_flag ( sk , SOCK_DEAD ) ) {
2005-08-10 07:10:42 +04:00
const int alive = ( icsk - > icsk_rto < TCP_RTO_MAX ) ;
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
retry_until = tcp_orphan_retries ( sk , alive ) ;
2005-08-10 07:10:42 +04:00
if ( tcp_out_of_resources ( sk , alive | | icsk - > icsk_retransmits < retry_until ) )
2005-04-17 02:20:36 +04:00
return 1 ;
}
}
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_retransmits > = retry_until ) {
2005-04-17 02:20:36 +04:00
/* Has it gone just too far? */
tcp_write_err ( sk ) ;
return 1 ;
}
return 0 ;
}
static void tcp_delack_timer ( unsigned long data )
{
2008-11-03 13:47:38 +03:00
struct sock * sk = ( struct sock * ) data ;
2005-04-17 02:20:36 +04:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
bh_lock_sock ( sk ) ;
if ( sock_owned_by_user ( sk ) ) {
/* Try again later. */
2005-08-10 07:10:42 +04:00
icsk - > icsk_ack . blocked = 1 ;
2008-07-17 07:31:16 +04:00
NET_INC_STATS_BH ( sock_net ( sk ) , LINUX_MIB_DELAYEDACKLOCKED ) ;
2005-08-10 07:10:42 +04:00
sk_reset_timer ( sk , & icsk - > icsk_delack_timer , jiffies + TCP_DELACK_MIN ) ;
2005-04-17 02:20:36 +04:00
goto out_unlock ;
}
2008-01-11 08:56:38 +03:00
sk_mem_reclaim_partial ( sk ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:42 +04:00
if ( sk - > sk_state = = TCP_CLOSE | | ! ( icsk - > icsk_ack . pending & ICSK_ACK_TIMER ) )
2005-04-17 02:20:36 +04:00
goto out ;
2005-08-10 07:10:42 +04:00
if ( time_after ( icsk - > icsk_ack . timeout , jiffies ) ) {
sk_reset_timer ( sk , & icsk - > icsk_delack_timer , icsk - > icsk_ack . timeout ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
2005-08-10 07:10:42 +04:00
icsk - > icsk_ack . pending & = ~ ICSK_ACK_TIMER ;
2005-04-17 02:20:36 +04:00
2005-07-09 01:57:23 +04:00
if ( ! skb_queue_empty ( & tp - > ucopy . prequeue ) ) {
2005-04-17 02:20:36 +04:00
struct sk_buff * skb ;
2008-07-17 07:31:16 +04:00
NET_INC_STATS_BH ( sock_net ( sk ) , LINUX_MIB_TCPSCHEDULERFAILED ) ;
2005-04-17 02:20:36 +04:00
while ( ( skb = __skb_dequeue ( & tp - > ucopy . prequeue ) ) ! = NULL )
2008-10-08 01:18:42 +04:00
sk_backlog_rcv ( sk , skb ) ;
2005-04-17 02:20:36 +04:00
tp - > ucopy . memory = 0 ;
}
2005-08-10 07:10:42 +04:00
if ( inet_csk_ack_scheduled ( sk ) ) {
if ( ! icsk - > icsk_ack . pingpong ) {
2005-04-17 02:20:36 +04:00
/* Delayed ACK missed: inflate ATO. */
2005-08-10 07:10:42 +04:00
icsk - > icsk_ack . ato = min ( icsk - > icsk_ack . ato < < 1 , icsk - > icsk_rto ) ;
2005-04-17 02:20:36 +04:00
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO .
*/
2005-08-10 07:10:42 +04:00
icsk - > icsk_ack . pingpong = 0 ;
icsk - > icsk_ack . ato = TCP_ATO_MIN ;
2005-04-17 02:20:36 +04:00
}
tcp_send_ack ( sk ) ;
2008-07-17 07:31:16 +04:00
NET_INC_STATS_BH ( sock_net ( sk ) , LINUX_MIB_DELAYEDACKS ) ;
2005-04-17 02:20:36 +04:00
}
TCP_CHECK_TIMER ( sk ) ;
out :
if ( tcp_memory_pressure )
2007-12-31 11:11:19 +03:00
sk_mem_reclaim ( sk ) ;
2005-04-17 02:20:36 +04:00
out_unlock :
bh_unlock_sock ( sk ) ;
sock_put ( sk ) ;
}
static void tcp_probe_timer ( struct sock * sk )
{
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
int max_probes ;
2007-03-07 23:12:44 +03:00
if ( tp - > packets_out | | ! tcp_send_head ( sk ) ) {
2005-08-10 11:03:31 +04:00
icsk - > icsk_probes_out = 0 ;
2005-04-17 02:20:36 +04:00
return ;
}
/* *WARNING* RFC 1122 forbids this
*
* It doesn ' t AFAIK , because we kill the retransmit timer - AK
*
* FIXME : We ought not to do it , Solaris 2.5 actually has fixing
* this behaviour in Solaris down as a bug fix . [ AC ]
*
2005-08-10 11:03:31 +04:00
* Let me to explain . icsk_probes_out is zeroed by incoming ACKs
2005-04-17 02:20:36 +04:00
* even if they advertise zero window . Hence , connection is killed only
* if we received no ACKs for normal connection timeout . It is not killed
* only because window stays zero for some time , window may be zero
* until armageddon and even later . We are in full accordance
* with RFCs , only probe timer combines both retransmission timeout
* and probe timeout in one bottle . - - ANK
*/
max_probes = sysctl_tcp_retries2 ;
if ( sock_flag ( sk , SOCK_DEAD ) ) {
2005-08-10 07:10:42 +04:00
const int alive = ( ( icsk - > icsk_rto < < icsk - > icsk_backoff ) < TCP_RTO_MAX ) ;
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
max_probes = tcp_orphan_retries ( sk , alive ) ;
2005-08-10 11:03:31 +04:00
if ( tcp_out_of_resources ( sk , alive | | icsk - > icsk_probes_out < = max_probes ) )
2005-04-17 02:20:36 +04:00
return ;
}
2005-08-10 11:03:31 +04:00
if ( icsk - > icsk_probes_out > max_probes ) {
2005-04-17 02:20:36 +04:00
tcp_write_err ( sk ) ;
} else {
/* Only send another probe if we didn't close things up. */
tcp_send_probe0 ( sk ) ;
}
}
/*
* The TCP retransmit timer .
*/
static void tcp_retransmit_timer ( struct sock * sk )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
if ( ! tp - > packets_out )
goto out ;
2008-07-26 08:43:18 +04:00
WARN_ON ( tcp_write_queue_empty ( sk ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! tp - > snd_wnd & & ! sock_flag ( sk , SOCK_DEAD ) & &
! ( ( 1 < < sk - > sk_state ) & ( TCPF_SYN_SENT | TCPF_SYN_RECV ) ) ) {
/* Receiver dastardly shrinks window. Our retransmits
* become zero probes , but we should not timeout this
* connection . If the socket is an orphan , time it out ,
* we cannot allow such beasts to hang infinitely .
*/
# ifdef TCP_DEBUG
2008-04-14 15:09:36 +04:00
struct inet_sock * inet = inet_sk ( sk ) ;
if ( sk - > sk_family = = AF_INET ) {
2008-12-19 06:54:22 +03:00
LIMIT_NETDEBUG ( KERN_DEBUG " TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired) \n " ,
2008-10-31 10:53:57 +03:00
& inet - > daddr , ntohs ( inet - > dport ) ,
2005-04-17 02:20:36 +04:00
inet - > num , tp - > snd_una , tp - > snd_nxt ) ;
}
2008-04-14 15:09:36 +04:00
# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if ( sk - > sk_family = = AF_INET6 ) {
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
2008-12-19 06:54:22 +03:00
LIMIT_NETDEBUG ( KERN_DEBUG " TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired) \n " ,
2008-10-29 02:09:23 +03:00
& np - > daddr , ntohs ( inet - > dport ) ,
2008-04-14 15:09:36 +04:00
inet - > num , tp - > snd_una , tp - > snd_nxt ) ;
}
# endif
2005-04-17 02:20:36 +04:00
# endif
if ( tcp_time_stamp - tp - > rcv_tstamp > TCP_RTO_MAX ) {
tcp_write_err ( sk ) ;
goto out ;
}
tcp_enter_loss ( sk , 0 ) ;
2007-03-07 23:12:44 +03:00
tcp_retransmit_skb ( sk , tcp_write_queue_head ( sk ) ) ;
2005-04-17 02:20:36 +04:00
__sk_dst_reset ( sk ) ;
goto out_reset_timer ;
}
if ( tcp_write_timeout ( sk ) )
goto out ;
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_retransmits = = 0 ) {
2008-07-03 12:05:41 +04:00
int mib_idx ;
2009-02-28 07:44:34 +03:00
if ( icsk - > icsk_ca_state = = TCP_CA_Disorder ) {
if ( tcp_is_sack ( tp ) )
mib_idx = LINUX_MIB_TCPSACKFAILURES ;
else
mib_idx = LINUX_MIB_TCPRENOFAILURES ;
} else if ( icsk - > icsk_ca_state = = TCP_CA_Recovery ) {
if ( tcp_is_sack ( tp ) )
mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL ;
else
mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL ;
2005-08-10 11:03:31 +04:00
} else if ( icsk - > icsk_ca_state = = TCP_CA_Loss ) {
2008-07-03 12:05:41 +04:00
mib_idx = LINUX_MIB_TCPLOSSFAILURES ;
2005-04-17 02:20:36 +04:00
} else {
2008-07-03 12:05:41 +04:00
mib_idx = LINUX_MIB_TCPTIMEOUTS ;
2005-04-17 02:20:36 +04:00
}
2008-07-17 07:31:16 +04:00
NET_INC_STATS_BH ( sock_net ( sk ) , mib_idx ) ;
2005-04-17 02:20:36 +04:00
}
if ( tcp_use_frto ( sk ) ) {
tcp_enter_frto ( sk ) ;
} else {
tcp_enter_loss ( sk , 0 ) ;
}
2007-03-07 23:12:44 +03:00
if ( tcp_retransmit_skb ( sk , tcp_write_queue_head ( sk ) ) > 0 ) {
2005-04-17 02:20:36 +04:00
/* Retransmission failed because of local congestion,
* do not backoff .
*/
2005-08-10 07:10:42 +04:00
if ( ! icsk - > icsk_retransmits )
icsk - > icsk_retransmits = 1 ;
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_RETRANS ,
2005-08-10 07:11:08 +04:00
min ( icsk - > icsk_rto , TCP_RESOURCE_PROBE_INTERVAL ) ,
TCP_RTO_MAX ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
/* Increase the timeout each time we retransmit. Note that
* we do not increase the rtt estimate . rto is initialized
* from rtt , but increases here . Jacobson ( SIGCOMM 88 ) suggests
* that doubling rto each time is the least we can get away with .
* In KA9Q , Karn uses this for the first few times , and then
* goes to quadratic . netBSD doubles , but only goes up to * 64 ,
* and clamps at 1 to 64 sec afterwards . Note that 120 sec is
* defined in the protocol as the maximum possible RTT . I guess
* we ' ll have to use something other than TCP to talk to the
* University of Mars .
*
* PAWS allows us longer timeouts and large windows , so once
* implemented ftp to mars will work nicely . We will have to fix
* the 120 second clamps though !
*/
2005-08-10 07:10:42 +04:00
icsk - > icsk_backoff + + ;
icsk - > icsk_retransmits + + ;
2005-04-17 02:20:36 +04:00
out_reset_timer :
2005-08-10 07:10:42 +04:00
icsk - > icsk_rto = min ( icsk - > icsk_rto < < 1 , TCP_RTO_MAX ) ;
2005-08-10 07:11:08 +04:00
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_RETRANS , icsk - > icsk_rto , TCP_RTO_MAX ) ;
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_retransmits > sysctl_tcp_retries1 )
2005-04-17 02:20:36 +04:00
__sk_dst_reset ( sk ) ;
out : ;
}
static void tcp_write_timer ( unsigned long data )
{
2008-11-03 13:47:38 +03:00
struct sock * sk = ( struct sock * ) data ;
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
int event ;
bh_lock_sock ( sk ) ;
if ( sock_owned_by_user ( sk ) ) {
/* Try again later */
2005-08-10 07:10:42 +04:00
sk_reset_timer ( sk , & icsk - > icsk_retransmit_timer , jiffies + ( HZ / 20 ) ) ;
2005-04-17 02:20:36 +04:00
goto out_unlock ;
}
2005-08-10 07:10:42 +04:00
if ( sk - > sk_state = = TCP_CLOSE | | ! icsk - > icsk_pending )
2005-04-17 02:20:36 +04:00
goto out ;
2005-08-10 07:10:42 +04:00
if ( time_after ( icsk - > icsk_timeout , jiffies ) ) {
sk_reset_timer ( sk , & icsk - > icsk_retransmit_timer , icsk - > icsk_timeout ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
2005-08-10 07:10:42 +04:00
event = icsk - > icsk_pending ;
icsk - > icsk_pending = 0 ;
2005-04-17 02:20:36 +04:00
switch ( event ) {
2005-08-10 07:10:42 +04:00
case ICSK_TIME_RETRANS :
2005-04-17 02:20:36 +04:00
tcp_retransmit_timer ( sk ) ;
break ;
2005-08-10 07:10:42 +04:00
case ICSK_TIME_PROBE0 :
2005-04-17 02:20:36 +04:00
tcp_probe_timer ( sk ) ;
break ;
}
TCP_CHECK_TIMER ( sk ) ;
out :
2007-12-31 11:11:19 +03:00
sk_mem_reclaim ( sk ) ;
2005-04-17 02:20:36 +04:00
out_unlock :
bh_unlock_sock ( sk ) ;
sock_put ( sk ) ;
}
2005-08-10 07:11:56 +04:00
/*
* Timer for listening sockets
*/
static void tcp_synack_timer ( struct sock * sk )
{
2005-08-10 07:15:09 +04:00
inet_csk_reqsk_queue_prune ( sk , TCP_SYNQ_INTERVAL ,
TCP_TIMEOUT_INIT , TCP_RTO_MAX ) ;
2005-04-17 02:20:36 +04:00
}
void tcp_set_keepalive ( struct sock * sk , int val )
{
if ( ( 1 < < sk - > sk_state ) & ( TCPF_CLOSE | TCPF_LISTEN ) )
return ;
if ( val & & ! sock_flag ( sk , SOCK_KEEPOPEN ) )
2005-08-10 07:10:42 +04:00
inet_csk_reset_keepalive_timer ( sk , keepalive_time_when ( tcp_sk ( sk ) ) ) ;
2005-04-17 02:20:36 +04:00
else if ( ! val )
2005-08-10 07:10:42 +04:00
inet_csk_delete_keepalive_timer ( sk ) ;
2005-04-17 02:20:36 +04:00
}
static void tcp_keepalive_timer ( unsigned long data )
{
struct sock * sk = ( struct sock * ) data ;
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
__u32 elapsed ;
/* Only process if socket is not in use. */
bh_lock_sock ( sk ) ;
if ( sock_owned_by_user ( sk ) ) {
2007-02-09 17:24:47 +03:00
/* Try again later. */
2005-08-10 07:10:42 +04:00
inet_csk_reset_keepalive_timer ( sk , HZ / 20 ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
if ( sk - > sk_state = = TCP_LISTEN ) {
tcp_synack_timer ( sk ) ;
goto out ;
}
if ( sk - > sk_state = = TCP_FIN_WAIT2 & & sock_flag ( sk , SOCK_DEAD ) ) {
if ( tp - > linger2 > = 0 ) {
2005-08-10 07:10:42 +04:00
const int tmo = tcp_fin_time ( sk ) - TCP_TIMEWAIT_LEN ;
2005-04-17 02:20:36 +04:00
if ( tmo > 0 ) {
tcp_time_wait ( sk , TCP_FIN_WAIT2 , tmo ) ;
goto out ;
}
}
tcp_send_active_reset ( sk , GFP_ATOMIC ) ;
goto death ;
}
if ( ! sock_flag ( sk , SOCK_KEEPOPEN ) | | sk - > sk_state = = TCP_CLOSE )
goto out ;
elapsed = keepalive_time_when ( tp ) ;
/* It is alive without keepalive 8) */
2007-03-07 23:12:44 +03:00
if ( tp - > packets_out | | tcp_send_head ( sk ) )
2005-04-17 02:20:36 +04:00
goto resched ;
elapsed = tcp_time_stamp - tp - > rcv_tstamp ;
if ( elapsed > = keepalive_time_when ( tp ) ) {
2005-08-10 11:03:31 +04:00
if ( ( ! tp - > keepalive_probes & & icsk - > icsk_probes_out > = sysctl_tcp_keepalive_probes ) | |
( tp - > keepalive_probes & & icsk - > icsk_probes_out > = tp - > keepalive_probes ) ) {
2005-04-17 02:20:36 +04:00
tcp_send_active_reset ( sk , GFP_ATOMIC ) ;
tcp_write_err ( sk ) ;
goto out ;
}
if ( tcp_write_wakeup ( sk ) < = 0 ) {
2005-08-10 11:03:31 +04:00
icsk - > icsk_probes_out + + ;
2005-04-17 02:20:36 +04:00
elapsed = keepalive_intvl_when ( tp ) ;
} else {
/* If keepalive was lost due to local congestion,
* try harder .
*/
elapsed = TCP_RESOURCE_PROBE_INTERVAL ;
}
} else {
/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
elapsed = keepalive_time_when ( tp ) - elapsed ;
}
TCP_CHECK_TIMER ( sk ) ;
2007-12-31 11:11:19 +03:00
sk_mem_reclaim ( sk ) ;
2005-04-17 02:20:36 +04:00
resched :
2005-08-10 07:10:42 +04:00
inet_csk_reset_keepalive_timer ( sk , elapsed ) ;
2005-04-17 02:20:36 +04:00
goto out ;
2007-02-09 17:24:47 +03:00
death :
2005-04-17 02:20:36 +04:00
tcp_done ( sk ) ;
out :
bh_unlock_sock ( sk ) ;
sock_put ( sk ) ;
}