2005-04-16 15:20:36 -07:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Definitions for the TCP module .
*
* Version : @ ( # ) tcp . h 1.0 .5 05 / 23 / 93
*
2005-05-05 16:16:16 -07:00
* Authors : Ross Biro
2005-04-16 15:20:36 -07:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# ifndef _TCP_H
# define _TCP_H
# define TCP_DEBUG 1
# define FASTRETRANS_DEBUG 1
# include <linux/list.h>
# include <linux/tcp.h>
# include <linux/slab.h>
# include <linux/cache.h>
# include <linux/percpu.h>
2005-11-10 13:01:24 -08:00
# include <linux/skbuff.h>
2006-05-23 17:55:33 -07:00
# include <linux/dmaengine.h>
2006-11-14 19:07:45 -08:00
# include <linux/crypto.h>
2005-08-09 20:11:08 -07:00
# include <net/inet_connection_sock.h>
2005-08-09 20:44:40 -07:00
# include <net/inet_timewait_sock.h>
2005-08-09 20:00:51 -07:00
# include <net/inet_hashtables.h>
2005-04-16 15:20:36 -07:00
# include <net/checksum.h>
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
# include <net/request_sock.h>
2005-04-16 15:20:36 -07:00
# include <net/sock.h>
# include <net/snmp.h>
# include <net/ip.h>
2005-08-09 20:08:28 -07:00
# include <net/tcp_states.h>
2005-04-16 15:20:36 -07:00
# include <linux/seq_file.h>
2005-08-09 20:07:35 -07:00
extern struct inet_hashinfo tcp_hashinfo ;
2005-04-16 15:20:36 -07:00
extern atomic_t tcp_orphan_count ;
extern void tcp_time_wait ( struct sock * sk , int state , int timeo ) ;
# define MAX_TCP_HEADER (128 + MAX_HEADER)
/*
* Never offer a window over 32767 without using window scaling . Some
* poor stacks do signed 16 bit maths !
*/
# define MAX_TCP_WINDOW 32767U
/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
# define TCP_MIN_MSS 88U
/* Minimal RCV_MSS. */
# define TCP_MIN_RCVMSS 536U
2006-03-20 17:53:41 -08:00
/* The least MTU to use for probing */
# define TCP_BASE_MSS 512
2005-04-16 15:20:36 -07:00
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
# define TCP_FASTRETRANS_THRESH 3
/* Maximal reordering. */
# define TCP_MAX_REORDERING 127
/* Maximal number of ACKs sent quickly to accelerate slow-start. */
# define TCP_MAX_QUICKACKS 16U
/* urg_data states */
# define TCP_URG_VALID 0x0100
# define TCP_URG_NOTYET 0x0200
# define TCP_URG_READ 0x0400
# define TCP_RETR1 3 / *
* This is how many retries it does before it
* tries to figure out if the gateway is
* down . Minimal RFC value is 3 ; it corresponds
* to ~ 3 sec - 8 min depending on RTO .
*/
# define TCP_RETR2 15 / *
* This should take at least
* 90 minutes to time out .
* RFC1122 says that the limit is 100 sec .
* 15 is ~ 13 - 30 min depending on RTO .
*/
# define TCP_SYN_RETRIES 5 / * number of times to retry active opening a
2005-11-10 17:13:47 -08:00
* connection : ~ 180 sec is RFC minimum */
2005-04-16 15:20:36 -07:00
# define TCP_SYNACK_RETRIES 5 / * number of times to retry passive opening a
2005-11-10 17:13:47 -08:00
* connection : ~ 180 sec is RFC minimum */
2005-04-16 15:20:36 -07:00
# define TCP_ORPHAN_RETRIES 7 / * number of times to retry on an orphaned
* socket . 7 is ~ 50 sec - 16 min .
*/
# define TCP_TIMEWAIT_LEN (60*HZ) / * how long to wait to destroy TIME-WAIT
* state , about 60 seconds */
# define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
/* BSD style FIN_WAIT2 deadlock breaker.
* It used to be 3 min , new value is 60 sec ,
* to combine FIN - WAIT - 2 timeout with
* TIME - WAIT timer .
*/
# define TCP_DELACK_MAX ((unsigned)(HZ / 5)) /* maximal time to delay before sending an ACK */
# if HZ >= 100
# define TCP_DELACK_MIN ((unsigned)(HZ / 25)) /* minimal time to delay before sending an ACK */
# define TCP_ATO_MIN ((unsigned)(HZ / 25))
# else
# define TCP_DELACK_MIN 4U
# define TCP_ATO_MIN 4U
# endif
# define TCP_RTO_MAX ((unsigned)(120*HZ))
# define TCP_RTO_MIN ((unsigned)(HZ / 5))
# define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
# define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) / * Maximal interval between probes
* for local resources .
*/
# define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
# define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
# define TCP_KEEPALIVE_INTVL (75*HZ)
# define MAX_TCP_KEEPIDLE 32767
# define MAX_TCP_KEEPINTVL 32767
# define MAX_TCP_KEEPCNT 127
# define MAX_TCP_SYNCNT 127
# define TCP_SYNQ_INTERVAL (HZ / 5) /* Period of SYNACK timer */
# define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
# define TCP_PAWS_MSL 60 / * Per-host timestamps are invalidated
* after this time . It should be equal
* ( or greater than ) TCP_TIMEWAIT_LEN
* to provide reliability equal to one
* provided by timewait state .
*/
# define TCP_PAWS_WINDOW 1 / * Replay window for per-host
* timestamps . It must be less than
* minimal timewait lifetime .
*/
/*
* TCP option
*/
# define TCPOPT_NOP 1 /* Padding */
# define TCPOPT_EOL 0 /* End of options */
# define TCPOPT_MSS 2 /* Segment size negotiating */
# define TCPOPT_WINDOW 3 /* Window scaling */
# define TCPOPT_SACK_PERM 4 /* SACK Permitted */
# define TCPOPT_SACK 5 /* SACK Block */
# define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
2006-11-14 19:07:45 -08:00
# define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
2005-04-16 15:20:36 -07:00
/*
* TCP option lengths
*/
# define TCPOLEN_MSS 4
# define TCPOLEN_WINDOW 3
# define TCPOLEN_SACK_PERM 2
# define TCPOLEN_TIMESTAMP 10
2006-11-14 19:07:45 -08:00
# define TCPOLEN_MD5SIG 18
2005-04-16 15:20:36 -07:00
/* But this is what stacks really send out. */
# define TCPOLEN_TSTAMP_ALIGNED 12
# define TCPOLEN_WSCALE_ALIGNED 4
# define TCPOLEN_SACKPERM_ALIGNED 4
# define TCPOLEN_SACK_BASE 2
# define TCPOLEN_SACK_BASE_ALIGNED 4
# define TCPOLEN_SACK_PERBLOCK 8
2006-11-14 19:07:45 -08:00
# define TCPOLEN_MD5SIG_ALIGNED 20
2005-04-16 15:20:36 -07:00
/* Flags in tp->nonagle */
# define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
# define TCP_NAGLE_CORK 2 /* Socket is corked */
2005-11-10 17:13:47 -08:00
# define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
2005-04-16 15:20:36 -07:00
2005-08-09 20:44:40 -07:00
extern struct inet_timewait_death_row tcp_death_row ;
2005-04-16 15:20:36 -07:00
/* sysctl variables for tcp */
extern int sysctl_tcp_timestamps ;
extern int sysctl_tcp_window_scaling ;
extern int sysctl_tcp_sack ;
extern int sysctl_tcp_fin_timeout ;
extern int sysctl_tcp_keepalive_time ;
extern int sysctl_tcp_keepalive_probes ;
extern int sysctl_tcp_keepalive_intvl ;
extern int sysctl_tcp_syn_retries ;
extern int sysctl_tcp_synack_retries ;
extern int sysctl_tcp_retries1 ;
extern int sysctl_tcp_retries2 ;
extern int sysctl_tcp_orphan_retries ;
extern int sysctl_tcp_syncookies ;
extern int sysctl_tcp_retrans_collapse ;
extern int sysctl_tcp_stdurg ;
extern int sysctl_tcp_rfc1337 ;
extern int sysctl_tcp_abort_on_overflow ;
extern int sysctl_tcp_max_orphans ;
extern int sysctl_tcp_fack ;
extern int sysctl_tcp_reordering ;
extern int sysctl_tcp_ecn ;
extern int sysctl_tcp_dsack ;
extern int sysctl_tcp_mem [ 3 ] ;
extern int sysctl_tcp_wmem [ 3 ] ;
extern int sysctl_tcp_rmem [ 3 ] ;
extern int sysctl_tcp_app_win ;
extern int sysctl_tcp_adv_win_scale ;
extern int sysctl_tcp_tw_reuse ;
extern int sysctl_tcp_frto ;
extern int sysctl_tcp_low_latency ;
2006-05-23 18:02:55 -07:00
extern int sysctl_tcp_dma_copybreak ;
2005-04-16 15:20:36 -07:00
extern int sysctl_tcp_nometrics_save ;
extern int sysctl_tcp_moderate_rcvbuf ;
extern int sysctl_tcp_tso_win_divisor ;
2005-11-10 17:09:53 -08:00
extern int sysctl_tcp_abc ;
2006-03-20 17:53:41 -08:00
extern int sysctl_tcp_mtu_probing ;
extern int sysctl_tcp_base_mss ;
2006-03-20 22:40:29 -08:00
extern int sysctl_tcp_workaround_signed_windows ;
2006-06-13 22:33:04 -07:00
extern int sysctl_tcp_slow_start_after_idle ;
2005-04-16 15:20:36 -07:00
extern atomic_t tcp_memory_allocated ;
extern atomic_t tcp_sockets_allocated ;
extern int tcp_memory_pressure ;
/*
* The next routines deal with comparing 32 bit unsigned ints
* and worry about wraparound ( automatic with unsigned arithmetic ) .
*/
static inline int before ( __u32 seq1 , __u32 seq2 )
{
2007-01-04 12:25:16 -08:00
return ( __s32 ) ( seq1 - seq2 ) < 0 ;
2005-04-16 15:20:36 -07:00
}
[TCP]: Fix ambiguity in the `before' relation.
While looking at DCCP sequence numbers, I stumbled over a problem with
the following definition of before in tcp.h:
static inline int before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
Problem: This definition suffers from an an ambiguity, i.e. always
before(a, (a + 2^31) % 2^32)) = 1
before((a + 2^31) % 2^32), a) = 1
In text: when the difference between a and b amounts to 2^31,
a is always considered `before' b, the function can not decide.
The reason is that implicitly 0 is `before' 1 ... 2^31-1 ... 2^31
Solution: There is a simple fix, by defining before in such a way that
0 is no longer `before' 2^31, i.e. 0 `before' 1 ... 2^31-1
By not using the middle between 0 and 2^32, before can be made
unambiguous.
This is achieved by testing whether seq2-seq1 > 0 (using signed
32-bit arithmetic).
I attach a patch to codify this. Also the `after' relation is basically
a redefinition of `before', it is now defined as a macro after before.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-12-20 10:25:55 -08:00
# define after(seq2, seq1) before(seq1, seq2)
2005-04-16 15:20:36 -07:00
/* is s2<=s1<=s3 ? */
static inline int between ( __u32 seq1 , __u32 seq2 , __u32 seq3 )
{
return seq3 - seq2 > = seq1 - seq2 ;
}
extern struct proto tcp_prot ;
DECLARE_SNMP_STAT ( struct tcp_mib , tcp_statistics ) ;
# define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
# define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
# define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
# define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field)
# define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
# define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
extern void tcp_v4_err ( struct sk_buff * skb , u32 ) ;
extern void tcp_shutdown ( struct sock * sk , int how ) ;
extern int tcp_v4_rcv ( struct sk_buff * skb ) ;
extern int tcp_v4_remember_stamp ( struct sock * sk ) ;
2005-08-09 20:09:30 -07:00
extern int tcp_v4_tw_remember_stamp ( struct inet_timewait_sock * tw ) ;
2005-04-16 15:20:36 -07:00
extern int tcp_sendmsg ( struct kiocb * iocb , struct sock * sk ,
struct msghdr * msg , size_t size ) ;
extern ssize_t tcp_sendpage ( struct socket * sock , struct page * page , int offset , size_t size , int flags ) ;
extern int tcp_ioctl ( struct sock * sk ,
int cmd ,
unsigned long arg ) ;
extern int tcp_rcv_state_process ( struct sock * sk ,
struct sk_buff * skb ,
struct tcphdr * th ,
unsigned len ) ;
extern int tcp_rcv_established ( struct sock * sk ,
struct sk_buff * skb ,
struct tcphdr * th ,
unsigned len ) ;
extern void tcp_rcv_space_adjust ( struct sock * sk ) ;
2006-05-23 18:00:16 -07:00
extern void tcp_cleanup_rbuf ( struct sock * sk , int copied ) ;
2005-12-13 23:25:19 -08:00
extern int tcp_twsk_unique ( struct sock * sk ,
struct sock * sktw , void * twp ) ;
2006-11-14 19:07:45 -08:00
extern void tcp_twsk_destructor ( struct sock * sk ) ;
2005-08-09 20:10:42 -07:00
static inline void tcp_dec_quickack_mode ( struct sock * sk ,
const unsigned int pkts )
2005-04-16 15:20:36 -07:00
{
2005-08-09 20:10:42 -07:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-07-05 15:17:45 -07:00
2005-08-09 20:10:42 -07:00
if ( icsk - > icsk_ack . quick ) {
if ( pkts > = icsk - > icsk_ack . quick ) {
icsk - > icsk_ack . quick = 0 ;
2005-07-05 15:17:45 -07:00
/* Leaving quickack mode we deflate ATO. */
2005-08-09 20:10:42 -07:00
icsk - > icsk_ack . ato = TCP_ATO_MIN ;
2005-07-05 15:17:45 -07:00
} else
2005-08-09 20:10:42 -07:00
icsk - > icsk_ack . quick - = pkts ;
2005-04-16 15:20:36 -07:00
}
}
2005-08-09 20:10:42 -07:00
extern void tcp_enter_quickack_mode ( struct sock * sk ) ;
2005-04-16 15:20:36 -07:00
static inline void tcp_clear_options ( struct tcp_options_received * rx_opt )
{
rx_opt - > tstamp_ok = rx_opt - > sack_ok = rx_opt - > wscale_ok = rx_opt - > snd_wscale = 0 ;
}
enum tcp_tw_status
{
TCP_TW_SUCCESS = 0 ,
TCP_TW_RST = 1 ,
TCP_TW_ACK = 2 ,
TCP_TW_SYN = 3
} ;
2005-08-09 20:09:30 -07:00
extern enum tcp_tw_status tcp_timewait_state_process ( struct inet_timewait_sock * tw ,
2005-04-16 15:20:36 -07:00
struct sk_buff * skb ,
2005-08-09 20:09:30 -07:00
const struct tcphdr * th ) ;
2005-04-16 15:20:36 -07:00
extern struct sock * tcp_check_req ( struct sock * sk , struct sk_buff * skb ,
2005-06-18 22:47:21 -07:00
struct request_sock * req ,
struct request_sock * * prev ) ;
2005-04-16 15:20:36 -07:00
extern int tcp_child_process ( struct sock * parent ,
struct sock * child ,
struct sk_buff * skb ) ;
extern void tcp_enter_frto ( struct sock * sk ) ;
extern void tcp_enter_loss ( struct sock * sk , int how ) ;
extern void tcp_clear_retrans ( struct tcp_sock * tp ) ;
extern void tcp_update_metrics ( struct sock * sk ) ;
extern void tcp_close ( struct sock * sk ,
long timeout ) ;
extern unsigned int tcp_poll ( struct file * file , struct socket * sock , struct poll_table_struct * wait ) ;
extern int tcp_getsockopt ( struct sock * sk , int level ,
int optname ,
char __user * optval ,
int __user * optlen ) ;
extern int tcp_setsockopt ( struct sock * sk , int level ,
int optname , char __user * optval ,
int optlen ) ;
2006-03-20 22:45:21 -08:00
extern int compat_tcp_getsockopt ( struct sock * sk ,
int level , int optname ,
char __user * optval , int __user * optlen ) ;
extern int compat_tcp_setsockopt ( struct sock * sk ,
int level , int optname ,
char __user * optval , int optlen ) ;
2005-04-16 15:20:36 -07:00
extern void tcp_set_keepalive ( struct sock * sk , int val ) ;
extern int tcp_recvmsg ( struct kiocb * iocb , struct sock * sk ,
struct msghdr * msg ,
size_t len , int nonblock ,
int flags , int * addr_len ) ;
extern void tcp_parse_options ( struct sk_buff * skb ,
struct tcp_options_received * opt_rx ,
int estab ) ;
/*
* TCP v4 functions exported for the inet6 API
*/
2005-12-13 23:15:52 -08:00
extern void tcp_v4_send_check ( struct sock * sk , int len ,
2005-04-16 15:20:36 -07:00
struct sk_buff * skb ) ;
extern int tcp_v4_conn_request ( struct sock * sk ,
struct sk_buff * skb ) ;
extern struct sock * tcp_create_openreq_child ( struct sock * sk ,
2005-06-18 22:47:21 -07:00
struct request_sock * req ,
2005-04-16 15:20:36 -07:00
struct sk_buff * skb ) ;
extern struct sock * tcp_v4_syn_recv_sock ( struct sock * sk ,
struct sk_buff * skb ,
2005-06-18 22:47:21 -07:00
struct request_sock * req ,
2005-04-16 15:20:36 -07:00
struct dst_entry * dst ) ;
extern int tcp_v4_do_rcv ( struct sock * sk ,
struct sk_buff * skb ) ;
extern int tcp_v4_connect ( struct sock * sk ,
struct sockaddr * uaddr ,
int addr_len ) ;
extern int tcp_connect ( struct sock * sk ) ;
extern struct sk_buff * tcp_make_synack ( struct sock * sk ,
struct dst_entry * dst ,
2005-06-18 22:47:21 -07:00
struct request_sock * req ) ;
2005-04-16 15:20:36 -07:00
extern int tcp_disconnect ( struct sock * sk , int flags ) ;
extern void tcp_unhash ( struct sock * sk ) ;
/* From syncookies.c */
extern struct sock * cookie_v4_check ( struct sock * sk , struct sk_buff * skb ,
struct ip_options * opt ) ;
extern __u32 cookie_v4_init_sequence ( struct sock * sk , struct sk_buff * skb ,
__u16 * mss ) ;
/* tcp_output.c */
2005-07-05 15:18:03 -07:00
extern void __tcp_push_pending_frames ( struct sock * sk , struct tcp_sock * tp ,
2005-07-05 15:19:23 -07:00
unsigned int cur_mss , int nonagle ) ;
2005-07-05 15:18:03 -07:00
extern int tcp_may_send_now ( struct sock * sk , struct tcp_sock * tp ) ;
2005-04-16 15:20:36 -07:00
extern int tcp_retransmit_skb ( struct sock * , struct sk_buff * ) ;
extern void tcp_xmit_retransmit_queue ( struct sock * ) ;
extern void tcp_simple_retransmit ( struct sock * ) ;
extern int tcp_trim_head ( struct sock * , struct sk_buff * , u32 ) ;
2005-09-01 22:47:01 -07:00
extern int tcp_fragment ( struct sock * , struct sk_buff * , u32 , unsigned int ) ;
2005-04-16 15:20:36 -07:00
extern void tcp_send_probe0 ( struct sock * ) ;
extern void tcp_send_partial ( struct sock * ) ;
extern int tcp_write_wakeup ( struct sock * ) ;
extern void tcp_send_fin ( struct sock * sk ) ;
2005-10-07 07:46:04 +01:00
extern void tcp_send_active_reset ( struct sock * sk , gfp_t priority ) ;
2005-04-16 15:20:36 -07:00
extern int tcp_send_synack ( struct sock * ) ;
2005-07-05 15:24:38 -07:00
extern void tcp_push_one ( struct sock * , unsigned int mss_now ) ;
2005-04-16 15:20:36 -07:00
extern void tcp_send_ack ( struct sock * sk ) ;
extern void tcp_send_delayed_ack ( struct sock * sk ) ;
2005-07-05 15:18:51 -07:00
/* tcp_input.c */
extern void tcp_cwnd_application_limited ( struct sock * sk ) ;
2005-04-16 15:20:36 -07:00
/* tcp_timer.c */
extern void tcp_init_xmit_timers ( struct sock * ) ;
2005-08-09 20:10:42 -07:00
static inline void tcp_clear_xmit_timers ( struct sock * sk )
{
inet_csk_clear_xmit_timers ( sk ) ;
}
2005-04-16 15:20:36 -07:00
extern unsigned int tcp_sync_mss ( struct sock * sk , u32 pmtu ) ;
extern unsigned int tcp_current_mss ( struct sock * sk , int large ) ;
[INET_DIAG]: Move the tcp_diag interface to the proper place
With this the previous setup is back, i.e. tcp_diag can be built as a module,
as dccp_diag and both share the infrastructure available in inet_diag.
If one selects CONFIG_INET_DIAG as module CONFIG_INET_TCP_DIAG will also be
built as a module, as will CONFIG_INET_DCCP_DIAG, if CONFIG_IP_DCCP was
selected static or as a module, if CONFIG_INET_DIAG is y, being statically
linked CONFIG_INET_TCP_DIAG will follow suit and CONFIG_INET_DCCP_DIAG will be
built in the same manner as CONFIG_IP_DCCP.
Now to aim at UDP, converting it to use inet_hashinfo, so that we can use
iproute2 for UDP sockets as well.
Ah, just to show an example of this new infrastructure working for DCCP :-)
[root@qemu ~]# ./ss -dane
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 0 *:5001 *:* ino:942 sk:cfd503a0
ESTAB 0 0 127.0.0.1:5001 127.0.0.1:32770 ino:943 sk:cfd50a60
ESTAB 0 0 127.0.0.1:32770 127.0.0.1:5001 ino:947 sk:cfd50700
TIME-WAIT 0 0 127.0.0.1:32769 127.0.0.1:5001 timer:(timewait,3.430ms,0) ino:0 sk:cf209620
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-08-12 12:59:17 -03:00
/* tcp.c */
2005-04-16 15:20:36 -07:00
extern void tcp_get_info ( struct sock * , struct tcp_info * ) ;
/* Read 'sendfile()'-style from a TCP socket */
typedef int ( * sk_read_actor_t ) ( read_descriptor_t * , struct sk_buff * ,
unsigned int , size_t ) ;
extern int tcp_read_sock ( struct sock * sk , read_descriptor_t * desc ,
sk_read_actor_t recv_actor ) ;
2006-01-03 16:03:49 -08:00
extern void tcp_initialize_rcv_mss ( struct sock * sk ) ;
2005-04-16 15:20:36 -07:00
2006-03-20 17:53:41 -08:00
extern int tcp_mtu_to_mss ( struct sock * sk , int pmtu ) ;
extern int tcp_mss_to_mtu ( struct sock * sk , int mss ) ;
extern void tcp_mtup_init ( struct sock * sk ) ;
2006-01-03 16:03:49 -08:00
static inline void __tcp_fast_path_on ( struct tcp_sock * tp , u32 snd_wnd )
2005-04-16 15:20:36 -07:00
{
tp - > pred_flags = htonl ( ( tp - > tcp_header_len < < 26 ) |
ntohl ( TCP_FLAG_ACK ) |
snd_wnd ) ;
}
2006-01-03 16:03:49 -08:00
static inline void tcp_fast_path_on ( struct tcp_sock * tp )
2005-04-16 15:20:36 -07:00
{
__tcp_fast_path_on ( tp , tp - > snd_wnd > > tp - > rx_opt . snd_wscale ) ;
}
static inline void tcp_fast_path_check ( struct sock * sk , struct tcp_sock * tp )
{
2005-07-08 14:57:23 -07:00
if ( skb_queue_empty ( & tp - > out_of_order_queue ) & &
2005-04-16 15:20:36 -07:00
tp - > rcv_wnd & &
atomic_read ( & sk - > sk_rmem_alloc ) < sk - > sk_rcvbuf & &
! tp - > urg_data )
tcp_fast_path_on ( tp ) ;
}
/* Compute the actual receive window we are currently advertising.
* Rcv_nxt can be after the window if our peer push more data
* than the offered window .
*/
2006-01-03 16:03:49 -08:00
static inline u32 tcp_receive_window ( const struct tcp_sock * tp )
2005-04-16 15:20:36 -07:00
{
s32 win = tp - > rcv_wup + tp - > rcv_wnd - tp - > rcv_nxt ;
if ( win < 0 )
win = 0 ;
return ( u32 ) win ;
}
/* Choose a new window, without checks for shrinking, and without
* scaling applied to the result . The caller does these things
* if necessary . This is a " raw " window selection .
*/
extern u32 __tcp_select_window ( struct sock * sk ) ;
/* TCP timestamps are only 32-bits, this causes a slight
* complication on 64 - bit systems since we store a snapshot
2005-11-15 15:17:10 -08:00
* of jiffies in the buffer control blocks below . We decided
* to use only the low 32 - bits of jiffies and hide the ugly
2005-04-16 15:20:36 -07:00
* casts with the following macro .
*/
# define tcp_time_stamp ((__u32)(jiffies))
2005-11-10 17:13:47 -08:00
/* This is what the send packet queuing engine uses to pass
2005-04-16 15:20:36 -07:00
* TCP per - packet control information to the transmission
* code . We also store the host - order sequence numbers in
* here too . This is 36 bytes on 32 - bit architectures ,
* 40 bytes on 64 - bit machines , if this grows please adjust
* skbuff . h : skbuff - > cb [ xxx ] size appropriately .
*/
struct tcp_skb_cb {
union {
struct inet_skb_parm h4 ;
# if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
struct inet6_skb_parm h6 ;
# endif
} header ; /* For incoming frames */
__u32 seq ; /* Starting sequence number */
__u32 end_seq ; /* SEQ + FIN + SYN + datalen */
__u32 when ; /* used to compute rtt's */
__u8 flags ; /* TCP header flags. */
/* NOTE: These must match up to the flags byte in a
* real TCP header .
*/
# define TCPCB_FLAG_FIN 0x01
# define TCPCB_FLAG_SYN 0x02
# define TCPCB_FLAG_RST 0x04
# define TCPCB_FLAG_PSH 0x08
# define TCPCB_FLAG_ACK 0x10
# define TCPCB_FLAG_URG 0x20
# define TCPCB_FLAG_ECE 0x40
# define TCPCB_FLAG_CWR 0x80
__u8 sacked ; /* State flags for SACK/FACK. */
# define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
# define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
# define TCPCB_LOST 0x04 /* SKB is lost */
# define TCPCB_TAGBITS 0x07 /* All tag bits */
# define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
# define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
2005-11-10 17:13:47 -08:00
# define TCPCB_URG 0x20 /* Urgent pointer advanced here */
2005-04-16 15:20:36 -07:00
# define TCPCB_AT_TAIL (TCPCB_URG)
__u16 urg_ptr ; /* Valid w/URG flags is set. */
__u32 ack_seq ; /* Sequence number ACK'd */
} ;
# define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
# include <net/tcp_ecn.h>
/* Due to TSO, an SKB can be composed of multiple actual
* packets . To keep these tracked properly , we use this .
*/
static inline int tcp_skb_pcount ( const struct sk_buff * skb )
{
2006-06-22 02:40:14 -07:00
return skb_shinfo ( skb ) - > gso_segs ;
2005-04-16 15:20:36 -07:00
}
/* This is valid iff tcp_skb_pcount() > 1. */
static inline int tcp_skb_mss ( const struct sk_buff * skb )
{
2006-06-22 02:40:14 -07:00
return skb_shinfo ( skb ) - > gso_size ;
2005-04-16 15:20:36 -07:00
}
static inline void tcp_dec_pcount_approx ( __u32 * count ,
const struct sk_buff * skb )
{
if ( * count ) {
* count - = tcp_skb_pcount ( skb ) ;
if ( ( int ) * count < 0 )
* count = 0 ;
}
}
static inline void tcp_packets_out_inc ( struct sock * sk ,
struct tcp_sock * tp ,
const struct sk_buff * skb )
{
int orig = tp - > packets_out ;
tp - > packets_out + = tcp_skb_pcount ( skb ) ;
if ( ! orig )
2005-08-09 20:11:08 -07:00
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_RETRANS ,
inet_csk ( sk ) - > icsk_rto , TCP_RTO_MAX ) ;
2005-04-16 15:20:36 -07:00
}
static inline void tcp_packets_out_dec ( struct tcp_sock * tp ,
const struct sk_buff * skb )
{
tp - > packets_out - = tcp_skb_pcount ( skb ) ;
}
2005-06-23 12:19:55 -07:00
/* Events passed to congestion control interface */
enum tcp_ca_event {
CA_EVENT_TX_START , /* first transmit when no packets in flight */
CA_EVENT_CWND_RESTART , /* congestion window restart */
CA_EVENT_COMPLETE_CWR , /* end of congestion recovery */
CA_EVENT_FRTO , /* fast recovery timeout */
CA_EVENT_LOSS , /* loss timeout */
CA_EVENT_FAST_ACK , /* in sequence ack */
CA_EVENT_SLOW_ACK , /* other ack */
} ;
/*
* Interface for adding new TCP congestion control handlers
*/
# define TCP_CA_NAME_MAX 16
2006-11-09 16:32:06 -08:00
# define TCP_CA_MAX 128
# define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
2005-06-23 12:19:55 -07:00
struct tcp_congestion_ops {
struct list_head list ;
2006-11-09 16:35:15 -08:00
int non_restricted ;
2005-06-23 12:19:55 -07:00
/* initialize private data (optional) */
2005-08-10 04:03:31 -03:00
void ( * init ) ( struct sock * sk ) ;
2005-06-23 12:19:55 -07:00
/* cleanup private data (optional) */
2005-08-10 04:03:31 -03:00
void ( * release ) ( struct sock * sk ) ;
2005-06-23 12:19:55 -07:00
/* return slow start threshold (required) */
2005-08-10 04:03:31 -03:00
u32 ( * ssthresh ) ( struct sock * sk ) ;
2005-06-23 12:19:55 -07:00
/* lower bound for congestion window (optional) */
2006-06-05 17:30:08 -07:00
u32 ( * min_cwnd ) ( const struct sock * sk ) ;
2005-06-23 12:19:55 -07:00
/* do new cwnd calculation (required) */
2005-08-10 04:03:31 -03:00
void ( * cong_avoid ) ( struct sock * sk , u32 ack ,
2005-06-23 12:19:55 -07:00
u32 rtt , u32 in_flight , int good_ack ) ;
/* round trip time sample per acked packet (optional) */
2005-08-10 04:03:31 -03:00
void ( * rtt_sample ) ( struct sock * sk , u32 usrtt ) ;
2005-06-23 12:19:55 -07:00
/* call before changing ca_state (optional) */
2005-08-10 04:03:31 -03:00
void ( * set_state ) ( struct sock * sk , u8 new_state ) ;
2005-06-23 12:19:55 -07:00
/* call when cwnd event occurs (optional) */
2005-08-10 04:03:31 -03:00
void ( * cwnd_event ) ( struct sock * sk , enum tcp_ca_event ev ) ;
2005-06-23 12:19:55 -07:00
/* new value of cwnd after loss (optional) */
2005-08-10 04:03:31 -03:00
u32 ( * undo_cwnd ) ( struct sock * sk ) ;
2005-06-23 12:19:55 -07:00
/* hook for packet ack accounting (optional) */
2005-08-10 04:03:31 -03:00
void ( * pkts_acked ) ( struct sock * sk , u32 num_acked ) ;
2005-08-12 12:51:49 -03:00
/* get info for inet_diag (optional) */
2005-08-10 04:03:31 -03:00
void ( * get_info ) ( struct sock * sk , u32 ext , struct sk_buff * skb ) ;
2005-06-23 12:19:55 -07:00
char name [ TCP_CA_NAME_MAX ] ;
struct module * owner ;
} ;
extern int tcp_register_congestion_control ( struct tcp_congestion_ops * type ) ;
extern void tcp_unregister_congestion_control ( struct tcp_congestion_ops * type ) ;
2005-08-10 04:03:31 -03:00
extern void tcp_init_congestion_control ( struct sock * sk ) ;
extern void tcp_cleanup_congestion_control ( struct sock * sk ) ;
2005-06-23 12:19:55 -07:00
extern int tcp_set_default_congestion_control ( const char * name ) ;
extern void tcp_get_default_congestion_control ( char * name ) ;
2006-11-09 16:32:06 -08:00
extern void tcp_get_available_congestion_control ( char * buf , size_t len ) ;
2006-11-09 16:35:15 -08:00
extern void tcp_get_allowed_congestion_control ( char * buf , size_t len ) ;
extern int tcp_set_allowed_congestion_control ( char * allowed ) ;
2005-08-10 04:03:31 -03:00
extern int tcp_set_congestion_control ( struct sock * sk , const char * name ) ;
2006-01-03 16:03:49 -08:00
extern void tcp_slow_start ( struct tcp_sock * tp ) ;
2005-06-23 12:19:55 -07:00
2005-06-23 20:37:36 -07:00
extern struct tcp_congestion_ops tcp_init_congestion_ops ;
2005-08-10 04:03:31 -03:00
extern u32 tcp_reno_ssthresh ( struct sock * sk ) ;
extern void tcp_reno_cong_avoid ( struct sock * sk , u32 ack ,
2005-06-23 12:19:55 -07:00
u32 rtt , u32 in_flight , int flag ) ;
2006-06-05 17:30:08 -07:00
extern u32 tcp_reno_min_cwnd ( const struct sock * sk ) ;
2005-06-23 23:45:02 -07:00
extern struct tcp_congestion_ops tcp_reno ;
2005-06-23 12:19:55 -07:00
2005-08-10 04:03:31 -03:00
static inline void tcp_set_ca_state ( struct sock * sk , const u8 ca_state )
2005-06-23 12:19:55 -07:00
{
2005-08-10 04:03:31 -03:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( icsk - > icsk_ca_ops - > set_state )
icsk - > icsk_ca_ops - > set_state ( sk , ca_state ) ;
icsk - > icsk_ca_state = ca_state ;
2005-06-23 12:19:55 -07:00
}
2005-08-10 04:03:31 -03:00
static inline void tcp_ca_event ( struct sock * sk , const enum tcp_ca_event event )
2005-06-23 12:19:55 -07:00
{
2005-08-10 04:03:31 -03:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( icsk - > icsk_ca_ops - > cwnd_event )
icsk - > icsk_ca_ops - > cwnd_event ( sk , event ) ;
2005-06-23 12:19:55 -07:00
}
2005-04-16 15:20:36 -07:00
/* This determines how many packets are "in the network" to the best
* of our knowledge . In many cases it is conservative , but where
* detailed information is available from the receiver ( via SACK
* blocks etc . ) we can make more aggressive calculations .
*
* Use this for decisions involving congestion control , use just
* tp - > packets_out to determine if the send queue is empty or not .
*
* Read this equation as :
*
* " Packets sent once on transmission queue " MINUS
* " Packets left network, but not honestly ACKed yet " PLUS
* " Packets fast retransmitted "
*/
2006-01-03 16:03:49 -08:00
static inline unsigned int tcp_packets_in_flight ( const struct tcp_sock * tp )
2005-04-16 15:20:36 -07:00
{
return ( tp - > packets_out - tp - > left_out + tp - > retrans_out ) ;
}
/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
* The exception is rate halving phase , when cwnd is decreasing towards
* ssthresh .
*/
2005-08-10 04:03:31 -03:00
static inline __u32 tcp_current_ssthresh ( const struct sock * sk )
2005-04-16 15:20:36 -07:00
{
2005-08-10 04:03:31 -03:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
if ( ( 1 < < inet_csk ( sk ) - > icsk_ca_state ) & ( TCPF_CA_CWR | TCPF_CA_Recovery ) )
2005-04-16 15:20:36 -07:00
return tp - > snd_ssthresh ;
else
return max ( tp - > snd_ssthresh ,
( ( tp - > snd_cwnd > > 1 ) +
( tp - > snd_cwnd > > 2 ) ) ) ;
}
static inline void tcp_sync_left_out ( struct tcp_sock * tp )
{
if ( tp - > rx_opt . sack_ok & &
( tp - > sacked_out > = tp - > packets_out - tp - > lost_out ) )
tp - > sacked_out = tp - > packets_out - tp - > lost_out ;
tp - > left_out = tp - > sacked_out + tp - > lost_out ;
}
2006-01-03 16:03:49 -08:00
extern void tcp_enter_cwr ( struct sock * sk ) ;
2005-04-16 15:20:36 -07:00
extern __u32 tcp_init_cwnd ( struct tcp_sock * tp , struct dst_entry * dst ) ;
/* Slow start with delack produces 3 packets of burst, so that
* it is safe " de facto " .
*/
static __inline__ __u32 tcp_max_burst ( const struct tcp_sock * tp )
{
return 3 ;
}
2005-11-10 16:53:30 -08:00
/* RFC2861 Check whether we are limited by application or congestion window
* This is the inverse of cwnd check in tcp_tso_should_defer
*/
static inline int tcp_is_cwnd_limited ( const struct sock * sk , u32 in_flight )
{
const struct tcp_sock * tp = tcp_sk ( sk ) ;
u32 left ;
if ( in_flight > = tp - > snd_cwnd )
return 1 ;
2006-06-30 13:36:35 -07:00
if ( ! sk_can_gso ( sk ) )
2005-11-10 16:53:30 -08:00
return 0 ;
left = tp - > snd_cwnd - in_flight ;
if ( sysctl_tcp_tso_win_divisor )
return left * sysctl_tcp_tso_win_divisor < tp - > snd_cwnd ;
else
return left < = tcp_max_burst ( tp ) ;
}
2006-01-03 16:03:49 -08:00
static inline void tcp_minshall_update ( struct tcp_sock * tp , int mss ,
const struct sk_buff * skb )
2005-04-16 15:20:36 -07:00
{
if ( skb - > len < mss )
tp - > snd_sml = TCP_SKB_CB ( skb ) - > end_seq ;
}
2006-01-03 16:03:49 -08:00
static inline void tcp_check_probe_timer ( struct sock * sk , struct tcp_sock * tp )
2005-04-16 15:20:36 -07:00
{
2005-08-09 20:10:42 -07:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( ! tp - > packets_out & & ! icsk - > icsk_pending )
2005-08-09 20:11:08 -07:00
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_PROBE0 ,
icsk - > icsk_rto , TCP_RTO_MAX ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-03 16:03:49 -08:00
static inline void tcp_push_pending_frames ( struct sock * sk ,
struct tcp_sock * tp )
2005-04-16 15:20:36 -07:00
{
__tcp_push_pending_frames ( sk , tp , tcp_current_mss ( sk , 1 ) , tp - > nonagle ) ;
}
2006-01-03 16:03:49 -08:00
static inline void tcp_init_wl ( struct tcp_sock * tp , u32 ack , u32 seq )
2005-04-16 15:20:36 -07:00
{
tp - > snd_wl1 = seq ;
}
2006-01-03 16:03:49 -08:00
static inline void tcp_update_wl ( struct tcp_sock * tp , u32 ack , u32 seq )
2005-04-16 15:20:36 -07:00
{
tp - > snd_wl1 = seq ;
}
/*
* Calculate ( / check ) TCP checksum
*/
2006-11-14 21:28:24 -08:00
static inline __sum16 tcp_v4_check ( struct tcphdr * th , int len ,
__be32 saddr , __be32 daddr ,
__wsum base )
2005-04-16 15:20:36 -07:00
{
return csum_tcpudp_magic ( saddr , daddr , len , IPPROTO_TCP , base ) ;
}
2006-11-14 21:40:42 -08:00
static inline __sum16 __tcp_checksum_complete ( struct sk_buff * skb )
2005-04-16 15:20:36 -07:00
{
2005-11-10 13:01:24 -08:00
return __skb_checksum_complete ( skb ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-03 16:03:49 -08:00
static inline int tcp_checksum_complete ( struct sk_buff * skb )
2005-04-16 15:20:36 -07:00
{
return skb - > ip_summed ! = CHECKSUM_UNNECESSARY & &
__tcp_checksum_complete ( skb ) ;
}
/* Prequeue for VJ style copy to user, combined with checksumming. */
2006-01-03 16:03:49 -08:00
static inline void tcp_prequeue_init ( struct tcp_sock * tp )
2005-04-16 15:20:36 -07:00
{
tp - > ucopy . task = NULL ;
tp - > ucopy . len = 0 ;
tp - > ucopy . memory = 0 ;
skb_queue_head_init ( & tp - > ucopy . prequeue ) ;
2006-05-23 17:55:33 -07:00
# ifdef CONFIG_NET_DMA
tp - > ucopy . dma_chan = NULL ;
tp - > ucopy . wakeup = 0 ;
tp - > ucopy . pinned_list = NULL ;
tp - > ucopy . dma_cookie = 0 ;
# endif
2005-04-16 15:20:36 -07:00
}
/* Packet is added to VJ-style prequeue for processing in process
* context , if a reader task is waiting . Apparently , this exciting
* idea ( VJ ' s mail " Re: query about TCP header on tcp-ip " of 07 Sep 93 )
* failed somewhere . Latency ? Burstiness ? Well , at least now we will
* see , why it failed . 8 ) 8 ) - - ANK
*
* NOTE : is this not too big to inline ?
*/
2006-01-03 16:03:49 -08:00
static inline int tcp_prequeue ( struct sock * sk , struct sk_buff * skb )
2005-04-16 15:20:36 -07:00
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
if ( ! sysctl_tcp_low_latency & & tp - > ucopy . task ) {
__skb_queue_tail ( & tp - > ucopy . prequeue , skb ) ;
tp - > ucopy . memory + = skb - > truesize ;
if ( tp - > ucopy . memory > sk - > sk_rcvbuf ) {
struct sk_buff * skb1 ;
BUG_ON ( sock_owned_by_user ( sk ) ) ;
while ( ( skb1 = __skb_dequeue ( & tp - > ucopy . prequeue ) ) ! = NULL ) {
sk - > sk_backlog_rcv ( sk , skb1 ) ;
NET_INC_STATS_BH ( LINUX_MIB_TCPPREQUEUEDROPPED ) ;
}
tp - > ucopy . memory = 0 ;
} else if ( skb_queue_len ( & tp - > ucopy . prequeue ) = = 1 ) {
wake_up_interruptible ( sk - > sk_sleep ) ;
2005-08-09 20:10:42 -07:00
if ( ! inet_csk_ack_scheduled ( sk ) )
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_DACK ,
2005-08-09 20:11:08 -07:00
( 3 * TCP_RTO_MIN ) / 4 ,
TCP_RTO_MAX ) ;
2005-04-16 15:20:36 -07:00
}
return 1 ;
}
return 0 ;
}
# undef STATE_TRACE
# ifdef STATE_TRACE
static const char * statename [ ] = {
" Unused " , " Established " , " Syn Sent " , " Syn Recv " ,
" Fin Wait 1 " , " Fin Wait 2 " , " Time Wait " , " Close " ,
" Close Wait " , " Last ACK " , " Listen " , " Closing "
} ;
# endif
2006-01-03 16:03:49 -08:00
static inline void tcp_set_state ( struct sock * sk , int state )
2005-04-16 15:20:36 -07:00
{
int oldstate = sk - > sk_state ;
switch ( state ) {
case TCP_ESTABLISHED :
if ( oldstate ! = TCP_ESTABLISHED )
TCP_INC_STATS ( TCP_MIB_CURRESTAB ) ;
break ;
case TCP_CLOSE :
if ( oldstate = = TCP_CLOSE_WAIT | | oldstate = = TCP_ESTABLISHED )
TCP_INC_STATS ( TCP_MIB_ESTABRESETS ) ;
sk - > sk_prot - > unhash ( sk ) ;
2005-08-09 20:10:42 -07:00
if ( inet_csk ( sk ) - > icsk_bind_hash & &
2005-04-16 15:20:36 -07:00
! ( sk - > sk_userlocks & SOCK_BINDPORT_LOCK ) )
2005-08-09 20:07:13 -07:00
inet_put_port ( & tcp_hashinfo , sk ) ;
2005-04-16 15:20:36 -07:00
/* fall through */
default :
if ( oldstate = = TCP_ESTABLISHED )
TCP_DEC_STATS ( TCP_MIB_CURRESTAB ) ;
}
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables .
*/
sk - > sk_state = state ;
# ifdef STATE_TRACE
SOCK_DEBUG ( sk , " TCP sk=%p, State %s -> %s \n " , sk , statename [ oldstate ] , statename [ state ] ) ;
# endif
}
2006-01-03 16:03:49 -08:00
static inline void tcp_done ( struct sock * sk )
2005-04-16 15:20:36 -07:00
{
2006-07-30 20:35:54 -07:00
if ( sk - > sk_state = = TCP_SYN_SENT | | sk - > sk_state = = TCP_SYN_RECV )
TCP_INC_STATS_BH ( TCP_MIB_ATTEMPTFAILS ) ;
2005-04-16 15:20:36 -07:00
tcp_set_state ( sk , TCP_CLOSE ) ;
tcp_clear_xmit_timers ( sk ) ;
sk - > sk_shutdown = SHUTDOWN_MASK ;
if ( ! sock_flag ( sk , SOCK_DEAD ) )
sk - > sk_state_change ( sk ) ;
else
2005-08-09 20:11:41 -07:00
inet_csk_destroy_sock ( sk ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-03 16:03:49 -08:00
static inline void tcp_sack_reset ( struct tcp_options_received * rx_opt )
2005-04-16 15:20:36 -07:00
{
rx_opt - > dsack = 0 ;
rx_opt - > eff_sacks = 0 ;
rx_opt - > num_sacks = 0 ;
}
/* Determine a window scaling and initial window to offer. */
extern void tcp_select_initial_window ( int __space , __u32 mss ,
__u32 * rcv_wnd , __u32 * window_clamp ,
int wscale_ok , __u8 * rcv_wscale ) ;
static inline int tcp_win_from_space ( int space )
{
return sysctl_tcp_adv_win_scale < = 0 ?
( space > > ( - sysctl_tcp_adv_win_scale ) ) :
space - ( space > > sysctl_tcp_adv_win_scale ) ;
}
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space ( const struct sock * sk )
{
return tcp_win_from_space ( sk - > sk_rcvbuf -
atomic_read ( & sk - > sk_rmem_alloc ) ) ;
}
static inline int tcp_full_space ( const struct sock * sk )
{
return tcp_win_from_space ( sk - > sk_rcvbuf ) ;
}
2006-01-03 16:03:49 -08:00
static inline void tcp_openreq_init ( struct request_sock * req ,
struct tcp_options_received * rx_opt ,
struct sk_buff * skb )
2005-04-16 15:20:36 -07:00
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
struct inet_request_sock * ireq = inet_rsk ( req ) ;
2005-04-16 15:20:36 -07:00
req - > rcv_wnd = 0 ; /* So that tcp_send_synack() knows! */
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
tcp_rsk ( req ) - > rcv_isn = TCP_SKB_CB ( skb ) - > seq ;
2005-04-16 15:20:36 -07:00
req - > mss = rx_opt - > mss_clamp ;
req - > ts_recent = rx_opt - > saw_tstamp ? rx_opt - > rcv_tsval : 0 ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
ireq - > tstamp_ok = rx_opt - > tstamp_ok ;
ireq - > sack_ok = rx_opt - > sack_ok ;
ireq - > snd_wscale = rx_opt - > snd_wscale ;
ireq - > wscale_ok = rx_opt - > wscale_ok ;
ireq - > acked = 0 ;
ireq - > ecn_ok = 0 ;
ireq - > rmt_port = skb - > h . th - > source ;
2005-04-16 15:20:36 -07:00
}
extern void tcp_enter_memory_pressure ( void ) ;
static inline int keepalive_intvl_when ( const struct tcp_sock * tp )
{
return tp - > keepalive_intvl ? : sysctl_tcp_keepalive_intvl ;
}
static inline int keepalive_time_when ( const struct tcp_sock * tp )
{
return tp - > keepalive_time ? : sysctl_tcp_keepalive_time ;
}
2005-08-09 20:10:42 -07:00
static inline int tcp_fin_time ( const struct sock * sk )
2005-04-16 15:20:36 -07:00
{
2005-08-09 20:10:42 -07:00
int fin_timeout = tcp_sk ( sk ) - > linger2 ? : sysctl_tcp_fin_timeout ;
const int rto = inet_csk ( sk ) - > icsk_rto ;
2005-04-16 15:20:36 -07:00
2005-08-09 20:10:42 -07:00
if ( fin_timeout < ( rto < < 2 ) - ( rto > > 1 ) )
fin_timeout = ( rto < < 2 ) - ( rto > > 1 ) ;
2005-04-16 15:20:36 -07:00
return fin_timeout ;
}
static inline int tcp_paws_check ( const struct tcp_options_received * rx_opt , int rst )
{
if ( ( s32 ) ( rx_opt - > rcv_tsval - rx_opt - > ts_recent ) > = 0 )
return 0 ;
if ( xtime . tv_sec > = rx_opt - > ts_recent_stamp + TCP_PAWS_24DAYS )
return 0 ;
/* RST segments are not recommended to carry timestamp,
and , if they do , it is recommended to ignore PAWS because
" their cleanup function should take precedence over timestamps. "
Certainly , it is mistake . It is necessary to understand the reasons
of this constraint to relax it : if peer reboots , clock may go
out - of - sync and half - open connections will not be reset .
Actually , the problem would be not existing if all
the implementations followed draft about maintaining clock
via reboots . Linux - 2.2 DOES NOT !
However , we can relax time bounds for RST segments to MSL .
*/
if ( rst & & xtime . tv_sec > = rx_opt - > ts_recent_stamp + TCP_PAWS_MSL )
return 0 ;
return 1 ;
}
# define TCP_CHECK_TIMER(sk) do { } while (0)
static inline int tcp_use_frto ( const struct sock * sk )
{
const struct tcp_sock * tp = tcp_sk ( sk ) ;
/* F-RTO must be activated in sysctl and there must be some
* unsent new data , and the advertised window should allow
* sending it .
*/
return ( sysctl_tcp_frto & & sk - > sk_send_head & &
! after ( TCP_SKB_CB ( sk - > sk_send_head ) - > end_seq ,
tp - > snd_una + tp - > snd_wnd ) ) ;
}
static inline void tcp_mib_init ( void )
{
/* See RFC 2012 */
TCP_ADD_STATS_USER ( TCP_MIB_RTOALGORITHM , 1 ) ;
TCP_ADD_STATS_USER ( TCP_MIB_RTOMIN , TCP_RTO_MIN * 1000 / HZ ) ;
TCP_ADD_STATS_USER ( TCP_MIB_RTOMAX , TCP_RTO_MAX * 1000 / HZ ) ;
TCP_ADD_STATS_USER ( TCP_MIB_MAXCONN , - 1 ) ;
}
2005-11-10 17:14:59 -08:00
/*from STCP */
static inline void clear_all_retrans_hints ( struct tcp_sock * tp ) {
tp - > lost_skb_hint = NULL ;
tp - > scoreboard_skb_hint = NULL ;
tp - > retransmit_skb_hint = NULL ;
tp - > forward_skb_hint = NULL ;
tp - > fastpath_skb_hint = NULL ;
}
2006-11-14 19:07:45 -08:00
/* MD5 Signature */
struct crypto_hash ;
/* - key database */
struct tcp_md5sig_key {
u8 * key ;
u8 keylen ;
} ;
struct tcp4_md5sig_key {
u8 * key ;
u16 keylen ;
__be32 addr ;
} ;
struct tcp6_md5sig_key {
u8 * key ;
u16 keylen ;
#if 0
u32 scope_id ; /* XXX */
# endif
struct in6_addr addr ;
} ;
/* - sock block */
struct tcp_md5sig_info {
struct tcp4_md5sig_key * keys4 ;
# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct tcp6_md5sig_key * keys6 ;
u32 entries6 ;
u32 alloced6 ;
# endif
u32 entries4 ;
u32 alloced4 ;
} ;
/* - pseudo header */
struct tcp4_pseudohdr {
__be32 saddr ;
__be32 daddr ;
__u8 pad ;
__u8 protocol ;
__be16 len ;
} ;
struct tcp6_pseudohdr {
struct in6_addr saddr ;
struct in6_addr daddr ;
__be32 len ;
__be32 protocol ; /* including padding */
} ;
union tcp_md5sum_block {
struct tcp4_pseudohdr ip4 ;
# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct tcp6_pseudohdr ip6 ;
# endif
} ;
/* - pool: digest algorithm, hash description and scratch buffer */
struct tcp_md5sig_pool {
struct hash_desc md5_desc ;
union tcp_md5sum_block md5_blk ;
} ;
# define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */
/* - functions */
extern int tcp_v4_calc_md5_hash ( char * md5_hash ,
struct tcp_md5sig_key * key ,
struct sock * sk ,
struct dst_entry * dst ,
struct request_sock * req ,
struct tcphdr * th ,
int protocol , int tcplen ) ;
extern struct tcp_md5sig_key * tcp_v4_md5_lookup ( struct sock * sk ,
struct sock * addr_sk ) ;
extern int tcp_v4_md5_do_add ( struct sock * sk ,
__be32 addr ,
u8 * newkey ,
u8 newkeylen ) ;
extern int tcp_v4_md5_do_del ( struct sock * sk ,
2006-11-20 18:06:37 -08:00
__be32 addr ) ;
2006-11-14 19:07:45 -08:00
extern struct tcp_md5sig_pool * * tcp_alloc_md5sig_pool ( void ) ;
extern void tcp_free_md5sig_pool ( void ) ;
extern struct tcp_md5sig_pool * __tcp_get_md5sig_pool ( int cpu ) ;
extern void __tcp_put_md5sig_pool ( void ) ;
static inline
struct tcp_md5sig_pool * tcp_get_md5sig_pool ( void )
{
int cpu = get_cpu ( ) ;
struct tcp_md5sig_pool * ret = __tcp_get_md5sig_pool ( cpu ) ;
if ( ! ret )
put_cpu ( ) ;
return ret ;
}
static inline void tcp_put_md5sig_pool ( void )
{
__tcp_put_md5sig_pool ( ) ;
put_cpu ( ) ;
}
2005-04-16 15:20:36 -07:00
/* /proc */
enum tcp_seq_states {
TCP_SEQ_STATE_LISTENING ,
TCP_SEQ_STATE_OPENREQ ,
TCP_SEQ_STATE_ESTABLISHED ,
TCP_SEQ_STATE_TIME_WAIT ,
} ;
struct tcp_seq_afinfo {
struct module * owner ;
char * name ;
sa_family_t family ;
int ( * seq_show ) ( struct seq_file * m , void * v ) ;
struct file_operations * seq_fops ;
} ;
struct tcp_iter_state {
sa_family_t family ;
enum tcp_seq_states state ;
struct sock * syn_wait_sk ;
int bucket , sbucket , num , uid ;
struct seq_operations seq_ops ;
} ;
extern int tcp_proc_register ( struct tcp_seq_afinfo * afinfo ) ;
extern void tcp_proc_unregister ( struct tcp_seq_afinfo * afinfo ) ;
2005-08-16 02:18:02 -03:00
extern struct request_sock_ops tcp_request_sock_ops ;
extern int tcp_v4_destroy_sock ( struct sock * sk ) ;
2006-07-08 13:34:56 -07:00
extern int tcp_v4_gso_send_check ( struct sk_buff * skb ) ;
2006-06-27 13:22:38 -07:00
extern struct sk_buff * tcp_tso_segment ( struct sk_buff * skb , int features ) ;
2006-06-22 03:02:40 -07:00
2005-08-16 02:18:02 -03:00
# ifdef CONFIG_PROC_FS
extern int tcp4_proc_init ( void ) ;
extern void tcp4_proc_exit ( void ) ;
# endif
2006-11-14 19:07:45 -08:00
/* TCP af-specific functions */
struct tcp_sock_af_ops {
# ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key * ( * md5_lookup ) ( struct sock * sk ,
struct sock * addr_sk ) ;
int ( * calc_md5_hash ) ( char * location ,
struct tcp_md5sig_key * md5 ,
struct sock * sk ,
struct dst_entry * dst ,
struct request_sock * req ,
struct tcphdr * th ,
int protocol , int len ) ;
int ( * md5_add ) ( struct sock * sk ,
struct sock * addr_sk ,
u8 * newkey ,
u8 len ) ;
int ( * md5_parse ) ( struct sock * sk ,
char __user * optval ,
int optlen ) ;
# endif
} ;
struct tcp_request_sock_ops {
# ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key * ( * md5_lookup ) ( struct sock * sk ,
struct request_sock * req ) ;
# endif
} ;
2005-08-16 02:18:02 -03:00
extern void tcp_v4_init ( struct net_proto_family * ops ) ;
extern void tcp_init ( void ) ;
2005-04-16 15:20:36 -07:00
# endif /* _TCP_H */