2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Definitions for the TCP module .
*
* Version : @ ( # ) tcp . h 1.0 .5 05 / 23 / 93
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# ifndef _TCP_H
# define _TCP_H
# define TCP_DEBUG 1
# define FASTRETRANS_DEBUG 1
# include <linux/config.h>
# include <linux/list.h>
# include <linux/tcp.h>
# include <linux/slab.h>
# include <linux/cache.h>
# include <linux/percpu.h>
2005-08-10 07:11:08 +04:00
# include <net/inet_connection_sock.h>
2005-08-10 07:44:40 +04:00
# include <net/inet_timewait_sock.h>
2005-08-10 07:00:51 +04:00
# include <net/inet_hashtables.h>
2005-04-17 02:20:36 +04:00
# include <net/checksum.h>
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
# include <net/request_sock.h>
2005-04-17 02:20:36 +04:00
# include <net/sock.h>
# include <net/snmp.h>
# include <net/ip.h>
2005-08-10 07:08:28 +04:00
# include <net/tcp_states.h>
2005-04-17 02:20:36 +04:00
# include <linux/seq_file.h>
2005-08-10 07:07:35 +04:00
extern struct inet_hashinfo tcp_hashinfo ;
2005-04-17 02:20:36 +04:00
extern atomic_t tcp_orphan_count ;
extern void tcp_time_wait ( struct sock * sk , int state , int timeo ) ;
# define MAX_TCP_HEADER (128 + MAX_HEADER)
/*
* Never offer a window over 32767 without using window scaling . Some
* poor stacks do signed 16 bit maths !
*/
# define MAX_TCP_WINDOW 32767U
/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
# define TCP_MIN_MSS 88U
/* Minimal RCV_MSS. */
# define TCP_MIN_RCVMSS 536U
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
# define TCP_FASTRETRANS_THRESH 3
/* Maximal reordering. */
# define TCP_MAX_REORDERING 127
/* Maximal number of ACKs sent quickly to accelerate slow-start. */
# define TCP_MAX_QUICKACKS 16U
/* urg_data states */
# define TCP_URG_VALID 0x0100
# define TCP_URG_NOTYET 0x0200
# define TCP_URG_READ 0x0400
# define TCP_RETR1 3 / *
* This is how many retries it does before it
* tries to figure out if the gateway is
* down . Minimal RFC value is 3 ; it corresponds
* to ~ 3 sec - 8 min depending on RTO .
*/
# define TCP_RETR2 15 / *
* This should take at least
* 90 minutes to time out .
* RFC1122 says that the limit is 100 sec .
* 15 is ~ 13 - 30 min depending on RTO .
*/
# define TCP_SYN_RETRIES 5 / * number of times to retry active opening a
* connection : ~ 180 sec is RFC minumum */
# define TCP_SYNACK_RETRIES 5 / * number of times to retry passive opening a
* connection : ~ 180 sec is RFC minumum */
# define TCP_ORPHAN_RETRIES 7 / * number of times to retry on an orphaned
* socket . 7 is ~ 50 sec - 16 min .
*/
# define TCP_TIMEWAIT_LEN (60*HZ) / * how long to wait to destroy TIME-WAIT
* state , about 60 seconds */
# define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
/* BSD style FIN_WAIT2 deadlock breaker.
* It used to be 3 min , new value is 60 sec ,
* to combine FIN - WAIT - 2 timeout with
* TIME - WAIT timer .
*/
# define TCP_DELACK_MAX ((unsigned)(HZ / 5)) /* maximal time to delay before sending an ACK */
# if HZ >= 100
# define TCP_DELACK_MIN ((unsigned)(HZ / 25)) /* minimal time to delay before sending an ACK */
# define TCP_ATO_MIN ((unsigned)(HZ / 25))
# else
# define TCP_DELACK_MIN 4U
# define TCP_ATO_MIN 4U
# endif
# define TCP_RTO_MAX ((unsigned)(120*HZ))
# define TCP_RTO_MIN ((unsigned)(HZ / 5))
# define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
# define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) / * Maximal interval between probes
* for local resources .
*/
# define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
# define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
# define TCP_KEEPALIVE_INTVL (75*HZ)
# define MAX_TCP_KEEPIDLE 32767
# define MAX_TCP_KEEPINTVL 32767
# define MAX_TCP_KEEPCNT 127
# define MAX_TCP_SYNCNT 127
# define TCP_SYNQ_INTERVAL (HZ / 5) /* Period of SYNACK timer */
# define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */
# define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
# define TCP_PAWS_MSL 60 / * Per-host timestamps are invalidated
* after this time . It should be equal
* ( or greater than ) TCP_TIMEWAIT_LEN
* to provide reliability equal to one
* provided by timewait state .
*/
# define TCP_PAWS_WINDOW 1 / * Replay window for per-host
* timestamps . It must be less than
* minimal timewait lifetime .
*/
/*
* TCP option
*/
# define TCPOPT_NOP 1 /* Padding */
# define TCPOPT_EOL 0 /* End of options */
# define TCPOPT_MSS 2 /* Segment size negotiating */
# define TCPOPT_WINDOW 3 /* Window scaling */
# define TCPOPT_SACK_PERM 4 /* SACK Permitted */
# define TCPOPT_SACK 5 /* SACK Block */
# define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
/*
* TCP option lengths
*/
# define TCPOLEN_MSS 4
# define TCPOLEN_WINDOW 3
# define TCPOLEN_SACK_PERM 2
# define TCPOLEN_TIMESTAMP 10
/* But this is what stacks really send out. */
# define TCPOLEN_TSTAMP_ALIGNED 12
# define TCPOLEN_WSCALE_ALIGNED 4
# define TCPOLEN_SACKPERM_ALIGNED 4
# define TCPOLEN_SACK_BASE 2
# define TCPOLEN_SACK_BASE_ALIGNED 4
# define TCPOLEN_SACK_PERBLOCK 8
/* Flags in tp->nonagle */
# define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
# define TCP_NAGLE_CORK 2 /* Socket is corked */
# define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
2005-08-10 07:44:40 +04:00
extern struct inet_timewait_death_row tcp_death_row ;
2005-04-17 02:20:36 +04:00
/* sysctl variables for tcp */
extern int sysctl_tcp_timestamps ;
extern int sysctl_tcp_window_scaling ;
extern int sysctl_tcp_sack ;
extern int sysctl_tcp_fin_timeout ;
extern int sysctl_tcp_keepalive_time ;
extern int sysctl_tcp_keepalive_probes ;
extern int sysctl_tcp_keepalive_intvl ;
extern int sysctl_tcp_syn_retries ;
extern int sysctl_tcp_synack_retries ;
extern int sysctl_tcp_retries1 ;
extern int sysctl_tcp_retries2 ;
extern int sysctl_tcp_orphan_retries ;
extern int sysctl_tcp_syncookies ;
extern int sysctl_tcp_retrans_collapse ;
extern int sysctl_tcp_stdurg ;
extern int sysctl_tcp_rfc1337 ;
extern int sysctl_tcp_abort_on_overflow ;
extern int sysctl_tcp_max_orphans ;
extern int sysctl_tcp_fack ;
extern int sysctl_tcp_reordering ;
extern int sysctl_tcp_ecn ;
extern int sysctl_tcp_dsack ;
extern int sysctl_tcp_mem [ 3 ] ;
extern int sysctl_tcp_wmem [ 3 ] ;
extern int sysctl_tcp_rmem [ 3 ] ;
extern int sysctl_tcp_app_win ;
extern int sysctl_tcp_adv_win_scale ;
extern int sysctl_tcp_tw_reuse ;
extern int sysctl_tcp_frto ;
extern int sysctl_tcp_low_latency ;
extern int sysctl_tcp_nometrics_save ;
extern int sysctl_tcp_moderate_rcvbuf ;
extern int sysctl_tcp_tso_win_divisor ;
extern atomic_t tcp_memory_allocated ;
extern atomic_t tcp_sockets_allocated ;
extern int tcp_memory_pressure ;
/*
* Pointers to address related TCP functions
* ( i . e . things that depend on the address family )
*/
struct tcp_func {
int ( * queue_xmit ) ( struct sk_buff * skb ,
int ipfragok ) ;
void ( * send_check ) ( struct sock * sk ,
struct tcphdr * th ,
int len ,
struct sk_buff * skb ) ;
int ( * rebuild_header ) ( struct sock * sk ) ;
int ( * conn_request ) ( struct sock * sk ,
struct sk_buff * skb ) ;
struct sock * ( * syn_recv_sock ) ( struct sock * sk ,
struct sk_buff * skb ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
2005-04-17 02:20:36 +04:00
struct dst_entry * dst ) ;
int ( * remember_stamp ) ( struct sock * sk ) ;
__u16 net_header_len ;
int ( * setsockopt ) ( struct sock * sk ,
int level ,
int optname ,
char __user * optval ,
int optlen ) ;
int ( * getsockopt ) ( struct sock * sk ,
int level ,
int optname ,
char __user * optval ,
int __user * optlen ) ;
void ( * addr2sockaddr ) ( struct sock * sk ,
struct sockaddr * ) ;
int sockaddr_len ;
} ;
/*
* The next routines deal with comparing 32 bit unsigned ints
* and worry about wraparound ( automatic with unsigned arithmetic ) .
*/
static inline int before ( __u32 seq1 , __u32 seq2 )
{
return ( __s32 ) ( seq1 - seq2 ) < 0 ;
}
static inline int after ( __u32 seq1 , __u32 seq2 )
{
return ( __s32 ) ( seq2 - seq1 ) < 0 ;
}
/* is s2<=s1<=s3 ? */
static inline int between ( __u32 seq1 , __u32 seq2 , __u32 seq3 )
{
return seq3 - seq2 > = seq1 - seq2 ;
}
extern struct proto tcp_prot ;
DECLARE_SNMP_STAT ( struct tcp_mib , tcp_statistics ) ;
# define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
# define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
# define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
# define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field)
# define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
# define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
extern void tcp_v4_err ( struct sk_buff * skb , u32 ) ;
extern void tcp_shutdown ( struct sock * sk , int how ) ;
extern int tcp_v4_rcv ( struct sk_buff * skb ) ;
extern int tcp_v4_remember_stamp ( struct sock * sk ) ;
2005-08-10 07:09:30 +04:00
extern int tcp_v4_tw_remember_stamp ( struct inet_timewait_sock * tw ) ;
2005-04-17 02:20:36 +04:00
extern int tcp_sendmsg ( struct kiocb * iocb , struct sock * sk ,
struct msghdr * msg , size_t size ) ;
extern ssize_t tcp_sendpage ( struct socket * sock , struct page * page , int offset , size_t size , int flags ) ;
extern int tcp_ioctl ( struct sock * sk ,
int cmd ,
unsigned long arg ) ;
extern int tcp_rcv_state_process ( struct sock * sk ,
struct sk_buff * skb ,
struct tcphdr * th ,
unsigned len ) ;
extern int tcp_rcv_established ( struct sock * sk ,
struct sk_buff * skb ,
struct tcphdr * th ,
unsigned len ) ;
extern void tcp_rcv_space_adjust ( struct sock * sk ) ;
2005-08-10 07:10:42 +04:00
static inline void tcp_dec_quickack_mode ( struct sock * sk ,
const unsigned int pkts )
2005-04-17 02:20:36 +04:00
{
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-07-06 02:17:45 +04:00
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_ack . quick ) {
if ( pkts > = icsk - > icsk_ack . quick ) {
icsk - > icsk_ack . quick = 0 ;
2005-07-06 02:17:45 +04:00
/* Leaving quickack mode we deflate ATO. */
2005-08-10 07:10:42 +04:00
icsk - > icsk_ack . ato = TCP_ATO_MIN ;
2005-07-06 02:17:45 +04:00
} else
2005-08-10 07:10:42 +04:00
icsk - > icsk_ack . quick - = pkts ;
2005-04-17 02:20:36 +04:00
}
}
2005-08-10 07:10:42 +04:00
extern void tcp_enter_quickack_mode ( struct sock * sk ) ;
2005-04-17 02:20:36 +04:00
static inline void tcp_clear_options ( struct tcp_options_received * rx_opt )
{
rx_opt - > tstamp_ok = rx_opt - > sack_ok = rx_opt - > wscale_ok = rx_opt - > snd_wscale = 0 ;
}
enum tcp_tw_status
{
TCP_TW_SUCCESS = 0 ,
TCP_TW_RST = 1 ,
TCP_TW_ACK = 2 ,
TCP_TW_SYN = 3
} ;
2005-08-10 07:09:30 +04:00
extern enum tcp_tw_status tcp_timewait_state_process ( struct inet_timewait_sock * tw ,
2005-04-17 02:20:36 +04:00
struct sk_buff * skb ,
2005-08-10 07:09:30 +04:00
const struct tcphdr * th ) ;
2005-04-17 02:20:36 +04:00
extern struct sock * tcp_check_req ( struct sock * sk , struct sk_buff * skb ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
struct request_sock * * prev ) ;
2005-04-17 02:20:36 +04:00
extern int tcp_child_process ( struct sock * parent ,
struct sock * child ,
struct sk_buff * skb ) ;
extern void tcp_enter_frto ( struct sock * sk ) ;
extern void tcp_enter_loss ( struct sock * sk , int how ) ;
extern void tcp_clear_retrans ( struct tcp_sock * tp ) ;
extern void tcp_update_metrics ( struct sock * sk ) ;
extern void tcp_close ( struct sock * sk ,
long timeout ) ;
extern unsigned int tcp_poll ( struct file * file , struct socket * sock , struct poll_table_struct * wait ) ;
extern int tcp_getsockopt ( struct sock * sk , int level ,
int optname ,
char __user * optval ,
int __user * optlen ) ;
extern int tcp_setsockopt ( struct sock * sk , int level ,
int optname , char __user * optval ,
int optlen ) ;
extern void tcp_set_keepalive ( struct sock * sk , int val ) ;
extern int tcp_recvmsg ( struct kiocb * iocb , struct sock * sk ,
struct msghdr * msg ,
size_t len , int nonblock ,
int flags , int * addr_len ) ;
extern void tcp_parse_options ( struct sk_buff * skb ,
struct tcp_options_received * opt_rx ,
int estab ) ;
/*
* TCP v4 functions exported for the inet6 API
*/
extern void tcp_v4_send_check ( struct sock * sk ,
struct tcphdr * th , int len ,
struct sk_buff * skb ) ;
extern int tcp_v4_conn_request ( struct sock * sk ,
struct sk_buff * skb ) ;
extern struct sock * tcp_create_openreq_child ( struct sock * sk ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
2005-04-17 02:20:36 +04:00
struct sk_buff * skb ) ;
extern struct sock * tcp_v4_syn_recv_sock ( struct sock * sk ,
struct sk_buff * skb ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
2005-04-17 02:20:36 +04:00
struct dst_entry * dst ) ;
extern int tcp_v4_do_rcv ( struct sock * sk ,
struct sk_buff * skb ) ;
extern int tcp_v4_connect ( struct sock * sk ,
struct sockaddr * uaddr ,
int addr_len ) ;
extern int tcp_connect ( struct sock * sk ) ;
extern struct sk_buff * tcp_make_synack ( struct sock * sk ,
struct dst_entry * dst ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ) ;
2005-04-17 02:20:36 +04:00
extern int tcp_disconnect ( struct sock * sk , int flags ) ;
extern void tcp_unhash ( struct sock * sk ) ;
extern int tcp_v4_hash_connecting ( struct sock * sk ) ;
/* From syncookies.c */
extern struct sock * cookie_v4_check ( struct sock * sk , struct sk_buff * skb ,
struct ip_options * opt ) ;
extern __u32 cookie_v4_init_sequence ( struct sock * sk , struct sk_buff * skb ,
__u16 * mss ) ;
/* tcp_output.c */
2005-07-06 02:18:03 +04:00
extern void __tcp_push_pending_frames ( struct sock * sk , struct tcp_sock * tp ,
2005-07-06 02:19:23 +04:00
unsigned int cur_mss , int nonagle ) ;
2005-07-06 02:18:03 +04:00
extern int tcp_may_send_now ( struct sock * sk , struct tcp_sock * tp ) ;
2005-04-17 02:20:36 +04:00
extern int tcp_retransmit_skb ( struct sock * , struct sk_buff * ) ;
extern void tcp_xmit_retransmit_queue ( struct sock * ) ;
extern void tcp_simple_retransmit ( struct sock * ) ;
extern int tcp_trim_head ( struct sock * , struct sk_buff * , u32 ) ;
2005-09-02 09:47:01 +04:00
extern int tcp_fragment ( struct sock * , struct sk_buff * , u32 , unsigned int ) ;
2005-04-17 02:20:36 +04:00
extern void tcp_send_probe0 ( struct sock * ) ;
extern void tcp_send_partial ( struct sock * ) ;
extern int tcp_write_wakeup ( struct sock * ) ;
extern void tcp_send_fin ( struct sock * sk ) ;
2005-10-07 10:46:04 +04:00
extern void tcp_send_active_reset ( struct sock * sk , gfp_t priority ) ;
2005-04-17 02:20:36 +04:00
extern int tcp_send_synack ( struct sock * ) ;
2005-07-06 02:24:38 +04:00
extern void tcp_push_one ( struct sock * , unsigned int mss_now ) ;
2005-04-17 02:20:36 +04:00
extern void tcp_send_ack ( struct sock * sk ) ;
extern void tcp_send_delayed_ack ( struct sock * sk ) ;
2005-07-06 02:18:51 +04:00
/* tcp_input.c */
extern void tcp_cwnd_application_limited ( struct sock * sk ) ;
2005-04-17 02:20:36 +04:00
/* tcp_timer.c */
extern void tcp_init_xmit_timers ( struct sock * ) ;
2005-08-10 07:10:42 +04:00
static inline void tcp_clear_xmit_timers ( struct sock * sk )
{
inet_csk_clear_xmit_timers ( sk ) ;
}
2005-04-17 02:20:36 +04:00
extern unsigned int tcp_sync_mss ( struct sock * sk , u32 pmtu ) ;
extern unsigned int tcp_current_mss ( struct sock * sk , int large ) ;
[INET_DIAG]: Move the tcp_diag interface to the proper place
With this the previous setup is back, i.e. tcp_diag can be built as a module,
as dccp_diag and both share the infrastructure available in inet_diag.
If one selects CONFIG_INET_DIAG as module CONFIG_INET_TCP_DIAG will also be
built as a module, as will CONFIG_INET_DCCP_DIAG, if CONFIG_IP_DCCP was
selected static or as a module, if CONFIG_INET_DIAG is y, being statically
linked CONFIG_INET_TCP_DIAG will follow suit and CONFIG_INET_DCCP_DIAG will be
built in the same manner as CONFIG_IP_DCCP.
Now to aim at UDP, converting it to use inet_hashinfo, so that we can use
iproute2 for UDP sockets as well.
Ah, just to show an example of this new infrastructure working for DCCP :-)
[root@qemu ~]# ./ss -dane
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 0 *:5001 *:* ino:942 sk:cfd503a0
ESTAB 0 0 127.0.0.1:5001 127.0.0.1:32770 ino:943 sk:cfd50a60
ESTAB 0 0 127.0.0.1:32770 127.0.0.1:5001 ino:947 sk:cfd50700
TIME-WAIT 0 0 127.0.0.1:32769 127.0.0.1:5001 timer:(timewait,3.430ms,0) ino:0 sk:cf209620
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-08-12 19:59:17 +04:00
/* tcp.c */
2005-04-17 02:20:36 +04:00
extern void tcp_get_info ( struct sock * , struct tcp_info * ) ;
/* Read 'sendfile()'-style from a TCP socket */
typedef int ( * sk_read_actor_t ) ( read_descriptor_t * , struct sk_buff * ,
unsigned int , size_t ) ;
extern int tcp_read_sock ( struct sock * sk , read_descriptor_t * desc ,
sk_read_actor_t recv_actor ) ;
/* Initialize RCV_MSS value.
* RCV_MSS is an our guess about MSS used by the peer .
* We haven ' t any direct information about the MSS .
* It ' s better to underestimate the RCV_MSS rather than overestimate .
* Overestimations make us ACKing less frequently than needed .
* Underestimations are more easy to detect and fix by tcp_measure_rcv_mss ( ) .
*/
static inline void tcp_initialize_rcv_mss ( struct sock * sk )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-07-06 02:24:38 +04:00
unsigned int hint = min_t ( unsigned int , tp - > advmss , tp - > mss_cache ) ;
2005-04-17 02:20:36 +04:00
hint = min ( hint , tp - > rcv_wnd / 2 ) ;
hint = min ( hint , TCP_MIN_RCVMSS ) ;
hint = max ( hint , TCP_MIN_MSS ) ;
2005-08-10 07:10:42 +04:00
inet_csk ( sk ) - > icsk_ack . rcv_mss = hint ;
2005-04-17 02:20:36 +04:00
}
static __inline__ void __tcp_fast_path_on ( struct tcp_sock * tp , u32 snd_wnd )
{
tp - > pred_flags = htonl ( ( tp - > tcp_header_len < < 26 ) |
ntohl ( TCP_FLAG_ACK ) |
snd_wnd ) ;
}
static __inline__ void tcp_fast_path_on ( struct tcp_sock * tp )
{
__tcp_fast_path_on ( tp , tp - > snd_wnd > > tp - > rx_opt . snd_wscale ) ;
}
static inline void tcp_fast_path_check ( struct sock * sk , struct tcp_sock * tp )
{
2005-07-09 01:57:23 +04:00
if ( skb_queue_empty ( & tp - > out_of_order_queue ) & &
2005-04-17 02:20:36 +04:00
tp - > rcv_wnd & &
atomic_read ( & sk - > sk_rmem_alloc ) < sk - > sk_rcvbuf & &
! tp - > urg_data )
tcp_fast_path_on ( tp ) ;
}
/* Compute the actual receive window we are currently advertising.
* Rcv_nxt can be after the window if our peer push more data
* than the offered window .
*/
static __inline__ u32 tcp_receive_window ( const struct tcp_sock * tp )
{
s32 win = tp - > rcv_wup + tp - > rcv_wnd - tp - > rcv_nxt ;
if ( win < 0 )
win = 0 ;
return ( u32 ) win ;
}
/* Choose a new window, without checks for shrinking, and without
* scaling applied to the result . The caller does these things
* if necessary . This is a " raw " window selection .
*/
extern u32 __tcp_select_window ( struct sock * sk ) ;
/* TCP timestamps are only 32-bits, this causes a slight
* complication on 64 - bit systems since we store a snapshot
* of jiffies in the buffer control blocks below . We decidely
* only use of the low 32 - bits of jiffies and hide the ugly
* casts with the following macro .
*/
# define tcp_time_stamp ((__u32)(jiffies))
/* This is what the send packet queueing engine uses to pass
* TCP per - packet control information to the transmission
* code . We also store the host - order sequence numbers in
* here too . This is 36 bytes on 32 - bit architectures ,
* 40 bytes on 64 - bit machines , if this grows please adjust
* skbuff . h : skbuff - > cb [ xxx ] size appropriately .
*/
struct tcp_skb_cb {
union {
struct inet_skb_parm h4 ;
# if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
struct inet6_skb_parm h6 ;
# endif
} header ; /* For incoming frames */
__u32 seq ; /* Starting sequence number */
__u32 end_seq ; /* SEQ + FIN + SYN + datalen */
__u32 when ; /* used to compute rtt's */
__u8 flags ; /* TCP header flags. */
/* NOTE: These must match up to the flags byte in a
* real TCP header .
*/
# define TCPCB_FLAG_FIN 0x01
# define TCPCB_FLAG_SYN 0x02
# define TCPCB_FLAG_RST 0x04
# define TCPCB_FLAG_PSH 0x08
# define TCPCB_FLAG_ACK 0x10
# define TCPCB_FLAG_URG 0x20
# define TCPCB_FLAG_ECE 0x40
# define TCPCB_FLAG_CWR 0x80
__u8 sacked ; /* State flags for SACK/FACK. */
# define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
# define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
# define TCPCB_LOST 0x04 /* SKB is lost */
# define TCPCB_TAGBITS 0x07 /* All tag bits */
# define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
# define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
# define TCPCB_URG 0x20 /* Urgent pointer advenced here */
# define TCPCB_AT_TAIL (TCPCB_URG)
__u16 urg_ptr ; /* Valid w/URG flags is set. */
__u32 ack_seq ; /* Sequence number ACK'd */
} ;
# define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
# include <net/tcp_ecn.h>
/* Due to TSO, an SKB can be composed of multiple actual
* packets . To keep these tracked properly , we use this .
*/
static inline int tcp_skb_pcount ( const struct sk_buff * skb )
{
return skb_shinfo ( skb ) - > tso_segs ;
}
/* This is valid iff tcp_skb_pcount() > 1. */
static inline int tcp_skb_mss ( const struct sk_buff * skb )
{
return skb_shinfo ( skb ) - > tso_size ;
}
static inline void tcp_dec_pcount_approx ( __u32 * count ,
const struct sk_buff * skb )
{
if ( * count ) {
* count - = tcp_skb_pcount ( skb ) ;
if ( ( int ) * count < 0 )
* count = 0 ;
}
}
static inline void tcp_packets_out_inc ( struct sock * sk ,
struct tcp_sock * tp ,
const struct sk_buff * skb )
{
int orig = tp - > packets_out ;
tp - > packets_out + = tcp_skb_pcount ( skb ) ;
if ( ! orig )
2005-08-10 07:11:08 +04:00
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_RETRANS ,
inet_csk ( sk ) - > icsk_rto , TCP_RTO_MAX ) ;
2005-04-17 02:20:36 +04:00
}
static inline void tcp_packets_out_dec ( struct tcp_sock * tp ,
const struct sk_buff * skb )
{
tp - > packets_out - = tcp_skb_pcount ( skb ) ;
}
2005-06-23 23:19:55 +04:00
/* Events passed to congestion control interface */
enum tcp_ca_event {
CA_EVENT_TX_START , /* first transmit when no packets in flight */
CA_EVENT_CWND_RESTART , /* congestion window restart */
CA_EVENT_COMPLETE_CWR , /* end of congestion recovery */
CA_EVENT_FRTO , /* fast recovery timeout */
CA_EVENT_LOSS , /* loss timeout */
CA_EVENT_FAST_ACK , /* in sequence ack */
CA_EVENT_SLOW_ACK , /* other ack */
} ;
/*
* Interface for adding new TCP congestion control handlers
*/
# define TCP_CA_NAME_MAX 16
struct tcp_congestion_ops {
struct list_head list ;
/* initialize private data (optional) */
2005-08-10 11:03:31 +04:00
void ( * init ) ( struct sock * sk ) ;
2005-06-23 23:19:55 +04:00
/* cleanup private data (optional) */
2005-08-10 11:03:31 +04:00
void ( * release ) ( struct sock * sk ) ;
2005-06-23 23:19:55 +04:00
/* return slow start threshold (required) */
2005-08-10 11:03:31 +04:00
u32 ( * ssthresh ) ( struct sock * sk ) ;
2005-06-23 23:19:55 +04:00
/* lower bound for congestion window (optional) */
2005-08-10 11:03:31 +04:00
u32 ( * min_cwnd ) ( struct sock * sk ) ;
2005-06-23 23:19:55 +04:00
/* do new cwnd calculation (required) */
2005-08-10 11:03:31 +04:00
void ( * cong_avoid ) ( struct sock * sk , u32 ack ,
2005-06-23 23:19:55 +04:00
u32 rtt , u32 in_flight , int good_ack ) ;
/* round trip time sample per acked packet (optional) */
2005-08-10 11:03:31 +04:00
void ( * rtt_sample ) ( struct sock * sk , u32 usrtt ) ;
2005-06-23 23:19:55 +04:00
/* call before changing ca_state (optional) */
2005-08-10 11:03:31 +04:00
void ( * set_state ) ( struct sock * sk , u8 new_state ) ;
2005-06-23 23:19:55 +04:00
/* call when cwnd event occurs (optional) */
2005-08-10 11:03:31 +04:00
void ( * cwnd_event ) ( struct sock * sk , enum tcp_ca_event ev ) ;
2005-06-23 23:19:55 +04:00
/* new value of cwnd after loss (optional) */
2005-08-10 11:03:31 +04:00
u32 ( * undo_cwnd ) ( struct sock * sk ) ;
2005-06-23 23:19:55 +04:00
/* hook for packet ack accounting (optional) */
2005-08-10 11:03:31 +04:00
void ( * pkts_acked ) ( struct sock * sk , u32 num_acked ) ;
2005-08-12 19:51:49 +04:00
/* get info for inet_diag (optional) */
2005-08-10 11:03:31 +04:00
void ( * get_info ) ( struct sock * sk , u32 ext , struct sk_buff * skb ) ;
2005-06-23 23:19:55 +04:00
char name [ TCP_CA_NAME_MAX ] ;
struct module * owner ;
} ;
extern int tcp_register_congestion_control ( struct tcp_congestion_ops * type ) ;
extern void tcp_unregister_congestion_control ( struct tcp_congestion_ops * type ) ;
2005-08-10 11:03:31 +04:00
extern void tcp_init_congestion_control ( struct sock * sk ) ;
extern void tcp_cleanup_congestion_control ( struct sock * sk ) ;
2005-06-23 23:19:55 +04:00
extern int tcp_set_default_congestion_control ( const char * name ) ;
extern void tcp_get_default_congestion_control ( char * name ) ;
2005-08-10 11:03:31 +04:00
extern int tcp_set_congestion_control ( struct sock * sk , const char * name ) ;
2005-06-23 23:19:55 +04:00
2005-06-24 07:37:36 +04:00
extern struct tcp_congestion_ops tcp_init_congestion_ops ;
2005-08-10 11:03:31 +04:00
extern u32 tcp_reno_ssthresh ( struct sock * sk ) ;
extern void tcp_reno_cong_avoid ( struct sock * sk , u32 ack ,
2005-06-23 23:19:55 +04:00
u32 rtt , u32 in_flight , int flag ) ;
2005-08-10 11:03:31 +04:00
extern u32 tcp_reno_min_cwnd ( struct sock * sk ) ;
2005-06-24 10:45:02 +04:00
extern struct tcp_congestion_ops tcp_reno ;
2005-06-23 23:19:55 +04:00
2005-08-10 11:03:31 +04:00
static inline void tcp_set_ca_state ( struct sock * sk , const u8 ca_state )
2005-06-23 23:19:55 +04:00
{
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( icsk - > icsk_ca_ops - > set_state )
icsk - > icsk_ca_ops - > set_state ( sk , ca_state ) ;
icsk - > icsk_ca_state = ca_state ;
2005-06-23 23:19:55 +04:00
}
2005-08-10 11:03:31 +04:00
static inline void tcp_ca_event ( struct sock * sk , const enum tcp_ca_event event )
2005-06-23 23:19:55 +04:00
{
2005-08-10 11:03:31 +04:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( icsk - > icsk_ca_ops - > cwnd_event )
icsk - > icsk_ca_ops - > cwnd_event ( sk , event ) ;
2005-06-23 23:19:55 +04:00
}
2005-04-17 02:20:36 +04:00
/* This determines how many packets are "in the network" to the best
* of our knowledge . In many cases it is conservative , but where
* detailed information is available from the receiver ( via SACK
* blocks etc . ) we can make more aggressive calculations .
*
* Use this for decisions involving congestion control , use just
* tp - > packets_out to determine if the send queue is empty or not .
*
* Read this equation as :
*
* " Packets sent once on transmission queue " MINUS
* " Packets left network, but not honestly ACKed yet " PLUS
* " Packets fast retransmitted "
*/
static __inline__ unsigned int tcp_packets_in_flight ( const struct tcp_sock * tp )
{
return ( tp - > packets_out - tp - > left_out + tp - > retrans_out ) ;
}
/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
* The exception is rate halving phase , when cwnd is decreasing towards
* ssthresh .
*/
2005-08-10 11:03:31 +04:00
static inline __u32 tcp_current_ssthresh ( const struct sock * sk )
2005-04-17 02:20:36 +04:00
{
2005-08-10 11:03:31 +04:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
if ( ( 1 < < inet_csk ( sk ) - > icsk_ca_state ) & ( TCPF_CA_CWR | TCPF_CA_Recovery ) )
2005-04-17 02:20:36 +04:00
return tp - > snd_ssthresh ;
else
return max ( tp - > snd_ssthresh ,
( ( tp - > snd_cwnd > > 1 ) +
( tp - > snd_cwnd > > 2 ) ) ) ;
}
static inline void tcp_sync_left_out ( struct tcp_sock * tp )
{
if ( tp - > rx_opt . sack_ok & &
( tp - > sacked_out > = tp - > packets_out - tp - > lost_out ) )
tp - > sacked_out = tp - > packets_out - tp - > lost_out ;
tp - > left_out = tp - > sacked_out + tp - > lost_out ;
}
2005-07-31 04:41:59 +04:00
/* Set slow start threshold and cwnd not falling to slow start */
2005-08-10 11:03:31 +04:00
static inline void __tcp_enter_cwr ( struct sock * sk )
2005-04-17 02:20:36 +04:00
{
2005-08-10 11:03:31 +04:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-04-17 02:20:36 +04:00
tp - > undo_marker = 0 ;
2005-08-10 11:03:31 +04:00
tp - > snd_ssthresh = icsk - > icsk_ca_ops - > ssthresh ( sk ) ;
2005-04-17 02:20:36 +04:00
tp - > snd_cwnd = min ( tp - > snd_cwnd ,
tcp_packets_in_flight ( tp ) + 1U ) ;
tp - > snd_cwnd_cnt = 0 ;
tp - > high_seq = tp - > snd_nxt ;
tp - > snd_cwnd_stamp = tcp_time_stamp ;
TCP_ECN_queue_cwr ( tp ) ;
}
2005-08-10 11:03:31 +04:00
static inline void tcp_enter_cwr ( struct sock * sk )
2005-04-17 02:20:36 +04:00
{
2005-08-10 11:03:31 +04:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-04-17 02:20:36 +04:00
tp - > prior_ssthresh = 0 ;
2005-08-10 11:03:31 +04:00
if ( inet_csk ( sk ) - > icsk_ca_state < TCP_CA_CWR ) {
__tcp_enter_cwr ( sk ) ;
tcp_set_ca_state ( sk , TCP_CA_CWR ) ;
2005-04-17 02:20:36 +04:00
}
}
extern __u32 tcp_init_cwnd ( struct tcp_sock * tp , struct dst_entry * dst ) ;
/* Slow start with delack produces 3 packets of burst, so that
* it is safe " de facto " .
*/
static __inline__ __u32 tcp_max_burst ( const struct tcp_sock * tp )
{
return 3 ;
}
static __inline__ void tcp_minshall_update ( struct tcp_sock * tp , int mss ,
const struct sk_buff * skb )
{
if ( skb - > len < mss )
tp - > snd_sml = TCP_SKB_CB ( skb ) - > end_seq ;
}
static __inline__ void tcp_check_probe_timer ( struct sock * sk , struct tcp_sock * tp )
{
2005-08-10 07:10:42 +04:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
if ( ! tp - > packets_out & & ! icsk - > icsk_pending )
2005-08-10 07:11:08 +04:00
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_PROBE0 ,
icsk - > icsk_rto , TCP_RTO_MAX ) ;
2005-04-17 02:20:36 +04:00
}
static __inline__ void tcp_push_pending_frames ( struct sock * sk ,
struct tcp_sock * tp )
{
__tcp_push_pending_frames ( sk , tp , tcp_current_mss ( sk , 1 ) , tp - > nonagle ) ;
}
static __inline__ void tcp_init_wl ( struct tcp_sock * tp , u32 ack , u32 seq )
{
tp - > snd_wl1 = seq ;
}
static __inline__ void tcp_update_wl ( struct tcp_sock * tp , u32 ack , u32 seq )
{
tp - > snd_wl1 = seq ;
}
/*
* Calculate ( / check ) TCP checksum
*/
static __inline__ u16 tcp_v4_check ( struct tcphdr * th , int len ,
unsigned long saddr , unsigned long daddr ,
unsigned long base )
{
return csum_tcpudp_magic ( saddr , daddr , len , IPPROTO_TCP , base ) ;
}
static __inline__ int __tcp_checksum_complete ( struct sk_buff * skb )
{
return ( unsigned short ) csum_fold ( skb_checksum ( skb , 0 , skb - > len , skb - > csum ) ) ;
}
static __inline__ int tcp_checksum_complete ( struct sk_buff * skb )
{
return skb - > ip_summed ! = CHECKSUM_UNNECESSARY & &
__tcp_checksum_complete ( skb ) ;
}
/* Prequeue for VJ style copy to user, combined with checksumming. */
static __inline__ void tcp_prequeue_init ( struct tcp_sock * tp )
{
tp - > ucopy . task = NULL ;
tp - > ucopy . len = 0 ;
tp - > ucopy . memory = 0 ;
skb_queue_head_init ( & tp - > ucopy . prequeue ) ;
}
/* Packet is added to VJ-style prequeue for processing in process
* context , if a reader task is waiting . Apparently , this exciting
* idea ( VJ ' s mail " Re: query about TCP header on tcp-ip " of 07 Sep 93 )
* failed somewhere . Latency ? Burstiness ? Well , at least now we will
* see , why it failed . 8 ) 8 ) - - ANK
*
* NOTE : is this not too big to inline ?
*/
static __inline__ int tcp_prequeue ( struct sock * sk , struct sk_buff * skb )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
if ( ! sysctl_tcp_low_latency & & tp - > ucopy . task ) {
__skb_queue_tail ( & tp - > ucopy . prequeue , skb ) ;
tp - > ucopy . memory + = skb - > truesize ;
if ( tp - > ucopy . memory > sk - > sk_rcvbuf ) {
struct sk_buff * skb1 ;
BUG_ON ( sock_owned_by_user ( sk ) ) ;
while ( ( skb1 = __skb_dequeue ( & tp - > ucopy . prequeue ) ) ! = NULL ) {
sk - > sk_backlog_rcv ( sk , skb1 ) ;
NET_INC_STATS_BH ( LINUX_MIB_TCPPREQUEUEDROPPED ) ;
}
tp - > ucopy . memory = 0 ;
} else if ( skb_queue_len ( & tp - > ucopy . prequeue ) = = 1 ) {
wake_up_interruptible ( sk - > sk_sleep ) ;
2005-08-10 07:10:42 +04:00
if ( ! inet_csk_ack_scheduled ( sk ) )
inet_csk_reset_xmit_timer ( sk , ICSK_TIME_DACK ,
2005-08-10 07:11:08 +04:00
( 3 * TCP_RTO_MIN ) / 4 ,
TCP_RTO_MAX ) ;
2005-04-17 02:20:36 +04:00
}
return 1 ;
}
return 0 ;
}
# undef STATE_TRACE
# ifdef STATE_TRACE
static const char * statename [ ] = {
" Unused " , " Established " , " Syn Sent " , " Syn Recv " ,
" Fin Wait 1 " , " Fin Wait 2 " , " Time Wait " , " Close " ,
" Close Wait " , " Last ACK " , " Listen " , " Closing "
} ;
# endif
static __inline__ void tcp_set_state ( struct sock * sk , int state )
{
int oldstate = sk - > sk_state ;
switch ( state ) {
case TCP_ESTABLISHED :
if ( oldstate ! = TCP_ESTABLISHED )
TCP_INC_STATS ( TCP_MIB_CURRESTAB ) ;
break ;
case TCP_CLOSE :
if ( oldstate = = TCP_CLOSE_WAIT | | oldstate = = TCP_ESTABLISHED )
TCP_INC_STATS ( TCP_MIB_ESTABRESETS ) ;
sk - > sk_prot - > unhash ( sk ) ;
2005-08-10 07:10:42 +04:00
if ( inet_csk ( sk ) - > icsk_bind_hash & &
2005-04-17 02:20:36 +04:00
! ( sk - > sk_userlocks & SOCK_BINDPORT_LOCK ) )
2005-08-10 07:07:13 +04:00
inet_put_port ( & tcp_hashinfo , sk ) ;
2005-04-17 02:20:36 +04:00
/* fall through */
default :
if ( oldstate = = TCP_ESTABLISHED )
TCP_DEC_STATS ( TCP_MIB_CURRESTAB ) ;
}
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables .
*/
sk - > sk_state = state ;
# ifdef STATE_TRACE
SOCK_DEBUG ( sk , " TCP sk=%p, State %s -> %s \n " , sk , statename [ oldstate ] , statename [ state ] ) ;
# endif
}
static __inline__ void tcp_done ( struct sock * sk )
{
tcp_set_state ( sk , TCP_CLOSE ) ;
tcp_clear_xmit_timers ( sk ) ;
sk - > sk_shutdown = SHUTDOWN_MASK ;
if ( ! sock_flag ( sk , SOCK_DEAD ) )
sk - > sk_state_change ( sk ) ;
else
2005-08-10 07:11:41 +04:00
inet_csk_destroy_sock ( sk ) ;
2005-04-17 02:20:36 +04:00
}
static __inline__ void tcp_sack_reset ( struct tcp_options_received * rx_opt )
{
rx_opt - > dsack = 0 ;
rx_opt - > eff_sacks = 0 ;
rx_opt - > num_sacks = 0 ;
}
static __inline__ void tcp_build_and_update_options ( __u32 * ptr , struct tcp_sock * tp , __u32 tstamp )
{
if ( tp - > rx_opt . tstamp_ok ) {
* ptr + + = __constant_htonl ( ( TCPOPT_NOP < < 24 ) |
( TCPOPT_NOP < < 16 ) |
( TCPOPT_TIMESTAMP < < 8 ) |
TCPOLEN_TIMESTAMP ) ;
* ptr + + = htonl ( tstamp ) ;
* ptr + + = htonl ( tp - > rx_opt . ts_recent ) ;
}
if ( tp - > rx_opt . eff_sacks ) {
struct tcp_sack_block * sp = tp - > rx_opt . dsack ? tp - > duplicate_sack : tp - > selective_acks ;
int this_sack ;
* ptr + + = __constant_htonl ( ( TCPOPT_NOP < < 24 ) |
( TCPOPT_NOP < < 16 ) |
( TCPOPT_SACK < < 8 ) |
( TCPOLEN_SACK_BASE +
( tp - > rx_opt . eff_sacks * TCPOLEN_SACK_PERBLOCK ) ) ) ;
for ( this_sack = 0 ; this_sack < tp - > rx_opt . eff_sacks ; this_sack + + ) {
* ptr + + = htonl ( sp [ this_sack ] . start_seq ) ;
* ptr + + = htonl ( sp [ this_sack ] . end_seq ) ;
}
if ( tp - > rx_opt . dsack ) {
tp - > rx_opt . dsack = 0 ;
tp - > rx_opt . eff_sacks - - ;
}
}
}
/* Construct a tcp options header for a SYN or SYN_ACK packet.
* If this is every changed make sure to change the definition of
* MAX_SYN_SIZE to match the new maximum number of options that you
* can generate .
*/
static inline void tcp_syn_build_options ( __u32 * ptr , int mss , int ts , int sack ,
int offer_wscale , int wscale , __u32 tstamp , __u32 ts_recent )
{
/* We always get an MSS option.
* The option bytes which will be seen in normal data
* packets should timestamps be used , must be in the MSS
* advertised . But we subtract them from tp - > mss_cache so
* that calculations in tcp_sendmsg are simpler etc .
* So account for this fact here if necessary . If we
* don ' t do this correctly , as a receiver we won ' t
* recognize data packets as being full sized when we
* should , and thus we won ' t abide by the delayed ACK
* rules correctly .
* SACKs don ' t matter , we never delay an ACK when we
* have any of those going out .
*/
* ptr + + = htonl ( ( TCPOPT_MSS < < 24 ) | ( TCPOLEN_MSS < < 16 ) | mss ) ;
if ( ts ) {
if ( sack )
* ptr + + = __constant_htonl ( ( TCPOPT_SACK_PERM < < 24 ) | ( TCPOLEN_SACK_PERM < < 16 ) |
( TCPOPT_TIMESTAMP < < 8 ) | TCPOLEN_TIMESTAMP ) ;
else
* ptr + + = __constant_htonl ( ( TCPOPT_NOP < < 24 ) | ( TCPOPT_NOP < < 16 ) |
( TCPOPT_TIMESTAMP < < 8 ) | TCPOLEN_TIMESTAMP ) ;
* ptr + + = htonl ( tstamp ) ; /* TSVAL */
* ptr + + = htonl ( ts_recent ) ; /* TSECR */
} else if ( sack )
* ptr + + = __constant_htonl ( ( TCPOPT_NOP < < 24 ) | ( TCPOPT_NOP < < 16 ) |
( TCPOPT_SACK_PERM < < 8 ) | TCPOLEN_SACK_PERM ) ;
if ( offer_wscale )
* ptr + + = htonl ( ( TCPOPT_NOP < < 24 ) | ( TCPOPT_WINDOW < < 16 ) | ( TCPOLEN_WINDOW < < 8 ) | ( wscale ) ) ;
}
/* Determine a window scaling and initial window to offer. */
extern void tcp_select_initial_window ( int __space , __u32 mss ,
__u32 * rcv_wnd , __u32 * window_clamp ,
int wscale_ok , __u8 * rcv_wscale ) ;
static inline int tcp_win_from_space ( int space )
{
return sysctl_tcp_adv_win_scale < = 0 ?
( space > > ( - sysctl_tcp_adv_win_scale ) ) :
space - ( space > > sysctl_tcp_adv_win_scale ) ;
}
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space ( const struct sock * sk )
{
return tcp_win_from_space ( sk - > sk_rcvbuf -
atomic_read ( & sk - > sk_rmem_alloc ) ) ;
}
static inline int tcp_full_space ( const struct sock * sk )
{
return tcp_win_from_space ( sk - > sk_rcvbuf ) ;
}
2005-06-19 09:47:21 +04:00
static __inline__ void tcp_openreq_init ( struct request_sock * req ,
2005-04-17 02:20:36 +04:00
struct tcp_options_received * rx_opt ,
struct sk_buff * skb )
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
struct inet_request_sock * ireq = inet_rsk ( req ) ;
2005-04-17 02:20:36 +04:00
req - > rcv_wnd = 0 ; /* So that tcp_send_synack() knows! */
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
tcp_rsk ( req ) - > rcv_isn = TCP_SKB_CB ( skb ) - > seq ;
2005-04-17 02:20:36 +04:00
req - > mss = rx_opt - > mss_clamp ;
req - > ts_recent = rx_opt - > saw_tstamp ? rx_opt - > rcv_tsval : 0 ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
ireq - > tstamp_ok = rx_opt - > tstamp_ok ;
ireq - > sack_ok = rx_opt - > sack_ok ;
ireq - > snd_wscale = rx_opt - > snd_wscale ;
ireq - > wscale_ok = rx_opt - > wscale_ok ;
ireq - > acked = 0 ;
ireq - > ecn_ok = 0 ;
ireq - > rmt_port = skb - > h . th - > source ;
2005-04-17 02:20:36 +04:00
}
extern void tcp_enter_memory_pressure ( void ) ;
static inline int keepalive_intvl_when ( const struct tcp_sock * tp )
{
return tp - > keepalive_intvl ? : sysctl_tcp_keepalive_intvl ;
}
static inline int keepalive_time_when ( const struct tcp_sock * tp )
{
return tp - > keepalive_time ? : sysctl_tcp_keepalive_time ;
}
2005-08-10 07:10:42 +04:00
static inline int tcp_fin_time ( const struct sock * sk )
2005-04-17 02:20:36 +04:00
{
2005-08-10 07:10:42 +04:00
int fin_timeout = tcp_sk ( sk ) - > linger2 ? : sysctl_tcp_fin_timeout ;
const int rto = inet_csk ( sk ) - > icsk_rto ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:42 +04:00
if ( fin_timeout < ( rto < < 2 ) - ( rto > > 1 ) )
fin_timeout = ( rto < < 2 ) - ( rto > > 1 ) ;
2005-04-17 02:20:36 +04:00
return fin_timeout ;
}
static inline int tcp_paws_check ( const struct tcp_options_received * rx_opt , int rst )
{
if ( ( s32 ) ( rx_opt - > rcv_tsval - rx_opt - > ts_recent ) > = 0 )
return 0 ;
if ( xtime . tv_sec > = rx_opt - > ts_recent_stamp + TCP_PAWS_24DAYS )
return 0 ;
/* RST segments are not recommended to carry timestamp,
and , if they do , it is recommended to ignore PAWS because
" their cleanup function should take precedence over timestamps. "
Certainly , it is mistake . It is necessary to understand the reasons
of this constraint to relax it : if peer reboots , clock may go
out - of - sync and half - open connections will not be reset .
Actually , the problem would be not existing if all
the implementations followed draft about maintaining clock
via reboots . Linux - 2.2 DOES NOT !
However , we can relax time bounds for RST segments to MSL .
*/
if ( rst & & xtime . tv_sec > = rx_opt - > ts_recent_stamp + TCP_PAWS_MSL )
return 0 ;
return 1 ;
}
# define TCP_CHECK_TIMER(sk) do { } while (0)
static inline int tcp_use_frto ( const struct sock * sk )
{
const struct tcp_sock * tp = tcp_sk ( sk ) ;
/* F-RTO must be activated in sysctl and there must be some
* unsent new data , and the advertised window should allow
* sending it .
*/
return ( sysctl_tcp_frto & & sk - > sk_send_head & &
! after ( TCP_SKB_CB ( sk - > sk_send_head ) - > end_seq ,
tp - > snd_una + tp - > snd_wnd ) ) ;
}
static inline void tcp_mib_init ( void )
{
/* See RFC 2012 */
TCP_ADD_STATS_USER ( TCP_MIB_RTOALGORITHM , 1 ) ;
TCP_ADD_STATS_USER ( TCP_MIB_RTOMIN , TCP_RTO_MIN * 1000 / HZ ) ;
TCP_ADD_STATS_USER ( TCP_MIB_RTOMAX , TCP_RTO_MAX * 1000 / HZ ) ;
TCP_ADD_STATS_USER ( TCP_MIB_MAXCONN , - 1 ) ;
}
/* /proc */
enum tcp_seq_states {
TCP_SEQ_STATE_LISTENING ,
TCP_SEQ_STATE_OPENREQ ,
TCP_SEQ_STATE_ESTABLISHED ,
TCP_SEQ_STATE_TIME_WAIT ,
} ;
struct tcp_seq_afinfo {
struct module * owner ;
char * name ;
sa_family_t family ;
int ( * seq_show ) ( struct seq_file * m , void * v ) ;
struct file_operations * seq_fops ;
} ;
struct tcp_iter_state {
sa_family_t family ;
enum tcp_seq_states state ;
struct sock * syn_wait_sk ;
int bucket , sbucket , num , uid ;
struct seq_operations seq_ops ;
} ;
extern int tcp_proc_register ( struct tcp_seq_afinfo * afinfo ) ;
extern void tcp_proc_unregister ( struct tcp_seq_afinfo * afinfo ) ;
2005-08-16 09:18:02 +04:00
extern struct request_sock_ops tcp_request_sock_ops ;
extern int tcp_v4_destroy_sock ( struct sock * sk ) ;
# ifdef CONFIG_PROC_FS
extern int tcp4_proc_init ( void ) ;
extern void tcp4_proc_exit ( void ) ;
# endif
extern void tcp_v4_init ( struct net_proto_family * ops ) ;
extern void tcp_init ( void ) ;
2005-04-17 02:20:36 +04:00
# endif /* _TCP_H */