2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Implementation of the Transmission Control Protocol ( TCP ) .
*
* Version : $ Id : tcp_minisocks . c , v 1.15 2002 / 02 / 01 22 : 01 : 04 davem Exp $
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Mark Evans , < evansmp @ uhura . aston . ac . uk >
* Corey Minyard < wf - rch ! minyard @ relay . EU . net >
* Florian La Roche , < flla @ stud . uni - sb . de >
* Charles Hedrick , < hedrick @ klinzhai . rutgers . edu >
* Linus Torvalds , < torvalds @ cs . helsinki . fi >
* Alan Cox , < gw4pts @ gw4pts . ampr . org >
* Matthew Dillon , < dillon @ apollo . west . oic . com >
* Arnt Gulbrandsen , < agulbra @ nvg . unit . no >
* Jorge Cwik , < jorge @ laser . satlink . net >
*/
# include <linux/mm.h>
# include <linux/module.h>
# include <linux/sysctl.h>
# include <linux/workqueue.h>
# include <net/tcp.h>
# include <net/inet_common.h>
# include <net/xfrm.h>
# ifdef CONFIG_SYSCTL
# define SYNC_INIT 0 /* let the user enable it */
# else
# define SYNC_INIT 1
# endif
2006-09-23 01:15:41 +04:00
int sysctl_tcp_syncookies __read_mostly = SYNC_INIT ;
int sysctl_tcp_abort_on_overflow __read_mostly ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:44:40 +04:00
struct inet_timewait_death_row tcp_death_row = {
. sysctl_max_tw_buckets = NR_FILE * 2 ,
. period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS ,
2006-07-03 11:24:34 +04:00
. death_lock = __SPIN_LOCK_UNLOCKED ( tcp_death_row . death_lock ) ,
2005-08-10 07:44:40 +04:00
. hashinfo = & tcp_hashinfo ,
. tw_timer = TIMER_INITIALIZER ( inet_twdr_hangman , 0 ,
( unsigned long ) & tcp_death_row ) ,
. twkill_work = __WORK_INITIALIZER ( tcp_death_row . twkill_work ,
inet_twdr_twkill_work ,
& tcp_death_row ) ,
/* Short-time timewait calendar */
. twcal_hand = - 1 ,
. twcal_timer = TIMER_INITIALIZER ( inet_twdr_twcal_tick , 0 ,
( unsigned long ) & tcp_death_row ) ,
} ;
EXPORT_SYMBOL_GPL ( tcp_death_row ) ;
2005-04-17 02:20:36 +04:00
static __inline__ int tcp_in_window ( u32 seq , u32 end_seq , u32 s_win , u32 e_win )
{
if ( seq = = s_win )
return 1 ;
if ( after ( end_seq , s_win ) & & before ( seq , e_win ) )
return 1 ;
return ( seq = = e_win & & seq = = end_seq ) ;
}
/*
* * Main purpose of TIME - WAIT state is to close connection gracefully ,
* when one of ends sits in LAST - ACK or CLOSING retransmitting FIN
* ( and , probably , tail of data ) and one or more our ACKs are lost .
* * What is TIME - WAIT timeout ? It is associated with maximal packet
* lifetime in the internet , which results in wrong conclusion , that
* it is set to catch " old duplicate segments " wandering out of their path .
* It is not quite correct . This timeout is calculated so that it exceeds
* maximal retransmission timeout enough to allow to lose one ( or more )
* segments sent by peer and our ACKs . This time may be calculated from RTO .
* * When TIME - WAIT socket receives RST , it means that another end
* finally closed and we are allowed to kill TIME - WAIT too .
* * Second purpose of TIME - WAIT is catching old duplicate segments .
* Well , certainly it is pure paranoia , but if we load TIME - WAIT
* with this semantics , we MUST NOT kill TIME - WAIT state with RSTs .
* * If we invented some more clever way to catch duplicates
* ( f . e . based on PAWS ) , we could truncate TIME - WAIT to several RTOs .
*
* The algorithm below is based on FORMAL INTERPRETATION of RFCs .
* When you compare it to RFCs , please , read section SEGMENT ARRIVES
* from the very beginning .
*
* NOTE . With recycling ( and later with fin - wait - 2 ) TW bucket
* is _not_ stateless . It means , that strictly speaking we must
* spinlock it . I do not want ! Well , probability of misbehaviour
* is ridiculously low and , seems , we could use some mb ( ) tricks
* to avoid misread sequence numbers , states etc . - - ANK
*/
enum tcp_tw_status
2005-08-10 07:09:30 +04:00
tcp_timewait_state_process ( struct inet_timewait_sock * tw , struct sk_buff * skb ,
const struct tcphdr * th )
2005-04-17 02:20:36 +04:00
{
2005-08-10 07:09:30 +04:00
struct tcp_timewait_sock * tcptw = tcp_twsk ( ( struct sock * ) tw ) ;
2005-04-17 02:20:36 +04:00
struct tcp_options_received tmp_opt ;
int paws_reject = 0 ;
tmp_opt . saw_tstamp = 0 ;
2005-08-10 07:09:30 +04:00
if ( th - > doff > ( sizeof ( * th ) > > 2 ) & & tcptw - > tw_ts_recent_stamp ) {
2005-04-17 02:20:36 +04:00
tcp_parse_options ( skb , & tmp_opt , 0 ) ;
if ( tmp_opt . saw_tstamp ) {
2005-08-10 07:09:30 +04:00
tmp_opt . ts_recent = tcptw - > tw_ts_recent ;
tmp_opt . ts_recent_stamp = tcptw - > tw_ts_recent_stamp ;
2005-04-17 02:20:36 +04:00
paws_reject = tcp_paws_check ( & tmp_opt , th - > rst ) ;
}
}
if ( tw - > tw_substate = = TCP_FIN_WAIT2 ) {
/* Just repeat all the checks of tcp_rcv_state_process() */
/* Out of window, send ACK */
if ( paws_reject | |
! tcp_in_window ( TCP_SKB_CB ( skb ) - > seq , TCP_SKB_CB ( skb ) - > end_seq ,
2005-08-10 07:09:30 +04:00
tcptw - > tw_rcv_nxt ,
tcptw - > tw_rcv_nxt + tcptw - > tw_rcv_wnd ) )
2005-04-17 02:20:36 +04:00
return TCP_TW_ACK ;
if ( th - > rst )
goto kill ;
2005-08-10 07:09:30 +04:00
if ( th - > syn & & ! before ( TCP_SKB_CB ( skb ) - > seq , tcptw - > tw_rcv_nxt ) )
2005-04-17 02:20:36 +04:00
goto kill_with_rst ;
/* Dup ACK? */
2005-08-10 07:09:30 +04:00
if ( ! after ( TCP_SKB_CB ( skb ) - > end_seq , tcptw - > tw_rcv_nxt ) | |
2005-04-17 02:20:36 +04:00
TCP_SKB_CB ( skb ) - > end_seq = = TCP_SKB_CB ( skb ) - > seq ) {
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
return TCP_TW_SUCCESS ;
}
/* New data or FIN. If new data arrive after half-duplex close,
* reset .
*/
if ( ! th - > fin | |
2005-08-10 07:09:30 +04:00
TCP_SKB_CB ( skb ) - > end_seq ! = tcptw - > tw_rcv_nxt + 1 ) {
2005-04-17 02:20:36 +04:00
kill_with_rst :
2005-08-10 07:44:40 +04:00
inet_twsk_deschedule ( tw , & tcp_death_row ) ;
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
return TCP_TW_RST ;
}
/* FIN arrived, enter true time-wait state. */
2005-08-10 07:09:30 +04:00
tw - > tw_substate = TCP_TIME_WAIT ;
tcptw - > tw_rcv_nxt = TCP_SKB_CB ( skb ) - > end_seq ;
2005-04-17 02:20:36 +04:00
if ( tmp_opt . saw_tstamp ) {
2005-08-10 07:09:30 +04:00
tcptw - > tw_ts_recent_stamp = xtime . tv_sec ;
tcptw - > tw_ts_recent = tmp_opt . rcv_tsval ;
2005-04-17 02:20:36 +04:00
}
/* I am shamed, but failed to make it more elegant.
* Yes , it is direct reference to IP , which is impossible
* to generalize to IPv6 . Taking into account that IPv6
2005-11-11 04:13:47 +03:00
* do not understand recycling in any case , it not
2005-04-17 02:20:36 +04:00
* a big problem in practice . - - ANK */
if ( tw - > tw_family = = AF_INET & &
2005-08-10 07:44:40 +04:00
tcp_death_row . sysctl_tw_recycle & & tcptw - > tw_ts_recent_stamp & &
2005-04-17 02:20:36 +04:00
tcp_v4_tw_remember_stamp ( tw ) )
2005-08-10 07:45:03 +04:00
inet_twsk_schedule ( tw , & tcp_death_row , tw - > tw_timeout ,
TCP_TIMEWAIT_LEN ) ;
2005-04-17 02:20:36 +04:00
else
2005-08-10 07:45:03 +04:00
inet_twsk_schedule ( tw , & tcp_death_row , TCP_TIMEWAIT_LEN ,
TCP_TIMEWAIT_LEN ) ;
2005-04-17 02:20:36 +04:00
return TCP_TW_ACK ;
}
/*
* Now real TIME - WAIT state .
*
* RFC 1122 :
* " When a connection is [...] on TIME-WAIT state [...]
* [ a TCP ] MAY accept a new SYN from the remote TCP to
* reopen the connection directly , if it :
*
* ( 1 ) assigns its initial sequence number for the new
* connection to be larger than the largest sequence
* number it used on the previous connection incarnation ,
* and
*
* ( 2 ) returns to TIME - WAIT state if the SYN turns out
* to be an old duplicate " .
*/
if ( ! paws_reject & &
2005-08-10 07:09:30 +04:00
( TCP_SKB_CB ( skb ) - > seq = = tcptw - > tw_rcv_nxt & &
2005-04-17 02:20:36 +04:00
( TCP_SKB_CB ( skb ) - > seq = = TCP_SKB_CB ( skb ) - > end_seq | | th - > rst ) ) ) {
/* In window segment, it may be only reset or bare ack. */
if ( th - > rst ) {
2005-11-11 04:13:47 +03:00
/* This is TIME_WAIT assassination, in two flavors.
2005-04-17 02:20:36 +04:00
* Oh well . . . nobody has a sufficient solution to this
* protocol bug yet .
*/
if ( sysctl_tcp_rfc1337 = = 0 ) {
kill :
2005-08-10 07:44:40 +04:00
inet_twsk_deschedule ( tw , & tcp_death_row ) ;
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
return TCP_TW_SUCCESS ;
}
}
2005-08-10 07:45:03 +04:00
inet_twsk_schedule ( tw , & tcp_death_row , TCP_TIMEWAIT_LEN ,
TCP_TIMEWAIT_LEN ) ;
2005-04-17 02:20:36 +04:00
if ( tmp_opt . saw_tstamp ) {
2005-08-10 07:09:30 +04:00
tcptw - > tw_ts_recent = tmp_opt . rcv_tsval ;
tcptw - > tw_ts_recent_stamp = xtime . tv_sec ;
2005-04-17 02:20:36 +04:00
}
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
return TCP_TW_SUCCESS ;
}
/* Out of window segment.
All the segments are ACKed immediately .
The only exception is new SYN . We accept it , if it is
not old duplicate and we are not in danger to be killed
by delayed old duplicates . RFC check is that it has
newer sequence number works at rates < 40 Mbit / sec .
However , if paws works , it is reliable AND even more ,
we even may relax silly seq space cutoff .
RED - PEN : we violate main RFC requirement , if this SYN will appear
old duplicate ( i . e . we receive RST in reply to SYN - ACK ) ,
we must return socket to time - wait state . It is not good ,
but not fatal yet .
*/
if ( th - > syn & & ! th - > rst & & ! th - > ack & & ! paws_reject & &
2005-08-10 07:09:30 +04:00
( after ( TCP_SKB_CB ( skb ) - > seq , tcptw - > tw_rcv_nxt ) | |
( tmp_opt . saw_tstamp & &
( s32 ) ( tcptw - > tw_ts_recent - tmp_opt . rcv_tsval ) < 0 ) ) ) {
u32 isn = tcptw - > tw_snd_nxt + 65535 + 2 ;
2005-04-17 02:20:36 +04:00
if ( isn = = 0 )
isn + + ;
TCP_SKB_CB ( skb ) - > when = isn ;
return TCP_TW_SYN ;
}
if ( paws_reject )
NET_INC_STATS_BH ( LINUX_MIB_PAWSESTABREJECTED ) ;
if ( ! th - > rst ) {
/* In this case we must reset the TIMEWAIT timer.
*
* If it is ACKless SYN it may be both old duplicate
* and new good SYN with random sequence number < rcv_nxt .
* Do not reschedule in the last case .
*/
if ( paws_reject | | th - > ack )
2005-08-10 07:45:03 +04:00
inet_twsk_schedule ( tw , & tcp_death_row , TCP_TIMEWAIT_LEN ,
TCP_TIMEWAIT_LEN ) ;
2005-04-17 02:20:36 +04:00
/* Send ACK. Note, we do not put the bucket,
* it will be released by caller .
*/
return TCP_TW_ACK ;
}
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
return TCP_TW_SUCCESS ;
}
/*
* Move a socket to time - wait or dead fin - wait - 2 state .
*/
void tcp_time_wait ( struct sock * sk , int state , int timeo )
{
2005-08-10 07:09:30 +04:00
struct inet_timewait_sock * tw = NULL ;
2005-12-14 10:15:52 +03:00
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-08-10 07:09:30 +04:00
const struct tcp_sock * tp = tcp_sk ( sk ) ;
2005-04-17 02:20:36 +04:00
int recycle_ok = 0 ;
2005-08-10 07:44:40 +04:00
if ( tcp_death_row . sysctl_tw_recycle & & tp - > rx_opt . ts_recent_stamp )
2005-12-14 10:15:52 +03:00
recycle_ok = icsk - > icsk_af_ops - > remember_stamp ( sk ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:44:40 +04:00
if ( tcp_death_row . tw_count < tcp_death_row . sysctl_max_tw_buckets )
2005-08-10 07:09:59 +04:00
tw = inet_twsk_alloc ( sk , state ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:09:30 +04:00
if ( tw ! = NULL ) {
struct tcp_timewait_sock * tcptw = tcp_twsk ( ( struct sock * ) tw ) ;
2005-08-10 07:10:42 +04:00
const int rto = ( icsk - > icsk_rto < < 2 ) - ( icsk - > icsk_rto > > 1 ) ;
2005-08-10 07:09:30 +04:00
2005-04-17 02:20:36 +04:00
tw - > tw_rcv_wscale = tp - > rx_opt . rcv_wscale ;
2005-08-10 07:09:30 +04:00
tcptw - > tw_rcv_nxt = tp - > rcv_nxt ;
tcptw - > tw_snd_nxt = tp - > snd_nxt ;
tcptw - > tw_rcv_wnd = tcp_receive_window ( tp ) ;
tcptw - > tw_ts_recent = tp - > rx_opt . ts_recent ;
tcptw - > tw_ts_recent_stamp = tp - > rx_opt . ts_recent_stamp ;
2005-04-17 02:20:36 +04:00
# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if ( tw - > tw_family = = PF_INET6 ) {
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
2005-12-14 10:23:09 +03:00
struct inet6_timewait_sock * tw6 ;
2005-04-17 02:20:36 +04:00
2005-12-14 10:23:09 +03:00
tw - > tw_ipv6_offset = inet6_tw_offset ( sk - > sk_prot ) ;
tw6 = inet6_twsk ( ( struct sock * ) tw ) ;
ipv6_addr_copy ( & tw6 - > tw_v6_daddr , & np - > daddr ) ;
ipv6_addr_copy ( & tw6 - > tw_v6_rcv_saddr , & np - > rcv_saddr ) ;
2005-08-10 07:09:30 +04:00
tw - > tw_ipv6only = np - > ipv6only ;
2005-08-10 07:09:59 +04:00
}
2005-04-17 02:20:36 +04:00
# endif
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
/*
* The timewait bucket does not have the key DB from the
* sock structure . We just make a quick copy of the
* md5 key being used ( if indeed we are using one )
* so the timewait ack generating code has the key .
*/
do {
struct tcp_md5sig_key * key ;
memset ( tcptw - > tw_md5_key , 0 , sizeof ( tcptw - > tw_md5_key ) ) ;
tcptw - > tw_md5_keylen = 0 ;
key = tp - > af_specific - > md5_lookup ( sk , sk ) ;
if ( key ! = NULL ) {
memcpy ( & tcptw - > tw_md5_key , key - > key , key - > keylen ) ;
tcptw - > tw_md5_keylen = key - > keylen ;
if ( tcp_alloc_md5sig_pool ( ) = = NULL )
BUG ( ) ;
}
} while ( 0 ) ;
# endif
2005-04-17 02:20:36 +04:00
/* Linkage updates. */
2005-08-10 07:09:46 +04:00
__inet_twsk_hashdance ( tw , sk , & tcp_hashinfo ) ;
2005-04-17 02:20:36 +04:00
/* Get the TIME_WAIT timeout firing. */
if ( timeo < rto )
timeo = rto ;
if ( recycle_ok ) {
tw - > tw_timeout = rto ;
} else {
tw - > tw_timeout = TCP_TIMEWAIT_LEN ;
if ( state = = TCP_TIME_WAIT )
timeo = TCP_TIMEWAIT_LEN ;
}
2005-08-10 07:45:03 +04:00
inet_twsk_schedule ( tw , & tcp_death_row , timeo ,
TCP_TIMEWAIT_LEN ) ;
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up . We ' ve got bigger problems than
* non - graceful socket closings .
*/
if ( net_ratelimit ( ) )
printk ( KERN_INFO " TCP: time wait bucket table overflow \n " ) ;
}
tcp_update_metrics ( sk ) ;
tcp_done ( sk ) ;
}
2006-11-15 06:07:45 +03:00
void tcp_twsk_destructor ( struct sock * sk )
{
# ifdef CONFIG_TCP_MD5SIG
2006-11-15 06:53:22 +03:00
struct tcp_timewait_sock * twsk = tcp_twsk ( sk ) ;
2006-11-15 06:07:45 +03:00
if ( twsk - > tw_md5_keylen )
tcp_put_md5sig_pool ( ) ;
# endif
}
EXPORT_SYMBOL_GPL ( tcp_twsk_destructor ) ;
2005-04-17 02:20:36 +04:00
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4 / IPv6 SYN recv processing . - DaveM
*
* Actually , we could lots of memory writes here . tp of listening
* socket contains all necessary default parameters .
*/
2005-06-19 09:47:21 +04:00
struct sock * tcp_create_openreq_child ( struct sock * sk , struct request_sock * req , struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
2005-08-10 07:11:24 +04:00
struct sock * newsk = inet_csk_clone ( sk , req , GFP_ATOMIC ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:12 +04:00
if ( newsk ! = NULL ) {
2005-08-10 07:11:24 +04:00
const struct inet_request_sock * ireq = inet_rsk ( req ) ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
struct tcp_request_sock * treq = tcp_rsk ( req ) ;
2005-08-10 07:11:24 +04:00
struct inet_connection_sock * newicsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
struct tcp_sock * newtp ;
/* Now setup tcp_sock */
newtp = tcp_sk ( newsk ) ;
newtp - > pred_flags = 0 ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
newtp - > rcv_nxt = treq - > rcv_isn + 1 ;
2005-08-10 07:10:12 +04:00
newtp - > snd_nxt = newtp - > snd_una = newtp - > snd_sml = treq - > snt_isn + 1 ;
2005-04-17 02:20:36 +04:00
tcp_prequeue_init ( newtp ) ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
tcp_init_wl ( newtp , treq - > snt_isn , treq - > rcv_isn ) ;
2005-04-17 02:20:36 +04:00
newtp - > srtt = 0 ;
newtp - > mdev = TCP_TIMEOUT_INIT ;
2005-08-10 07:10:42 +04:00
newicsk - > icsk_rto = TCP_TIMEOUT_INIT ;
2005-04-17 02:20:36 +04:00
newtp - > packets_out = 0 ;
newtp - > left_out = 0 ;
newtp - > retrans_out = 0 ;
newtp - > sacked_out = 0 ;
newtp - > fackets_out = 0 ;
newtp - > snd_ssthresh = 0x7fffffff ;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed - ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them . - DaveM
*/
newtp - > snd_cwnd = 2 ;
newtp - > snd_cwnd_cnt = 0 ;
2005-11-11 04:09:53 +03:00
newtp - > bytes_acked = 0 ;
2005-04-17 02:20:36 +04:00
newtp - > frto_counter = 0 ;
newtp - > frto_highmark = 0 ;
2005-09-21 11:19:46 +04:00
newicsk - > icsk_ca_ops = & tcp_init_congestion_ops ;
2005-06-23 23:19:55 +04:00
2005-08-10 11:03:31 +04:00
tcp_set_ca_state ( newsk , TCP_CA_Open ) ;
2005-04-17 02:20:36 +04:00
tcp_init_xmit_timers ( newsk ) ;
skb_queue_head_init ( & newtp - > out_of_order_queue ) ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
newtp - > rcv_wup = treq - > rcv_isn + 1 ;
newtp - > write_seq = treq - > snt_isn + 1 ;
2005-04-17 02:20:36 +04:00
newtp - > pushed_seq = newtp - > write_seq ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
newtp - > copied_seq = treq - > rcv_isn + 1 ;
2005-04-17 02:20:36 +04:00
newtp - > rx_opt . saw_tstamp = 0 ;
newtp - > rx_opt . dsack = 0 ;
newtp - > rx_opt . eff_sacks = 0 ;
newtp - > rx_opt . num_sacks = 0 ;
newtp - > urg_data = 0 ;
if ( sock_flag ( newsk , SOCK_KEEPOPEN ) )
2005-08-10 07:10:42 +04:00
inet_csk_reset_keepalive_timer ( newsk ,
keepalive_time_when ( newtp ) ) ;
2005-04-17 02:20:36 +04:00
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
newtp - > rx_opt . tstamp_ok = ireq - > tstamp_ok ;
if ( ( newtp - > rx_opt . sack_ok = ireq - > sack_ok ) ! = 0 ) {
2005-04-17 02:20:36 +04:00
if ( sysctl_tcp_fack )
newtp - > rx_opt . sack_ok | = 2 ;
}
newtp - > window_clamp = req - > window_clamp ;
newtp - > rcv_ssthresh = req - > rcv_wnd ;
newtp - > rcv_wnd = req - > rcv_wnd ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
newtp - > rx_opt . wscale_ok = ireq - > wscale_ok ;
2005-04-17 02:20:36 +04:00
if ( newtp - > rx_opt . wscale_ok ) {
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
newtp - > rx_opt . snd_wscale = ireq - > snd_wscale ;
newtp - > rx_opt . rcv_wscale = ireq - > rcv_wscale ;
2005-04-17 02:20:36 +04:00
} else {
newtp - > rx_opt . snd_wscale = newtp - > rx_opt . rcv_wscale = 0 ;
newtp - > window_clamp = min ( newtp - > window_clamp , 65535U ) ;
}
newtp - > snd_wnd = ntohs ( skb - > h . th - > window ) < < newtp - > rx_opt . snd_wscale ;
newtp - > max_window = newtp - > snd_wnd ;
if ( newtp - > rx_opt . tstamp_ok ) {
newtp - > rx_opt . ts_recent = req - > ts_recent ;
newtp - > rx_opt . ts_recent_stamp = xtime . tv_sec ;
newtp - > tcp_header_len = sizeof ( struct tcphdr ) + TCPOLEN_TSTAMP_ALIGNED ;
} else {
newtp - > rx_opt . ts_recent_stamp = 0 ;
newtp - > tcp_header_len = sizeof ( struct tcphdr ) ;
}
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
newtp - > md5sig_info = NULL ; /*XXX*/
if ( newtp - > af_specific - > md5_lookup ( sk , newsk ) )
newtp - > tcp_header_len + = TCPOLEN_MD5SIG_ALIGNED ;
# endif
2005-04-17 02:20:36 +04:00
if ( skb - > len > = TCP_MIN_RCVMSS + newtp - > tcp_header_len )
2005-08-10 07:10:42 +04:00
newicsk - > icsk_ack . last_seg_size = skb - > len - newtp - > tcp_header_len ;
2005-04-17 02:20:36 +04:00
newtp - > rx_opt . mss_clamp = req - > mss ;
TCP_ECN_openreq_child ( newtp , req ) ;
TCP_INC_STATS_BH ( TCP_MIB_PASSIVEOPENS ) ;
}
return newsk ;
}
/*
* Process an incoming packet for SYN_RECV sockets represented
2005-06-19 09:47:21 +04:00
* as a request_sock .
2005-04-17 02:20:36 +04:00
*/
struct sock * tcp_check_req ( struct sock * sk , struct sk_buff * skb ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
struct request_sock * * prev )
2005-04-17 02:20:36 +04:00
{
struct tcphdr * th = skb - > h . th ;
u32 flg = tcp_flag_word ( th ) & ( TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_ACK ) ;
int paws_reject = 0 ;
struct tcp_options_received tmp_opt ;
struct sock * child ;
tmp_opt . saw_tstamp = 0 ;
if ( th - > doff > ( sizeof ( struct tcphdr ) > > 2 ) ) {
tcp_parse_options ( skb , & tmp_opt , 0 ) ;
if ( tmp_opt . saw_tstamp ) {
tmp_opt . ts_recent = req - > ts_recent ;
/* We do not store true stamp, but it is not required,
* it can be estimated ( approximately )
* from another data .
*/
tmp_opt . ts_recent_stamp = xtime . tv_sec - ( ( TCP_TIMEOUT_INIT / HZ ) < < req - > retrans ) ;
paws_reject = tcp_paws_check ( & tmp_opt , th - > rst ) ;
}
}
/* Check for pure retransmitted SYN. */
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
if ( TCP_SKB_CB ( skb ) - > seq = = tcp_rsk ( req ) - > rcv_isn & &
2005-04-17 02:20:36 +04:00
flg = = TCP_FLAG_SYN & &
! paws_reject ) {
/*
* RFC793 draws ( Incorrectly ! It was fixed in RFC1122 )
* this case on figure 6 and figure 8 , but formal
* protocol description says NOTHING .
* To be more exact , it says that we should send ACK ,
* because this segment ( at least , if it has no data )
* is out of window .
*
* CONCLUSION : RFC793 ( even with RFC1122 ) DOES NOT
* describe SYN - RECV state . All the description
* is wrong , we cannot believe to it and should
* rely only on common sense and implementation
* experience .
*
* Enforce " SYN-ACK " according to figure 8 , figure 6
* of RFC793 , fixed by RFC1122 .
*/
2005-06-19 09:47:21 +04:00
req - > rsk_ops - > rtx_syn_ack ( sk , req , NULL ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
/* Further reproduces section "SEGMENT ARRIVES"
for state SYN - RECEIVED of RFC793 .
It is broken , however , it does not work only
when SYNs are crossed .
You would think that SYN crossing is impossible here , since
we should have a SYN_SENT socket ( from connect ( ) ) on our end ,
but this is not true if the crossed SYNs were sent to both
ends by a malicious third party . We must defend against this ,
and to do that we first verify the ACK ( as per RFC793 , page
36 ) and reset if it is invalid . Is this a true full defense ?
To convince ourselves , let us consider a way in which the ACK
test can still pass in this ' malicious crossed SYNs ' case .
Malicious sender sends identical SYNs ( and thus identical sequence
numbers ) to both A and B :
A : gets SYN , seq = 7
B : gets SYN , seq = 7
By our good fortune , both A and B select the same initial
send sequence number of seven : - )
A : sends SYN | ACK , seq = 7 , ack_seq = 8
B : sends SYN | ACK , seq = 7 , ack_seq = 8
So we are now A eating this SYN | ACK , ACK test passes . So
does sequence test , SYN is truncated , and thus we consider
it a bare ACK .
2005-08-10 07:11:56 +04:00
If icsk - > icsk_accept_queue . rskq_defer_accept , we silently drop this
bare ACK . Otherwise , we create an established connection . Both
ends ( listening sockets ) accept the new incoming connection and try
to talk to each other . 8 - )
2005-04-17 02:20:36 +04:00
Note : This case is both harmless , and rare . Possibility is about the
same as us discovering intelligent life on another plant tomorrow .
But generally , we should ( RFC lies ! ) to accept ACK
from SYNACK both here and in tcp_rcv_state_process ( ) .
tcp_rcv_state_process ( ) does not , hence , we do not too .
Note that the case is absolutely generic :
we cannot optimize anything here without
violating protocol . All the checks must be made
before attempt to create socket .
*/
/* RFC793 page 36: "If the connection is in any non-synchronized state ...
* and the incoming segment acknowledges something not yet
2005-11-11 04:13:47 +03:00
* sent ( the segment carries an unacceptable ACK ) . . .
2005-04-17 02:20:36 +04:00
* a reset is sent . "
*
* Invalid ACK : reset will be sent by listening socket
*/
if ( ( flg & TCP_FLAG_ACK ) & &
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
( TCP_SKB_CB ( skb ) - > ack_seq ! = tcp_rsk ( req ) - > snt_isn + 1 ) )
2005-04-17 02:20:36 +04:00
return sk ;
/* Also, it would be not so bad idea to check rcv_tsecr, which
* is essentially ACK extension and too early or too late values
* should cause reset in unsynchronized states .
*/
/* RFC793: "first check sequence number". */
if ( paws_reject | | ! tcp_in_window ( TCP_SKB_CB ( skb ) - > seq , TCP_SKB_CB ( skb ) - > end_seq ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
tcp_rsk ( req ) - > rcv_isn + 1 , tcp_rsk ( req ) - > rcv_isn + 1 + req - > rcv_wnd ) ) {
2005-04-17 02:20:36 +04:00
/* Out of window: send ACK and drop. */
if ( ! ( flg & TCP_FLAG_RST ) )
2005-06-19 09:47:21 +04:00
req - > rsk_ops - > send_ack ( skb , req ) ;
2005-04-17 02:20:36 +04:00
if ( paws_reject )
NET_INC_STATS_BH ( LINUX_MIB_PAWSESTABREJECTED ) ;
return NULL ;
}
/* In sequence, PAWS is OK. */
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
if ( tmp_opt . saw_tstamp & & ! after ( TCP_SKB_CB ( skb ) - > seq , tcp_rsk ( req ) - > rcv_isn + 1 ) )
2005-04-17 02:20:36 +04:00
req - > ts_recent = tmp_opt . rcv_tsval ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
if ( TCP_SKB_CB ( skb ) - > seq = = tcp_rsk ( req ) - > rcv_isn ) {
2005-04-17 02:20:36 +04:00
/* Truncate SYN, it is out of window starting
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
at tcp_rsk ( req ) - > rcv_isn + 1. */
2005-04-17 02:20:36 +04:00
flg & = ~ TCP_FLAG_SYN ;
}
/* RFC793: "second check the RST bit" and
* " fourth, check the SYN bit "
*/
2006-07-31 07:35:54 +04:00
if ( flg & ( TCP_FLAG_RST | TCP_FLAG_SYN ) ) {
TCP_INC_STATS_BH ( TCP_MIB_ATTEMPTFAILS ) ;
2005-04-17 02:20:36 +04:00
goto embryonic_reset ;
2006-07-31 07:35:54 +04:00
}
2005-04-17 02:20:36 +04:00
/* ACK sequence verified above, just make sure ACK is
* set . If ACK not set , just silently drop the packet .
*/
if ( ! ( flg & TCP_FLAG_ACK ) )
return NULL ;
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
2005-08-10 07:11:56 +04:00
if ( inet_csk ( sk ) - > icsk_accept_queue . rskq_defer_accept & &
TCP_SKB_CB ( skb ) - > end_seq = = tcp_rsk ( req ) - > rcv_isn + 1 ) {
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
inet_rsk ( req ) - > acked = 1 ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
/* OK, ACK is valid, create big socket and
* feed this segment to it . It will repeat all
* the tests . THIS SEGMENT MUST MOVE SOCKET TO
* ESTABLISHED STATE . If it will be dropped after
* socket is created , wait for troubles .
*/
2005-12-14 10:15:52 +03:00
child = inet_csk ( sk ) - > icsk_af_ops - > syn_recv_sock ( sk , skb ,
req , NULL ) ;
2005-04-17 02:20:36 +04:00
if ( child = = NULL )
goto listen_overflow ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
else {
/* Copy over the MD5 key from the original socket */
struct tcp_md5sig_key * key ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
key = tp - > af_specific - > md5_lookup ( sk , child ) ;
if ( key ! = NULL ) {
/*
* We ' re using one , so create a matching key on the
* newsk structure . If we fail to get memory then we
* end up not copying the key across . Shucks .
*/
char * newkey = kmalloc ( key - > keylen , GFP_ATOMIC ) ;
if ( newkey ) {
if ( ! tcp_alloc_md5sig_pool ( ) )
BUG ( ) ;
memcpy ( newkey , key - > key , key - > keylen ) ;
tp - > af_specific - > md5_add ( child , child ,
newkey ,
key - > keylen ) ;
}
}
}
# endif
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:42 +04:00
inet_csk_reqsk_queue_unlink ( sk , req , prev ) ;
inet_csk_reqsk_queue_removed ( sk , req ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:42 +04:00
inet_csk_reqsk_queue_add ( sk , req , child ) ;
2005-04-17 02:20:36 +04:00
return child ;
listen_overflow :
if ( ! sysctl_tcp_abort_on_overflow ) {
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
inet_rsk ( req ) - > acked = 1 ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
embryonic_reset :
NET_INC_STATS_BH ( LINUX_MIB_EMBRYONICRSTS ) ;
if ( ! ( flg & TCP_FLAG_RST ) )
2006-11-15 06:07:45 +03:00
req - > rsk_ops - > send_reset ( sk , skb ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:42 +04:00
inet_csk_reqsk_queue_drop ( sk , req , prev ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
/*
* Queue segment on the new socket if the new socket is active ,
* otherwise we just shortcircuit this and continue with
* the new socket .
*/
int tcp_child_process ( struct sock * parent , struct sock * child ,
struct sk_buff * skb )
{
int ret = 0 ;
int state = child - > sk_state ;
if ( ! sock_owned_by_user ( child ) ) {
ret = tcp_rcv_state_process ( child , skb , skb - > h . th , skb - > len ) ;
/* Wakeup parent, send SIGIO */
if ( state = = TCP_SYN_RECV & & child - > sk_state ! = state )
parent - > sk_data_ready ( parent , 0 ) ;
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
* socket does not protect us more .
*/
sk_add_backlog ( child , skb ) ;
}
bh_unlock_sock ( child ) ;
sock_put ( child ) ;
return ret ;
}
EXPORT_SYMBOL ( tcp_check_req ) ;
EXPORT_SYMBOL ( tcp_child_process ) ;
EXPORT_SYMBOL ( tcp_create_openreq_child ) ;
EXPORT_SYMBOL ( tcp_timewait_state_process ) ;