2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Implementation of the Transmission Control Protocol ( TCP ) .
*
* Version : $ Id : tcp_ipv4 . c , v 1.240 2002 / 02 / 01 22 : 01 : 04 davem Exp $
*
* IPv4 specific functions
*
*
* code split from :
* linux / ipv4 / tcp . c
* linux / ipv4 / tcp_input . c
* linux / ipv4 / tcp_output . c
*
* See tcp . c for author information
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
/*
* Changes :
* David S . Miller : New socket lookup architecture .
* This code is dedicated to John Dyson .
* David S . Miller : Change semantics of established hash ,
* half is devoted to TIME_WAIT sockets
* and the rest go in the other half .
* Andi Kleen : Add support for syncookies and fixed
* some bugs : ip options weren ' t passed to
* the TCP layer , missed a check for an
* ACK bit .
* Andi Kleen : Implemented fast path mtu discovery .
* Fixed many serious bugs in the
2005-06-19 09:47:21 +04:00
* request_sock handling and moved
2005-04-17 02:20:36 +04:00
* most of it into the af independent code .
* Added tail drop and some other bugfixes .
2005-11-11 04:13:47 +03:00
* Added new listen semantics .
2005-04-17 02:20:36 +04:00
* Mike McLagan : Routing by source
* Juan Jose Ciarlante : ip_dynaddr bits
* Andi Kleen : various fixes .
* Vitaly E . Lavrov : Transparent proxy revived after year
* coma .
* Andi Kleen : Fix new listen .
* Andi Kleen : Fix accept error reporting .
* YOSHIFUJI Hideaki @ USAGI and : Support IPV6_V6ONLY socket option , which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time .
*/
# include <linux/types.h>
# include <linux/fcntl.h>
# include <linux/module.h>
# include <linux/random.h>
# include <linux/cache.h>
# include <linux/jhash.h>
# include <linux/init.h>
# include <linux/times.h>
# include <net/icmp.h>
2005-08-10 06:59:20 +04:00
# include <net/inet_hashtables.h>
2005-04-17 02:20:36 +04:00
# include <net/tcp.h>
2005-08-16 09:18:02 +04:00
# include <net/transp_v6.h>
2005-04-17 02:20:36 +04:00
# include <net/ipv6.h>
# include <net/inet_common.h>
2005-12-14 10:25:19 +03:00
# include <net/timewait_sock.h>
2005-04-17 02:20:36 +04:00
# include <net/xfrm.h>
2006-05-24 05:05:53 +04:00
# include <net/netdma.h>
2005-04-17 02:20:36 +04:00
# include <linux/inet.h>
# include <linux/ipv6.h>
# include <linux/stddef.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2006-11-15 06:07:45 +03:00
# include <linux/crypto.h>
# include <linux/scatterlist.h>
2006-09-23 01:15:41 +04:00
int sysctl_tcp_tw_reuse __read_mostly ;
int sysctl_tcp_low_latency __read_mostly ;
2005-04-17 02:20:36 +04:00
/* Check TCP sequence numbers in ICMP packets. */
# define ICMP_MIN_LENGTH 8
/* Socket used for sending RSTs */
static struct socket * tcp_socket ;
2005-12-14 10:15:52 +03:00
void tcp_v4_send_check ( struct sock * sk , int len , struct sk_buff * skb ) ;
2005-04-17 02:20:36 +04:00
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
2006-11-17 15:57:30 +03:00
static struct tcp_md5sig_key * tcp_v4_md5_do_lookup ( struct sock * sk ,
__be32 addr ) ;
2006-11-15 06:07:45 +03:00
static int tcp_v4_do_calc_md5_hash ( char * md5_hash , struct tcp_md5sig_key * key ,
2006-11-17 15:57:30 +03:00
__be32 saddr , __be32 daddr ,
struct tcphdr * th , int protocol ,
int tcplen ) ;
2006-11-15 06:07:45 +03:00
# endif
2005-08-10 06:59:44 +04:00
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
2006-11-17 15:57:30 +03:00
. lhash_lock = __RW_LOCK_UNLOCKED ( tcp_hashinfo . lhash_lock ) ,
. lhash_users = ATOMIC_INIT ( 0 ) ,
. lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER ( tcp_hashinfo . lhash_wait ) ,
2005-04-17 02:20:36 +04:00
} ;
2005-08-10 07:10:42 +04:00
static int tcp_v4_get_port ( struct sock * sk , unsigned short snum )
{
2005-12-14 10:14:47 +03:00
return inet_csk_get_port ( & tcp_hashinfo , sk , snum ,
inet_csk_bind_conflict ) ;
2005-08-10 07:10:42 +04:00
}
2005-04-17 02:20:36 +04:00
static void tcp_v4_hash ( struct sock * sk )
{
2005-08-10 07:08:50 +04:00
inet_hash ( & tcp_hashinfo , sk ) ;
2005-04-17 02:20:36 +04:00
}
void tcp_unhash ( struct sock * sk )
{
2005-08-10 07:08:50 +04:00
inet_unhash ( & tcp_hashinfo , sk ) ;
2005-04-17 02:20:36 +04:00
}
2006-11-11 01:06:49 +03:00
static inline __u32 tcp_v4_init_sequence ( struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
return secure_tcp_sequence_number ( skb - > nh . iph - > daddr ,
skb - > nh . iph - > saddr ,
skb - > h . th - > dest ,
skb - > h . th - > source ) ;
}
2005-12-14 10:25:19 +03:00
int tcp_twsk_unique ( struct sock * sk , struct sock * sktw , void * twp )
{
const struct tcp_timewait_sock * tcptw = tcp_twsk ( sktw ) ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
/* With PAWS, it is safe from the viewpoint
of data integrity . Even without PAWS it is safe provided sequence
spaces do not overlap i . e . at data rates < = 80 Mbit / sec .
Actually , the idea is close to VJ ' s one , only timestamp cache is
held not per host , but per port pair and TW bucket is used as state
holder .
If TW bucket has been already destroyed we fall back to VJ ' s scheme
and use initial timestamp retrieved from peer table .
*/
if ( tcptw - > tw_ts_recent_stamp & &
( twp = = NULL | | ( sysctl_tcp_tw_reuse & &
xtime . tv_sec - tcptw - > tw_ts_recent_stamp > 1 ) ) ) {
tp - > write_seq = tcptw - > tw_snd_nxt + 65535 + 2 ;
if ( tp - > write_seq = = 0 )
tp - > write_seq = 1 ;
tp - > rx_opt . ts_recent = tcptw - > tw_ts_recent ;
tp - > rx_opt . ts_recent_stamp = tcptw - > tw_ts_recent_stamp ;
sock_hold ( sktw ) ;
return 1 ;
}
return 0 ;
}
EXPORT_SYMBOL_GPL ( tcp_twsk_unique ) ;
2005-04-17 02:20:36 +04:00
/* This will initiate an outgoing connection. */
int tcp_v4_connect ( struct sock * sk , struct sockaddr * uaddr , int addr_len )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
struct sockaddr_in * usin = ( struct sockaddr_in * ) uaddr ;
struct rtable * rt ;
2006-09-27 08:27:15 +04:00
__be32 daddr , nexthop ;
2005-04-17 02:20:36 +04:00
int tmp ;
int err ;
if ( addr_len < sizeof ( struct sockaddr_in ) )
return - EINVAL ;
if ( usin - > sin_family ! = AF_INET )
return - EAFNOSUPPORT ;
nexthop = daddr = usin - > sin_addr . s_addr ;
if ( inet - > opt & & inet - > opt - > srr ) {
if ( ! daddr )
return - EINVAL ;
nexthop = inet - > opt - > faddr ;
}
tmp = ip_route_connect ( & rt , nexthop , inet - > saddr ,
RT_CONN_FLAGS ( sk ) , sk - > sk_bound_dev_if ,
IPPROTO_TCP ,
inet - > sport , usin - > sin_port , sk ) ;
if ( tmp < 0 )
return tmp ;
if ( rt - > rt_flags & ( RTCF_MULTICAST | RTCF_BROADCAST ) ) {
ip_rt_put ( rt ) ;
return - ENETUNREACH ;
}
if ( ! inet - > opt | | ! inet - > opt - > srr )
daddr = rt - > rt_dst ;
if ( ! inet - > saddr )
inet - > saddr = rt - > rt_src ;
inet - > rcv_saddr = inet - > saddr ;
if ( tp - > rx_opt . ts_recent_stamp & & inet - > daddr ! = daddr ) {
/* Reset inherited state */
tp - > rx_opt . ts_recent = 0 ;
tp - > rx_opt . ts_recent_stamp = 0 ;
tp - > write_seq = 0 ;
}
2005-08-10 07:44:40 +04:00
if ( tcp_death_row . sysctl_tw_recycle & &
2005-04-17 02:20:36 +04:00
! tp - > rx_opt . ts_recent_stamp & & rt - > rt_dst = = daddr ) {
struct inet_peer * peer = rt_get_peer ( rt ) ;
2006-11-17 15:57:30 +03:00
/*
* VJ ' s idea . We save last timestamp seen from
* the destination in peer table , when entering state
* TIME - WAIT * and initialize rx_opt . ts_recent from it ,
* when trying new connection .
2005-04-17 02:20:36 +04:00
*/
2006-11-17 15:57:30 +03:00
if ( peer ! = NULL & &
peer - > tcp_ts_stamp + TCP_PAWS_MSL > = xtime . tv_sec ) {
2005-04-17 02:20:36 +04:00
tp - > rx_opt . ts_recent_stamp = peer - > tcp_ts_stamp ;
tp - > rx_opt . ts_recent = peer - > tcp_ts ;
}
}
inet - > dport = usin - > sin_port ;
inet - > daddr = daddr ;
2005-12-14 10:26:10 +03:00
inet_csk ( sk ) - > icsk_ext_hdr_len = 0 ;
2005-04-17 02:20:36 +04:00
if ( inet - > opt )
2005-12-14 10:26:10 +03:00
inet_csk ( sk ) - > icsk_ext_hdr_len = inet - > opt - > optlen ;
2005-04-17 02:20:36 +04:00
tp - > rx_opt . mss_clamp = 536 ;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN - SENT and not releasing socket
* lock select source port , enter ourselves into the hash tables and
* complete initialization after this .
*/
tcp_set_state ( sk , TCP_SYN_SENT ) ;
2005-12-14 10:25:31 +03:00
err = inet_hash_connect ( & tcp_death_row , sk ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto failure ;
2006-11-17 15:57:30 +03:00
err = ip_route_newports ( & rt , IPPROTO_TCP ,
inet - > sport , inet - > dport , sk ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto failure ;
/* OK, now commit destination to socket. */
2006-07-01 00:36:35 +04:00
sk - > sk_gso_type = SKB_GSO_TCPV4 ;
2005-08-10 06:49:02 +04:00
sk_setup_caps ( sk , & rt - > u . dst ) ;
2005-04-17 02:20:36 +04:00
if ( ! tp - > write_seq )
tp - > write_seq = secure_tcp_sequence_number ( inet - > saddr ,
inet - > daddr ,
inet - > sport ,
usin - > sin_port ) ;
inet - > id = tp - > write_seq ^ jiffies ;
err = tcp_connect ( sk ) ;
rt = NULL ;
if ( err )
goto failure ;
return 0 ;
failure :
2006-11-17 15:57:30 +03:00
/*
* This unhashes the socket and releases the local port ,
* if necessary .
*/
2005-04-17 02:20:36 +04:00
tcp_set_state ( sk , TCP_CLOSE ) ;
ip_rt_put ( rt ) ;
sk - > sk_route_caps = 0 ;
inet - > dport = 0 ;
return err ;
}
/*
* This routine does path mtu discovery as defined in RFC1191 .
*/
2006-01-04 03:03:49 +03:00
static void do_pmtu_discovery ( struct sock * sk , struct iphdr * iph , u32 mtu )
2005-04-17 02:20:36 +04:00
{
struct dst_entry * dst ;
struct inet_sock * inet = inet_sk ( sk ) ;
/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
* send out by Linux are always < 576 bytes so they should go through
* unfragmented ) .
*/
if ( sk - > sk_state = = TCP_LISTEN )
return ;
/* We don't check in the destentry if pmtu discovery is forbidden
* on this route . We just assume that no packet_to_big packets
* are send back when pmtu discovery is not active .
* There is a small race when the user changes this flag in the
* route , but I think that ' s acceptable .
*/
if ( ( dst = __sk_dst_check ( sk , 0 ) ) = = NULL )
return ;
dst - > ops - > update_pmtu ( dst , mtu ) ;
/* Something is about to be wrong... Remember soft error
* for the case , if this connection will not able to recover .
*/
if ( mtu < dst_mtu ( dst ) & & ip_dont_fragment ( sk , dst ) )
sk - > sk_err_soft = EMSGSIZE ;
mtu = dst_mtu ( dst ) ;
if ( inet - > pmtudisc ! = IP_PMTUDISC_DONT & &
2005-12-14 10:26:10 +03:00
inet_csk ( sk ) - > icsk_pmtu_cookie > mtu ) {
2005-04-17 02:20:36 +04:00
tcp_sync_mss ( sk , mtu ) ;
/* Resend the TCP packet because it's
* clear that the old packet has been
* dropped . This is the new " fast " path mtu
* discovery .
*/
tcp_simple_retransmit ( sk ) ;
} /* else let the usual retransmit timer handle it */
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition . If err < 0 then the socket should
* be closed and the error returned to the user . If err > 0
* it ' s just the icmp type < < 8 | icmp code . After adjustment
* header points to the first 8 bytes of the tcp header . We need
* to find the appropriate port .
*
* The locking strategy used here is very " optimistic " . When
* someone else accesses the socket the ICMP is just dropped
* and for some paths there is no check at all .
* A more general error queue to queue errors for later handling
* is probably better .
*
*/
void tcp_v4_err ( struct sk_buff * skb , u32 info )
{
struct iphdr * iph = ( struct iphdr * ) skb - > data ;
struct tcphdr * th = ( struct tcphdr * ) ( skb - > data + ( iph - > ihl < < 2 ) ) ;
struct tcp_sock * tp ;
struct inet_sock * inet ;
int type = skb - > h . icmph - > type ;
int code = skb - > h . icmph - > code ;
struct sock * sk ;
__u32 seq ;
int err ;
if ( skb - > len < ( iph - > ihl < < 2 ) + 8 ) {
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
return ;
}
2005-08-10 07:09:46 +04:00
sk = inet_lookup ( & tcp_hashinfo , iph - > daddr , th - > dest , iph - > saddr ,
2005-08-10 07:10:42 +04:00
th - > source , inet_iif ( skb ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! sk ) {
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
return ;
}
if ( sk - > sk_state = = TCP_TIME_WAIT ) {
2006-10-11 06:41:46 +04:00
inet_twsk_put ( inet_twsk ( sk ) ) ;
2005-04-17 02:20:36 +04:00
return ;
}
bh_lock_sock ( sk ) ;
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently .
*/
if ( sock_owned_by_user ( sk ) )
NET_INC_STATS_BH ( LINUX_MIB_LOCKDROPPEDICMPS ) ;
if ( sk - > sk_state = = TCP_CLOSE )
goto out ;
tp = tcp_sk ( sk ) ;
seq = ntohl ( th - > seq ) ;
if ( sk - > sk_state ! = TCP_LISTEN & &
! between ( seq , tp - > snd_una , tp - > snd_nxt ) ) {
2006-10-20 11:22:25 +04:00
NET_INC_STATS_BH ( LINUX_MIB_OUTOFWINDOWICMPS ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
switch ( type ) {
case ICMP_SOURCE_QUENCH :
/* Just silently ignore these. */
goto out ;
case ICMP_PARAMETERPROB :
err = EPROTO ;
break ;
case ICMP_DEST_UNREACH :
if ( code > NR_ICMP_UNREACH )
goto out ;
if ( code = = ICMP_FRAG_NEEDED ) { /* PMTU discovery (RFC1191) */
if ( ! sock_owned_by_user ( sk ) )
do_pmtu_discovery ( sk , iph , info ) ;
goto out ;
}
err = icmp_err_convert [ code ] . errno ;
break ;
case ICMP_TIME_EXCEEDED :
err = EHOSTUNREACH ;
break ;
default :
goto out ;
}
switch ( sk - > sk_state ) {
2005-06-19 09:47:21 +04:00
struct request_sock * req , * * prev ;
2005-04-17 02:20:36 +04:00
case TCP_LISTEN :
if ( sock_owned_by_user ( sk ) )
goto out ;
2005-08-10 07:10:42 +04:00
req = inet_csk_search_req ( sk , & prev , th - > dest ,
iph - > daddr , iph - > saddr ) ;
2005-04-17 02:20:36 +04:00
if ( ! req )
goto out ;
/* ICMPs are not backlogged, hence we cannot get
an established socket here .
*/
BUG_TRAP ( ! req - > sk ) ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
if ( seq ! = tcp_rsk ( req ) - > snt_isn ) {
2005-04-17 02:20:36 +04:00
NET_INC_STATS_BH ( LINUX_MIB_OUTOFWINDOWICMPS ) ;
goto out ;
}
/*
* Still in SYN_RECV , just remove it silently .
* There is no good way to pass the error to the newly
* created socket , and POSIX does not want network
* errors returned from accept ( ) .
*/
2005-08-10 07:10:42 +04:00
inet_csk_reqsk_queue_drop ( sk , req , prev ) ;
2005-04-17 02:20:36 +04:00
goto out ;
case TCP_SYN_SENT :
case TCP_SYN_RECV : /* Cannot happen.
It can f . e . if SYNs crossed .
*/
if ( ! sock_owned_by_user ( sk ) ) {
sk - > sk_err = err ;
sk - > sk_error_report ( sk ) ;
tcp_done ( sk ) ;
} else {
sk - > sk_err_soft = err ;
}
goto out ;
}
/* If we've already connected we will keep trying
* until we time out , or the user gives up .
*
* rfc1122 4.2 .3 .9 allows to consider as hard errors
* only PROTO_UNREACH and PORT_UNREACH ( well , FRAG_FAILED too ,
* but it is obsoleted by pmtu discovery ) .
*
* Note , that in modern internet , where routing is unreliable
* and in each dark corner broken firewalls sit , sending random
* errors ordered by their masters even this two messages finally lose
* their original sense ( even Linux sends invalid PORT_UNREACHs )
*
* Now we are in compliance with RFCs .
* - - ANK ( 980905 )
*/
inet = inet_sk ( sk ) ;
if ( ! sock_owned_by_user ( sk ) & & inet - > recverr ) {
sk - > sk_err = err ;
sk - > sk_error_report ( sk ) ;
} else { /* Only an error on timeout */
sk - > sk_err_soft = err ;
}
out :
bh_unlock_sock ( sk ) ;
sock_put ( sk ) ;
}
/* This routine computes an IPv4 TCP checksum. */
2005-12-14 10:15:52 +03:00
void tcp_v4_send_check ( struct sock * sk , int len , struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
struct inet_sock * inet = inet_sk ( sk ) ;
2005-12-14 10:15:52 +03:00
struct tcphdr * th = skb - > h . th ;
2005-04-17 02:20:36 +04:00
2006-08-30 03:44:56 +04:00
if ( skb - > ip_summed = = CHECKSUM_PARTIAL ) {
2006-11-17 15:57:30 +03:00
th - > check = ~ tcp_v4_check ( th , len ,
inet - > saddr , inet - > daddr , 0 ) ;
2006-11-21 05:07:29 +03:00
skb - > csum_offset = offsetof ( struct tcphdr , check ) ;
2005-04-17 02:20:36 +04:00
} else {
th - > check = tcp_v4_check ( th , len , inet - > saddr , inet - > daddr ,
csum_partial ( ( char * ) th ,
th - > doff < < 2 ,
skb - > csum ) ) ;
}
}
2006-07-09 00:34:56 +04:00
int tcp_v4_gso_send_check ( struct sk_buff * skb )
{
struct iphdr * iph ;
struct tcphdr * th ;
if ( ! pskb_may_pull ( skb , sizeof ( * th ) ) )
return - EINVAL ;
iph = skb - > nh . iph ;
th = skb - > h . th ;
th - > check = 0 ;
th - > check = ~ tcp_v4_check ( th , skb - > len , iph - > saddr , iph - > daddr , 0 ) ;
2006-11-21 05:07:29 +03:00
skb - > csum_offset = offsetof ( struct tcphdr , check ) ;
2006-08-30 03:44:56 +04:00
skb - > ip_summed = CHECKSUM_PARTIAL ;
2006-07-09 00:34:56 +04:00
return 0 ;
}
2005-04-17 02:20:36 +04:00
/*
* This routine will send an RST to the other tcp .
*
* Someone asks : why I NEVER use socket parameters ( TOS , TTL etc . )
* for reset .
* Answer : if a packet caused RST , it is not for a socket
* existing in our system , if it is matched to a socket ,
* it is just duplicate segment or bug in other side ' s TCP .
* So that we build reply only basing on parameters
* arrived with segment .
* Exception : precedence violation . We do not implement it in any case .
*/
2006-11-15 06:07:45 +03:00
static void tcp_v4_send_reset ( struct sock * sk , struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
struct tcphdr * th = skb - > h . th ;
2006-11-15 06:07:45 +03:00
struct {
struct tcphdr th ;
# ifdef CONFIG_TCP_MD5SIG
2006-11-15 07:51:49 +03:00
__be32 opt [ ( TCPOLEN_MD5SIG_ALIGNED > > 2 ) ] ;
2006-11-15 06:07:45 +03:00
# endif
} rep ;
2005-04-17 02:20:36 +04:00
struct ip_reply_arg arg ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key * key ;
# endif
2005-04-17 02:20:36 +04:00
/* Never send a reset in response to a reset. */
if ( th - > rst )
return ;
if ( ( ( struct rtable * ) skb - > dst ) - > rt_type ! = RTN_LOCAL )
return ;
/* Swap the send and the receive. */
2006-11-15 06:07:45 +03:00
memset ( & rep , 0 , sizeof ( rep ) ) ;
rep . th . dest = th - > source ;
rep . th . source = th - > dest ;
rep . th . doff = sizeof ( struct tcphdr ) / 4 ;
rep . th . rst = 1 ;
2005-04-17 02:20:36 +04:00
if ( th - > ack ) {
2006-11-15 06:07:45 +03:00
rep . th . seq = th - > ack_seq ;
2005-04-17 02:20:36 +04:00
} else {
2006-11-15 06:07:45 +03:00
rep . th . ack = 1 ;
rep . th . ack_seq = htonl ( ntohl ( th - > seq ) + th - > syn + th - > fin +
skb - > len - ( th - > doff < < 2 ) ) ;
2005-04-17 02:20:36 +04:00
}
2006-11-17 15:57:30 +03:00
memset ( & arg , 0 , sizeof ( arg ) ) ;
2006-11-15 06:07:45 +03:00
arg . iov [ 0 ] . iov_base = ( unsigned char * ) & rep ;
arg . iov [ 0 ] . iov_len = sizeof ( rep . th ) ;
# ifdef CONFIG_TCP_MD5SIG
key = sk ? tcp_v4_md5_do_lookup ( sk , skb - > nh . iph - > daddr ) : NULL ;
if ( key ) {
rep . opt [ 0 ] = htonl ( ( TCPOPT_NOP < < 24 ) |
( TCPOPT_NOP < < 16 ) |
( TCPOPT_MD5SIG < < 8 ) |
TCPOLEN_MD5SIG ) ;
/* Update length and the length the header thinks exists */
arg . iov [ 0 ] . iov_len + = TCPOLEN_MD5SIG_ALIGNED ;
rep . th . doff = arg . iov [ 0 ] . iov_len / 4 ;
tcp_v4_do_calc_md5_hash ( ( __u8 * ) & rep . opt [ 1 ] ,
key ,
skb - > nh . iph - > daddr ,
skb - > nh . iph - > saddr ,
& rep . th , IPPROTO_TCP ,
arg . iov [ 0 ] . iov_len ) ;
}
# endif
2005-04-17 02:20:36 +04:00
arg . csum = csum_tcpudp_nofold ( skb - > nh . iph - > daddr ,
2006-11-17 15:57:30 +03:00
skb - > nh . iph - > saddr , /* XXX */
2005-04-17 02:20:36 +04:00
sizeof ( struct tcphdr ) , IPPROTO_TCP , 0 ) ;
arg . csumoffset = offsetof ( struct tcphdr , check ) / 2 ;
2006-11-15 06:07:45 +03:00
ip_send_reply ( tcp_socket - > sk , skb , & arg , arg . iov [ 0 ] . iov_len ) ;
2005-04-17 02:20:36 +04:00
TCP_INC_STATS_BH ( TCP_MIB_OUTSEGS ) ;
TCP_INC_STATS_BH ( TCP_MIB_OUTRSTS ) ;
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
outside socket context is ugly , certainly . What can I do ?
*/
2006-11-15 06:07:45 +03:00
static void tcp_v4_send_ack ( struct tcp_timewait_sock * twsk ,
struct sk_buff * skb , u32 seq , u32 ack ,
2005-04-17 02:20:36 +04:00
u32 win , u32 ts )
{
struct tcphdr * th = skb - > h . th ;
struct {
struct tcphdr th ;
2006-11-15 07:51:49 +03:00
__be32 opt [ ( TCPOLEN_TSTAMP_ALIGNED > > 2 )
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
2006-11-15 07:51:49 +03:00
+ ( TCPOLEN_MD5SIG_ALIGNED > > 2 )
2006-11-15 06:07:45 +03:00
# endif
] ;
2005-04-17 02:20:36 +04:00
} rep ;
struct ip_reply_arg arg ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key * key ;
struct tcp_md5sig_key tw_key ;
# endif
2005-04-17 02:20:36 +04:00
memset ( & rep . th , 0 , sizeof ( struct tcphdr ) ) ;
2006-11-17 15:57:30 +03:00
memset ( & arg , 0 , sizeof ( arg ) ) ;
2005-04-17 02:20:36 +04:00
arg . iov [ 0 ] . iov_base = ( unsigned char * ) & rep ;
arg . iov [ 0 ] . iov_len = sizeof ( rep . th ) ;
if ( ts ) {
2006-11-15 06:07:45 +03:00
rep . opt [ 0 ] = htonl ( ( TCPOPT_NOP < < 24 ) | ( TCPOPT_NOP < < 16 ) |
( TCPOPT_TIMESTAMP < < 8 ) |
TCPOLEN_TIMESTAMP ) ;
rep . opt [ 1 ] = htonl ( tcp_time_stamp ) ;
rep . opt [ 2 ] = htonl ( ts ) ;
arg . iov [ 0 ] . iov_len = TCPOLEN_TSTAMP_ALIGNED ;
2005-04-17 02:20:36 +04:00
}
/* Swap the send and the receive. */
rep . th . dest = th - > source ;
rep . th . source = th - > dest ;
rep . th . doff = arg . iov [ 0 ] . iov_len / 4 ;
rep . th . seq = htonl ( seq ) ;
rep . th . ack_seq = htonl ( ack ) ;
rep . th . ack = 1 ;
rep . th . window = htons ( win ) ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
/*
* The SKB holds an imcoming packet , but may not have a valid - > sk
* pointer . This is especially the case when we ' re dealing with a
* TIME_WAIT ack , because the sk structure is long gone , and only
* the tcp_timewait_sock remains . So the md5 key is stashed in that
* structure , and we use it in preference . I believe that ( twsk | |
* skb - > sk ) holds true , but we program defensively .
*/
if ( ! twsk & & skb - > sk ) {
key = tcp_v4_md5_do_lookup ( skb - > sk , skb - > nh . iph - > daddr ) ;
} else if ( twsk & & twsk - > tw_md5_keylen ) {
tw_key . key = twsk - > tw_md5_key ;
tw_key . keylen = twsk - > tw_md5_keylen ;
key = & tw_key ;
2006-11-17 15:57:30 +03:00
} else
2006-11-15 06:07:45 +03:00
key = NULL ;
if ( key ) {
int offset = ( ts ) ? 3 : 0 ;
rep . opt [ offset + + ] = htonl ( ( TCPOPT_NOP < < 24 ) |
( TCPOPT_NOP < < 16 ) |
( TCPOPT_MD5SIG < < 8 ) |
TCPOLEN_MD5SIG ) ;
arg . iov [ 0 ] . iov_len + = TCPOLEN_MD5SIG_ALIGNED ;
rep . th . doff = arg . iov [ 0 ] . iov_len / 4 ;
tcp_v4_do_calc_md5_hash ( ( __u8 * ) & rep . opt [ offset ] ,
key ,
skb - > nh . iph - > daddr ,
skb - > nh . iph - > saddr ,
& rep . th , IPPROTO_TCP ,
arg . iov [ 0 ] . iov_len ) ;
}
# endif
2005-04-17 02:20:36 +04:00
arg . csum = csum_tcpudp_nofold ( skb - > nh . iph - > daddr ,
2006-11-17 15:57:30 +03:00
skb - > nh . iph - > saddr , /* XXX */
2005-04-17 02:20:36 +04:00
arg . iov [ 0 ] . iov_len , IPPROTO_TCP , 0 ) ;
arg . csumoffset = offsetof ( struct tcphdr , check ) / 2 ;
ip_send_reply ( tcp_socket - > sk , skb , & arg , arg . iov [ 0 ] . iov_len ) ;
TCP_INC_STATS_BH ( TCP_MIB_OUTSEGS ) ;
}
static void tcp_v4_timewait_ack ( struct sock * sk , struct sk_buff * skb )
{
2005-08-10 07:09:30 +04:00
struct inet_timewait_sock * tw = inet_twsk ( sk ) ;
2006-11-15 06:07:45 +03:00
struct tcp_timewait_sock * tcptw = tcp_twsk ( sk ) ;
2005-04-17 02:20:36 +04:00
2006-11-15 06:07:45 +03:00
tcp_v4_send_ack ( tcptw , skb , tcptw - > tw_snd_nxt , tcptw - > tw_rcv_nxt ,
2006-11-17 15:57:30 +03:00
tcptw - > tw_rcv_wnd > > tw - > tw_rcv_wscale ,
tcptw - > tw_ts_recent ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:09:30 +04:00
inet_twsk_put ( tw ) ;
2005-04-17 02:20:36 +04:00
}
2006-11-17 15:57:30 +03:00
static void tcp_v4_reqsk_send_ack ( struct sk_buff * skb ,
struct request_sock * req )
2005-04-17 02:20:36 +04:00
{
2006-11-15 06:07:45 +03:00
tcp_v4_send_ack ( NULL , skb , tcp_rsk ( req ) - > snt_isn + 1 ,
tcp_rsk ( req ) - > rcv_isn + 1 , req - > rcv_wnd ,
2005-04-17 02:20:36 +04:00
req - > ts_recent ) ;
}
/*
* Send a SYN - ACK after having received an ACK .
2005-06-19 09:47:21 +04:00
* This still operates on a request_sock only , not on a big
2005-04-17 02:20:36 +04:00
* socket .
*/
2005-06-19 09:47:21 +04:00
static int tcp_v4_send_synack ( struct sock * sk , struct request_sock * req ,
2005-04-17 02:20:36 +04:00
struct dst_entry * dst )
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
const struct inet_request_sock * ireq = inet_rsk ( req ) ;
2005-04-17 02:20:36 +04:00
int err = - 1 ;
struct sk_buff * skb ;
/* First, grab a route. */
2005-08-10 07:10:42 +04:00
if ( ! dst & & ( dst = inet_csk_route_req ( sk , req ) ) = = NULL )
2005-04-17 02:20:36 +04:00
goto out ;
skb = tcp_make_synack ( sk , dst , req ) ;
if ( skb ) {
struct tcphdr * th = skb - > h . th ;
th - > check = tcp_v4_check ( th , skb - > len ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
ireq - > loc_addr ,
ireq - > rmt_addr ,
2005-04-17 02:20:36 +04:00
csum_partial ( ( char * ) th , skb - > len ,
skb - > csum ) ) ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
err = ip_build_and_send_pkt ( skb , sk , ireq - > loc_addr ,
ireq - > rmt_addr ,
ireq - > opt ) ;
2006-11-14 16:21:36 +03:00
err = net_xmit_eval ( err ) ;
2005-04-17 02:20:36 +04:00
}
out :
dst_release ( dst ) ;
return err ;
}
/*
2005-06-19 09:47:21 +04:00
* IPv4 request_sock destructor .
2005-04-17 02:20:36 +04:00
*/
2005-06-19 09:47:21 +04:00
static void tcp_v4_reqsk_destructor ( struct request_sock * req )
2005-04-17 02:20:36 +04:00
{
2005-11-08 20:41:34 +03:00
kfree ( inet_rsk ( req ) - > opt ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-04 06:58:06 +03:00
# ifdef CONFIG_SYN_COOKIES
2006-01-04 03:03:49 +03:00
static void syn_flood_warning ( struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
static unsigned long warntime ;
if ( time_after ( jiffies , ( warntime + HZ * 60 ) ) ) {
warntime = jiffies ;
printk ( KERN_INFO
" possible SYN flooding on port %d. Sending cookies. \n " ,
ntohs ( skb - > h . th - > dest ) ) ;
}
}
2006-01-04 06:58:06 +03:00
# endif
2005-04-17 02:20:36 +04:00
/*
2005-06-19 09:47:21 +04:00
* Save and compile IPv4 options into the request_sock if needed .
2005-04-17 02:20:36 +04:00
*/
2006-01-04 03:03:49 +03:00
static struct ip_options * tcp_v4_save_options ( struct sock * sk ,
struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
struct ip_options * opt = & ( IPCB ( skb ) - > opt ) ;
struct ip_options * dopt = NULL ;
if ( opt & & opt - > optlen ) {
int opt_size = optlength ( opt ) ;
dopt = kmalloc ( opt_size , GFP_ATOMIC ) ;
if ( dopt ) {
if ( ip_options_echo ( dopt , skb ) ) {
kfree ( dopt ) ;
dopt = NULL ;
}
}
}
return dopt ;
}
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
/*
* RFC2385 MD5 checksumming requires a mapping of
* IP address - > MD5 Key .
* We need to maintain these in the sk structure .
*/
/* Find the Key structure for an address. */
2006-11-17 15:57:30 +03:00
static struct tcp_md5sig_key *
tcp_v4_md5_do_lookup ( struct sock * sk , __be32 addr )
2006-11-15 06:07:45 +03:00
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
int i ;
if ( ! tp - > md5sig_info | | ! tp - > md5sig_info - > entries4 )
return NULL ;
for ( i = 0 ; i < tp - > md5sig_info - > entries4 ; i + + ) {
if ( tp - > md5sig_info - > keys4 [ i ] . addr = = addr )
2006-11-17 15:57:30 +03:00
return ( struct tcp_md5sig_key * )
& tp - > md5sig_info - > keys4 [ i ] ;
2006-11-15 06:07:45 +03:00
}
return NULL ;
}
struct tcp_md5sig_key * tcp_v4_md5_lookup ( struct sock * sk ,
struct sock * addr_sk )
{
return tcp_v4_md5_do_lookup ( sk , inet_sk ( addr_sk ) - > daddr ) ;
}
EXPORT_SYMBOL ( tcp_v4_md5_lookup ) ;
struct tcp_md5sig_key * tcp_v4_reqsk_md5_lookup ( struct sock * sk ,
struct request_sock * req )
{
return tcp_v4_md5_do_lookup ( sk , inet_rsk ( req ) - > rmt_addr ) ;
}
/* This can be called on a newly created socket, from other files */
int tcp_v4_md5_do_add ( struct sock * sk , __be32 addr ,
u8 * newkey , u8 newkeylen )
{
/* Add Key to the list */
struct tcp4_md5sig_key * key ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
struct tcp4_md5sig_key * keys ;
2006-11-17 16:06:01 +03:00
key = ( struct tcp4_md5sig_key * ) tcp_v4_md5_do_lookup ( sk , addr ) ;
2006-11-15 06:07:45 +03:00
if ( key ) {
/* Pre-existing entry - just update that one. */
2006-11-17 16:06:01 +03:00
kfree ( key - > key ) ;
2006-11-15 06:07:45 +03:00
key - > key = newkey ;
key - > keylen = newkeylen ;
} else {
2006-11-17 16:06:01 +03:00
struct tcp_md5sig_info * md5sig ;
2006-11-15 06:07:45 +03:00
if ( ! tp - > md5sig_info ) {
2006-11-17 16:06:01 +03:00
tp - > md5sig_info = kzalloc ( sizeof ( * tp - > md5sig_info ) ,
GFP_ATOMIC ) ;
2006-11-15 06:07:45 +03:00
if ( ! tp - > md5sig_info ) {
kfree ( newkey ) ;
return - ENOMEM ;
}
}
if ( tcp_alloc_md5sig_pool ( ) = = NULL ) {
kfree ( newkey ) ;
return - ENOMEM ;
}
2006-11-17 16:06:01 +03:00
md5sig = tp - > md5sig_info ;
if ( md5sig - > alloced4 = = md5sig - > entries4 ) {
keys = kmalloc ( ( sizeof ( * keys ) *
( md5sig - > entries4 + 1 ) ) , GFP_ATOMIC ) ;
2006-11-15 06:07:45 +03:00
if ( ! keys ) {
kfree ( newkey ) ;
tcp_free_md5sig_pool ( ) ;
return - ENOMEM ;
}
2006-11-17 16:06:01 +03:00
if ( md5sig - > entries4 )
memcpy ( keys , md5sig - > keys4 ,
sizeof ( * keys ) * md5sig - > entries4 ) ;
2006-11-15 06:07:45 +03:00
/* Free old key list, and reference new one */
2006-11-17 16:06:01 +03:00
if ( md5sig - > keys4 )
kfree ( md5sig - > keys4 ) ;
md5sig - > keys4 = keys ;
md5sig - > alloced4 + + ;
2006-11-15 06:07:45 +03:00
}
2006-11-17 16:06:01 +03:00
md5sig - > entries4 + + ;
md5sig - > keys4 [ md5sig - > entries4 - 1 ] . addr = addr ;
md5sig - > keys4 [ md5sig - > entries4 - 1 ] . key = newkey ;
md5sig - > keys4 [ md5sig - > entries4 - 1 ] . keylen = newkeylen ;
2006-11-15 06:07:45 +03:00
}
return 0 ;
}
EXPORT_SYMBOL ( tcp_v4_md5_do_add ) ;
static int tcp_v4_md5_add_func ( struct sock * sk , struct sock * addr_sk ,
u8 * newkey , u8 newkeylen )
{
return tcp_v4_md5_do_add ( sk , inet_sk ( addr_sk ) - > daddr ,
newkey , newkeylen ) ;
}
int tcp_v4_md5_do_del ( struct sock * sk , __be32 addr )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
int i ;
for ( i = 0 ; i < tp - > md5sig_info - > entries4 ; i + + ) {
if ( tp - > md5sig_info - > keys4 [ i ] . addr = = addr ) {
/* Free the key */
kfree ( tp - > md5sig_info - > keys4 [ i ] . key ) ;
tp - > md5sig_info - > entries4 - - ;
if ( tp - > md5sig_info - > entries4 = = 0 ) {
kfree ( tp - > md5sig_info - > keys4 ) ;
tp - > md5sig_info - > keys4 = NULL ;
2006-11-17 15:57:30 +03:00
} else if ( tp - > md5sig_info - > entries4 ! = i ) {
2006-11-15 06:07:45 +03:00
/* Need to do some manipulation */
2006-11-17 15:57:30 +03:00
memcpy ( & tp - > md5sig_info - > keys4 [ i ] ,
& tp - > md5sig_info - > keys4 [ i + 1 ] ,
( tp - > md5sig_info - > entries4 - i ) *
sizeof ( struct tcp4_md5sig_key ) ) ;
2006-11-15 06:07:45 +03:00
}
tcp_free_md5sig_pool ( ) ;
return 0 ;
}
}
return - ENOENT ;
}
EXPORT_SYMBOL ( tcp_v4_md5_do_del ) ;
2006-11-17 15:57:30 +03:00
static void tcp_v4_clear_md5_list ( struct sock * sk )
2006-11-15 06:07:45 +03:00
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
/* Free each key, then the set of key keys,
* the crypto element , and then decrement our
* hold on the last resort crypto .
*/
if ( tp - > md5sig_info - > entries4 ) {
int i ;
for ( i = 0 ; i < tp - > md5sig_info - > entries4 ; i + + )
kfree ( tp - > md5sig_info - > keys4 [ i ] . key ) ;
tp - > md5sig_info - > entries4 = 0 ;
tcp_free_md5sig_pool ( ) ;
}
if ( tp - > md5sig_info - > keys4 ) {
kfree ( tp - > md5sig_info - > keys4 ) ;
tp - > md5sig_info - > keys4 = NULL ;
tp - > md5sig_info - > alloced4 = 0 ;
}
}
2006-11-17 15:57:30 +03:00
static int tcp_v4_parse_md5_keys ( struct sock * sk , char __user * optval ,
int optlen )
2006-11-15 06:07:45 +03:00
{
struct tcp_md5sig cmd ;
struct sockaddr_in * sin = ( struct sockaddr_in * ) & cmd . tcpm_addr ;
u8 * newkey ;
if ( optlen < sizeof ( cmd ) )
return - EINVAL ;
2006-11-17 15:57:30 +03:00
if ( copy_from_user ( & cmd , optval , sizeof ( cmd ) ) )
2006-11-15 06:07:45 +03:00
return - EFAULT ;
if ( sin - > sin_family ! = AF_INET )
return - EINVAL ;
if ( ! cmd . tcpm_key | | ! cmd . tcpm_keylen ) {
if ( ! tcp_sk ( sk ) - > md5sig_info )
return - ENOENT ;
return tcp_v4_md5_do_del ( sk , sin - > sin_addr . s_addr ) ;
}
if ( cmd . tcpm_keylen > TCP_MD5SIG_MAXKEYLEN )
return - EINVAL ;
if ( ! tcp_sk ( sk ) - > md5sig_info ) {
struct tcp_sock * tp = tcp_sk ( sk ) ;
2006-11-17 15:57:30 +03:00
struct tcp_md5sig_info * p = kzalloc ( sizeof ( * p ) , GFP_KERNEL ) ;
2006-11-15 06:07:45 +03:00
if ( ! p )
return - EINVAL ;
tp - > md5sig_info = p ;
}
2006-11-17 16:06:01 +03:00
newkey = kmemdup ( cmd . tcpm_key , cmd . tcpm_keylen , GFP_KERNEL ) ;
2006-11-15 06:07:45 +03:00
if ( ! newkey )
return - ENOMEM ;
return tcp_v4_md5_do_add ( sk , sin - > sin_addr . s_addr ,
newkey , cmd . tcpm_keylen ) ;
}
static int tcp_v4_do_calc_md5_hash ( char * md5_hash , struct tcp_md5sig_key * key ,
__be32 saddr , __be32 daddr ,
struct tcphdr * th , int protocol ,
int tcplen )
{
struct scatterlist sg [ 4 ] ;
__u16 data_len ;
int block = 0 ;
# ifdef CONFIG_TCP_MD5SIG_DEBUG
int i ;
# endif
2006-11-21 05:06:37 +03:00
__sum16 old_checksum ;
2006-11-15 06:07:45 +03:00
struct tcp_md5sig_pool * hp ;
struct tcp4_pseudohdr * bp ;
struct hash_desc * desc ;
int err ;
unsigned int nbytes = 0 ;
/*
* Okay , so RFC2385 is turned on for this connection ,
* so we need to generate the MD5 hash for the packet now .
*/
hp = tcp_get_md5sig_pool ( ) ;
if ( ! hp )
goto clear_hash_noput ;
bp = & hp - > md5_blk . ip4 ;
desc = & hp - > md5_desc ;
/*
* 1. the TCP pseudo - header ( in the order : source IP address ,
* destination IP address , zero - padded protocol number , and
* segment length )
*/
bp - > saddr = saddr ;
bp - > daddr = daddr ;
bp - > pad = 0 ;
bp - > protocol = protocol ;
bp - > len = htons ( tcplen ) ;
sg_set_buf ( & sg [ block + + ] , bp , sizeof ( * bp ) ) ;
nbytes + = sizeof ( * bp ) ;
# ifdef CONFIG_TCP_MD5SIG_DEBUG
printk ( " Calcuating hash for: " ) ;
2006-11-17 15:57:30 +03:00
for ( i = 0 ; i < sizeof ( * bp ) ; i + + )
printk ( " %02x " , ( unsigned int ) ( ( unsigned char * ) bp ) [ i ] ) ;
2006-11-15 06:07:45 +03:00
printk ( " " ) ;
# endif
/* 2. the TCP header, excluding options, and assuming a
* checksum of zero /
*/
old_checksum = th - > check ;
th - > check = 0 ;
sg_set_buf ( & sg [ block + + ] , th , sizeof ( struct tcphdr ) ) ;
nbytes + = sizeof ( struct tcphdr ) ;
# ifdef CONFIG_TCP_MD5SIG_DEBUG
2006-11-17 15:57:30 +03:00
for ( i = 0 ; i < sizeof ( struct tcphdr ) ; i + + )
printk ( " %02x " , ( unsigned int ) ( ( unsigned char * ) th ) [ i ] ) ;
2006-11-15 06:07:45 +03:00
# endif
/* 3. the TCP segment data (if any) */
data_len = tcplen - ( th - > doff < < 2 ) ;
if ( data_len > 0 ) {
unsigned char * data = ( unsigned char * ) th + ( th - > doff < < 2 ) ;
sg_set_buf ( & sg [ block + + ] , data , data_len ) ;
nbytes + = data_len ;
}
/* 4. an independently-specified key or password, known to both
* TCPs and presumably connection - specific
*/
sg_set_buf ( & sg [ block + + ] , key - > key , key - > keylen ) ;
nbytes + = key - > keylen ;
# ifdef CONFIG_TCP_MD5SIG_DEBUG
2006-11-17 15:57:30 +03:00
printk ( " and password: " ) ;
2006-11-15 06:07:45 +03:00
for ( i = 0 ; i < key - > keylen ; i + + )
2006-11-17 15:57:30 +03:00
printk ( " %02x " , ( unsigned int ) key - > key [ i ] ) ;
2006-11-15 06:07:45 +03:00
# endif
/* Now store the Hash into the packet */
err = crypto_hash_init ( desc ) ;
if ( err )
goto clear_hash ;
err = crypto_hash_update ( desc , sg , nbytes ) ;
if ( err )
goto clear_hash ;
err = crypto_hash_final ( desc , md5_hash ) ;
if ( err )
goto clear_hash ;
/* Reset header, and free up the crypto */
tcp_put_md5sig_pool ( ) ;
th - > check = old_checksum ;
out :
# ifdef CONFIG_TCP_MD5SIG_DEBUG
printk ( " result: " ) ;
for ( i = 0 ; i < 16 ; i + + )
2006-11-17 15:57:30 +03:00
printk ( " %02x " , ( unsigned int ) ( ( ( u8 * ) md5_hash ) [ i ] ) ) ;
2006-11-15 06:07:45 +03:00
printk ( " \n " ) ;
# endif
return 0 ;
clear_hash :
tcp_put_md5sig_pool ( ) ;
clear_hash_noput :
memset ( md5_hash , 0 , 16 ) ;
goto out ;
}
int tcp_v4_calc_md5_hash ( char * md5_hash , struct tcp_md5sig_key * key ,
struct sock * sk ,
struct dst_entry * dst ,
struct request_sock * req ,
struct tcphdr * th , int protocol ,
int tcplen )
{
__be32 saddr , daddr ;
if ( sk ) {
saddr = inet_sk ( sk ) - > saddr ;
daddr = inet_sk ( sk ) - > daddr ;
} else {
struct rtable * rt = ( struct rtable * ) dst ;
BUG_ON ( ! rt ) ;
saddr = rt - > rt_src ;
daddr = rt - > rt_dst ;
}
return tcp_v4_do_calc_md5_hash ( md5_hash , key ,
saddr , daddr ,
th , protocol , tcplen ) ;
}
EXPORT_SYMBOL ( tcp_v4_calc_md5_hash ) ;
2006-11-17 15:57:30 +03:00
static int tcp_v4_inbound_md5_hash ( struct sock * sk , struct sk_buff * skb )
2006-11-15 06:07:45 +03:00
{
/*
* This gets called for each TCP segment that arrives
* so we want to be efficient .
* We have 3 drop cases :
* o No MD5 hash and one expected .
* o MD5 hash and we ' re not expecting one .
* o MD5 hash and its wrong .
*/
__u8 * hash_location = NULL ;
struct tcp_md5sig_key * hash_expected ;
struct iphdr * iph = skb - > nh . iph ;
struct tcphdr * th = skb - > h . th ;
2006-11-17 15:57:30 +03:00
int length = ( th - > doff < < 2 ) - sizeof ( struct tcphdr ) ;
2006-11-15 06:07:45 +03:00
int genhash ;
unsigned char * ptr ;
unsigned char newhash [ 16 ] ;
hash_expected = tcp_v4_md5_do_lookup ( sk , iph - > saddr ) ;
/*
* If the TCP option length is less than the TCP_MD5SIG
* option length , then we can shortcut
*/
if ( length < TCPOLEN_MD5SIG ) {
if ( hash_expected )
return 1 ;
else
return 0 ;
}
/* Okay, we can't shortcut - we have to grub through the options */
ptr = ( unsigned char * ) ( th + 1 ) ;
while ( length > 0 ) {
int opcode = * ptr + + ;
int opsize ;
switch ( opcode ) {
case TCPOPT_EOL :
goto done_opts ;
case TCPOPT_NOP :
length - - ;
continue ;
default :
opsize = * ptr + + ;
if ( opsize < 2 )
goto done_opts ;
if ( opsize > length )
goto done_opts ;
if ( opcode = = TCPOPT_MD5SIG ) {
hash_location = ptr ;
goto done_opts ;
}
}
ptr + = opsize - 2 ;
length - = opsize ;
}
done_opts :
/* We've parsed the options - do we have a hash? */
if ( ! hash_expected & & ! hash_location )
return 0 ;
if ( hash_expected & & ! hash_location ) {
2006-11-17 15:57:30 +03:00
LIMIT_NETDEBUG ( KERN_INFO " MD5 Hash NOT expected but found "
2006-11-15 06:07:45 +03:00
" ( " NIPQUAD_FMT " , %d)->( " NIPQUAD_FMT " , %d) \n " ,
2006-11-17 15:57:30 +03:00
NIPQUAD ( iph - > saddr ) , ntohs ( th - > source ) ,
NIPQUAD ( iph - > daddr ) , ntohs ( th - > dest ) ) ;
2006-11-15 06:07:45 +03:00
return 1 ;
}
if ( ! hash_expected & & hash_location ) {
2006-11-17 15:57:30 +03:00
LIMIT_NETDEBUG ( KERN_INFO " MD5 Hash NOT expected but found "
2006-11-15 06:07:45 +03:00
" ( " NIPQUAD_FMT " , %d)->( " NIPQUAD_FMT " , %d) \n " ,
2006-11-17 15:57:30 +03:00
NIPQUAD ( iph - > saddr ) , ntohs ( th - > source ) ,
NIPQUAD ( iph - > daddr ) , ntohs ( th - > dest ) ) ;
2006-11-15 06:07:45 +03:00
return 1 ;
}
/* Okay, so this is hash_expected and hash_location -
* so we need to calculate the checksum .
*/
genhash = tcp_v4_do_calc_md5_hash ( newhash ,
hash_expected ,
iph - > saddr , iph - > daddr ,
th , sk - > sk_protocol ,
skb - > len ) ;
if ( genhash | | memcmp ( hash_location , newhash , 16 ) ! = 0 ) {
if ( net_ratelimit ( ) ) {
printk ( KERN_INFO " MD5 Hash failed for "
" ( " NIPQUAD_FMT " , %d)->( " NIPQUAD_FMT " , %d)%s \n " ,
2006-11-17 15:57:30 +03:00
NIPQUAD ( iph - > saddr ) , ntohs ( th - > source ) ,
NIPQUAD ( iph - > daddr ) , ntohs ( th - > dest ) ,
2006-11-15 06:07:45 +03:00
genhash ? " tcp_v4_calc_md5_hash failed " : " " ) ;
# ifdef CONFIG_TCP_MD5SIG_DEBUG
do {
int i ;
printk ( " Received: " ) ;
for ( i = 0 ; i < 16 ; i + + )
2006-11-17 15:57:30 +03:00
printk ( " %02x " ,
0xff & ( int ) hash_location [ i ] ) ;
2006-11-15 06:07:45 +03:00
printk ( " \n " ) ;
printk ( " Calculated: " ) ;
for ( i = 0 ; i < 16 ; i + + )
printk ( " %02x " , 0xff & ( int ) newhash [ i ] ) ;
printk ( " \n " ) ;
} while ( 0 ) ;
# endif
}
return 1 ;
}
return 0 ;
}
# endif
2006-11-16 13:30:37 +03:00
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
2005-04-17 02:20:36 +04:00
. family = PF_INET ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
. obj_size = sizeof ( struct tcp_request_sock ) ,
2005-04-17 02:20:36 +04:00
. rtx_syn_ack = tcp_v4_send_synack ,
2005-06-19 09:47:21 +04:00
. send_ack = tcp_v4_reqsk_send_ack ,
. destructor = tcp_v4_reqsk_destructor ,
2005-04-17 02:20:36 +04:00
. send_reset = tcp_v4_send_reset ,
} ;
2006-11-15 06:07:45 +03:00
struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
# ifdef CONFIG_TCP_MD5SIG
. md5_lookup = tcp_v4_reqsk_md5_lookup ,
# endif
} ;
2005-12-14 10:25:19 +03:00
static struct timewait_sock_ops tcp_timewait_sock_ops = {
. twsk_obj_size = sizeof ( struct tcp_timewait_sock ) ,
. twsk_unique = tcp_twsk_unique ,
2006-11-15 06:07:45 +03:00
. twsk_destructor = tcp_twsk_destructor ,
2005-12-14 10:25:19 +03:00
} ;
2005-04-17 02:20:36 +04:00
int tcp_v4_conn_request ( struct sock * sk , struct sk_buff * skb )
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
struct inet_request_sock * ireq ;
2005-04-17 02:20:36 +04:00
struct tcp_options_received tmp_opt ;
2005-06-19 09:47:21 +04:00
struct request_sock * req ;
2006-09-28 05:27:13 +04:00
__be32 saddr = skb - > nh . iph - > saddr ;
__be32 daddr = skb - > nh . iph - > daddr ;
2005-04-17 02:20:36 +04:00
__u32 isn = TCP_SKB_CB ( skb ) - > when ;
struct dst_entry * dst = NULL ;
# ifdef CONFIG_SYN_COOKIES
int want_cookie = 0 ;
# else
# define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
# endif
/* Never answer to SYNs send to broadcast or multicast */
if ( ( ( struct rtable * ) skb - > dst ) - > rt_flags &
( RTCF_BROADCAST | RTCF_MULTICAST ) )
goto drop ;
/* TW buckets are converted to open requests without
* limitations , they conserve resources and peer is
* evidently real one .
*/
2005-08-10 07:10:42 +04:00
if ( inet_csk_reqsk_queue_is_full ( sk ) & & ! isn ) {
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_SYN_COOKIES
if ( sysctl_tcp_syncookies ) {
want_cookie = 1 ;
} else
# endif
goto drop ;
}
/* Accept backlog is full. If we have already queued enough
* of warm entries in syn queue , drop request . It is better than
* clogging syn queue with openreqs with exponentially increasing
* timeout .
*/
2005-08-10 07:10:42 +04:00
if ( sk_acceptq_is_full ( sk ) & & inet_csk_reqsk_queue_young ( sk ) > 1 )
2005-04-17 02:20:36 +04:00
goto drop ;
2005-06-19 09:47:21 +04:00
req = reqsk_alloc ( & tcp_request_sock_ops ) ;
2005-04-17 02:20:36 +04:00
if ( ! req )
goto drop ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
tcp_rsk ( req ) - > af_specific = & tcp_request_sock_ipv4_ops ;
# endif
2005-04-17 02:20:36 +04:00
tcp_clear_options ( & tmp_opt ) ;
tmp_opt . mss_clamp = 536 ;
tmp_opt . user_mss = tcp_sk ( sk ) - > rx_opt . user_mss ;
tcp_parse_options ( skb , & tmp_opt , 0 ) ;
if ( want_cookie ) {
tcp_clear_options ( & tmp_opt ) ;
tmp_opt . saw_tstamp = 0 ;
}
if ( tmp_opt . saw_tstamp & & ! tmp_opt . rcv_tsval ) {
/* Some OSes (unknown ones, but I see them on web server, which
* contains information interesting only for windows '
* users ) do not send their stamp in SYN . It is easy case .
* We simply do not advertise TS support .
*/
tmp_opt . saw_tstamp = 0 ;
tmp_opt . tstamp_ok = 0 ;
}
tmp_opt . tstamp_ok = tmp_opt . saw_tstamp ;
tcp_openreq_init ( req , & tmp_opt , skb ) ;
2006-07-25 10:32:50 +04:00
if ( security_inet_conn_request ( sk , skb , req ) )
goto drop_and_free ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
ireq = inet_rsk ( req ) ;
ireq - > loc_addr = daddr ;
ireq - > rmt_addr = saddr ;
ireq - > opt = tcp_v4_save_options ( sk , skb ) ;
2005-04-17 02:20:36 +04:00
if ( ! want_cookie )
TCP_ECN_create_request ( req , skb - > h . th ) ;
if ( want_cookie ) {
# ifdef CONFIG_SYN_COOKIES
syn_flood_warning ( skb ) ;
# endif
isn = cookie_v4_init_sequence ( sk , skb , & req - > mss ) ;
} else if ( ! isn ) {
struct inet_peer * peer = NULL ;
/* VJ's idea. We save last timestamp seen
* from the destination in peer table , when entering
* state TIME - WAIT , and check against it before
* accepting new connection request .
*
* If " isn " is not zero , this request hit alive
* timewait bucket , so that all the necessary checks
* are made in the function processing timewait state .
*/
if ( tmp_opt . saw_tstamp & &
2005-08-10 07:44:40 +04:00
tcp_death_row . sysctl_tw_recycle & &
2005-08-10 07:10:42 +04:00
( dst = inet_csk_route_req ( sk , req ) ) ! = NULL & &
2005-04-17 02:20:36 +04:00
( peer = rt_get_peer ( ( struct rtable * ) dst ) ) ! = NULL & &
peer - > v4daddr = = saddr ) {
if ( xtime . tv_sec < peer - > tcp_ts_stamp + TCP_PAWS_MSL & &
( s32 ) ( peer - > tcp_ts - req - > ts_recent ) >
TCP_PAWS_WINDOW ) {
NET_INC_STATS_BH ( LINUX_MIB_PAWSPASSIVEREJECTED ) ;
dst_release ( dst ) ;
goto drop_and_free ;
}
}
/* Kill the following clause, if you dislike this way. */
else if ( ! sysctl_tcp_syncookies & &
2005-08-10 07:10:42 +04:00
( sysctl_max_syn_backlog - inet_csk_reqsk_queue_len ( sk ) <
2005-04-17 02:20:36 +04:00
( sysctl_max_syn_backlog > > 2 ) ) & &
( ! peer | | ! peer - > tcp_ts_stamp ) & &
( ! dst | | ! dst_metric ( dst , RTAX_RTT ) ) ) {
/* Without syncookies last quarter of
* backlog is filled with destinations ,
* proven to be alive .
* It means that we continue to communicate
* to destinations , already remembered
* to the moment of synflood .
*/
2005-08-10 07:50:53 +04:00
LIMIT_NETDEBUG ( KERN_DEBUG " TCP: drop open "
" request from %u.%u.%u.%u/%u \n " ,
NIPQUAD ( saddr ) ,
ntohs ( skb - > h . th - > source ) ) ;
2005-04-17 02:20:36 +04:00
dst_release ( dst ) ;
goto drop_and_free ;
}
2006-11-11 01:06:49 +03:00
isn = tcp_v4_init_sequence ( skb ) ;
2005-04-17 02:20:36 +04:00
}
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
tcp_rsk ( req ) - > snt_isn = isn ;
2005-04-17 02:20:36 +04:00
if ( tcp_v4_send_synack ( sk , req , dst ) )
goto drop_and_free ;
if ( want_cookie ) {
2005-06-19 09:47:21 +04:00
reqsk_free ( req ) ;
2005-04-17 02:20:36 +04:00
} else {
2005-08-10 07:11:08 +04:00
inet_csk_reqsk_queue_hash_add ( sk , req , TCP_TIMEOUT_INIT ) ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
drop_and_free :
2005-06-19 09:47:21 +04:00
reqsk_free ( req ) ;
2005-04-17 02:20:36 +04:00
drop :
return 0 ;
}
/*
* The three way handshake has completed - we got a valid synack -
* now create the new socket .
*/
struct sock * tcp_v4_syn_recv_sock ( struct sock * sk , struct sk_buff * skb ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
2005-04-17 02:20:36 +04:00
struct dst_entry * dst )
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
struct inet_request_sock * ireq ;
2005-04-17 02:20:36 +04:00
struct inet_sock * newinet ;
struct tcp_sock * newtp ;
struct sock * newsk ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key * key ;
# endif
2005-04-17 02:20:36 +04:00
if ( sk_acceptq_is_full ( sk ) )
goto exit_overflow ;
2005-08-10 07:10:42 +04:00
if ( ! dst & & ( dst = inet_csk_route_req ( sk , req ) ) = = NULL )
2005-04-17 02:20:36 +04:00
goto exit ;
newsk = tcp_create_openreq_child ( sk , req , skb ) ;
if ( ! newsk )
goto exit ;
2006-07-01 00:36:35 +04:00
newsk - > sk_gso_type = SKB_GSO_TCPV4 ;
2005-08-10 06:49:02 +04:00
sk_setup_caps ( newsk , dst ) ;
2005-04-17 02:20:36 +04:00
newtp = tcp_sk ( newsk ) ;
newinet = inet_sk ( newsk ) ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
ireq = inet_rsk ( req ) ;
newinet - > daddr = ireq - > rmt_addr ;
newinet - > rcv_saddr = ireq - > loc_addr ;
newinet - > saddr = ireq - > loc_addr ;
newinet - > opt = ireq - > opt ;
ireq - > opt = NULL ;
2005-08-10 07:10:42 +04:00
newinet - > mc_index = inet_iif ( skb ) ;
2005-04-17 02:20:36 +04:00
newinet - > mc_ttl = skb - > nh . iph - > ttl ;
2005-12-14 10:26:10 +03:00
inet_csk ( newsk ) - > icsk_ext_hdr_len = 0 ;
2005-04-17 02:20:36 +04:00
if ( newinet - > opt )
2005-12-14 10:26:10 +03:00
inet_csk ( newsk ) - > icsk_ext_hdr_len = newinet - > opt - > optlen ;
2005-04-17 02:20:36 +04:00
newinet - > id = newtp - > write_seq ^ jiffies ;
2006-03-21 04:53:41 +03:00
tcp_mtup_init ( newsk ) ;
2005-04-17 02:20:36 +04:00
tcp_sync_mss ( newsk , dst_mtu ( dst ) ) ;
newtp - > advmss = dst_metric ( dst , RTAX_ADVMSS ) ;
tcp_initialize_rcv_mss ( newsk ) ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
if ( ( key = tcp_v4_md5_do_lookup ( sk , newinet - > daddr ) ) ! = NULL ) {
/*
* We ' re using one , so create a matching key
* on the newsk structure . If we fail to get
* memory , then we end up not copying the key
* across . Shucks .
*/
2006-11-17 16:06:01 +03:00
char * newkey = kmemdup ( key - > key , key - > keylen , GFP_ATOMIC ) ;
if ( newkey ! = NULL )
2006-11-15 06:07:45 +03:00
tcp_v4_md5_do_add ( newsk , inet_sk ( sk ) - > daddr ,
newkey , key - > keylen ) ;
}
# endif
2005-08-10 07:08:09 +04:00
__inet_hash ( & tcp_hashinfo , newsk , 0 ) ;
2005-08-10 07:07:13 +04:00
__inet_inherit_port ( & tcp_hashinfo , sk , newsk ) ;
2005-04-17 02:20:36 +04:00
return newsk ;
exit_overflow :
NET_INC_STATS_BH ( LINUX_MIB_LISTENOVERFLOWS ) ;
exit :
NET_INC_STATS_BH ( LINUX_MIB_LISTENDROPS ) ;
dst_release ( dst ) ;
return NULL ;
}
static struct sock * tcp_v4_hnd_req ( struct sock * sk , struct sk_buff * skb )
{
struct tcphdr * th = skb - > h . th ;
struct iphdr * iph = skb - > nh . iph ;
struct sock * nsk ;
2005-06-19 09:47:21 +04:00
struct request_sock * * prev ;
2005-04-17 02:20:36 +04:00
/* Find possible connection requests. */
2005-08-10 07:10:42 +04:00
struct request_sock * req = inet_csk_search_req ( sk , & prev , th - > source ,
iph - > saddr , iph - > daddr ) ;
2005-04-17 02:20:36 +04:00
if ( req )
return tcp_check_req ( sk , skb , req , prev ) ;
2006-08-10 02:47:12 +04:00
nsk = inet_lookup_established ( & tcp_hashinfo , skb - > nh . iph - > saddr ,
th - > source , skb - > nh . iph - > daddr ,
th - > dest , inet_iif ( skb ) ) ;
2005-04-17 02:20:36 +04:00
if ( nsk ) {
if ( nsk - > sk_state ! = TCP_TIME_WAIT ) {
bh_lock_sock ( nsk ) ;
return nsk ;
}
2006-10-11 06:41:46 +04:00
inet_twsk_put ( inet_twsk ( nsk ) ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
# ifdef CONFIG_SYN_COOKIES
if ( ! th - > rst & & ! th - > syn & & th - > ack )
sk = cookie_v4_check ( sk , skb , & ( IPCB ( skb ) - > opt ) ) ;
# endif
return sk ;
}
2006-11-15 08:40:42 +03:00
static __sum16 tcp_v4_checksum_init ( struct sk_buff * skb )
2005-04-17 02:20:36 +04:00
{
2006-08-30 03:44:56 +04:00
if ( skb - > ip_summed = = CHECKSUM_COMPLETE ) {
2005-04-17 02:20:36 +04:00
if ( ! tcp_v4_check ( skb - > h . th , skb - > len , skb - > nh . iph - > saddr ,
2005-11-11 00:01:24 +03:00
skb - > nh . iph - > daddr , skb - > csum ) ) {
skb - > ip_summed = CHECKSUM_UNNECESSARY ;
2005-04-17 02:20:36 +04:00
return 0 ;
2005-11-11 00:01:24 +03:00
}
2005-04-17 02:20:36 +04:00
}
2005-11-11 00:01:24 +03:00
skb - > csum = csum_tcpudp_nofold ( skb - > nh . iph - > saddr , skb - > nh . iph - > daddr ,
skb - > len , IPPROTO_TCP , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( skb - > len < = 76 ) {
2005-11-11 00:01:24 +03:00
return __skb_checksum_complete ( skb ) ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
}
/* The socket must have it's spinlock held when we get
* here .
*
* We have a potential double - lock case here , so even when
* doing backlog processing we use the BH locking scheme .
* This is because we cannot sleep with the original spinlock
* held .
*/
int tcp_v4_do_rcv ( struct sock * sk , struct sk_buff * skb )
{
2006-11-15 06:07:45 +03:00
struct sock * rsk ;
# ifdef CONFIG_TCP_MD5SIG
/*
* We really want to reject the packet as early as possible
* if :
* o We ' re expecting an MD5 ' d packet and this is no MD5 tcp option
* o There is an MD5 option and we ' re not expecting one
*/
2006-11-17 15:57:30 +03:00
if ( tcp_v4_inbound_md5_hash ( sk , skb ) )
2006-11-15 06:07:45 +03:00
goto discard ;
# endif
2005-04-17 02:20:36 +04:00
if ( sk - > sk_state = = TCP_ESTABLISHED ) { /* Fast path */
TCP_CHECK_TIMER ( sk ) ;
2006-11-15 06:07:45 +03:00
if ( tcp_rcv_established ( sk , skb , skb - > h . th , skb - > len ) ) {
rsk = sk ;
2005-04-17 02:20:36 +04:00
goto reset ;
2006-11-15 06:07:45 +03:00
}
2005-04-17 02:20:36 +04:00
TCP_CHECK_TIMER ( sk ) ;
return 0 ;
}
if ( skb - > len < ( skb - > h . th - > doff < < 2 ) | | tcp_checksum_complete ( skb ) )
goto csum_err ;
if ( sk - > sk_state = = TCP_LISTEN ) {
struct sock * nsk = tcp_v4_hnd_req ( sk , skb ) ;
if ( ! nsk )
goto discard ;
if ( nsk ! = sk ) {
2006-11-15 06:07:45 +03:00
if ( tcp_child_process ( sk , nsk , skb ) ) {
rsk = nsk ;
2005-04-17 02:20:36 +04:00
goto reset ;
2006-11-15 06:07:45 +03:00
}
2005-04-17 02:20:36 +04:00
return 0 ;
}
}
TCP_CHECK_TIMER ( sk ) ;
2006-11-15 06:07:45 +03:00
if ( tcp_rcv_state_process ( sk , skb , skb - > h . th , skb - > len ) ) {
rsk = sk ;
2005-04-17 02:20:36 +04:00
goto reset ;
2006-11-15 06:07:45 +03:00
}
2005-04-17 02:20:36 +04:00
TCP_CHECK_TIMER ( sk ) ;
return 0 ;
reset :
2006-11-15 06:07:45 +03:00
tcp_v4_send_reset ( rsk , skb ) ;
2005-04-17 02:20:36 +04:00
discard :
kfree_skb ( skb ) ;
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86 , sk ( in % ebx )
* might be destroyed here . This current version compiles correctly ,
* but you have been warned .
*/
return 0 ;
csum_err :
TCP_INC_STATS_BH ( TCP_MIB_INERRS ) ;
goto discard ;
}
/*
* From tcp_input . c
*/
int tcp_v4_rcv ( struct sk_buff * skb )
{
struct tcphdr * th ;
struct sock * sk ;
int ret ;
if ( skb - > pkt_type ! = PACKET_HOST )
goto discard_it ;
/* Count it even if it's bad */
TCP_INC_STATS_BH ( TCP_MIB_INSEGS ) ;
if ( ! pskb_may_pull ( skb , sizeof ( struct tcphdr ) ) )
goto discard_it ;
th = skb - > h . th ;
if ( th - > doff < sizeof ( struct tcphdr ) / 4 )
goto bad_packet ;
if ( ! pskb_may_pull ( skb , th - > doff * 4 ) )
goto discard_it ;
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction ,
2005-11-11 04:13:47 +03:00
* provided case of th - > doff = = 0 is eliminated .
2005-04-17 02:20:36 +04:00
* So , we defer the checks . */
if ( ( skb - > ip_summed ! = CHECKSUM_UNNECESSARY & &
2005-11-11 00:01:24 +03:00
tcp_v4_checksum_init ( skb ) ) )
2005-04-17 02:20:36 +04:00
goto bad_packet ;
th = skb - > h . th ;
TCP_SKB_CB ( skb ) - > seq = ntohl ( th - > seq ) ;
TCP_SKB_CB ( skb ) - > end_seq = ( TCP_SKB_CB ( skb ) - > seq + th - > syn + th - > fin +
skb - > len - th - > doff * 4 ) ;
TCP_SKB_CB ( skb ) - > ack_seq = ntohl ( th - > ack_seq ) ;
TCP_SKB_CB ( skb ) - > when = 0 ;
TCP_SKB_CB ( skb ) - > flags = skb - > nh . iph - > tos ;
TCP_SKB_CB ( skb ) - > sacked = 0 ;
2005-08-10 07:09:46 +04:00
sk = __inet_lookup ( & tcp_hashinfo , skb - > nh . iph - > saddr , th - > source ,
2006-08-10 02:47:12 +04:00
skb - > nh . iph - > daddr , th - > dest ,
2005-08-10 07:10:42 +04:00
inet_iif ( skb ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! sk )
goto no_tcp_socket ;
process :
if ( sk - > sk_state = = TCP_TIME_WAIT )
goto do_time_wait ;
if ( ! xfrm4_policy_check ( sk , XFRM_POLICY_IN , skb ) )
goto discard_and_relse ;
2006-01-07 10:06:10 +03:00
nf_reset ( skb ) ;
2005-04-17 02:20:36 +04:00
2006-09-01 02:28:39 +04:00
if ( sk_filter ( sk , skb ) )
2005-04-17 02:20:36 +04:00
goto discard_and_relse ;
skb - > dev = NULL ;
2006-07-03 11:25:13 +04:00
bh_lock_sock_nested ( sk ) ;
2005-04-17 02:20:36 +04:00
ret = 0 ;
if ( ! sock_owned_by_user ( sk ) ) {
2006-05-24 05:05:53 +04:00
# ifdef CONFIG_NET_DMA
struct tcp_sock * tp = tcp_sk ( sk ) ;
if ( ! tp - > ucopy . dma_chan & & tp - > ucopy . pinned_list )
tp - > ucopy . dma_chan = get_softnet_dma ( ) ;
if ( tp - > ucopy . dma_chan )
2005-04-17 02:20:36 +04:00
ret = tcp_v4_do_rcv ( sk , skb ) ;
2006-05-24 05:05:53 +04:00
else
# endif
{
if ( ! tcp_prequeue ( sk , skb ) )
ret = tcp_v4_do_rcv ( sk , skb ) ;
}
2005-04-17 02:20:36 +04:00
} else
sk_add_backlog ( sk , skb ) ;
bh_unlock_sock ( sk ) ;
sock_put ( sk ) ;
return ret ;
no_tcp_socket :
if ( ! xfrm4_policy_check ( NULL , XFRM_POLICY_IN , skb ) )
goto discard_it ;
if ( skb - > len < ( th - > doff < < 2 ) | | tcp_checksum_complete ( skb ) ) {
bad_packet :
TCP_INC_STATS_BH ( TCP_MIB_INERRS ) ;
} else {
2006-11-15 06:07:45 +03:00
tcp_v4_send_reset ( NULL , skb ) ;
2005-04-17 02:20:36 +04:00
}
discard_it :
/* Discard frame. */
kfree_skb ( skb ) ;
return 0 ;
discard_and_relse :
sock_put ( sk ) ;
goto discard_it ;
do_time_wait :
if ( ! xfrm4_policy_check ( NULL , XFRM_POLICY_IN , skb ) ) {
2006-10-11 06:41:46 +04:00
inet_twsk_put ( inet_twsk ( sk ) ) ;
2005-04-17 02:20:36 +04:00
goto discard_it ;
}
if ( skb - > len < ( th - > doff < < 2 ) | | tcp_checksum_complete ( skb ) ) {
TCP_INC_STATS_BH ( TCP_MIB_INERRS ) ;
2006-10-11 06:41:46 +04:00
inet_twsk_put ( inet_twsk ( sk ) ) ;
2005-04-17 02:20:36 +04:00
goto discard_it ;
}
2006-10-11 06:41:46 +04:00
switch ( tcp_timewait_state_process ( inet_twsk ( sk ) , skb , th ) ) {
2005-04-17 02:20:36 +04:00
case TCP_TW_SYN : {
2005-08-10 07:09:06 +04:00
struct sock * sk2 = inet_lookup_listener ( & tcp_hashinfo ,
skb - > nh . iph - > daddr ,
2006-08-10 02:47:12 +04:00
th - > dest ,
2005-08-10 07:10:42 +04:00
inet_iif ( skb ) ) ;
2005-04-17 02:20:36 +04:00
if ( sk2 ) {
2006-10-11 06:41:46 +04:00
inet_twsk_deschedule ( inet_twsk ( sk ) , & tcp_death_row ) ;
inet_twsk_put ( inet_twsk ( sk ) ) ;
2005-04-17 02:20:36 +04:00
sk = sk2 ;
goto process ;
}
/* Fall through to ACK */
}
case TCP_TW_ACK :
tcp_v4_timewait_ack ( sk , skb ) ;
break ;
case TCP_TW_RST :
goto no_tcp_socket ;
case TCP_TW_SUCCESS : ;
}
goto discard_it ;
}
/* VJ's idea. Save last timestamp seen from this destination
* and hold it at least for normal timewait interval to use for duplicate
* segment detection in subsequent connections , before they enter synchronized
* state .
*/
int tcp_v4_remember_stamp ( struct sock * sk )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
struct rtable * rt = ( struct rtable * ) __sk_dst_get ( sk ) ;
struct inet_peer * peer = NULL ;
int release_it = 0 ;
if ( ! rt | | rt - > rt_dst ! = inet - > daddr ) {
peer = inet_getpeer ( inet - > daddr , 1 ) ;
release_it = 1 ;
} else {
if ( ! rt - > peer )
rt_bind_peer ( rt , 1 ) ;
peer = rt - > peer ;
}
if ( peer ) {
if ( ( s32 ) ( peer - > tcp_ts - tp - > rx_opt . ts_recent ) < = 0 | |
( peer - > tcp_ts_stamp + TCP_PAWS_MSL < xtime . tv_sec & &
peer - > tcp_ts_stamp < = tp - > rx_opt . ts_recent_stamp ) ) {
peer - > tcp_ts_stamp = tp - > rx_opt . ts_recent_stamp ;
peer - > tcp_ts = tp - > rx_opt . ts_recent ;
}
if ( release_it )
inet_putpeer ( peer ) ;
return 1 ;
}
return 0 ;
}
2005-08-10 07:09:30 +04:00
int tcp_v4_tw_remember_stamp ( struct inet_timewait_sock * tw )
2005-04-17 02:20:36 +04:00
{
2005-08-10 07:09:30 +04:00
struct inet_peer * peer = inet_getpeer ( tw - > tw_daddr , 1 ) ;
2005-04-17 02:20:36 +04:00
if ( peer ) {
2005-08-10 07:09:30 +04:00
const struct tcp_timewait_sock * tcptw = tcp_twsk ( ( struct sock * ) tw ) ;
if ( ( s32 ) ( peer - > tcp_ts - tcptw - > tw_ts_recent ) < = 0 | |
2005-04-17 02:20:36 +04:00
( peer - > tcp_ts_stamp + TCP_PAWS_MSL < xtime . tv_sec & &
2005-08-10 07:09:30 +04:00
peer - > tcp_ts_stamp < = tcptw - > tw_ts_recent_stamp ) ) {
peer - > tcp_ts_stamp = tcptw - > tw_ts_recent_stamp ;
peer - > tcp_ts = tcptw - > tw_ts_recent ;
2005-04-17 02:20:36 +04:00
}
inet_putpeer ( peer ) ;
return 1 ;
}
return 0 ;
}
2005-12-14 10:15:52 +03:00
struct inet_connection_sock_af_ops ipv4_specific = {
2006-03-21 09:48:35 +03:00
. queue_xmit = ip_queue_xmit ,
. send_check = tcp_v4_send_check ,
. rebuild_header = inet_sk_rebuild_header ,
. conn_request = tcp_v4_conn_request ,
. syn_recv_sock = tcp_v4_syn_recv_sock ,
. remember_stamp = tcp_v4_remember_stamp ,
. net_header_len = sizeof ( struct iphdr ) ,
. setsockopt = ip_setsockopt ,
. getsockopt = ip_getsockopt ,
. addr2sockaddr = inet_csk_addr2sockaddr ,
. sockaddr_len = sizeof ( struct sockaddr_in ) ,
2006-03-21 09:45:21 +03:00
# ifdef CONFIG_COMPAT
2006-03-21 09:48:35 +03:00
. compat_setsockopt = compat_ip_setsockopt ,
. compat_getsockopt = compat_ip_getsockopt ,
2006-03-21 09:45:21 +03:00
# endif
2005-04-17 02:20:36 +04:00
} ;
2006-11-15 06:07:45 +03:00
struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
# ifdef CONFIG_TCP_MD5SIG
. md5_lookup = tcp_v4_md5_lookup ,
. calc_md5_hash = tcp_v4_calc_md5_hash ,
. md5_add = tcp_v4_md5_add_func ,
. md5_parse = tcp_v4_parse_md5_keys ,
# endif
} ;
2005-04-17 02:20:36 +04:00
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc ( ) so need not be done here .
*/
static int tcp_v4_init_sock ( struct sock * sk )
{
2005-08-10 11:03:31 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
skb_queue_head_init ( & tp - > out_of_order_queue ) ;
tcp_init_xmit_timers ( sk ) ;
tcp_prequeue_init ( tp ) ;
2005-08-10 11:03:31 +04:00
icsk - > icsk_rto = TCP_TIMEOUT_INIT ;
2005-04-17 02:20:36 +04:00
tp - > mdev = TCP_TIMEOUT_INIT ;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed - ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them . - DaveM
*/
tp - > snd_cwnd = 2 ;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values .
*/
tp - > snd_ssthresh = 0x7fffffff ; /* Infinity */
tp - > snd_cwnd_clamp = ~ 0 ;
2005-07-06 02:24:38 +04:00
tp - > mss_cache = 536 ;
2005-04-17 02:20:36 +04:00
tp - > reordering = sysctl_tcp_reordering ;
2005-08-10 11:03:31 +04:00
icsk - > icsk_ca_ops = & tcp_init_congestion_ops ;
2005-04-17 02:20:36 +04:00
sk - > sk_state = TCP_CLOSE ;
sk - > sk_write_space = sk_stream_write_space ;
sock_set_flag ( sk , SOCK_USE_WRITE_QUEUE ) ;
2005-12-14 10:15:52 +03:00
icsk - > icsk_af_ops = & ipv4_specific ;
2005-12-14 10:26:10 +03:00
icsk - > icsk_sync_mss = tcp_sync_mss ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
tp - > af_specific = & tcp_sock_ipv4_specific ;
# endif
2005-04-17 02:20:36 +04:00
sk - > sk_sndbuf = sysctl_tcp_wmem [ 1 ] ;
sk - > sk_rcvbuf = sysctl_tcp_rmem [ 1 ] ;
atomic_inc ( & tcp_sockets_allocated ) ;
return 0 ;
}
int tcp_v4_destroy_sock ( struct sock * sk )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
tcp_clear_xmit_timers ( sk ) ;
2005-08-10 11:03:31 +04:00
tcp_cleanup_congestion_control ( sk ) ;
2005-06-23 23:19:55 +04:00
2005-04-17 02:20:36 +04:00
/* Cleanup up the write buffer. */
sk_stream_writequeue_purge ( sk ) ;
/* Cleans up our, hopefully empty, out_of_order_queue. */
__skb_queue_purge ( & tp - > out_of_order_queue ) ;
2006-11-15 06:07:45 +03:00
# ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if ( tp - > md5sig_info ) {
tcp_v4_clear_md5_list ( sk ) ;
kfree ( tp - > md5sig_info ) ;
tp - > md5sig_info = NULL ;
}
# endif
2006-05-24 05:05:53 +04:00
# ifdef CONFIG_NET_DMA
/* Cleans up our sk_async_wait_queue */
__skb_queue_purge ( & sk - > sk_async_wait_queue ) ;
# endif
2005-04-17 02:20:36 +04:00
/* Clean prequeue, it must be empty really */
__skb_queue_purge ( & tp - > ucopy . prequeue ) ;
/* Clean up a referenced TCP bind bucket. */
2005-08-10 07:10:42 +04:00
if ( inet_csk ( sk ) - > icsk_bind_hash )
2005-08-10 07:07:13 +04:00
inet_put_port ( & tcp_hashinfo , sk ) ;
2005-04-17 02:20:36 +04:00
/*
* If sendmsg cached page exists , toss it .
*/
if ( sk - > sk_sndmsg_page ) {
__free_page ( sk - > sk_sndmsg_page ) ;
sk - > sk_sndmsg_page = NULL ;
}
atomic_dec ( & tcp_sockets_allocated ) ;
return 0 ;
}
EXPORT_SYMBOL ( tcp_v4_destroy_sock ) ;
# ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
2005-08-10 07:09:30 +04:00
static inline struct inet_timewait_sock * tw_head ( struct hlist_head * head )
2005-04-17 02:20:36 +04:00
{
return hlist_empty ( head ) ? NULL :
2005-08-10 07:09:30 +04:00
list_entry ( head - > first , struct inet_timewait_sock , tw_node ) ;
2005-04-17 02:20:36 +04:00
}
2005-08-10 07:09:30 +04:00
static inline struct inet_timewait_sock * tw_next ( struct inet_timewait_sock * tw )
2005-04-17 02:20:36 +04:00
{
return tw - > tw_node . next ?
hlist_entry ( tw - > tw_node . next , typeof ( * tw ) , tw_node ) : NULL ;
}
static void * listening_get_next ( struct seq_file * seq , void * cur )
{
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk ;
2005-04-17 02:20:36 +04:00
struct hlist_node * node ;
struct sock * sk = cur ;
struct tcp_iter_state * st = seq - > private ;
if ( ! sk ) {
st - > bucket = 0 ;
2005-08-10 07:07:35 +04:00
sk = sk_head ( & tcp_hashinfo . listening_hash [ 0 ] ) ;
2005-04-17 02:20:36 +04:00
goto get_sk ;
}
+ + st - > num ;
if ( st - > state = = TCP_SEQ_STATE_OPENREQ ) {
2005-06-19 09:47:21 +04:00
struct request_sock * req = cur ;
2005-04-17 02:20:36 +04:00
2006-11-16 13:30:37 +03:00
icsk = inet_csk ( st - > syn_wait_sk ) ;
2005-04-17 02:20:36 +04:00
req = req - > dl_next ;
while ( 1 ) {
while ( req ) {
2005-06-19 09:47:21 +04:00
if ( req - > rsk_ops - > family = = st - > family ) {
2005-04-17 02:20:36 +04:00
cur = req ;
goto out ;
}
req = req - > dl_next ;
}
2006-11-16 13:30:37 +03:00
if ( + + st - > sbucket > = icsk - > icsk_accept_queue . listen_opt - > nr_table_entries )
2005-04-17 02:20:36 +04:00
break ;
get_req :
2005-08-10 07:10:42 +04:00
req = icsk - > icsk_accept_queue . listen_opt - > syn_table [ st - > sbucket ] ;
2005-04-17 02:20:36 +04:00
}
sk = sk_next ( st - > syn_wait_sk ) ;
st - > state = TCP_SEQ_STATE_LISTENING ;
2005-08-10 07:10:42 +04:00
read_unlock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
2005-04-17 02:20:36 +04:00
} else {
2005-08-10 07:10:42 +04:00
icsk = inet_csk ( sk ) ;
read_lock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
if ( reqsk_queue_len ( & icsk - > icsk_accept_queue ) )
2005-04-17 02:20:36 +04:00
goto start_req ;
2005-08-10 07:10:42 +04:00
read_unlock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
2005-04-17 02:20:36 +04:00
sk = sk_next ( sk ) ;
}
get_sk :
sk_for_each_from ( sk , node ) {
if ( sk - > sk_family = = st - > family ) {
cur = sk ;
goto out ;
}
2005-08-10 07:10:42 +04:00
icsk = inet_csk ( sk ) ;
read_lock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
if ( reqsk_queue_len ( & icsk - > icsk_accept_queue ) ) {
2005-04-17 02:20:36 +04:00
start_req :
st - > uid = sock_i_uid ( sk ) ;
st - > syn_wait_sk = sk ;
st - > state = TCP_SEQ_STATE_OPENREQ ;
st - > sbucket = 0 ;
goto get_req ;
}
2005-08-10 07:10:42 +04:00
read_unlock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
2005-04-17 02:20:36 +04:00
}
2005-08-10 06:59:44 +04:00
if ( + + st - > bucket < INET_LHTABLE_SIZE ) {
2005-08-10 07:07:35 +04:00
sk = sk_head ( & tcp_hashinfo . listening_hash [ st - > bucket ] ) ;
2005-04-17 02:20:36 +04:00
goto get_sk ;
}
cur = NULL ;
out :
return cur ;
}
static void * listening_get_idx ( struct seq_file * seq , loff_t * pos )
{
void * rc = listening_get_next ( seq , NULL ) ;
while ( rc & & * pos ) {
rc = listening_get_next ( seq , rc ) ;
- - * pos ;
}
return rc ;
}
static void * established_get_first ( struct seq_file * seq )
{
struct tcp_iter_state * st = seq - > private ;
void * rc = NULL ;
2005-08-10 07:07:35 +04:00
for ( st - > bucket = 0 ; st - > bucket < tcp_hashinfo . ehash_size ; + + st - > bucket ) {
2005-04-17 02:20:36 +04:00
struct sock * sk ;
struct hlist_node * node ;
2005-08-10 07:09:30 +04:00
struct inet_timewait_sock * tw ;
2005-04-17 02:20:36 +04:00
/* We can reschedule _before_ having picked the target: */
cond_resched_softirq ( ) ;
2005-08-10 07:07:35 +04:00
read_lock ( & tcp_hashinfo . ehash [ st - > bucket ] . lock ) ;
sk_for_each ( sk , node , & tcp_hashinfo . ehash [ st - > bucket ] . chain ) {
2005-04-17 02:20:36 +04:00
if ( sk - > sk_family ! = st - > family ) {
continue ;
}
rc = sk ;
goto out ;
}
st - > state = TCP_SEQ_STATE_TIME_WAIT ;
2005-08-10 07:09:30 +04:00
inet_twsk_for_each ( tw , node ,
& tcp_hashinfo . ehash [ st - > bucket + tcp_hashinfo . ehash_size ] . chain ) {
2005-04-17 02:20:36 +04:00
if ( tw - > tw_family ! = st - > family ) {
continue ;
}
rc = tw ;
goto out ;
}
2005-08-10 07:07:35 +04:00
read_unlock ( & tcp_hashinfo . ehash [ st - > bucket ] . lock ) ;
2005-04-17 02:20:36 +04:00
st - > state = TCP_SEQ_STATE_ESTABLISHED ;
}
out :
return rc ;
}
static void * established_get_next ( struct seq_file * seq , void * cur )
{
struct sock * sk = cur ;
2005-08-10 07:09:30 +04:00
struct inet_timewait_sock * tw ;
2005-04-17 02:20:36 +04:00
struct hlist_node * node ;
struct tcp_iter_state * st = seq - > private ;
+ + st - > num ;
if ( st - > state = = TCP_SEQ_STATE_TIME_WAIT ) {
tw = cur ;
tw = tw_next ( tw ) ;
get_tw :
while ( tw & & tw - > tw_family ! = st - > family ) {
tw = tw_next ( tw ) ;
}
if ( tw ) {
cur = tw ;
goto out ;
}
2005-08-10 07:07:35 +04:00
read_unlock ( & tcp_hashinfo . ehash [ st - > bucket ] . lock ) ;
2005-04-17 02:20:36 +04:00
st - > state = TCP_SEQ_STATE_ESTABLISHED ;
/* We can reschedule between buckets: */
cond_resched_softirq ( ) ;
2005-08-10 07:07:35 +04:00
if ( + + st - > bucket < tcp_hashinfo . ehash_size ) {
read_lock ( & tcp_hashinfo . ehash [ st - > bucket ] . lock ) ;
sk = sk_head ( & tcp_hashinfo . ehash [ st - > bucket ] . chain ) ;
2005-04-17 02:20:36 +04:00
} else {
cur = NULL ;
goto out ;
}
} else
sk = sk_next ( sk ) ;
sk_for_each_from ( sk , node ) {
if ( sk - > sk_family = = st - > family )
goto found ;
}
st - > state = TCP_SEQ_STATE_TIME_WAIT ;
2005-08-10 07:07:35 +04:00
tw = tw_head ( & tcp_hashinfo . ehash [ st - > bucket + tcp_hashinfo . ehash_size ] . chain ) ;
2005-04-17 02:20:36 +04:00
goto get_tw ;
found :
cur = sk ;
out :
return cur ;
}
static void * established_get_idx ( struct seq_file * seq , loff_t pos )
{
void * rc = established_get_first ( seq ) ;
while ( rc & & pos ) {
rc = established_get_next ( seq , rc ) ;
- - pos ;
2006-11-17 15:57:30 +03:00
}
2005-04-17 02:20:36 +04:00
return rc ;
}
static void * tcp_get_idx ( struct seq_file * seq , loff_t pos )
{
void * rc ;
struct tcp_iter_state * st = seq - > private ;
2005-08-10 07:08:09 +04:00
inet_listen_lock ( & tcp_hashinfo ) ;
2005-04-17 02:20:36 +04:00
st - > state = TCP_SEQ_STATE_LISTENING ;
rc = listening_get_idx ( seq , & pos ) ;
if ( ! rc ) {
2005-08-10 07:08:09 +04:00
inet_listen_unlock ( & tcp_hashinfo ) ;
2005-04-17 02:20:36 +04:00
local_bh_disable ( ) ;
st - > state = TCP_SEQ_STATE_ESTABLISHED ;
rc = established_get_idx ( seq , pos ) ;
}
return rc ;
}
static void * tcp_seq_start ( struct seq_file * seq , loff_t * pos )
{
struct tcp_iter_state * st = seq - > private ;
st - > state = TCP_SEQ_STATE_LISTENING ;
st - > num = 0 ;
return * pos ? tcp_get_idx ( seq , * pos - 1 ) : SEQ_START_TOKEN ;
}
static void * tcp_seq_next ( struct seq_file * seq , void * v , loff_t * pos )
{
void * rc = NULL ;
struct tcp_iter_state * st ;
if ( v = = SEQ_START_TOKEN ) {
rc = tcp_get_idx ( seq , 0 ) ;
goto out ;
}
st = seq - > private ;
switch ( st - > state ) {
case TCP_SEQ_STATE_OPENREQ :
case TCP_SEQ_STATE_LISTENING :
rc = listening_get_next ( seq , v ) ;
if ( ! rc ) {
2005-08-10 07:08:09 +04:00
inet_listen_unlock ( & tcp_hashinfo ) ;
2005-04-17 02:20:36 +04:00
local_bh_disable ( ) ;
st - > state = TCP_SEQ_STATE_ESTABLISHED ;
rc = established_get_first ( seq ) ;
}
break ;
case TCP_SEQ_STATE_ESTABLISHED :
case TCP_SEQ_STATE_TIME_WAIT :
rc = established_get_next ( seq , v ) ;
break ;
}
out :
+ + * pos ;
return rc ;
}
static void tcp_seq_stop ( struct seq_file * seq , void * v )
{
struct tcp_iter_state * st = seq - > private ;
switch ( st - > state ) {
case TCP_SEQ_STATE_OPENREQ :
if ( v ) {
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk = inet_csk ( st - > syn_wait_sk ) ;
read_unlock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
2005-04-17 02:20:36 +04:00
}
case TCP_SEQ_STATE_LISTENING :
if ( v ! = SEQ_START_TOKEN )
2005-08-10 07:08:09 +04:00
inet_listen_unlock ( & tcp_hashinfo ) ;
2005-04-17 02:20:36 +04:00
break ;
case TCP_SEQ_STATE_TIME_WAIT :
case TCP_SEQ_STATE_ESTABLISHED :
if ( v )
2005-08-10 07:07:35 +04:00
read_unlock ( & tcp_hashinfo . ehash [ st - > bucket ] . lock ) ;
2005-04-17 02:20:36 +04:00
local_bh_enable ( ) ;
break ;
}
}
static int tcp_seq_open ( struct inode * inode , struct file * file )
{
struct tcp_seq_afinfo * afinfo = PDE ( inode ) - > data ;
struct seq_file * seq ;
struct tcp_iter_state * s ;
int rc ;
if ( unlikely ( afinfo = = NULL ) )
return - EINVAL ;
2006-07-22 01:51:30 +04:00
s = kzalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! s )
return - ENOMEM ;
s - > family = afinfo - > family ;
s - > seq_ops . start = tcp_seq_start ;
s - > seq_ops . next = tcp_seq_next ;
s - > seq_ops . show = afinfo - > seq_show ;
s - > seq_ops . stop = tcp_seq_stop ;
rc = seq_open ( file , & s - > seq_ops ) ;
if ( rc )
goto out_kfree ;
seq = file - > private_data ;
seq - > private = s ;
out :
return rc ;
out_kfree :
kfree ( s ) ;
goto out ;
}
int tcp_proc_register ( struct tcp_seq_afinfo * afinfo )
{
int rc = 0 ;
struct proc_dir_entry * p ;
if ( ! afinfo )
return - EINVAL ;
afinfo - > seq_fops - > owner = afinfo - > owner ;
afinfo - > seq_fops - > open = tcp_seq_open ;
afinfo - > seq_fops - > read = seq_read ;
afinfo - > seq_fops - > llseek = seq_lseek ;
afinfo - > seq_fops - > release = seq_release_private ;
2006-11-17 15:57:30 +03:00
2005-04-17 02:20:36 +04:00
p = proc_net_fops_create ( afinfo - > name , S_IRUGO , afinfo - > seq_fops ) ;
if ( p )
p - > data = afinfo ;
else
rc = - ENOMEM ;
return rc ;
}
void tcp_proc_unregister ( struct tcp_seq_afinfo * afinfo )
{
if ( ! afinfo )
return ;
proc_net_remove ( afinfo - > name ) ;
2006-11-17 15:57:30 +03:00
memset ( afinfo - > seq_fops , 0 , sizeof ( * afinfo - > seq_fops ) ) ;
2005-04-17 02:20:36 +04:00
}
2005-06-19 09:47:21 +04:00
static void get_openreq4 ( struct sock * sk , struct request_sock * req ,
2005-04-17 02:20:36 +04:00
char * tmpbuf , int i , int uid )
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
const struct inet_request_sock * ireq = inet_rsk ( req ) ;
2005-04-17 02:20:36 +04:00
int ttd = req - > expires - jiffies ;
sprintf ( tmpbuf , " %4d: %08X:%04X %08X:%04X "
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p " ,
i ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
ireq - > loc_addr ,
2005-04-17 02:20:36 +04:00
ntohs ( inet_sk ( sk ) - > sport ) ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
ireq - > rmt_addr ,
ntohs ( ireq - > rmt_port ) ,
2005-04-17 02:20:36 +04:00
TCP_SYN_RECV ,
0 , 0 , /* could print option size, but that is af dependent. */
1 , /* timers active (only the expire timer) */
jiffies_to_clock_t ( ttd ) ,
req - > retrans ,
uid ,
0 , /* non standard timer */
0 , /* open_requests have no inode */
atomic_read ( & sk - > sk_refcnt ) ,
req ) ;
}
static void get_tcp4_sock ( struct sock * sp , char * tmpbuf , int i )
{
int timer_active ;
unsigned long timer_expires ;
struct tcp_sock * tp = tcp_sk ( sp ) ;
2005-08-10 07:10:42 +04:00
const struct inet_connection_sock * icsk = inet_csk ( sp ) ;
2005-04-17 02:20:36 +04:00
struct inet_sock * inet = inet_sk ( sp ) ;
2006-11-15 07:51:49 +03:00
__be32 dest = inet - > daddr ;
__be32 src = inet - > rcv_saddr ;
2005-04-17 02:20:36 +04:00
__u16 destp = ntohs ( inet - > dport ) ;
__u16 srcp = ntohs ( inet - > sport ) ;
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_pending = = ICSK_TIME_RETRANS ) {
2005-04-17 02:20:36 +04:00
timer_active = 1 ;
2005-08-10 07:10:42 +04:00
timer_expires = icsk - > icsk_timeout ;
} else if ( icsk - > icsk_pending = = ICSK_TIME_PROBE0 ) {
2005-04-17 02:20:36 +04:00
timer_active = 4 ;
2005-08-10 07:10:42 +04:00
timer_expires = icsk - > icsk_timeout ;
2005-04-17 02:20:36 +04:00
} else if ( timer_pending ( & sp - > sk_timer ) ) {
timer_active = 2 ;
timer_expires = sp - > sk_timer . expires ;
} else {
timer_active = 0 ;
timer_expires = jiffies ;
}
sprintf ( tmpbuf , " %4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
" %08X %5d %8d %lu %d %p %u %u %u %u %d " ,
i , src , srcp , dest , destp , sp - > sk_state ,
2006-06-28 00:29:00 +04:00
tp - > write_seq - tp - > snd_una ,
2006-11-17 15:57:30 +03:00
sp - > sk_state = = TCP_LISTEN ? sp - > sk_ack_backlog :
( tp - > rcv_nxt - tp - > copied_seq ) ,
2005-04-17 02:20:36 +04:00
timer_active ,
jiffies_to_clock_t ( timer_expires - jiffies ) ,
2005-08-10 07:10:42 +04:00
icsk - > icsk_retransmits ,
2005-04-17 02:20:36 +04:00
sock_i_uid ( sp ) ,
2005-08-10 11:03:31 +04:00
icsk - > icsk_probes_out ,
2005-04-17 02:20:36 +04:00
sock_i_ino ( sp ) ,
atomic_read ( & sp - > sk_refcnt ) , sp ,
2005-08-10 07:10:42 +04:00
icsk - > icsk_rto ,
icsk - > icsk_ack . ato ,
( icsk - > icsk_ack . quick < < 1 ) | icsk - > icsk_ack . pingpong ,
2005-04-17 02:20:36 +04:00
tp - > snd_cwnd ,
tp - > snd_ssthresh > = 0xFFFF ? - 1 : tp - > snd_ssthresh ) ;
}
2006-11-17 15:57:30 +03:00
static void get_timewait4_sock ( struct inet_timewait_sock * tw ,
char * tmpbuf , int i )
2005-04-17 02:20:36 +04:00
{
2006-09-28 05:43:50 +04:00
__be32 dest , src ;
2005-04-17 02:20:36 +04:00
__u16 destp , srcp ;
int ttd = tw - > tw_ttd - jiffies ;
if ( ttd < 0 )
ttd = 0 ;
dest = tw - > tw_daddr ;
src = tw - > tw_rcv_saddr ;
destp = ntohs ( tw - > tw_dport ) ;
srcp = ntohs ( tw - > tw_sport ) ;
sprintf ( tmpbuf , " %4d: %08X:%04X %08X:%04X "
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p " ,
i , src , srcp , dest , destp , tw - > tw_substate , 0 , 0 ,
3 , jiffies_to_clock_t ( ttd ) , 0 , 0 , 0 , 0 ,
atomic_read ( & tw - > tw_refcnt ) , tw ) ;
}
# define TMPSZ 150
static int tcp4_seq_show ( struct seq_file * seq , void * v )
{
struct tcp_iter_state * st ;
char tmpbuf [ TMPSZ + 1 ] ;
if ( v = = SEQ_START_TOKEN ) {
seq_printf ( seq , " %-*s \n " , TMPSZ - 1 ,
" sl local_address rem_address st tx_queue "
" rx_queue tr tm->when retrnsmt uid timeout "
" inode " ) ;
goto out ;
}
st = seq - > private ;
switch ( st - > state ) {
case TCP_SEQ_STATE_LISTENING :
case TCP_SEQ_STATE_ESTABLISHED :
get_tcp4_sock ( v , tmpbuf , st - > num ) ;
break ;
case TCP_SEQ_STATE_OPENREQ :
get_openreq4 ( st - > syn_wait_sk , v , tmpbuf , st - > num , st - > uid ) ;
break ;
case TCP_SEQ_STATE_TIME_WAIT :
get_timewait4_sock ( v , tmpbuf , st - > num ) ;
break ;
}
seq_printf ( seq , " %-*s \n " , TMPSZ - 1 , tmpbuf ) ;
out :
return 0 ;
}
static struct file_operations tcp4_seq_fops ;
static struct tcp_seq_afinfo tcp4_seq_afinfo = {
. owner = THIS_MODULE ,
. name = " tcp " ,
. family = AF_INET ,
. seq_show = tcp4_seq_show ,
. seq_fops = & tcp4_seq_fops ,
} ;
int __init tcp4_proc_init ( void )
{
return tcp_proc_register ( & tcp4_seq_afinfo ) ;
}
void tcp4_proc_exit ( void )
{
tcp_proc_unregister ( & tcp4_seq_afinfo ) ;
}
# endif /* CONFIG_PROC_FS */
struct proto tcp_prot = {
. name = " TCP " ,
. owner = THIS_MODULE ,
. close = tcp_close ,
. connect = tcp_v4_connect ,
. disconnect = tcp_disconnect ,
2005-08-10 07:10:42 +04:00
. accept = inet_csk_accept ,
2005-04-17 02:20:36 +04:00
. ioctl = tcp_ioctl ,
. init = tcp_v4_init_sock ,
. destroy = tcp_v4_destroy_sock ,
. shutdown = tcp_shutdown ,
. setsockopt = tcp_setsockopt ,
. getsockopt = tcp_getsockopt ,
. sendmsg = tcp_sendmsg ,
. recvmsg = tcp_recvmsg ,
. backlog_rcv = tcp_v4_do_rcv ,
. hash = tcp_v4_hash ,
. unhash = tcp_unhash ,
. get_port = tcp_v4_get_port ,
. enter_memory_pressure = tcp_enter_memory_pressure ,
. sockets_allocated = & tcp_sockets_allocated ,
2005-08-10 07:11:41 +04:00
. orphan_count = & tcp_orphan_count ,
2005-04-17 02:20:36 +04:00
. memory_allocated = & tcp_memory_allocated ,
. memory_pressure = & tcp_memory_pressure ,
. sysctl_mem = sysctl_tcp_mem ,
. sysctl_wmem = sysctl_tcp_wmem ,
. sysctl_rmem = sysctl_tcp_rmem ,
. max_header = MAX_TCP_HEADER ,
. obj_size = sizeof ( struct tcp_sock ) ,
2005-12-14 10:25:19 +03:00
. twsk_prot = & tcp_timewait_sock_ops ,
2005-06-19 09:47:21 +04:00
. rsk_prot = & tcp_request_sock_ops ,
2006-03-21 09:48:35 +03:00
# ifdef CONFIG_COMPAT
. compat_setsockopt = compat_tcp_setsockopt ,
. compat_getsockopt = compat_tcp_getsockopt ,
# endif
2005-04-17 02:20:36 +04:00
} ;
void __init tcp_v4_init ( struct net_proto_family * ops )
{
2006-11-17 15:57:30 +03:00
if ( inet_csk_ctl_sock_create ( & tcp_socket , PF_INET , SOCK_RAW ,
IPPROTO_TCP ) < 0 )
2005-04-17 02:20:36 +04:00
panic ( " Failed to create the TCP control socket. \n " ) ;
}
EXPORT_SYMBOL ( ipv4_specific ) ;
EXPORT_SYMBOL ( tcp_hashinfo ) ;
EXPORT_SYMBOL ( tcp_prot ) ;
EXPORT_SYMBOL ( tcp_unhash ) ;
EXPORT_SYMBOL ( tcp_v4_conn_request ) ;
EXPORT_SYMBOL ( tcp_v4_connect ) ;
EXPORT_SYMBOL ( tcp_v4_do_rcv ) ;
EXPORT_SYMBOL ( tcp_v4_remember_stamp ) ;
EXPORT_SYMBOL ( tcp_v4_send_check ) ;
EXPORT_SYMBOL ( tcp_v4_syn_recv_sock ) ;
# ifdef CONFIG_PROC_FS
EXPORT_SYMBOL ( tcp_proc_register ) ;
EXPORT_SYMBOL ( tcp_proc_unregister ) ;
# endif
EXPORT_SYMBOL ( sysctl_local_port_range ) ;
EXPORT_SYMBOL ( sysctl_tcp_low_latency ) ;