2005-04-16 15:20:36 -07:00
/*
* Syncookies implementation for the Linux kernel
*
* Copyright ( C ) 1997 Andi Kleen
2007-02-09 23:24:47 +09:00
* Based on ideas by D . J . Bernstein and Eric Schenk .
2005-04-16 15:20:36 -07:00
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
2007-02-09 23:24:47 +09:00
*
2005-04-16 15:20:36 -07:00
* $ Id : syncookies . c , v 1.18 2002 / 02 / 01 22 : 01 : 04 davem Exp $
*/
# include <linux/tcp.h>
# include <linux/slab.h>
# include <linux/random.h>
# include <linux/cryptohash.h>
# include <linux/kernel.h>
# include <net/tcp.h>
2008-04-10 03:12:40 -07:00
/* Timestamps: lowest 9 bits store TCP options */
# define TSBITS 9
# define TSMASK (((__u32)1 << TSBITS) - 1)
2005-04-16 15:20:36 -07:00
extern int sysctl_tcp_syncookies ;
2008-03-23 22:21:28 -07:00
__u32 syncookie_secret [ 2 ] [ 16 - 4 + SHA_DIGEST_WORDS ] ;
2008-02-07 21:49:26 -08:00
EXPORT_SYMBOL ( syncookie_secret ) ;
2005-04-16 15:20:36 -07:00
static __init int init_syncookies ( void )
{
get_random_bytes ( syncookie_secret , sizeof ( syncookie_secret ) ) ;
return 0 ;
}
2008-02-07 21:49:26 -08:00
__initcall ( init_syncookies ) ;
2005-04-16 15:20:36 -07:00
# define COOKIEBITS 24 /* Upper bits store count */
# define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
2008-02-07 10:40:19 +01:00
static DEFINE_PER_CPU ( __u32 , cookie_scratch ) [ 16 + 5 + SHA_WORKSPACE_WORDS ] ;
2006-11-14 20:51:49 -08:00
static u32 cookie_hash ( __be32 saddr , __be32 daddr , __be16 sport , __be16 dport ,
2005-04-16 15:20:36 -07:00
u32 count , int c )
{
2008-02-07 10:40:19 +01:00
__u32 * tmp = __get_cpu_var ( cookie_scratch ) ;
2005-04-16 15:20:36 -07:00
2008-03-23 22:21:28 -07:00
memcpy ( tmp + 4 , syncookie_secret [ c ] , sizeof ( syncookie_secret [ c ] ) ) ;
2006-11-14 20:51:49 -08:00
tmp [ 0 ] = ( __force u32 ) saddr ;
tmp [ 1 ] = ( __force u32 ) daddr ;
tmp [ 2 ] = ( ( __force u32 ) sport < < 16 ) + ( __force u32 ) dport ;
2005-04-16 15:20:36 -07:00
tmp [ 3 ] = count ;
sha_transform ( tmp + 16 , ( __u8 * ) tmp , tmp + 16 + 5 ) ;
return tmp [ 17 ] ;
}
2008-04-10 03:12:40 -07:00
/*
* when syncookies are in effect and tcp timestamps are enabled we encode
* tcp options in the lowest 9 bits of the timestamp value that will be
* sent in the syn - ack .
* Since subsequent timestamps use the normal tcp_time_stamp value , we
* must make sure that the resulting initial timestamp is < = tcp_time_stamp .
*/
__u32 cookie_init_timestamp ( struct request_sock * req )
{
struct inet_request_sock * ireq ;
u32 ts , ts_now = tcp_time_stamp ;
u32 options = 0 ;
ireq = inet_rsk ( req ) ;
if ( ireq - > wscale_ok ) {
options = ireq - > snd_wscale ;
options | = ireq - > rcv_wscale < < 4 ;
}
options | = ireq - > sack_ok < < 8 ;
ts = ts_now & ~ TSMASK ;
ts | = options ;
if ( ts > ts_now ) {
ts > > = TSBITS ;
ts - - ;
ts < < = TSBITS ;
ts | = options ;
}
return ts ;
}
2006-11-14 20:51:49 -08:00
static __u32 secure_tcp_syn_cookie ( __be32 saddr , __be32 daddr , __be16 sport ,
__be16 dport , __u32 sseq , __u32 count ,
2005-04-16 15:20:36 -07:00
__u32 data )
{
/*
* Compute the secure sequence number .
* The output should be :
2007-02-09 23:24:47 +09:00
* HASH ( sec1 , saddr , sport , daddr , dport , sec1 ) + sseq + ( count * 2 ^ 24 )
2005-04-16 15:20:36 -07:00
* + ( HASH ( sec2 , saddr , sport , daddr , dport , count , sec2 ) % 2 ^ 24 ) .
* Where sseq is their sequence number and count increases every
* minute by 1.
* As an extra hack , we add a small " data " value that encodes the
* MSS into the second hash value .
*/
return ( cookie_hash ( saddr , daddr , sport , dport , 0 , 0 ) +
sseq + ( count < < COOKIEBITS ) +
( ( cookie_hash ( saddr , daddr , sport , dport , count , 1 ) + data )
& COOKIEMASK ) ) ;
}
/*
* This retrieves the small " data " value from the syncookie .
* If the syncookie is bad , the data returned will be out of
* range . This must be checked by the caller .
*
* The count value used to generate the cookie must be within
* " maxdiff " if the current ( passed - in ) " count " . The return value
* is ( __u32 ) - 1 if this test fails .
*/
2006-11-14 20:51:49 -08:00
static __u32 check_tcp_syn_cookie ( __u32 cookie , __be32 saddr , __be32 daddr ,
__be16 sport , __be16 dport , __u32 sseq ,
2005-04-16 15:20:36 -07:00
__u32 count , __u32 maxdiff )
{
__u32 diff ;
/* Strip away the layers from the cookie */
cookie - = cookie_hash ( saddr , daddr , sport , dport , 0 , 0 ) + sseq ;
/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */
diff = ( count - ( cookie > > COOKIEBITS ) ) & ( ( __u32 ) - 1 > > COOKIEBITS ) ;
if ( diff > = maxdiff )
return ( __u32 ) - 1 ;
return ( cookie -
cookie_hash ( saddr , daddr , sport , dport , count - diff , 1 ) )
& COOKIEMASK ; /* Leaving the data behind */
}
2007-02-09 23:24:47 +09:00
/*
2005-04-16 15:20:36 -07:00
* This table has to be sorted and terminated with ( __u16 ) - 1.
* XXX generate a better table .
* Unresolved Issues : HIPPI with a 64 k MSS is not well supported .
*/
static __u16 const msstab [ ] = {
64 - 1 ,
2007-02-09 23:24:47 +09:00
256 - 1 ,
2005-04-16 15:20:36 -07:00
512 - 1 ,
536 - 1 ,
2007-02-09 23:24:47 +09:00
1024 - 1 ,
2005-04-16 15:20:36 -07:00
1440 - 1 ,
1460 - 1 ,
4312 - 1 ,
( __u16 ) - 1
} ;
/* The number doesn't include the -1 terminator */
# define NUM_MSS (ARRAY_SIZE(msstab) - 1)
/*
* Generate a syncookie . mssp points to the mss , which is returned
* rounded down to the value encoded in the cookie .
*/
__u32 cookie_v4_init_sequence ( struct sock * sk , struct sk_buff * skb , __u16 * mssp )
{
struct tcp_sock * tp = tcp_sk ( sk ) ;
2007-04-10 21:04:22 -07:00
const struct iphdr * iph = ip_hdr ( skb ) ;
const struct tcphdr * th = tcp_hdr ( skb ) ;
2005-04-16 15:20:36 -07:00
int mssind ;
const __u16 mss = * mssp ;
tp - > last_synq_overflow = jiffies ;
/* XXX sort msstab[] by probability? Binary search? */
for ( mssind = 0 ; mss > msstab [ mssind + 1 ] ; mssind + + )
;
* mssp = msstab [ mssind ] + 1 ;
NET_INC_STATS_BH ( LINUX_MIB_SYNCOOKIESSENT ) ;
2007-04-10 21:04:22 -07:00
return secure_tcp_syn_cookie ( iph - > saddr , iph - > daddr ,
th - > source , th - > dest , ntohl ( th - > seq ) ,
2005-04-16 15:20:36 -07:00
jiffies / ( HZ * 60 ) , mssind ) ;
}
2007-02-09 23:24:47 +09:00
/*
2005-04-16 15:20:36 -07:00
* This ( misnamed ) value is the age of syncookie which is permitted .
* Its ideal value should be dependent on TCP_TIMEOUT_INIT and
* sysctl_tcp_retries1 . It ' s a rather complicated formula ( exponential
* backoff ) to compute at runtime so it ' s currently hardcoded here .
*/
# define COUNTER_TRIES 4
2007-02-09 23:24:47 +09:00
/*
* Check if a ack sequence number is a valid syncookie .
2005-04-16 15:20:36 -07:00
* Return the decoded mss if it is , or 0 if not .
*/
static inline int cookie_check ( struct sk_buff * skb , __u32 cookie )
{
2007-04-10 21:04:22 -07:00
const struct iphdr * iph = ip_hdr ( skb ) ;
const struct tcphdr * th = tcp_hdr ( skb ) ;
__u32 seq = ntohl ( th - > seq ) - 1 ;
__u32 mssind = check_tcp_syn_cookie ( cookie , iph - > saddr , iph - > daddr ,
th - > source , th - > dest , seq ,
jiffies / ( HZ * 60 ) ,
COUNTER_TRIES ) ;
2005-04-16 15:20:36 -07:00
return mssind < NUM_MSS ? msstab [ mssind ] + 1 : 0 ;
}
static inline struct sock * get_cookie_sock ( struct sock * sk , struct sk_buff * skb ,
2005-06-18 22:47:21 -07:00
struct request_sock * req ,
2005-04-16 15:20:36 -07:00
struct dst_entry * dst )
{
2005-12-13 23:15:52 -08:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-16 15:20:36 -07:00
struct sock * child ;
2005-12-13 23:15:52 -08:00
child = icsk - > icsk_af_ops - > syn_recv_sock ( sk , skb , req , dst ) ;
2005-04-16 15:20:36 -07:00
if ( child )
2005-08-09 20:10:42 -07:00
inet_csk_reqsk_queue_add ( sk , req , child ) ;
2005-04-16 15:20:36 -07:00
else
2005-06-18 22:47:21 -07:00
reqsk_free ( req ) ;
2005-04-16 15:20:36 -07:00
return child ;
}
2008-04-10 03:12:40 -07:00
/*
* when syncookies are in effect and tcp timestamps are enabled we stored
* additional tcp options in the timestamp .
* This extracts these options from the timestamp echo .
*
* The lowest 4 bits are for snd_wscale
* The next 4 lsb are for rcv_wscale
* The next lsb is for sack_ok
*/
void cookie_check_timestamp ( struct tcp_options_received * tcp_opt )
{
/* echoed timestamp, 9 lowest bits contain options */
u32 options = tcp_opt - > rcv_tsecr & TSMASK ;
tcp_opt - > snd_wscale = options & 0xf ;
options > > = 4 ;
tcp_opt - > rcv_wscale = options & 0xf ;
tcp_opt - > sack_ok = ( options > > 4 ) & 0x1 ;
if ( tcp_opt - > sack_ok )
tcp_sack_reset ( tcp_opt ) ;
if ( tcp_opt - > snd_wscale | | tcp_opt - > rcv_wscale )
tcp_opt - > wscale_ok = 1 ;
}
EXPORT_SYMBOL ( cookie_check_timestamp ) ;
2005-04-16 15:20:36 -07:00
struct sock * cookie_v4_check ( struct sock * sk , struct sk_buff * skb ,
struct ip_options * opt )
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
struct inet_request_sock * ireq ;
struct tcp_request_sock * treq ;
2005-04-16 15:20:36 -07:00
struct tcp_sock * tp = tcp_sk ( sk ) ;
2007-04-10 21:04:22 -07:00
const struct tcphdr * th = tcp_hdr ( skb ) ;
__u32 cookie = ntohl ( th - > ack_seq ) - 1 ;
2005-04-16 15:20:36 -07:00
struct sock * ret = sk ;
2007-02-09 23:24:47 +09:00
struct request_sock * req ;
int mss ;
struct rtable * rt ;
2005-04-16 15:20:36 -07:00
__u8 rcv_wscale ;
2008-04-10 03:12:40 -07:00
struct tcp_options_received tcp_opt ;
2005-04-16 15:20:36 -07:00
2007-04-10 21:04:22 -07:00
if ( ! sysctl_tcp_syncookies | | ! th - > ack )
2005-04-16 15:20:36 -07:00
goto out ;
2007-02-09 23:24:47 +09:00
if ( time_after ( jiffies , tp - > last_synq_overflow + TCP_TIMEOUT_INIT ) | |
2005-04-16 15:20:36 -07:00
( mss = cookie_check ( skb , cookie ) ) = = 0 ) {
2007-02-09 23:24:47 +09:00
NET_INC_STATS_BH ( LINUX_MIB_SYNCOOKIESFAILED ) ;
2005-04-16 15:20:36 -07:00
goto out ;
}
NET_INC_STATS_BH ( LINUX_MIB_SYNCOOKIESRECV ) ;
2008-04-10 03:12:40 -07:00
/* check for timestamp cookie support */
memset ( & tcp_opt , 0 , sizeof ( tcp_opt ) ) ;
tcp_parse_options ( skb , & tcp_opt , 0 ) ;
if ( tcp_opt . saw_tstamp )
cookie_check_timestamp ( & tcp_opt ) ;
2005-04-16 15:20:36 -07:00
ret = NULL ;
2008-06-10 12:39:35 -07:00
req = inet_reqsk_alloc ( & tcp_request_sock_ops ) ; /* for safety */
2005-04-16 15:20:36 -07:00
if ( ! req )
goto out ;
2006-07-24 23:32:50 -07:00
if ( security_inet_conn_request ( sk , skb , req ) ) {
reqsk_free ( req ) ;
goto out ;
}
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
ireq = inet_rsk ( req ) ;
treq = tcp_rsk ( req ) ;
2007-04-10 21:04:22 -07:00
treq - > rcv_isn = ntohl ( th - > seq ) - 1 ;
2007-02-09 23:24:47 +09:00
treq - > snt_isn = cookie ;
2005-04-16 15:20:36 -07:00
req - > mss = mss ;
2007-04-10 21:04:22 -07:00
ireq - > rmt_port = th - > source ;
2007-04-20 22:47:35 -07:00
ireq - > loc_addr = ip_hdr ( skb ) - > daddr ;
ireq - > rmt_addr = ip_hdr ( skb ) - > saddr ;
2008-04-10 03:12:40 -07:00
ireq - > snd_wscale = tcp_opt . snd_wscale ;
ireq - > rcv_wscale = tcp_opt . rcv_wscale ;
ireq - > sack_ok = tcp_opt . sack_ok ;
ireq - > wscale_ok = tcp_opt . wscale_ok ;
ireq - > tstamp_ok = tcp_opt . saw_tstamp ;
req - > ts_recent = tcp_opt . saw_tstamp ? tcp_opt . rcv_tsval : 0 ;
2005-04-16 15:20:36 -07:00
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again ( see RFC1122 4.2 .3 .8 )
*/
if ( opt & & opt - > optlen ) {
int opt_size = sizeof ( struct ip_options ) + opt - > optlen ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
ireq - > opt = kmalloc ( opt_size , GFP_ATOMIC ) ;
if ( ireq - > opt ! = NULL & & ip_options_echo ( ireq - > opt , skb ) ) {
kfree ( ireq - > opt ) ;
ireq - > opt = NULL ;
2005-04-16 15:20:36 -07:00
}
}
2007-02-09 23:24:47 +09:00
req - > expires = 0UL ;
req - > retrans = 0 ;
2005-04-16 15:20:36 -07:00
/*
* We need to lookup the route here to get at the correct
* window size . We should better make sure that the window size
* hasn ' t changed since we received the original syn , but I see
2007-02-09 23:24:47 +09:00
* no easy way to do this .
2005-04-16 15:20:36 -07:00
*/
{
struct flowi fl = { . nl_u = { . ip4_u =
{ . daddr = ( ( opt & & opt - > srr ) ?
opt - > faddr :
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-18 22:46:52 -07:00
ireq - > rmt_addr ) ,
. saddr = ireq - > loc_addr ,
2005-04-16 15:20:36 -07:00
. tos = RT_CONN_FLAGS ( sk ) } } ,
. proto = IPPROTO_TCP ,
. uli_u = { . ports =
2007-04-10 21:04:22 -07:00
{ . sport = th - > dest ,
. dport = th - > source } } } ;
2006-07-24 23:32:50 -07:00
security_req_classify_flow ( req , & fl ) ;
2008-01-22 22:07:34 -08:00
if ( ip_route_output_key ( & init_net , & rt , & fl ) ) {
2005-06-18 22:47:21 -07:00
reqsk_free ( req ) ;
2007-02-09 23:24:47 +09:00
goto out ;
2005-04-16 15:20:36 -07:00
}
}
/* Try to redo what tcp_v4_send_synack did. */
2008-04-10 03:12:40 -07:00
req - > window_clamp = tp - > window_clamp ? : dst_metric ( & rt - > u . dst , RTAX_WINDOW ) ;
2005-04-16 15:20:36 -07:00
tcp_select_initial_window ( tcp_full_space ( sk ) , req - > mss ,
2007-02-09 23:24:47 +09:00
& req - > rcv_wnd , & req - > window_clamp ,
2008-04-10 03:12:40 -07:00
ireq - > wscale_ok , & rcv_wscale ) ;
2007-02-09 23:24:47 +09:00
ireq - > rcv_wscale = rcv_wscale ;
2005-04-16 15:20:36 -07:00
ret = get_cookie_sock ( sk , skb , req , & rt - > u . dst ) ;
out : return ret ;
}