2005-08-10 07:14:34 +04:00
/*
* net / dccp / minisocks . c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo < acme @ conectiva . com . br >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/dccp.h>
2006-11-10 17:32:01 +03:00
# include <linux/kernel.h>
2005-08-10 07:14:34 +04:00
# include <linux/skbuff.h>
# include <linux/timer.h>
# include <net/sock.h>
# include <net/xfrm.h>
# include <net/inet_timewait_sock.h>
2005-09-18 11:17:51 +04:00
# include "ackvec.h"
2005-08-10 07:14:34 +04:00
# include "ccid.h"
# include "dccp.h"
[DCCP]: Initial feature negotiation implementation
Still needs more work, but boots and doesn't crashes, even
does some negotiation!
18:38:52.174934 127.0.0.1.43458 > 127.0.0.1.5001: request <change_l ack_ratio 2, change_r ccid 2, change_l ccid 2>
18:38:52.218526 127.0.0.1.5001 > 127.0.0.1.43458: response <nop, nop, change_l ack_ratio 2, confirm_r ccid 2 2, confirm_l ccid 2 2, confirm_r ack_ratio 2>
18:38:52.185398 127.0.0.1.43458 > 127.0.0.1.5001: <nop, confirm_r ack_ratio 2, ack_vector0 0x00, elapsed_time 212>
:-)
Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-21 04:43:56 +03:00
# include "feat.h"
2005-08-10 07:14:34 +04:00
2005-08-10 07:45:21 +04:00
struct inet_timewait_death_row dccp_death_row = {
. sysctl_max_tw_buckets = NR_FILE * 2 ,
. period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS ,
2007-04-26 12:37:44 +04:00
. death_lock = __SPIN_LOCK_UNLOCKED ( dccp_death_row . death_lock ) ,
2005-08-10 07:45:21 +04:00
. hashinfo = & dccp_hashinfo ,
. tw_timer = TIMER_INITIALIZER ( inet_twdr_hangman , 0 ,
( unsigned long ) & dccp_death_row ) ,
. twkill_work = __WORK_INITIALIZER ( dccp_death_row . twkill_work ,
2006-11-22 17:57:56 +03:00
inet_twdr_twkill_work ) ,
2005-08-10 07:45:21 +04:00
/* Short-time timewait calendar */
. twcal_hand = - 1 ,
. twcal_timer = TIMER_INITIALIZER ( inet_twdr_twcal_tick , 0 ,
( unsigned long ) & dccp_death_row ) ,
} ;
2005-12-14 10:24:16 +03:00
EXPORT_SYMBOL_GPL ( dccp_death_row ) ;
2007-10-05 01:43:42 +04:00
void dccp_minisock_init ( struct dccp_minisock * dmsk )
{
dmsk - > dccpms_sequence_window = sysctl_dccp_feat_sequence_window ;
}
2005-08-10 07:14:34 +04:00
void dccp_time_wait ( struct sock * sk , int state , int timeo )
{
2005-08-10 07:45:21 +04:00
struct inet_timewait_sock * tw = NULL ;
2005-08-10 07:14:34 +04:00
2005-08-10 07:45:21 +04:00
if ( dccp_death_row . tw_count < dccp_death_row . sysctl_max_tw_buckets )
tw = inet_twsk_alloc ( sk , state ) ;
if ( tw ! = NULL ) {
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
const int rto = ( icsk - > icsk_rto < < 2 ) - ( icsk - > icsk_rto > > 1 ) ;
2005-12-14 10:24:53 +03:00
# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if ( tw - > tw_family = = PF_INET6 ) {
const struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct inet6_timewait_sock * tw6 ;
2005-08-10 07:45:21 +04:00
2005-12-14 10:24:53 +03:00
tw - > tw_ipv6_offset = inet6_tw_offset ( sk - > sk_prot ) ;
tw6 = inet6_twsk ( ( struct sock * ) tw ) ;
ipv6_addr_copy ( & tw6 - > tw_v6_daddr , & np - > daddr ) ;
ipv6_addr_copy ( & tw6 - > tw_v6_rcv_saddr , & np - > rcv_saddr ) ;
tw - > tw_ipv6only = np - > ipv6only ;
}
# endif
2005-08-10 07:45:21 +04:00
/* Linkage updates. */
__inet_twsk_hashdance ( tw , sk , & dccp_hashinfo ) ;
/* Get the TIME_WAIT timeout firing. */
if ( timeo < rto )
timeo = rto ;
tw - > tw_timeout = DCCP_TIMEWAIT_LEN ;
if ( state = = DCCP_TIME_WAIT )
timeo = DCCP_TIMEWAIT_LEN ;
inet_twsk_schedule ( tw , & dccp_death_row , timeo ,
DCCP_TIMEWAIT_LEN ) ;
inet_twsk_put ( tw ) ;
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up . We ' ve got bigger problems than
* non - graceful socket closings .
*/
2006-11-20 23:39:23 +03:00
DCCP_WARN ( " time wait bucket table overflow \n " ) ;
2005-08-10 07:45:21 +04:00
}
dccp_done ( sk ) ;
2005-08-10 07:14:34 +04:00
}
struct sock * dccp_create_openreq_child ( struct sock * sk ,
const struct request_sock * req ,
const struct sk_buff * skb )
{
/*
* Step 3 : Process LISTEN state
*
2006-11-10 21:29:14 +03:00
* ( * Generate a new socket and switch to that socket * )
* Set S : = new socket for this port pair
2005-08-10 07:14:34 +04:00
*/
struct sock * newsk = inet_csk_clone ( sk , req , GFP_ATOMIC ) ;
if ( newsk ! = NULL ) {
2008-12-08 12:15:55 +03:00
struct dccp_request_sock * dreq = dccp_rsk ( req ) ;
2007-03-07 01:24:44 +03:00
struct inet_connection_sock * newicsk = inet_csk ( newsk ) ;
2005-08-10 07:14:34 +04:00
struct dccp_sock * newdp = dccp_sk ( newsk ) ;
2006-03-21 09:50:58 +03:00
struct dccp_minisock * newdmsk = dccp_msk ( newsk ) ;
2005-08-10 07:14:34 +04:00
[DCCP]: Handle timestamps on Request/Response exchange separately
In DCCP, timestamps can occur on packets anytime, CCID3 uses a timestamp(/echo) on the Request/Response
exchange. This patch addresses the following situation:
* timestamps are recorded on the listening socket;
* Responses are sent from dccp_request_sockets;
* suppose two connections reach the listening socket with very small time in between:
* the first timestamp value gets overwritten by the second connection request.
This is not really good, so this patch separates timestamps into
* those which are received by the server during the initial handshake (on dccp_request_sock);
* those which are received by the client or the client after connection establishment.
As before, a timestamp of 0 is regarded as indicating that no (meaningful) timestamp has been
received (in addition, a warning message is printed if hosts send 0-valued timestamps).
The timestamp-echoing now works as follows:
* when a timestamp is present on the initial Request, it is placed into dreq, due to the
call to dccp_parse_options in dccp_v{4,6}_conn_request;
* when a timestamp is present on the Ack leading from RESPOND => OPEN, it is copied over
from the request_sock into the child cocket in dccp_create_openreq_child;
* timestamps received on an (established) dccp_sock are treated as before.
Since Elapsed Time is measured in hundredths of milliseconds (13.2), the new dccp_timestamp()
function is used, as it is expected that the time between receiving the timestamp and
sending the timestamp echo will be very small against the wrap-around time. As a byproduct,
this allows smaller timestamping-time fields.
Furthermore, inserting the Timestamp Echo option has been taken out of the block starting with
'!dccp_packet_without_ack()', since Timestamp Echo can be carried on any packet (5.8 and 13.3).
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-12-13 17:37:19 +03:00
newdp - > dccps_role = DCCP_ROLE_SERVER ;
newdp - > dccps_hc_rx_ackvec = NULL ;
newdp - > dccps_service_list = NULL ;
newdp - > dccps_service = dreq - > dreq_service ;
newdp - > dccps_timestamp_echo = dreq - > dreq_timestamp_echo ;
newdp - > dccps_timestamp_time = dreq - > dreq_timestamp_time ;
newicsk - > icsk_rto = DCCP_TIMEOUT_INIT ;
2005-08-10 07:14:34 +04:00
2008-11-05 10:55:49 +03:00
INIT_LIST_HEAD ( & newdp - > dccps_featneg ) ;
2005-08-10 07:14:34 +04:00
/*
* Step 3 : Process LISTEN state
*
2006-11-10 21:29:14 +03:00
* Choose S . ISS ( initial seqno ) or set from Init Cookies
* Initialize S . GAR : = S . ISS
* Set S . ISR , S . GSR , S . SWL , S . SWH from packet or Init Cookies
2005-08-10 07:14:34 +04:00
*/
/* See dccp_v4_conn_request */
2006-03-21 09:50:58 +03:00
newdmsk - > dccpms_sequence_window = req - > rcv_wnd ;
2005-08-10 07:14:34 +04:00
2008-06-11 14:19:10 +04:00
newdp - > dccps_gar = newdp - > dccps_iss = dreq - > dreq_iss ;
2005-08-10 07:14:34 +04:00
dccp_update_gss ( newsk , dreq - > dreq_iss ) ;
2008-06-11 14:19:10 +04:00
newdp - > dccps_isr = dreq - > dreq_isr ;
dccp_update_gsr ( newsk , dreq - > dreq_isr ) ;
2005-08-21 12:36:45 +04:00
/*
* SWL and AWL are initially adjusted so that they are not less than
* the initial Sequence Numbers received and sent , respectively :
* SWL : = max ( GSR + 1 - floor ( W / 4 ) , ISR ) ,
* AWL : = max ( GSS - W ' + 1 , ISS ) .
* These adjustments MUST be applied only at the beginning of the
* connection .
*/
dccp_set_seqno ( & newdp - > dccps_swl ,
max48 ( newdp - > dccps_swl , newdp - > dccps_isr ) ) ;
dccp_set_seqno ( & newdp - > dccps_awl ,
max48 ( newdp - > dccps_awl , newdp - > dccps_iss ) ) ;
2008-12-08 12:15:55 +03:00
/*
* Activate features after initialising the sequence numbers ,
* since CCID initialisation may depend on GSS , ISR , ISS etc .
*/
if ( dccp_feat_activate_values ( newsk , & dreq - > dreq_featneg ) ) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free ( ) */
newsk - > sk_destruct = NULL ;
sk_free ( newsk ) ;
return NULL ;
}
2005-08-10 07:14:34 +04:00
dccp_init_xmit_timers ( newsk ) ;
DCCP_INC_STATS_BH ( DCCP_MIB_PASSIVEOPENS ) ;
}
return newsk ;
}
2005-12-14 10:24:16 +03:00
EXPORT_SYMBOL_GPL ( dccp_create_openreq_child ) ;
2006-12-10 21:01:18 +03:00
/*
2005-08-10 07:14:34 +04:00
* Process an incoming packet for RESPOND sockets represented
* as an request_sock .
*/
struct sock * dccp_check_req ( struct sock * sk , struct sk_buff * skb ,
struct request_sock * req ,
struct request_sock * * prev )
{
struct sock * child = NULL ;
2007-12-13 17:31:26 +03:00
struct dccp_request_sock * dreq = dccp_rsk ( req ) ;
2005-08-10 07:14:34 +04:00
/* Check for retransmitted REQUEST */
if ( dccp_hdr ( skb ) - > dccph_type = = DCCP_PKT_REQUEST ) {
2006-11-13 18:12:07 +03:00
if ( after48 ( DCCP_SKB_CB ( skb ) - > dccpd_seq , dreq - > dreq_isr ) ) {
2005-08-10 07:14:34 +04:00
dccp_pr_debug ( " Retransmitted REQUEST \n " ) ;
2006-11-13 18:12:07 +03:00
dreq - > dreq_isr = DCCP_SKB_CB ( skb ) - > dccpd_seq ;
/*
* Send another RESPONSE packet
* To protect against Request floods , increment retrans
* counter ( backoff , monitored by dccp_response_timer ) .
*/
req - > retrans + + ;
2008-02-29 22:43:03 +03:00
req - > rsk_ops - > rtx_syn_ack ( sk , req ) ;
2005-08-10 07:14:34 +04:00
}
/* Network Duplicate, discard packet */
return NULL ;
}
DCCP_SKB_CB ( skb ) - > dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR ;
if ( dccp_hdr ( skb ) - > dccph_type ! = DCCP_PKT_ACK & &
dccp_hdr ( skb ) - > dccph_type ! = DCCP_PKT_DATAACK )
goto drop ;
/* Invalid ACK */
2007-12-13 17:31:26 +03:00
if ( DCCP_SKB_CB ( skb ) - > dccpd_ack_seq ! = dreq - > dreq_iss ) {
2005-08-14 03:34:54 +04:00
dccp_pr_debug ( " Invalid ACK number: ack_seq=%llu, "
" dreq_iss=%llu \n " ,
2005-08-10 07:27:14 +04:00
( unsigned long long )
DCCP_SKB_CB ( skb ) - > dccpd_ack_seq ,
2007-12-13 17:31:26 +03:00
( unsigned long long ) dreq - > dreq_iss ) ;
2005-08-10 07:14:34 +04:00
goto drop ;
}
2007-12-13 17:31:26 +03:00
if ( dccp_parse_options ( sk , dreq , skb ) )
goto drop ;
2005-12-14 10:16:16 +03:00
child = inet_csk ( sk ) - > icsk_af_ops - > syn_recv_sock ( sk , skb , req , NULL ) ;
2005-08-10 07:14:34 +04:00
if ( child = = NULL )
goto listen_overflow ;
inet_csk_reqsk_queue_unlink ( sk , req , prev ) ;
inet_csk_reqsk_queue_removed ( sk , req ) ;
inet_csk_reqsk_queue_add ( sk , req , child ) ;
out :
return child ;
listen_overflow :
dccp_pr_debug ( " listen_overflow! \n " ) ;
DCCP_SKB_CB ( skb ) - > dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY ;
drop :
if ( dccp_hdr ( skb ) - > dccph_type ! = DCCP_PKT_RESET )
2006-11-15 06:07:45 +03:00
req - > rsk_ops - > send_reset ( sk , skb ) ;
2005-08-10 07:14:34 +04:00
inet_csk_reqsk_queue_drop ( sk , req , prev ) ;
goto out ;
}
2005-12-14 10:24:16 +03:00
EXPORT_SYMBOL_GPL ( dccp_check_req ) ;
2005-08-10 07:14:34 +04:00
/*
* Queue segment on the new socket if the new socket is active ,
* otherwise we just shortcircuit this and continue with
* the new socket .
*/
int dccp_child_process ( struct sock * parent , struct sock * child ,
struct sk_buff * skb )
{
int ret = 0 ;
const int state = child - > sk_state ;
if ( ! sock_owned_by_user ( child ) ) {
2005-08-14 03:34:54 +04:00
ret = dccp_rcv_state_process ( child , skb , dccp_hdr ( skb ) ,
skb - > len ) ;
2005-08-10 07:14:34 +04:00
/* Wakeup parent, send SIGIO */
if ( state = = DCCP_RESPOND & & child - > sk_state ! = state )
parent - > sk_data_ready ( parent , 0 ) ;
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
* socket does not protect us more .
*/
sk_add_backlog ( child , skb ) ;
}
bh_unlock_sock ( child ) ;
sock_put ( child ) ;
return ret ;
}
2005-12-14 10:24:16 +03:00
EXPORT_SYMBOL_GPL ( dccp_child_process ) ;
2006-11-10 17:32:01 +03:00
2008-08-07 10:50:04 +04:00
void dccp_reqsk_send_ack ( struct sock * sk , struct sk_buff * skb ,
struct request_sock * rsk )
2006-11-10 17:32:01 +03:00
{
2006-11-20 23:39:23 +03:00
DCCP_BUG ( " DCCP-ACK packets are never sent in LISTEN/RESPOND state " ) ;
2006-11-10 17:32:01 +03:00
}
EXPORT_SYMBOL_GPL ( dccp_reqsk_send_ack ) ;
2006-11-10 21:08:37 +03:00
2008-11-05 10:55:49 +03:00
int dccp_reqsk_init ( struct request_sock * req ,
struct dccp_sock const * dp , struct sk_buff const * skb )
2006-11-10 21:08:37 +03:00
{
[DCCP]: Handle timestamps on Request/Response exchange separately
In DCCP, timestamps can occur on packets anytime, CCID3 uses a timestamp(/echo) on the Request/Response
exchange. This patch addresses the following situation:
* timestamps are recorded on the listening socket;
* Responses are sent from dccp_request_sockets;
* suppose two connections reach the listening socket with very small time in between:
* the first timestamp value gets overwritten by the second connection request.
This is not really good, so this patch separates timestamps into
* those which are received by the server during the initial handshake (on dccp_request_sock);
* those which are received by the client or the client after connection establishment.
As before, a timestamp of 0 is regarded as indicating that no (meaningful) timestamp has been
received (in addition, a warning message is printed if hosts send 0-valued timestamps).
The timestamp-echoing now works as follows:
* when a timestamp is present on the initial Request, it is placed into dreq, due to the
call to dccp_parse_options in dccp_v{4,6}_conn_request;
* when a timestamp is present on the Ack leading from RESPOND => OPEN, it is copied over
from the request_sock into the child cocket in dccp_create_openreq_child;
* timestamps received on an (established) dccp_sock are treated as before.
Since Elapsed Time is measured in hundredths of milliseconds (13.2), the new dccp_timestamp()
function is used, as it is expected that the time between receiving the timestamp and
sending the timestamp echo will be very small against the wrap-around time. As a byproduct,
this allows smaller timestamping-time fields.
Furthermore, inserting the Timestamp Echo option has been taken out of the block starting with
'!dccp_packet_without_ack()', since Timestamp Echo can be carried on any packet (5.8 and 13.3).
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-12-13 17:37:19 +03:00
struct dccp_request_sock * dreq = dccp_rsk ( req ) ;
inet_rsk ( req ) - > rmt_port = dccp_hdr ( skb ) - > dccph_sport ;
2008-10-20 10:36:47 +04:00
inet_rsk ( req ) - > loc_port = dccp_hdr ( skb ) - > dccph_dport ;
[DCCP]: Handle timestamps on Request/Response exchange separately
In DCCP, timestamps can occur on packets anytime, CCID3 uses a timestamp(/echo) on the Request/Response
exchange. This patch addresses the following situation:
* timestamps are recorded on the listening socket;
* Responses are sent from dccp_request_sockets;
* suppose two connections reach the listening socket with very small time in between:
* the first timestamp value gets overwritten by the second connection request.
This is not really good, so this patch separates timestamps into
* those which are received by the server during the initial handshake (on dccp_request_sock);
* those which are received by the client or the client after connection establishment.
As before, a timestamp of 0 is regarded as indicating that no (meaningful) timestamp has been
received (in addition, a warning message is printed if hosts send 0-valued timestamps).
The timestamp-echoing now works as follows:
* when a timestamp is present on the initial Request, it is placed into dreq, due to the
call to dccp_parse_options in dccp_v{4,6}_conn_request;
* when a timestamp is present on the Ack leading from RESPOND => OPEN, it is copied over
from the request_sock into the child cocket in dccp_create_openreq_child;
* timestamps received on an (established) dccp_sock are treated as before.
Since Elapsed Time is measured in hundredths of milliseconds (13.2), the new dccp_timestamp()
function is used, as it is expected that the time between receiving the timestamp and
sending the timestamp echo will be very small against the wrap-around time. As a byproduct,
this allows smaller timestamping-time fields.
Furthermore, inserting the Timestamp Echo option has been taken out of the block starting with
'!dccp_packet_without_ack()', since Timestamp Echo can be carried on any packet (5.8 and 13.3).
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-12-13 17:37:19 +03:00
inet_rsk ( req ) - > acked = 0 ;
req - > rcv_wnd = sysctl_dccp_feat_sequence_window ;
dreq - > dreq_timestamp_echo = 0 ;
2008-11-05 10:55:49 +03:00
/* inherit feature negotiation options from listening socket */
return dccp_feat_clone_list ( & dp - > dccps_featneg , & dreq - > dreq_featneg ) ;
2006-11-10 21:08:37 +03:00
}
EXPORT_SYMBOL_GPL ( dccp_reqsk_init ) ;