2020-01-22 03:56:16 +03:00
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP
*
* Copyright ( c ) 2017 - 2019 , Intel Corporation .
*/
# include <linux/kernel.h>
# include <net/tcp.h>
# include <net/mptcp.h>
# include "protocol.h"
void mptcp_parse_option ( const unsigned char * ptr , int opsize ,
struct tcp_options_received * opt_rx )
{
struct mptcp_options_received * mp_opt = & opt_rx - > mptcp ;
u8 subtype = * ptr > > 4 ;
2020-01-22 03:56:24 +03:00
int expected_opsize ;
2020-01-22 03:56:16 +03:00
u8 version ;
u8 flags ;
switch ( subtype ) {
case MPTCPOPT_MP_CAPABLE :
if ( opsize ! = TCPOLEN_MPTCP_MPC_SYN & &
opsize ! = TCPOLEN_MPTCP_MPC_ACK )
break ;
version = * ptr + + & MPTCP_VERSION_MASK ;
if ( version ! = MPTCP_SUPPORTED_VERSION )
break ;
flags = * ptr + + ;
if ( ! ( ( flags & MPTCP_CAP_FLAG_MASK ) = = MPTCP_CAP_HMAC_SHA1 ) | |
( flags & MPTCP_CAP_EXTENSIBILITY ) )
break ;
/* RFC 6824, Section 3.1:
* " For the Checksum Required bit (labeled " A " ), if either
* host requires the use of checksums , checksums MUST be used .
* In other words , the only way for checksums not to be used
* is if both hosts in their SYNs set A = 0. "
*
* Section 3.3 .0 :
* " If a checksum is not present when its use has been
* negotiated , the receiver MUST close the subflow with a RST as
* it is considered broken . "
*
* We don ' t implement DSS checksum - fall back to TCP .
*/
if ( flags & MPTCP_CAP_CHECKSUM_REQD )
break ;
mp_opt - > mp_capable = 1 ;
mp_opt - > sndr_key = get_unaligned_be64 ( ptr ) ;
ptr + = 8 ;
if ( opsize = = TCPOLEN_MPTCP_MPC_ACK ) {
mp_opt - > rcvr_key = get_unaligned_be64 ( ptr ) ;
ptr + = 8 ;
pr_debug ( " MP_CAPABLE sndr=%llu, rcvr=%llu " ,
mp_opt - > sndr_key , mp_opt - > rcvr_key ) ;
} else {
pr_debug ( " MP_CAPABLE sndr=%llu " , mp_opt - > sndr_key ) ;
}
break ;
case MPTCPOPT_DSS :
pr_debug ( " DSS " ) ;
2020-01-22 03:56:24 +03:00
ptr + + ;
flags = ( * ptr + + ) & MPTCP_DSS_FLAG_MASK ;
mp_opt - > data_fin = ( flags & MPTCP_DSS_DATA_FIN ) ! = 0 ;
mp_opt - > dsn64 = ( flags & MPTCP_DSS_DSN64 ) ! = 0 ;
mp_opt - > use_map = ( flags & MPTCP_DSS_HAS_MAP ) ! = 0 ;
mp_opt - > ack64 = ( flags & MPTCP_DSS_ACK64 ) ! = 0 ;
mp_opt - > use_ack = ( flags & MPTCP_DSS_HAS_ACK ) ;
pr_debug ( " data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d " ,
mp_opt - > data_fin , mp_opt - > dsn64 ,
mp_opt - > use_map , mp_opt - > ack64 ,
mp_opt - > use_ack ) ;
expected_opsize = TCPOLEN_MPTCP_DSS_BASE ;
if ( mp_opt - > use_ack ) {
if ( mp_opt - > ack64 )
expected_opsize + = TCPOLEN_MPTCP_DSS_ACK64 ;
else
expected_opsize + = TCPOLEN_MPTCP_DSS_ACK32 ;
}
if ( mp_opt - > use_map ) {
if ( mp_opt - > dsn64 )
expected_opsize + = TCPOLEN_MPTCP_DSS_MAP64 ;
else
expected_opsize + = TCPOLEN_MPTCP_DSS_MAP32 ;
}
/* RFC 6824, Section 3.3:
* If a checksum is present , but its use had
* not been negotiated in the MP_CAPABLE handshake ,
* the checksum field MUST be ignored .
*/
if ( opsize ! = expected_opsize & &
opsize ! = expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM )
break ;
2020-01-22 03:56:16 +03:00
mp_opt - > dss = 1 ;
2020-01-22 03:56:24 +03:00
if ( mp_opt - > use_ack ) {
if ( mp_opt - > ack64 ) {
mp_opt - > data_ack = get_unaligned_be64 ( ptr ) ;
ptr + = 8 ;
} else {
mp_opt - > data_ack = get_unaligned_be32 ( ptr ) ;
ptr + = 4 ;
}
pr_debug ( " data_ack=%llu " , mp_opt - > data_ack ) ;
}
if ( mp_opt - > use_map ) {
if ( mp_opt - > dsn64 ) {
mp_opt - > data_seq = get_unaligned_be64 ( ptr ) ;
ptr + = 8 ;
} else {
mp_opt - > data_seq = get_unaligned_be32 ( ptr ) ;
ptr + = 4 ;
}
mp_opt - > subflow_seq = get_unaligned_be32 ( ptr ) ;
ptr + = 4 ;
mp_opt - > data_len = get_unaligned_be16 ( ptr ) ;
ptr + = 2 ;
pr_debug ( " data_seq=%llu subflow_seq=%u data_len=%u " ,
mp_opt - > data_seq , mp_opt - > subflow_seq ,
mp_opt - > data_len ) ;
}
2020-01-22 03:56:16 +03:00
break ;
default :
break ;
}
}
2020-01-22 03:56:18 +03:00
void mptcp_get_options ( const struct sk_buff * skb ,
struct tcp_options_received * opt_rx )
{
const unsigned char * ptr ;
const struct tcphdr * th = tcp_hdr ( skb ) ;
int length = ( th - > doff * 4 ) - sizeof ( struct tcphdr ) ;
ptr = ( const unsigned char * ) ( th + 1 ) ;
while ( length > 0 ) {
int opcode = * ptr + + ;
int opsize ;
switch ( opcode ) {
case TCPOPT_EOL :
return ;
case TCPOPT_NOP : /* Ref: RFC 793 section 3.1 */
length - - ;
continue ;
default :
opsize = * ptr + + ;
if ( opsize < 2 ) /* "silly options" */
return ;
if ( opsize > length )
return ; /* don't parse partial options */
if ( opcode = = TCPOPT_MPTCP )
mptcp_parse_option ( ptr , opsize , opt_rx ) ;
ptr + = opsize - 2 ;
length - = opsize ;
}
}
}
bool mptcp_syn_options ( struct sock * sk , unsigned int * size ,
struct mptcp_out_options * opts )
{
struct mptcp_subflow_context * subflow = mptcp_subflow_ctx ( sk ) ;
if ( subflow - > request_mptcp ) {
pr_debug ( " local_key=%llu " , subflow - > local_key ) ;
opts - > suboptions = OPTION_MPTCP_MPC_SYN ;
opts - > sndr_key = subflow - > local_key ;
* size = TCPOLEN_MPTCP_MPC_SYN ;
return true ;
}
return false ;
}
void mptcp_rcv_synsent ( struct sock * sk )
{
struct mptcp_subflow_context * subflow = mptcp_subflow_ctx ( sk ) ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
pr_debug ( " subflow=%p " , subflow ) ;
if ( subflow - > request_mptcp & & tp - > rx_opt . mptcp . mp_capable ) {
subflow - > mp_capable = 1 ;
subflow - > remote_key = tp - > rx_opt . mptcp . sndr_key ;
} else {
tcp_sk ( sk ) - > is_mptcp = 0 ;
}
}
2020-01-22 03:56:23 +03:00
static bool mptcp_established_options_mp ( struct sock * sk , unsigned int * size ,
unsigned int remaining ,
struct mptcp_out_options * opts )
2020-01-22 03:56:18 +03:00
{
struct mptcp_subflow_context * subflow = mptcp_subflow_ctx ( sk ) ;
2020-01-22 03:56:23 +03:00
if ( ! subflow - > fourth_ack ) {
2020-01-22 03:56:18 +03:00
opts - > suboptions = OPTION_MPTCP_MPC_ACK ;
opts - > sndr_key = subflow - > local_key ;
opts - > rcvr_key = subflow - > remote_key ;
* size = TCPOLEN_MPTCP_MPC_ACK ;
subflow - > fourth_ack = 1 ;
pr_debug ( " subflow=%p, local_key=%llu, remote_key=%llu " ,
subflow , subflow - > local_key , subflow - > remote_key ) ;
return true ;
}
return false ;
}
2020-01-22 03:56:23 +03:00
static void mptcp_write_data_fin ( struct mptcp_subflow_context * subflow ,
struct mptcp_ext * ext )
{
ext - > data_fin = 1 ;
if ( ! ext - > use_map ) {
/* RFC6824 requires a DSS mapping with specific values
* if DATA_FIN is set but no data payload is mapped
*/
ext - > use_map = 1 ;
ext - > dsn64 = 1 ;
ext - > data_seq = mptcp_sk ( subflow - > conn ) - > write_seq ;
ext - > subflow_seq = 0 ;
ext - > data_len = 1 ;
} else {
/* If there's an existing DSS mapping, DATA_FIN consumes
* 1 additional byte of mapping space .
*/
ext - > data_len + + ;
}
}
static bool mptcp_established_options_dss ( struct sock * sk , struct sk_buff * skb ,
unsigned int * size ,
unsigned int remaining ,
struct mptcp_out_options * opts )
{
struct mptcp_subflow_context * subflow = mptcp_subflow_ctx ( sk ) ;
unsigned int dss_size = 0 ;
struct mptcp_ext * mpext ;
struct mptcp_sock * msk ;
unsigned int ack_size ;
u8 tcp_fin ;
if ( skb ) {
mpext = mptcp_get_ext ( skb ) ;
tcp_fin = TCP_SKB_CB ( skb ) - > tcp_flags & TCPHDR_FIN ;
} else {
mpext = NULL ;
tcp_fin = 0 ;
}
if ( ! skb | | ( mpext & & mpext - > use_map ) | | tcp_fin ) {
unsigned int map_size ;
map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64 ;
remaining - = map_size ;
dss_size = map_size ;
if ( mpext )
opts - > ext_copy = * mpext ;
if ( skb & & tcp_fin & &
subflow - > conn - > sk_state ! = TCP_ESTABLISHED )
mptcp_write_data_fin ( subflow , & opts - > ext_copy ) ;
}
ack_size = TCPOLEN_MPTCP_DSS_ACK64 ;
/* Add kind/length/subtype/flag overhead if mapping is not populated */
if ( dss_size = = 0 )
ack_size + = TCPOLEN_MPTCP_DSS_BASE ;
dss_size + = ack_size ;
msk = mptcp_sk ( mptcp_subflow_ctx ( sk ) - > conn ) ;
if ( msk ) {
opts - > ext_copy . data_ack = msk - > ack_seq ;
} else {
mptcp_crypto_key_sha ( mptcp_subflow_ctx ( sk ) - > remote_key ,
NULL , & opts - > ext_copy . data_ack ) ;
opts - > ext_copy . data_ack + + ;
}
opts - > ext_copy . ack64 = 1 ;
opts - > ext_copy . use_ack = 1 ;
* size = ALIGN ( dss_size , 4 ) ;
return true ;
}
bool mptcp_established_options ( struct sock * sk , struct sk_buff * skb ,
unsigned int * size , unsigned int remaining ,
struct mptcp_out_options * opts )
{
unsigned int opt_size = 0 ;
bool ret = false ;
if ( mptcp_established_options_mp ( sk , & opt_size , remaining , opts ) )
ret = true ;
else if ( mptcp_established_options_dss ( sk , skb , & opt_size , remaining ,
opts ) )
ret = true ;
/* we reserved enough space for the above options, and exceeding the
* TCP option space would be fatal
*/
if ( WARN_ON_ONCE ( opt_size > remaining ) )
return false ;
* size + = opt_size ;
remaining - = opt_size ;
return ret ;
}
2020-01-22 03:56:18 +03:00
bool mptcp_synack_options ( const struct request_sock * req , unsigned int * size ,
struct mptcp_out_options * opts )
{
struct mptcp_subflow_request_sock * subflow_req = mptcp_subflow_rsk ( req ) ;
if ( subflow_req - > mp_capable ) {
opts - > suboptions = OPTION_MPTCP_MPC_SYNACK ;
opts - > sndr_key = subflow_req - > local_key ;
* size = TCPOLEN_MPTCP_MPC_SYNACK ;
pr_debug ( " subflow_req=%p, local_key=%llu " ,
subflow_req , subflow_req - > local_key ) ;
return true ;
}
return false ;
}
2020-01-22 03:56:24 +03:00
void mptcp_incoming_options ( struct sock * sk , struct sk_buff * skb ,
struct tcp_options_received * opt_rx )
{
struct mptcp_options_received * mp_opt ;
struct mptcp_ext * mpext ;
mp_opt = & opt_rx - > mptcp ;
if ( ! mp_opt - > dss )
return ;
mpext = skb_ext_add ( skb , SKB_EXT_MPTCP ) ;
if ( ! mpext )
return ;
memset ( mpext , 0 , sizeof ( * mpext ) ) ;
if ( mp_opt - > use_map ) {
mpext - > data_seq = mp_opt - > data_seq ;
mpext - > subflow_seq = mp_opt - > subflow_seq ;
mpext - > data_len = mp_opt - > data_len ;
mpext - > use_map = 1 ;
mpext - > dsn64 = mp_opt - > dsn64 ;
}
if ( mp_opt - > use_ack ) {
mpext - > data_ack = mp_opt - > data_ack ;
mpext - > use_ack = 1 ;
mpext - > ack64 = mp_opt - > ack64 ;
}
mpext - > data_fin = mp_opt - > data_fin ;
}
2020-01-22 03:56:16 +03:00
void mptcp_write_options ( __be32 * ptr , struct mptcp_out_options * opts )
{
if ( ( OPTION_MPTCP_MPC_SYN |
2020-01-22 03:56:18 +03:00
OPTION_MPTCP_MPC_SYNACK |
2020-01-22 03:56:16 +03:00
OPTION_MPTCP_MPC_ACK ) & opts - > suboptions ) {
u8 len ;
if ( OPTION_MPTCP_MPC_SYN & opts - > suboptions )
len = TCPOLEN_MPTCP_MPC_SYN ;
2020-01-22 03:56:18 +03:00
else if ( OPTION_MPTCP_MPC_SYNACK & opts - > suboptions )
len = TCPOLEN_MPTCP_MPC_SYNACK ;
2020-01-22 03:56:16 +03:00
else
len = TCPOLEN_MPTCP_MPC_ACK ;
* ptr + + = htonl ( ( TCPOPT_MPTCP < < 24 ) | ( len < < 16 ) |
( MPTCPOPT_MP_CAPABLE < < 12 ) |
( MPTCP_SUPPORTED_VERSION < < 8 ) |
MPTCP_CAP_HMAC_SHA1 ) ;
put_unaligned_be64 ( opts - > sndr_key , ptr ) ;
ptr + = 2 ;
if ( OPTION_MPTCP_MPC_ACK & opts - > suboptions ) {
put_unaligned_be64 ( opts - > rcvr_key , ptr ) ;
ptr + = 2 ;
}
}
2020-01-22 03:56:23 +03:00
if ( opts - > ext_copy . use_ack | | opts - > ext_copy . use_map ) {
struct mptcp_ext * mpext = & opts - > ext_copy ;
u8 len = TCPOLEN_MPTCP_DSS_BASE ;
u8 flags = 0 ;
if ( mpext - > use_ack ) {
len + = TCPOLEN_MPTCP_DSS_ACK64 ;
flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64 ;
}
if ( mpext - > use_map ) {
len + = TCPOLEN_MPTCP_DSS_MAP64 ;
/* Use only 64-bit mapping flags for now, add
* support for optional 32 - bit mappings later .
*/
flags | = MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64 ;
if ( mpext - > data_fin )
flags | = MPTCP_DSS_DATA_FIN ;
}
* ptr + + = htonl ( ( TCPOPT_MPTCP < < 24 ) |
( len < < 16 ) |
( MPTCPOPT_DSS < < 12 ) |
( flags ) ) ;
if ( mpext - > use_ack ) {
put_unaligned_be64 ( mpext - > data_ack , ptr ) ;
ptr + = 2 ;
}
if ( mpext - > use_map ) {
put_unaligned_be64 ( mpext - > data_seq , ptr ) ;
ptr + = 2 ;
put_unaligned_be32 ( mpext - > subflow_seq , ptr ) ;
ptr + = 1 ;
put_unaligned_be32 ( mpext - > data_len < < 16 |
TCPOPT_NOP < < 8 | TCPOPT_NOP , ptr ) ;
}
}
2020-01-22 03:56:16 +03:00
}