Merge branch 'sctp-rfc7829'

Xin Long says:

====================
sctp: update from rfc7829

SCTP-PF was implemented based on a Internet-Draft in 2012:

  https://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05

It's been updated quite a few by rfc7829 in 2016.

This patchset adds the following features:

  1. add SCTP_ADDR_POTENTIALLY_FAILED notification
  2. add pf_expose per netns/sock/asoc
  3. add SCTP_EXPOSE_POTENTIALLY_FAILED_STATE sockopt
  4. add ps_retrans per netns/sock/asoc/transport
     (Primary Path Switchover)
  5. add spt_pathcpthld for SCTP_PEER_ADDR_THLDS sockopt

v1->v2:
  - See Patch 2/5 and Patch 5/5.
v2->v3:
  - See Patch 1/5, 2/5 and 3/5.
v3->v4:
  - See Patch 1/5, 2/5, 3/5 and 4/5.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-11-08 14:18:32 -08:00
commit 92da362c07
10 changed files with 266 additions and 38 deletions

View File

@ -2091,6 +2091,28 @@ pf_enable - INTEGER
Default: 1
pf_expose - INTEGER
Unset or enable/disable pf (pf is short for potentially failed) state
exposure. Applications can control the exposure of the PF path state
in the SCTP_PEER_ADDR_CHANGE event and the SCTP_GET_PEER_ADDR_INFO
sockopt. When it's unset, no SCTP_PEER_ADDR_CHANGE event with
SCTP_ADDR_PF state will be sent and a SCTP_PF-state transport info
can be got via SCTP_GET_PEER_ADDR_INFO sockopt; When it's enabled,
a SCTP_PEER_ADDR_CHANGE event will be sent for a transport becoming
SCTP_PF state and a SCTP_PF-state transport info can be got via
SCTP_GET_PEER_ADDR_INFO sockopt; When it's diabled, no
SCTP_PEER_ADDR_CHANGE event will be sent and it returns -EACCES when
trying to get a SCTP_PF-state transport info via SCTP_GET_PEER_ADDR_INFO
sockopt.
0: Unset pf state exposure, Compatible with old applications.
1: Disable pf state exposure.
2: Enable pf state exposure.
Default: 0
addip_noauth_enable - BOOLEAN
Dynamic Address Reconfiguration (ADD-IP) requires the use of
authentication to protect the operations of adding or removing new
@ -2173,6 +2195,18 @@ pf_retrans - INTEGER
Default: 0
ps_retrans - INTEGER
Primary.Switchover.Max.Retrans (PSMR), it's a tunable parameter coming
from section-5 "Primary Path Switchover" in rfc7829. The primary path
will be changed to another active path when the path error counter on
the old primary path exceeds PSMR, so that "the SCTP sender is allowed
to continue data transmission on a new working path even when the old
primary destination address becomes active again". Note this feature
is disabled by initializing 'ps_retrans' per netns as 0xffff by default,
and its value can't be less than 'pf_retrans' when changing by sysctl.
Default: 0xffff
rto_initial - INTEGER
The initial round trip timeout value in milliseconds that will be used
in calculating round trip times. This is the initial time interval

View File

@ -89,6 +89,12 @@ struct netns_sctp {
*/
int pf_retrans;
/* Primary.Switchover.Max.Retrans sysctl value
* taken from:
* https://tools.ietf.org/html/rfc7829
*/
int ps_retrans;
/*
* Disable Potentially-Failed feature, the feature is enabled by default
* pf_enable - 0 : disable pf
@ -96,6 +102,14 @@ struct netns_sctp {
*/
int pf_enable;
/*
* Disable Potentially-Failed state exposure, ignored by default
* pf_expose - 0 : compatible with old applications (by default)
* - 1 : disable pf state exposure
* - 2 : enable pf state exposure
*/
int pf_expose;
/*
* Policy for preforming sctp/socket accounting
* 0 - do socket level accounting, all assocs share sk_sndbuf

View File

@ -286,6 +286,18 @@ enum { SCTP_MAX_GABS = 16 };
* functions simpler to write.
*/
/* These are the values for pf exposure, UNUSED is to keep compatible with old
* applications by default.
*/
enum {
SCTP_PF_EXPOSE_UNSET,
SCTP_PF_EXPOSE_DISABLE,
SCTP_PF_EXPOSE_ENABLE,
};
#define SCTP_PF_EXPOSE_MAX SCTP_PF_EXPOSE_ENABLE
#define SCTP_PS_RETRANS_MAX 0xffff
/* These return values describe the success or failure of a number of
* routines which form the lower interface to SCTP_outqueue.
*/

View File

@ -184,7 +184,8 @@ struct sctp_sock {
__u32 flowlabel;
__u8 dscp;
int pf_retrans;
__u16 pf_retrans;
__u16 ps_retrans;
/* The initial Path MTU to use for new associations. */
__u32 pathmtu;
@ -215,6 +216,7 @@ struct sctp_sock {
__u32 adaptation_ind;
__u32 pd_point;
__u16 nodelay:1,
pf_expose:2,
reuse:1,
disable_fragments:1,
v4mapped:1,
@ -896,7 +898,9 @@ struct sctp_transport {
* and will be initialized from the assocs value. This can be changed
* using the SCTP_PEER_ADDR_THLDS socket option
*/
int pf_retrans;
__u16 pf_retrans;
/* Used for primary path switchover. */
__u16 ps_retrans;
/* PMTU : The current known path MTU. */
__u32 pathmtu;
@ -1772,7 +1776,9 @@ struct sctp_association {
* and will be initialized from the assocs value. This can be
* changed using the SCTP_PEER_ADDR_THLDS socket option
*/
int pf_retrans;
__u16 pf_retrans;
/* Used for primary path switchover. */
__u16 ps_retrans;
/* Maximum number of times the endpoint will retransmit INIT */
__u16 max_init_attempts;
@ -2053,6 +2059,7 @@ struct sctp_association {
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
temp:1, /* Is it a temporary association? */
pf_expose:2, /* Expose pf state? */
force_delay:1;
__u8 strreset_enable;

View File

@ -105,6 +105,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_DEFAULT_SNDINFO 34
#define SCTP_AUTH_DEACTIVATE_KEY 35
#define SCTP_REUSE_PORT 36
#define SCTP_PEER_ADDR_THLDS_V2 37
/* Internal Socket Options. Some of the sctp library functions are
* implemented using these socket options.
@ -137,6 +138,8 @@ typedef __s32 sctp_assoc_t;
#define SCTP_ASCONF_SUPPORTED 128
#define SCTP_AUTH_SUPPORTED 129
#define SCTP_ECN_SUPPORTED 130
#define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131
#define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE
/* PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000
@ -410,6 +413,8 @@ enum sctp_spc_state {
SCTP_ADDR_ADDED,
SCTP_ADDR_MADE_PRIM,
SCTP_ADDR_CONFIRMED,
SCTP_ADDR_POTENTIALLY_FAILED,
#define SCTP_ADDR_PF SCTP_ADDR_POTENTIALLY_FAILED
};
@ -933,6 +938,7 @@ struct sctp_paddrinfo {
enum sctp_spinfo_state {
SCTP_INACTIVE,
SCTP_PF,
#define SCTP_POTENTIALLY_FAILED SCTP_PF
SCTP_ACTIVE,
SCTP_UNCONFIRMED,
SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */
@ -1082,6 +1088,15 @@ struct sctp_paddrthlds {
__u16 spt_pathpfthld;
};
/* Use a new structure with spt_pathcpthld for back compatibility */
struct sctp_paddrthlds_v2 {
sctp_assoc_t spt_assoc_id;
struct sockaddr_storage spt_address;
__u16 spt_pathmaxrxt;
__u16 spt_pathpfthld;
__u16 spt_pathcpthld;
};
/*
* Socket Option for Getting the Association/Stream-Specific PR-SCTP Status
*/

View File

@ -86,6 +86,8 @@ static struct sctp_association *sctp_association_init(
*/
asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
asoc->pf_retrans = sp->pf_retrans;
asoc->ps_retrans = sp->ps_retrans;
asoc->pf_expose = sp->pf_expose;
asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
@ -627,6 +629,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* And the partial failure retrans threshold */
peer->pf_retrans = asoc->pf_retrans;
/* And the primary path switchover retrans threshold */
peer->ps_retrans = asoc->ps_retrans;
/* Initialize the peer's SACK delay timeout based on the
* association configured value.
@ -786,8 +790,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
enum sctp_transport_cmd command,
sctp_sn_error_t error)
{
int spc_state = SCTP_ADDR_AVAILABLE;
bool ulp_notify = true;
int spc_state = 0;
/* Record the transition on the transport. */
switch (command) {
@ -796,19 +800,13 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
* to heartbeat success, report the SCTP_ADDR_CONFIRMED
* state to the user, otherwise report SCTP_ADDR_AVAILABLE.
*/
if (SCTP_UNCONFIRMED == transport->state &&
SCTP_HEARTBEAT_SUCCESS == error)
spc_state = SCTP_ADDR_CONFIRMED;
else
spc_state = SCTP_ADDR_AVAILABLE;
/* Don't inform ULP about transition from PF to
* active state and set cwnd to 1 MTU, see SCTP
* Quick failover draft section 5.1, point 5
*/
if (transport->state == SCTP_PF) {
if (transport->state == SCTP_PF &&
asoc->pf_expose != SCTP_PF_EXPOSE_ENABLE)
ulp_notify = false;
transport->cwnd = asoc->pathmtu;
}
else if (transport->state == SCTP_UNCONFIRMED &&
error == SCTP_HEARTBEAT_SUCCESS)
spc_state = SCTP_ADDR_CONFIRMED;
transport->state = SCTP_ACTIVE;
break;
@ -817,19 +815,21 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
* to inactive state. Also, release the cached route since
* there may be a better route next time.
*/
if (transport->state != SCTP_UNCONFIRMED)
if (transport->state != SCTP_UNCONFIRMED) {
transport->state = SCTP_INACTIVE;
else {
spc_state = SCTP_ADDR_UNREACHABLE;
} else {
sctp_transport_dst_release(transport);
ulp_notify = false;
}
spc_state = SCTP_ADDR_UNREACHABLE;
break;
case SCTP_TRANSPORT_PF:
transport->state = SCTP_PF;
ulp_notify = false;
if (asoc->pf_expose != SCTP_PF_EXPOSE_ENABLE)
ulp_notify = false;
else
spc_state = SCTP_ADDR_POTENTIALLY_FAILED;
break;
default:

View File

@ -1217,9 +1217,15 @@ static int __net_init sctp_defaults_init(struct net *net)
/* Max.Burst - 4 */
net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
/* Disable of Primary Path Switchover by default */
net->sctp.ps_retrans = SCTP_PS_RETRANS_MAX;
/* Enable pf state by default */
net->sctp.pf_enable = 1;
/* Ignore pf exposure feature by default */
net->sctp.pf_expose = SCTP_PF_EXPOSE_UNSET;
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
* Max.Init.Retransmits - 8 attempts

View File

@ -567,6 +567,11 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
SCTP_FAILED_THRESHOLD);
}
if (transport->error_count > transport->ps_retrans &&
asoc->peer.primary_path == transport &&
asoc->peer.active_path != transport)
sctp_assoc_set_primary(asoc, asoc->peer.active_path);
/* E2) For the destination address for which the timer
* expires, set RTO <- RTO * 2 ("back off the timer"). The
* maximum value discussed in rule C7 above (RTO.max) may be

View File

@ -3943,18 +3943,22 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
*/
static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
char __user *optval,
unsigned int optlen)
unsigned int optlen, bool v2)
{
struct sctp_paddrthlds val;
struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
int len;
if (optlen < sizeof(struct sctp_paddrthlds))
len = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
if (optlen < len)
return -EINVAL;
if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval,
sizeof(struct sctp_paddrthlds)))
if (copy_from_user(&val, optval, len))
return -EFAULT;
if (v2 && val.spt_pathpfthld > val.spt_pathcpthld)
return -EINVAL;
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
trans = sctp_addr_id2transport(sk, &val.spt_address,
val.spt_assoc_id);
@ -3963,6 +3967,8 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
if (val.spt_pathmaxrxt)
trans->pathmaxrxt = val.spt_pathmaxrxt;
if (v2)
trans->ps_retrans = val.spt_pathcpthld;
trans->pf_retrans = val.spt_pathpfthld;
return 0;
@ -3978,17 +3984,23 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
transports) {
if (val.spt_pathmaxrxt)
trans->pathmaxrxt = val.spt_pathmaxrxt;
if (v2)
trans->ps_retrans = val.spt_pathcpthld;
trans->pf_retrans = val.spt_pathpfthld;
}
if (val.spt_pathmaxrxt)
asoc->pathmaxrxt = val.spt_pathmaxrxt;
if (v2)
asoc->ps_retrans = val.spt_pathcpthld;
asoc->pf_retrans = val.spt_pathpfthld;
} else {
struct sctp_sock *sp = sctp_sk(sk);
if (val.spt_pathmaxrxt)
sp->pathmaxrxt = val.spt_pathmaxrxt;
if (v2)
sp->ps_retrans = val.spt_pathcpthld;
sp->pf_retrans = val.spt_pathpfthld;
}
@ -4589,6 +4601,40 @@ out:
return retval;
}
static int sctp_setsockopt_pf_expose(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
struct sctp_assoc_value params;
struct sctp_association *asoc;
int retval = -EINVAL;
if (optlen != sizeof(params))
goto out;
if (copy_from_user(&params, optval, optlen)) {
retval = -EFAULT;
goto out;
}
if (params.assoc_value > SCTP_PF_EXPOSE_MAX)
goto out;
asoc = sctp_id2assoc(sk, params.assoc_id);
if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
goto out;
if (asoc)
asoc->pf_expose = params.assoc_value;
else
sctp_sk(sk)->pf_expose = params.assoc_value;
retval = 0;
out:
return retval;
}
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@ -4744,7 +4790,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
false);
break;
case SCTP_PEER_ADDR_THLDS_V2:
retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
true);
break;
case SCTP_RECVRCVINFO:
retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
@ -4798,6 +4849,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_ECN_SUPPORTED:
retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
break;
case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
retval = sctp_setsockopt_pf_expose(sk, optval, optlen);
break;
default:
retval = -ENOPROTOOPT;
break;
@ -5041,6 +5095,8 @@ static int sctp_init_sock(struct sock *sk)
sp->hbinterval = net->sctp.hb_interval;
sp->pathmaxrxt = net->sctp.max_retrans_path;
sp->pf_retrans = net->sctp.pf_retrans;
sp->ps_retrans = net->sctp.ps_retrans;
sp->pf_expose = net->sctp.pf_expose;
sp->pathmtu = 0; /* allow default discovery */
sp->sackdelay = net->sctp.sack_timeout;
sp->sackfreq = 2;
@ -5521,8 +5577,16 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
transport = sctp_addr_id2transport(sk, &pinfo.spinfo_address,
pinfo.spinfo_assoc_id);
if (!transport)
return -EINVAL;
if (!transport) {
retval = -EINVAL;
goto out;
}
if (transport->state == SCTP_PF &&
transport->asoc->pf_expose == SCTP_PF_EXPOSE_DISABLE) {
retval = -EACCES;
goto out;
}
pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
pinfo.spinfo_state = transport->state;
@ -7170,18 +7234,19 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
* http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
*/
static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
char __user *optval,
int len,
int __user *optlen)
char __user *optval, int len,
int __user *optlen, bool v2)
{
struct sctp_paddrthlds val;
struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
int min;
if (len < sizeof(struct sctp_paddrthlds))
min = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
if (len < min)
return -EINVAL;
len = sizeof(struct sctp_paddrthlds);
if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len))
len = min;
if (copy_from_user(&val, optval, len))
return -EFAULT;
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
@ -7192,6 +7257,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
val.spt_pathmaxrxt = trans->pathmaxrxt;
val.spt_pathpfthld = trans->pf_retrans;
val.spt_pathcpthld = trans->ps_retrans;
goto out;
}
@ -7204,11 +7270,13 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
if (asoc) {
val.spt_pathpfthld = asoc->pf_retrans;
val.spt_pathmaxrxt = asoc->pathmaxrxt;
val.spt_pathcpthld = asoc->ps_retrans;
} else {
struct sctp_sock *sp = sctp_sk(sk);
val.spt_pathpfthld = sp->pf_retrans;
val.spt_pathmaxrxt = sp->pathmaxrxt;
val.spt_pathcpthld = sp->ps_retrans;
}
out:
@ -7900,6 +7968,45 @@ out:
return retval;
}
static int sctp_getsockopt_pf_expose(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
{
struct sctp_assoc_value params;
struct sctp_association *asoc;
int retval = -EFAULT;
if (len < sizeof(params)) {
retval = -EINVAL;
goto out;
}
len = sizeof(params);
if (copy_from_user(&params, optval, len))
goto out;
asoc = sctp_id2assoc(sk, params.assoc_id);
if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP)) {
retval = -EINVAL;
goto out;
}
params.assoc_value = asoc ? asoc->pf_expose
: sctp_sk(sk)->pf_expose;
if (put_user(len, optlen))
goto out;
if (copy_to_user(optval, &params, len))
goto out;
retval = 0;
out:
return retval;
}
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@ -8049,7 +8156,12 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen);
retval = sctp_getsockopt_paddr_thresholds(sk, optval, len,
optlen, false);
break;
case SCTP_PEER_ADDR_THLDS_V2:
retval = sctp_getsockopt_paddr_thresholds(sk, optval, len,
optlen, true);
break;
case SCTP_GET_ASSOC_STATS:
retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen);
@ -8112,6 +8224,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_ECN_SUPPORTED:
retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen);
break;
case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen);
break;
default:
retval = -ENOPROTOOPT;
break;

View File

@ -34,6 +34,8 @@ static int rto_alpha_min = 0;
static int rto_beta_min = 0;
static int rto_alpha_max = 1000;
static int rto_beta_max = 1000;
static int pf_expose_max = SCTP_PF_EXPOSE_MAX;
static int ps_retrans_max = SCTP_PS_RETRANS_MAX;
static unsigned long max_autoclose_min = 0;
static unsigned long max_autoclose_max =
@ -212,7 +214,16 @@ static struct ctl_table sctp_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
.extra2 = &init_net.sctp.ps_retrans,
},
{
.procname = "ps_retrans",
.data = &init_net.sctp.ps_retrans,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &init_net.sctp.pf_retrans,
.extra2 = &ps_retrans_max,
},
{
.procname = "sndbuf_policy",
@ -318,6 +329,15 @@ static struct ctl_table sctp_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "pf_expose",
.data = &init_net.sctp.pf_expose,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &pf_expose_max,
},
{ /* sentinel */ }
};