tcp: set TCP_USER_TIMEOUT locklessly
icsk->icsk_user_timeout can be set locklessly, if all read sides use READ_ONCE(). Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
d44fd4a767
commit
d58f2e15aa
@ -564,6 +564,6 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on);
|
|||||||
void tcp_sock_set_nodelay(struct sock *sk);
|
void tcp_sock_set_nodelay(struct sock *sk);
|
||||||
void tcp_sock_set_quickack(struct sock *sk, int val);
|
void tcp_sock_set_quickack(struct sock *sk, int val);
|
||||||
int tcp_sock_set_syncnt(struct sock *sk, int val);
|
int tcp_sock_set_syncnt(struct sock *sk, int val);
|
||||||
void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
|
int tcp_sock_set_user_timeout(struct sock *sk, int val);
|
||||||
|
|
||||||
#endif /* _LINUX_TCP_H */
|
#endif /* _LINUX_TCP_H */
|
||||||
|
@ -3296,11 +3296,16 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_sock_set_syncnt);
|
EXPORT_SYMBOL(tcp_sock_set_syncnt);
|
||||||
|
|
||||||
void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
|
int tcp_sock_set_user_timeout(struct sock *sk, int val)
|
||||||
{
|
{
|
||||||
lock_sock(sk);
|
/* Cap the max time in ms TCP will retry or probe the window
|
||||||
|
* before giving up and aborting (ETIMEDOUT) a connection.
|
||||||
|
*/
|
||||||
|
if (val < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
|
WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
|
||||||
release_sock(sk);
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
|
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
|
||||||
|
|
||||||
@ -3464,6 +3469,8 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
switch (optname) {
|
switch (optname) {
|
||||||
case TCP_SYNCNT:
|
case TCP_SYNCNT:
|
||||||
return tcp_sock_set_syncnt(sk, val);
|
return tcp_sock_set_syncnt(sk, val);
|
||||||
|
case TCP_USER_TIMEOUT:
|
||||||
|
return tcp_sock_set_user_timeout(sk, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
sockopt_lock_sock(sk);
|
sockopt_lock_sock(sk);
|
||||||
@ -3611,16 +3618,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
|
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case TCP_USER_TIMEOUT:
|
|
||||||
/* Cap the max time in ms TCP will retry or probe the window
|
|
||||||
* before giving up and aborting (ETIMEDOUT) a connection.
|
|
||||||
*/
|
|
||||||
if (val < 0)
|
|
||||||
err = -EINVAL;
|
|
||||||
else
|
|
||||||
WRITE_ONCE(icsk->icsk_user_timeout, val);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case TCP_FASTOPEN:
|
case TCP_FASTOPEN:
|
||||||
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
|
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
|
||||||
TCPF_LISTEN))) {
|
TCPF_LISTEN))) {
|
||||||
|
@ -26,14 +26,15 @@
|
|||||||
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
|
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
u32 elapsed, start_ts;
|
u32 elapsed, start_ts, user_timeout;
|
||||||
s32 remaining;
|
s32 remaining;
|
||||||
|
|
||||||
start_ts = tcp_sk(sk)->retrans_stamp;
|
start_ts = tcp_sk(sk)->retrans_stamp;
|
||||||
if (!icsk->icsk_user_timeout)
|
user_timeout = READ_ONCE(icsk->icsk_user_timeout);
|
||||||
|
if (!user_timeout)
|
||||||
return icsk->icsk_rto;
|
return icsk->icsk_rto;
|
||||||
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
|
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
|
||||||
remaining = icsk->icsk_user_timeout - elapsed;
|
remaining = user_timeout - elapsed;
|
||||||
if (remaining <= 0)
|
if (remaining <= 0)
|
||||||
return 1; /* user timeout has passed; fire ASAP */
|
return 1; /* user timeout has passed; fire ASAP */
|
||||||
|
|
||||||
@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
|
|||||||
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
|
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
u32 remaining;
|
u32 remaining, user_timeout;
|
||||||
s32 elapsed;
|
s32 elapsed;
|
||||||
|
|
||||||
if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
|
user_timeout = READ_ONCE(icsk->icsk_user_timeout);
|
||||||
|
if (!user_timeout || !icsk->icsk_probes_tstamp)
|
||||||
return when;
|
return when;
|
||||||
|
|
||||||
elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
|
elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
|
||||||
if (unlikely(elapsed < 0))
|
if (unlikely(elapsed < 0))
|
||||||
elapsed = 0;
|
elapsed = 0;
|
||||||
remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
|
remaining = msecs_to_jiffies(user_timeout) - elapsed;
|
||||||
remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
|
remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
|
||||||
|
|
||||||
return min_t(u32, remaining, when);
|
return min_t(u32, remaining, when);
|
||||||
@ -270,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
|
|||||||
}
|
}
|
||||||
if (!expired)
|
if (!expired)
|
||||||
expired = retransmits_timed_out(sk, retry_until,
|
expired = retransmits_timed_out(sk, retry_until,
|
||||||
icsk->icsk_user_timeout);
|
READ_ONCE(icsk->icsk_user_timeout));
|
||||||
tcp_fastopen_active_detect_blackhole(sk, expired);
|
tcp_fastopen_active_detect_blackhole(sk, expired);
|
||||||
|
|
||||||
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
|
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
|
||||||
@ -384,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
|
|||||||
* corresponding system limit. We also implement similar policy when
|
* corresponding system limit. We also implement similar policy when
|
||||||
* we use RTO to probe window in tcp_retransmit_timer().
|
* we use RTO to probe window in tcp_retransmit_timer().
|
||||||
*/
|
*/
|
||||||
if (!icsk->icsk_probes_tstamp)
|
if (!icsk->icsk_probes_tstamp) {
|
||||||
icsk->icsk_probes_tstamp = tcp_jiffies32;
|
icsk->icsk_probes_tstamp = tcp_jiffies32;
|
||||||
else if (icsk->icsk_user_timeout &&
|
} else {
|
||||||
(s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
|
u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
|
||||||
msecs_to_jiffies(icsk->icsk_user_timeout))
|
|
||||||
goto abort;
|
|
||||||
|
|
||||||
|
if (user_timeout &&
|
||||||
|
(s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
|
||||||
|
msecs_to_jiffies(user_timeout))
|
||||||
|
goto abort;
|
||||||
|
}
|
||||||
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
|
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
|
||||||
if (sock_flag(sk, SOCK_DEAD)) {
|
if (sock_flag(sk, SOCK_DEAD)) {
|
||||||
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
|
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
|
||||||
@ -734,13 +739,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
|
|||||||
elapsed = keepalive_time_elapsed(tp);
|
elapsed = keepalive_time_elapsed(tp);
|
||||||
|
|
||||||
if (elapsed >= keepalive_time_when(tp)) {
|
if (elapsed >= keepalive_time_when(tp)) {
|
||||||
|
u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
|
||||||
|
|
||||||
/* If the TCP_USER_TIMEOUT option is enabled, use that
|
/* If the TCP_USER_TIMEOUT option is enabled, use that
|
||||||
* to determine when to timeout instead.
|
* to determine when to timeout instead.
|
||||||
*/
|
*/
|
||||||
if ((icsk->icsk_user_timeout != 0 &&
|
if ((user_timeout != 0 &&
|
||||||
elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
|
elapsed >= msecs_to_jiffies(user_timeout) &&
|
||||||
icsk->icsk_probes_out > 0) ||
|
icsk->icsk_probes_out > 0) ||
|
||||||
(icsk->icsk_user_timeout == 0 &&
|
(user_timeout == 0 &&
|
||||||
icsk->icsk_probes_out >= keepalive_probes(tp))) {
|
icsk->icsk_probes_out >= keepalive_probes(tp))) {
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
tcp_write_err(sk);
|
tcp_write_err(sk);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user