From 3d4762639dd36a5f0f433f0c9d82e9743dc21a33 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Apr 2017 09:45:51 -0700 Subject: [PATCH 1/2] tcp: remove poll() flakes when receiving RST When a RST packet is processed, we send two wakeup events to interested polling users. First one by a sk->sk_error_report(sk) from tcp_reset(), followed by a sk->sk_state_change(sk) from tcp_done(). Depending on machine load and luck, poll() can either return POLLERR, or POLLIN|POLLOUT|POLLERR|POLLHUP (this happens on 99 % of the cases) This is probably fine, but we can avoid the confusion by reordering things so that we have more TCP fields updated before the first wakeup. This might even allow us to remove some barriers we added in the past. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5838858c362..37e2aa925f62 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4008,10 +4008,10 @@ void tcp_reset(struct sock *sk) /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); + tcp_done(sk); + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); - - tcp_done(sk); } /* From 0f9fa831aecfc297b7b45d4f046759bcefcf87f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Apr 2017 09:45:52 -0700 Subject: [PATCH 2/2] tcp: remove poll() flakes with FastOpen When using TCP FastOpen for an active session, we send one wakeup event from tcp_finish_connect(), right before the data eventually contained in the received SYNACK is queued to sk->sk_receive_queue. This means that depending on machine load or luck, poll() users might receive POLLOUT events instead of POLLIN|POLLOUT To fix this, we need to move the call to sk->sk_state_change() after the (optional) call to tcp_rcv_fastopen_synack() Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 37e2aa925f62..341f021f02a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5580,10 +5580,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) else tp->pred_flags = 0; - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - } } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -5652,6 +5648,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; + bool fastopen_fail; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) @@ -5755,10 +5752,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_finish_connect(sk, skb); - if ((tp->syn_fastopen || tp->syn_data) && - tcp_rcv_fastopen_synack(sk, skb, &foc)) - return -1; + fastopen_fail = (tp->syn_fastopen || tp->syn_data) && + tcp_rcv_fastopen_synack(sk, skb, &foc); + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + } + if (fastopen_fail) + return -1; if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_ack.pingpong) {