Merge branch 'mptcp-fixes-for-6-4'
Matthieu Baerts says: ==================== mptcp: fixes for 6.4 Patch 1 correctly handles disconnect() failures that can happen in some specific cases: now the socket state is set as unconnected as expected. That fixes an issue introduced in v6.2. Patch 2 fixes a divide by zero bug in mptcp_recvmsg() with a fix similar to a recent one from Eric Dumazet for TCP introducing sk_wait_pending flag. It should address an issue present in MPTCP from almost the beginning, from v5.9. Patch 3 fixes a possible list corruption on passive MPJ even if the race seems very unlikely, better be safe than sorry. The possible issue is present from v5.17. Patch 4 consolidates fallback and non fallback state machines to avoid leaking some MPTCP sockets. The fix is likely needed for versions from v5.11. Patch 5 drops code that is no longer used after the introduction of patch 4/6. This is not really a fix but this patch can probably land in the -net tree as well not to leave unused code. Patch 6 ensures listeners are unhashed before updating their sk status to avoid possible deadlocks when diag info are going to be retrieved with a lock. Even if it should not be visible with the way we are currently getting diag info, the issue is present from v5.17. ==================== Link: https://lore.kernel.org/r/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
533aa0baaa
@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
inet_sk_state_store(newsk, TCP_LISTEN);
|
||||
err = kernel_listen(ssock, backlog);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -44,7 +44,7 @@ enum {
|
||||
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
|
||||
|
||||
static void __mptcp_destroy_sock(struct sock *sk);
|
||||
static void __mptcp_check_send_data_fin(struct sock *sk);
|
||||
static void mptcp_check_send_data_fin(struct sock *sk);
|
||||
|
||||
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
|
||||
static struct net_device mptcp_napi_dev;
|
||||
@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
return !__mptcp_check_fallback(msk) &&
|
||||
((1 << sk->sk_state) &
|
||||
return ((1 << sk->sk_state) &
|
||||
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
|
||||
msk->write_seq == READ_ONCE(msk->snd_una);
|
||||
}
|
||||
@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
|
||||
u64 rcv_data_fin_seq;
|
||||
bool ret = false;
|
||||
|
||||
if (__mptcp_check_fallback(msk))
|
||||
return ret;
|
||||
|
||||
/* Need to ack a DATA_FIN received from a peer while this side
|
||||
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
|
||||
* msk->rcv_data_fin was set when parsing the incoming options
|
||||
@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
|
||||
}
|
||||
|
||||
ret = true;
|
||||
mptcp_send_ack(msk);
|
||||
if (!__mptcp_check_fallback(msk))
|
||||
mptcp_send_ack(msk);
|
||||
mptcp_close_wake_up(sk);
|
||||
}
|
||||
return ret;
|
||||
@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __mptcp_flush_join_list(struct sock *sk)
|
||||
static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list)
|
||||
{
|
||||
struct mptcp_subflow_context *tmp, *subflow;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
|
||||
list_for_each_entry_safe(subflow, tmp, join_list, node) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow = lock_sock_fast(ssk);
|
||||
|
||||
@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
|
||||
return false;
|
||||
}
|
||||
|
||||
void mptcp_subflow_eof(struct sock *sk)
|
||||
{
|
||||
if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags))
|
||||
mptcp_schedule_work(sk);
|
||||
}
|
||||
|
||||
static void mptcp_check_for_eof(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
int receivers = 0;
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow)
|
||||
receivers += !subflow->rx_eof;
|
||||
if (receivers)
|
||||
return;
|
||||
|
||||
if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
|
||||
/* hopefully temporary hack: propagate shutdown status
|
||||
* to msk, when all subflows agree on it
|
||||
*/
|
||||
WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
|
||||
|
||||
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
|
||||
sk->sk_data_ready(sk);
|
||||
}
|
||||
|
||||
switch (sk->sk_state) {
|
||||
case TCP_ESTABLISHED:
|
||||
inet_sk_state_store(sk, TCP_CLOSE_WAIT);
|
||||
break;
|
||||
case TCP_FIN_WAIT1:
|
||||
inet_sk_state_store(sk, TCP_CLOSING);
|
||||
break;
|
||||
case TCP_FIN_WAIT2:
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
mptcp_close_wake_up(sk);
|
||||
}
|
||||
|
||||
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
@ -1609,7 +1563,7 @@ out:
|
||||
if (!mptcp_timer_pending(sk))
|
||||
mptcp_reset_timer(sk);
|
||||
if (do_check_data_fin)
|
||||
__mptcp_check_send_data_fin(sk);
|
||||
mptcp_check_send_data_fin(sk);
|
||||
}
|
||||
|
||||
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
|
||||
@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
|
||||
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
|
||||
*copied_syn = 0;
|
||||
} else if (ret && ret != -EINPROGRESS) {
|
||||
mptcp_disconnect(sk, 0);
|
||||
/* The disconnect() op called by tcp_sendmsg_fastopen()/
|
||||
* __inet_stream_connect() can fail, due to looking check,
|
||||
* see mptcp_disconnect().
|
||||
* Attempt it again outside the problematic scope.
|
||||
*/
|
||||
if (!mptcp_disconnect(sk, 0))
|
||||
sk->sk_socket->state = SS_UNCONNECTED;
|
||||
}
|
||||
inet_sk(sk)->defer_connect = 0;
|
||||
|
||||
@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
break;
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
|
||||
mptcp_check_for_eof(msk);
|
||||
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN) {
|
||||
/* race breaker: the shutdown could be after the
|
||||
* previous receive queue check
|
||||
@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
|
||||
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
|
||||
if (!dispose_it) {
|
||||
tcp_disconnect(ssk, 0);
|
||||
/* The MPTCP code never wait on the subflow sockets, TCP-level
|
||||
* disconnect should never fail
|
||||
*/
|
||||
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
|
||||
msk->subflow->state = SS_UNCONNECTED;
|
||||
mptcp_subflow_ctx_reset(subflow);
|
||||
release_sock(ssk);
|
||||
@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
kfree_rcu(subflow, rcu);
|
||||
} else {
|
||||
/* otherwise tcp will dispose of the ssk and subflow ctx */
|
||||
if (ssk->sk_state == TCP_LISTEN) {
|
||||
tcp_set_state(ssk, TCP_CLOSE);
|
||||
mptcp_subflow_queue_clean(sk, ssk);
|
||||
inet_csk_listen_stop(ssk);
|
||||
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
|
||||
}
|
||||
|
||||
__tcp_close(ssk, 0);
|
||||
|
||||
/* close acquired an extra ref */
|
||||
@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
|
||||
if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
|
||||
goto unlock;
|
||||
|
||||
mptcp_check_data_fin_ack(sk);
|
||||
|
||||
mptcp_check_fastclose(msk);
|
||||
|
||||
mptcp_pm_nl_work(msk);
|
||||
|
||||
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
|
||||
mptcp_check_for_eof(msk);
|
||||
|
||||
__mptcp_check_send_data_fin(sk);
|
||||
mptcp_check_send_data_fin(sk);
|
||||
mptcp_check_data_fin_ack(sk);
|
||||
mptcp_check_data_fin(sk);
|
||||
|
||||
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
|
||||
@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
|
||||
break;
|
||||
fallthrough;
|
||||
case TCP_SYN_SENT:
|
||||
tcp_disconnect(ssk, O_NONBLOCK);
|
||||
WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK));
|
||||
break;
|
||||
default:
|
||||
if (__mptcp_check_fallback(mptcp_sk(sk))) {
|
||||
pr_debug("Fallback");
|
||||
ssk->sk_shutdown |= how;
|
||||
tcp_shutdown(ssk, how);
|
||||
|
||||
/* simulate the data_fin ack reception to let the state
|
||||
* machine move forward
|
||||
*/
|
||||
WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
|
||||
mptcp_schedule_work(sk);
|
||||
} else {
|
||||
pr_debug("Sending DATA_FIN on subflow %p", ssk);
|
||||
tcp_send_ack(ssk);
|
||||
@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
|
||||
return next & TCP_ACTION_FIN;
|
||||
}
|
||||
|
||||
static void __mptcp_check_send_data_fin(struct sock *sk)
|
||||
static void mptcp_check_send_data_fin(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
|
||||
|
||||
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
|
||||
|
||||
/* fallback socket will not get data_fin/ack, can move to the next
|
||||
* state now
|
||||
*/
|
||||
if (__mptcp_check_fallback(msk)) {
|
||||
WRITE_ONCE(msk->snd_una, msk->write_seq);
|
||||
if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
mptcp_close_wake_up(sk);
|
||||
} else if (sk->sk_state == TCP_FIN_WAIT1) {
|
||||
inet_sk_state_store(sk, TCP_FIN_WAIT2);
|
||||
}
|
||||
}
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
|
||||
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
|
||||
WRITE_ONCE(msk->snd_data_fin_enable, 1);
|
||||
|
||||
__mptcp_check_send_data_fin(sk);
|
||||
mptcp_check_send_data_fin(sk);
|
||||
}
|
||||
|
||||
static void __mptcp_destroy_sock(struct sock *sk)
|
||||
@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
|
||||
return EPOLLIN | EPOLLRDNORM;
|
||||
}
|
||||
|
||||
static void mptcp_listen_inuse_dec(struct sock *sk)
|
||||
static void mptcp_check_listen_stop(struct sock *sk)
|
||||
{
|
||||
if (inet_sk_state_load(sk) == TCP_LISTEN)
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||
struct sock *ssk;
|
||||
|
||||
if (inet_sk_state_load(sk) != TCP_LISTEN)
|
||||
return;
|
||||
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||
ssk = mptcp_sk(sk)->first;
|
||||
if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN))
|
||||
return;
|
||||
|
||||
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
|
||||
mptcp_subflow_queue_clean(sk, ssk);
|
||||
inet_csk_listen_stop(ssk);
|
||||
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
|
||||
tcp_set_state(ssk, TCP_CLOSE);
|
||||
release_sock(ssk);
|
||||
}
|
||||
|
||||
bool __mptcp_close(struct sock *sk, long timeout)
|
||||
@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
|
||||
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
|
||||
|
||||
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
|
||||
mptcp_listen_inuse_dec(sk);
|
||||
mptcp_check_listen_stop(sk);
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
goto cleanup;
|
||||
}
|
||||
@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
/* Deny disconnect if other threads are blocked in sk_wait_event()
|
||||
* or inet_wait_for_connect().
|
||||
*/
|
||||
if (sk->sk_wait_pending)
|
||||
return -EBUSY;
|
||||
|
||||
/* We are on the fastopen error path. We can't call straight into the
|
||||
* subflows cleanup code due to lock nesting (we are already under
|
||||
* msk->firstsocket lock). Do nothing and leave the cleanup to the
|
||||
* caller.
|
||||
* msk->firstsocket lock).
|
||||
*/
|
||||
if (msk->fastopening)
|
||||
return 0;
|
||||
return -EBUSY;
|
||||
|
||||
mptcp_listen_inuse_dec(sk);
|
||||
mptcp_check_listen_stop(sk);
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
|
||||
mptcp_stop_timer(sk);
|
||||
@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
|
||||
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
|
||||
#endif
|
||||
|
||||
nsk->sk_wait_pending = 0;
|
||||
__mptcp_init_sock(nsk);
|
||||
|
||||
msk = mptcp_sk(nsk);
|
||||
@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
|
||||
for (;;) {
|
||||
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
|
||||
msk->push_pending;
|
||||
struct list_head join_list;
|
||||
|
||||
if (!flags)
|
||||
break;
|
||||
|
||||
INIT_LIST_HEAD(&join_list);
|
||||
list_splice_init(&msk->join_list, &join_list);
|
||||
|
||||
/* the following actions acquire the subflow socket lock
|
||||
*
|
||||
* 1) can't be invoked in atomic scope
|
||||
@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
|
||||
msk->push_pending = 0;
|
||||
msk->cb_flags &= ~flags;
|
||||
spin_unlock_bh(&sk->sk_lock.slock);
|
||||
|
||||
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
|
||||
__mptcp_flush_join_list(sk);
|
||||
__mptcp_flush_join_list(sk, &join_list);
|
||||
if (flags & BIT(MPTCP_PUSH_PENDING))
|
||||
__mptcp_push_pending(sk, 0);
|
||||
if (flags & BIT(MPTCP_RETRANSMIT))
|
||||
|
@ -113,7 +113,6 @@
|
||||
/* MPTCP socket atomic flags */
|
||||
#define MPTCP_NOSPACE 1
|
||||
#define MPTCP_WORK_RTX 2
|
||||
#define MPTCP_WORK_EOF 3
|
||||
#define MPTCP_FALLBACK_DONE 4
|
||||
#define MPTCP_WORK_CLOSE_SUBFLOW 5
|
||||
|
||||
@ -476,14 +475,13 @@ struct mptcp_subflow_context {
|
||||
send_mp_fail : 1,
|
||||
send_fastclose : 1,
|
||||
send_infinite_map : 1,
|
||||
rx_eof : 1,
|
||||
remote_key_valid : 1, /* received the peer key from */
|
||||
disposable : 1, /* ctx can be free at ulp release time */
|
||||
stale : 1, /* unable to snd/rcv data, do not use for xmit */
|
||||
local_id_valid : 1, /* local_id is correctly initialized */
|
||||
valid_csum_seen : 1, /* at least one csum validated */
|
||||
is_mptfo : 1, /* subflow is doing TFO */
|
||||
__unused : 8;
|
||||
__unused : 9;
|
||||
enum mptcp_data_avail data_avail;
|
||||
u32 remote_nonce;
|
||||
u64 thmac;
|
||||
@ -720,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
|
||||
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
|
||||
void __mptcp_data_acked(struct sock *sk);
|
||||
void __mptcp_error_report(struct sock *sk);
|
||||
void mptcp_subflow_eof(struct sock *sk);
|
||||
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
|
||||
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
|
||||
{
|
||||
|
@ -1749,14 +1749,16 @@ static void subflow_state_change(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
struct sock *parent = subflow->conn;
|
||||
struct mptcp_sock *msk;
|
||||
|
||||
__subflow_state_change(sk);
|
||||
|
||||
msk = mptcp_sk(parent);
|
||||
if (subflow_simultaneous_connect(sk)) {
|
||||
mptcp_propagate_sndbuf(parent, sk);
|
||||
mptcp_do_fallback(sk);
|
||||
mptcp_rcv_space_init(mptcp_sk(parent), sk);
|
||||
pr_fallback(mptcp_sk(parent));
|
||||
mptcp_rcv_space_init(msk, sk);
|
||||
pr_fallback(msk);
|
||||
subflow->conn_finished = 1;
|
||||
mptcp_set_connected(parent);
|
||||
}
|
||||
@ -1772,11 +1774,12 @@ static void subflow_state_change(struct sock *sk)
|
||||
|
||||
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
|
||||
|
||||
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
|
||||
!subflow->rx_eof && subflow_is_done(sk)) {
|
||||
subflow->rx_eof = 1;
|
||||
mptcp_subflow_eof(parent);
|
||||
}
|
||||
/* when the fallback subflow closes the rx side, trigger a 'dummy'
|
||||
* ingress data fin, so that the msk state will follow along
|
||||
*/
|
||||
if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
|
||||
mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
|
||||
mptcp_schedule_work(parent);
|
||||
}
|
||||
|
||||
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
|
||||
|
Loading…
x
Reference in New Issue
Block a user