From 7a486c443c89bd949f7a64e0040f704e02710b3c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Apr 2023 16:08:00 +0200 Subject: [PATCH 1/5] mptcp: drop unneeded argument After commit 3a236aef280e ("mptcp: refactor passive socket initialization"), every mptcp_pm_fully_established() call is always invoked with a GFP_ATOMIC argument. We can then drop it. Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/options.c | 2 +- net/mptcp/pm.c | 4 ++-- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 355f798d575a..cd3b885c8faa 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1001,7 +1001,7 @@ check_notify: clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { - mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); + mptcp_pm_fully_established(msk, ssk); } return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 70f0ced3ca86..78c924506e83 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -126,7 +126,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return true; } -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) { struct mptcp_pm_data *pm = &msk->pm; bool announce = false; @@ -150,7 +150,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, spin_unlock_bh(&pm->lock); if (announce) - mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC); } void mptcp_pm_connection_closed(struct mptcp_sock *msk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e1310bc113be..a9eb0e428a6b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -782,7 +782,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index f46d8f6c40aa..80bbe96c0694 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -850,7 +850,7 @@ create_child: */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); - mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } } else if (ctx->mp_join) { From 617612316953093bc859890e405e1b550c27d840 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Apr 2023 16:08:01 +0200 Subject: [PATCH 2/5] mptcp: avoid unneeded __mptcp_nmpc_socket() usage In a few spots, the mptcp code invokes the __mptcp_nmpc_socket() helper multiple times under the same socket lock scope. Additionally, in such places, the socket status ensures that there is no MP capable handshake running. Under the above condition we can replace the later __mptcp_nmpc_socket() helper invocation with direct access to the msk->subflow pointer and better document such access is not supposed to fail with WARN(). Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e6cb36784a68..9cdcfdb44aee 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3143,7 +3143,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct socket *listener; struct sock *newsk; - listener = __mptcp_nmpc_socket(msk); + listener = msk->subflow; if (WARN_ON_ONCE(!listener)) { *err = -EINVAL; return NULL; @@ -3363,7 +3363,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum) struct mptcp_sock *msk = mptcp_sk(sk); struct socket *ssock; - ssock = __mptcp_nmpc_socket(msk); + ssock = msk->subflow; pr_debug("msk=%p, subflow=%p", msk, ssock); if (WARN_ON_ONCE(!ssock)) return -EINVAL; @@ -3709,7 +3709,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, pr_debug("msk=%p", msk); - ssock = __mptcp_nmpc_socket(msk); + /* buggy applications can call accept on socket states other then LISTEN + * but no need to allocate the first subflow just to error out. + */ + ssock = msk->subflow; if (!ssock) return -EINVAL; From a2702a076e73787db660fb8fd07d26a1a3108358 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Apr 2023 16:08:02 +0200 Subject: [PATCH 3/5] mptcp: move fastopen subflow check inside mptcp_sendmsg_fastopen() So that we can avoid a bunch of check in fastpath. Additionally we can specialize such check according to the specific fastopen method - defer_connect vs MSG_FASTOPEN. The latter bits will simplify the next patches. Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9cdcfdb44aee..22e073b373af 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1662,13 +1662,27 @@ static void mptcp_set_nospace(struct sock *sk) static int mptcp_disconnect(struct sock *sk, int flags); -static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, size_t len, int *copied_syn) { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk; int ret; + /* on flags based fastopen the mptcp is supposed to create the + * first subflow right now. Otherwise we are in the defer_connect + * path, and the first subflow must be already present. + * Since the defer_connect flag is cleared after the first succsful + * fastopen attempt, no need to check for additional subflow status. + */ + if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) + return -EINVAL; + if (!msk->first) + return -EINVAL; + + ssk = msk->first; + lock_sock(ssk); msg->msg_flags |= MSG_DONTWAIT; msk->connect_flags = O_NONBLOCK; @@ -1691,6 +1705,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh } else if (ret && ret != -EINPROGRESS) { mptcp_disconnect(sk, 0); } + inet_sk(sk)->defer_connect = 0; return ret; } @@ -1699,7 +1714,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; - struct socket *ssock; size_t copied = 0; int ret = 0; long timeo; @@ -1709,12 +1723,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); - if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || - msg->msg_flags & MSG_FASTOPEN))) { + if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) { int copied_syn = 0; - ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); + ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn); copied += copied_syn; if (ret == -EINPROGRESS && copied_syn > 0) goto out; From ddb1a072f858704b3555876877ca38c5b103a215 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Apr 2023 16:08:03 +0200 Subject: [PATCH 4/5] mptcp: move first subflow allocation at mpc access time In the long run this will simplify the mptcp code and will allow for more consistent behavior. Move the first subflow allocation out of the sock->init ops into the __mptcp_nmpc_socket() helper. Since the first subflow creation can now happen after the first setsockopt() we additionally need to invoke mptcp_sockopt_sync() on it. Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 4 +-- net/mptcp/protocol.c | 61 +++++++++++++++++++++++++----------------- net/mptcp/protocol.h | 2 +- net/mptcp/sockopt.c | 24 ++++++++++------- 4 files changed, 54 insertions(+), 37 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1c42bebca39e..bc343dab5e3f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1035,8 +1035,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, lock_sock(newsk); ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); release_sock(newsk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 22e073b373af..a676ac1bb9f1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; -/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not - * completed yet or has failed, return the subflow socket. - * Otherwise return NULL. - */ -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) -{ - if (!msk->subflow || READ_ONCE(msk->can_ack)) - return NULL; - - return msk->subflow; -} - /* Returns end sequence number of the receiver's advertised window */ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) { @@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return 0; } +/* If the MPC handshake is not started, returns the first subflow, + * eventually allocating it. + */ +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + int ret; + + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + return ERR_PTR(-EINVAL); + + if (!msk->subflow) { + if (msk->first) + return ERR_PTR(-EINVAL); + + ret = __mptcp_socket_create(msk); + if (ret) + return ERR_PTR(ret); + + mptcp_sockopt_sync(msk, msk->first); + } + + return msk->subflow; +} + static void mptcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -1667,6 +1680,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *ssock; struct sock *ssk; int ret; @@ -1676,8 +1690,11 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * Since the defer_connect flag is cleared after the first succsful * fastopen attempt, no need to check for additional subflow status. */ - if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) - return -EINVAL; + if (msg->msg_flags & MSG_FASTOPEN) { + ssock = __mptcp_nmpc_socket(msk); + if (IS_ERR(ssock)) + return PTR_ERR(ssock); + } if (!msk->first) return -EINVAL; @@ -2740,10 +2757,6 @@ static int mptcp_init_sock(struct sock *sk) if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) return -ENOMEM; - ret = __mptcp_socket_create(mptcp_sk(sk)); - if (ret) - return ret; - set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will @@ -3563,8 +3576,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) int err = -EINVAL; ssock = __mptcp_nmpc_socket(msk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); mptcp_token_destroy(msk); inet_sk_state_store(sk, TCP_SYN_SENT); @@ -3652,8 +3665,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sock->sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; + if (IS_ERR(ssock)) { + err = PTR_ERR(ssock); goto unlock; } @@ -3689,8 +3702,8 @@ static int mptcp_listen(struct socket *sock, int backlog) lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; + if (IS_ERR(ssock)) { + err = PTR_ERR(ssock); goto unlock; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a9eb0e428a6b..21eda9cd0c52 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -627,7 +627,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index b655cebda0f3..d4258869ac48 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -301,9 +301,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BINDTOIFINDEX: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); @@ -396,9 +396,9 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, case IPV6_FREEBIND: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); @@ -693,9 +693,9 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } issk = inet_sk(ssock->sk); @@ -762,13 +762,15 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int { struct sock *sk = (struct sock *)msk; struct socket *sock; - int ret = -EINVAL; + int ret; /* Limit to first subflow, before the connection establishment */ lock_sock(sk); sock = __mptcp_nmpc_socket(msk); - if (!sock) + if (IS_ERR(sock)) { + ret = PTR_ERR(sock); goto unlock; + } ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen); @@ -861,7 +863,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int { struct sock *sk = (struct sock *)msk; struct socket *ssock; - int ret = -EINVAL; + int ret; struct sock *ssk; lock_sock(sk); @@ -872,8 +874,10 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int } ssock = __mptcp_nmpc_socket(msk); - if (!ssock) + if (IS_ERR(ssock)) { + ret = PTR_ERR(ssock); goto out; + } ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); From 8d547809a5d74aacf022d1df7a508c631088aaec Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Apr 2023 16:08:04 +0200 Subject: [PATCH 5/5] mptcp: fastclose msk when cleaning unaccepted sockets When cleaning up unaccepted mptcp socket still laying inside the listener queue at listener close time, such sockets will go through a regular close, waiting for a timeout before shutting down the subflows. There is no need to keep the kernel resources in use for such a possibly long time: short-circuit to fast-close. Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a676ac1bb9f1..1926b81a9538 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2953,10 +2953,13 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_check_readable(msk)) { - /* the msk has read data, do the MPTCP equivalent of TCP reset */ + if (mptcp_check_readable(msk) || timeout < 0) { + /* If the msk has read data, or the caller explicitly ask it, + * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose + */ inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); + timeout = 0; } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); }