Merge branch 'mptcp-fixes'

Mat Martineau says:

====================
mptcp: Fixes for v5.12

These patches from the MPTCP tree fix a few multipath TCP issues:

Patches 1 and 5 clear some stale pointers when subflows close.

Patches 2, 4, and 9 plug some memory leaks.

Patch 3 fixes a memory accounting error identified by syzkaller.

Patches 6 and 7 fix a race condition that slowed data transmission.

Patch 8 adds missing wakeups when write buffer space is freed.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-03-04 14:30:13 -08:00
commit bdda7dfab1
2 changed files with 112 additions and 67 deletions

View File

@ -1061,6 +1061,12 @@ out:
}
}
static void __mptcp_clean_una_wakeup(struct sock *sk)
{
__mptcp_clean_una(sk);
mptcp_write_space(sk);
}
static void mptcp_enter_memory_pressure(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@ -1189,6 +1195,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size,
*/
while (skbs->qlen > 1) {
skb = __skb_dequeue_tail(skbs);
*total_ts -= skb->truesize;
__kfree_skb(skb);
}
return skbs->qlen > 0;
@ -1444,7 +1451,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
release_sock(ssk);
}
static void mptcp_push_pending(struct sock *sk, unsigned int flags)
static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
@ -1696,14 +1703,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
wait_for_memory:
mptcp_set_nospace(sk);
mptcp_push_pending(sk, msg->msg_flags);
__mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
}
if (copied)
mptcp_push_pending(sk, msg->msg_flags);
__mptcp_push_pending(sk, msg->msg_flags);
out:
release_sock(sk);
@ -2115,6 +2122,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
return backup;
}
static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
{
if (msk->subflow) {
iput(SOCK_INODE(msk->subflow));
msk->subflow = NULL;
}
}
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@ -2126,6 +2141,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
struct mptcp_sock *msk = mptcp_sk(sk);
list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@ -2154,6 +2171,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
release_sock(ssk);
sock_put(ssk);
if (ssk == msk->last_snd)
msk->last_snd = NULL;
if (ssk == msk->ack_hint)
msk->ack_hint = NULL;
if (ssk == msk->first)
msk->first = NULL;
if (msk->subflow && ssk == msk->subflow->sk)
mptcp_dispose_initial_subflow(msk);
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@ -2238,14 +2267,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
mptcp_close_wake_up(sk);
}
static void mptcp_worker(struct work_struct *work)
static void __mptcp_retrans(struct sock *sk)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
size_t copied = 0;
int state, ret;
struct sock *ssk;
int ret;
__mptcp_clean_una_wakeup(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag)
return;
ssk = mptcp_subflow_get_retrans(msk);
if (!ssk)
goto reset_timer;
lock_sock(ssk);
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
info.limit = dfrag->already_sent;
while (info.sent < dfrag->already_sent) {
if (!mptcp_alloc_tx_skb(sk, ssk))
break;
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
break;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
copied += ret;
info.sent += ret;
}
if (copied)
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
reset_timer:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
}
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
struct sock *sk = &msk->sk.icsk_inet.sk;
int state;
lock_sock(sk);
state = sk->sk_state;
@ -2280,45 +2353,8 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
__mptcp_clean_una(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag)
goto unlock;
ssk = mptcp_subflow_get_retrans(msk);
if (!ssk)
goto reset_unlock;
lock_sock(ssk);
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
info.limit = dfrag->already_sent;
while (info.sent < dfrag->already_sent) {
if (!mptcp_alloc_tx_skb(sk, ssk))
break;
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
break;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
copied += ret;
info.sent += ret;
}
if (copied)
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
reset_unlock:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
unlock:
release_sock(sk);
@ -2523,12 +2559,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
/* dispose the ancillatory tcp socket, if any */
if (msk->subflow) {
iput(SOCK_INODE(msk->subflow));
msk->subflow = NULL;
}
/* be sure to always acquire the join list lock, to sync vs
* mptcp_finish_join().
*/
@ -2553,6 +2583,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
mptcp_dispose_initial_subflow(msk);
sock_put(sk);
}
@ -2934,13 +2965,14 @@ static void mptcp_release_cb(struct sock *sk)
{
unsigned long flags, nflags;
/* push_pending may touch wmem_reserved, do it before the later
* cleanup
*/
if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
__mptcp_clean_una(sk);
if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) {
/* mptcp_push_pending() acquires the subflow socket lock
for (;;) {
flags = 0;
if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
flags |= MPTCP_PUSH_PENDING;
if (!flags)
break;
/* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
* 2) must avoid ABBA deadlock with msk socket spinlock: the RX
@ -2949,13 +2981,21 @@ static void mptcp_release_cb(struct sock *sk)
*/
spin_unlock_bh(&sk->sk_lock.slock);
mptcp_push_pending(sk, 0);
if (flags & MPTCP_PUSH_PENDING)
__mptcp_push_pending(sk, 0);
cond_resched();
spin_lock_bh(&sk->sk_lock.slock);
}
if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
__mptcp_clean_una_wakeup(sk);
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
/* clear any wmem reservation and errors */
/* push_pending may touch wmem_reserved, ensure we do the cleanup
* later
*/
__mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
@ -3285,6 +3325,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
* we can notify it, if needed.
*
* Even if remote has reset the initial subflow by now
* the refcnt is still at least one.
*/
subflow = mptcp_subflow_ctx(msk->first);
list_add(&subflow->node, &msk->conn_list);

View File

@ -687,11 +687,6 @@ create_child:
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
if (!mptcp_finish_join(child))
goto dispose_child;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
if (subflow_use_different_sport(owner, sk)) {
pr_debug("ack inet_sport=%d %d",
@ -699,10 +694,16 @@ create_child:
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
goto out;
goto dispose_child;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
}
if (!mptcp_finish_join(child))
goto dispose_child;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
}
}
@ -1297,6 +1298,7 @@ failed_unlink:
spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
spin_unlock_bh(&msk->join_list_lock);
sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
subflow->disposable = 1;