net: tls: avoid hanging tasks on the tx_lock

syzbot sent a hung task report and Eric explains that adversarial
receiver may keep RWIN at 0 for a long time, so we are not guaranteed
to make forward progress. Thread which took tx_lock and went to sleep
may not release tx_lock for hours. Use interruptible sleep where
possible and reschedule the work if it can't take the lock.

Testing: existing selftest passes

Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com
Fixes: 79ffe6087e91 ("net/tls: add a TX lock")
Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/
Cc: stable@vger.kernel.org # wait 4 weeks
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-02-28 16:28:57 -08:00
parent 49c47cc21b
commit f3221361dc

View File

@ -956,7 +956,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
MSG_CMSG_COMPAT))
return -EOPNOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
if (ret)
return ret;
lock_sock(sk);
if (unlikely(msg->msg_controllen)) {
@ -1290,7 +1292,9 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -EOPNOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
if (ret)
return ret;
lock_sock(sk);
ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
release_sock(sk);
@ -2435,11 +2439,19 @@ static void tx_work_handler(struct work_struct *work)
if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
return;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
tls_tx_records(sk, -1);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
if (mutex_trylock(&tls_ctx->tx_lock)) {
lock_sock(sk);
tls_tx_records(sk, -1);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
} else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
/* Someone is holding the tx_lock, they will likely run Tx
* and cancel the work on their way out of the lock section.
* Schedule a long delay just in case.
*/
schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10));
}
}
static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)