62ec33b44e
Christoph Paasch reported that commit b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().") started triggering WARN_ON_ONCE(sk->sk_forward_alloc) in sk_stream_kill_queues(). [0 - 2] Also, we can reproduce it by a program in [3]. In the commit, we delay freeing ipv6_pinfo.pktoptions from sk->destroy() to sk->sk_destruct(), so sk->sk_forward_alloc is no longer zero in inet_csk_destroy_sock(). The same check has been in inet_sock_destruct() from at least v2.6, we can just remove the WARN_ON_ONCE(). However, among the users of sk_stream_kill_queues(), only CAIF is not calling inet_sock_destruct(). Thus, we add the same WARN_ON_ONCE() to caif_sock_destructor(). [0]: https://lore.kernel.org/netdev/39725AB4-88F1-41B3-B07F-949C5CAEFF4F@icloud.com/ [1]: https://github.com/multipath-tcp/mptcp_net-next/issues/341 [2]: WARNING: CPU: 0 PID: 3232 at net/core/stream.c:212 sk_stream_kill_queues+0x2f9/0x3e0 Modules linked in: CPU: 0 PID: 3232 Comm: syz-executor.0 Not tainted 6.2.0-rc5ab24eb4698afbe147b424149c529e2a43ec24eb5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:sk_stream_kill_queues+0x2f9/0x3e0 Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ec 00 00 00 8b ab 08 01 00 00 e9 60 ff ff ff e8 d0 5f b6 fe 0f 0b eb 97 e8 c7 5f b6 fe <0f> 0b eb a0 e8 be 5f b6 fe 0f 0b e9 6a fe ff ff e8 02 07 e3 fe e9 RSP: 0018:ffff88810570fc68 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff888101f38f40 RSI: ffffffff8285e529 RDI: 0000000000000005 RBP: 0000000000000ce0 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000ce0 R11: 0000000000000001 R12: ffff8881009e9488 R13: ffffffff84af2cc0 R14: 0000000000000000 R15: ffff8881009e9458 FS: 00007f7fdfbd5800(0000) GS:ffff88811b600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32923000 CR3: 00000001062fc006 CR4: 0000000000170ef0 Call Trace: <TASK> inet_csk_destroy_sock+0x1a1/0x320 __tcp_close+0xab6/0xe90 tcp_close+0x30/0xc0 inet_release+0xe9/0x1f0 inet6_release+0x4c/0x70 __sock_release+0xd2/0x280 sock_close+0x15/0x20 __fput+0x252/0xa20 task_work_run+0x169/0x250 exit_to_user_mode_prepare+0x113/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f7fdf7ae28d Code: c1 20 00 00 75 10 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee fb ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 37 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00000000007dfbb0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7fdf7ae28d RDX: 0000000000000000 RSI: ffffffffffffffff RDI: 0000000000000003 RBP: 0000000000000000 R08: 000000007f338e0f R09: 0000000000000e0f R10: 000000007f338e13 R11: 0000000000000293 R12: 00007f7fdefff000 R13: 00007f7fdefffcd8 R14: 00007f7fdefffce0 R15: 00007f7fdefffcd8 </TASK> [3]: https://lore.kernel.org/netdev/20230208004245.83497-1-kuniyu@amazon.com/ Fixes: b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().") Reported-by: syzbot <syzkaller@googlegroups.com> Reported-by: Christoph Paasch <christophpaasch@icloud.com> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
219 lines
5.5 KiB
C
219 lines
5.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* SUCS NET3:
|
|
*
|
|
* Generic stream handling routines. These are generic for most
|
|
* protocols. Even IP. Tonight 8-).
|
|
* This is used because TCP, LLC (others too) layer all have mostly
|
|
* identical sendmsg() and recvmsg() code.
|
|
* So we (will) share it here.
|
|
*
|
|
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
|
|
* (from old tcp.c code)
|
|
* Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/net.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/wait.h>
|
|
#include <net/sock.h>
|
|
|
|
/**
|
|
* sk_stream_write_space - stream socket write_space callback.
|
|
* @sk: socket
|
|
*
|
|
* FIXME: write proper description
|
|
*/
|
|
void sk_stream_write_space(struct sock *sk)
|
|
{
|
|
struct socket *sock = sk->sk_socket;
|
|
struct socket_wq *wq;
|
|
|
|
if (__sk_stream_is_writeable(sk, 1) && sock) {
|
|
clear_bit(SOCK_NOSPACE, &sock->flags);
|
|
|
|
rcu_read_lock();
|
|
wq = rcu_dereference(sk->sk_wq);
|
|
if (skwq_has_sleeper(wq))
|
|
wake_up_interruptible_poll(&wq->wait, EPOLLOUT |
|
|
EPOLLWRNORM | EPOLLWRBAND);
|
|
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
|
|
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
|
|
rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* sk_stream_wait_connect - Wait for a socket to get into the connected state
|
|
* @sk: sock to wait on
|
|
* @timeo_p: for how long to wait
|
|
*
|
|
* Must be called with the socket locked.
|
|
*/
|
|
int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
|
|
{
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
struct task_struct *tsk = current;
|
|
int done;
|
|
|
|
do {
|
|
int err = sock_error(sk);
|
|
if (err)
|
|
return err;
|
|
if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
|
|
return -EPIPE;
|
|
if (!*timeo_p)
|
|
return -EAGAIN;
|
|
if (signal_pending(tsk))
|
|
return sock_intr_errno(*timeo_p);
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
sk->sk_write_pending++;
|
|
done = sk_wait_event(sk, timeo_p,
|
|
!sk->sk_err &&
|
|
!((1 << sk->sk_state) &
|
|
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
sk->sk_write_pending--;
|
|
} while (!done);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_wait_connect);
|
|
|
|
/**
|
|
* sk_stream_closing - Return 1 if we still have things to send in our buffers.
|
|
* @sk: socket to verify
|
|
*/
|
|
static inline int sk_stream_closing(struct sock *sk)
|
|
{
|
|
return (1 << sk->sk_state) &
|
|
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
|
|
}
|
|
|
|
void sk_stream_wait_close(struct sock *sk, long timeout)
|
|
{
|
|
if (timeout) {
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
|
|
do {
|
|
if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait))
|
|
break;
|
|
} while (!signal_pending(current) && timeout);
|
|
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_wait_close);
|
|
|
|
/**
|
|
* sk_stream_wait_memory - Wait for more memory for a socket
|
|
* @sk: socket to wait for memory
|
|
* @timeo_p: for how long
|
|
*/
|
|
int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
|
|
{
|
|
int err = 0;
|
|
long vm_wait = 0;
|
|
long current_timeo = *timeo_p;
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
|
|
if (sk_stream_memory_free(sk))
|
|
current_timeo = vm_wait = get_random_u32_below(HZ / 5) + 2;
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
|
|
while (1) {
|
|
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
|
|
|
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
|
|
goto do_error;
|
|
if (!*timeo_p)
|
|
goto do_eagain;
|
|
if (signal_pending(current))
|
|
goto do_interrupted;
|
|
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
|
if (sk_stream_memory_free(sk) && !vm_wait)
|
|
break;
|
|
|
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
sk->sk_write_pending++;
|
|
sk_wait_event(sk, ¤t_timeo, sk->sk_err ||
|
|
(sk->sk_shutdown & SEND_SHUTDOWN) ||
|
|
(sk_stream_memory_free(sk) &&
|
|
!vm_wait), &wait);
|
|
sk->sk_write_pending--;
|
|
|
|
if (vm_wait) {
|
|
vm_wait -= current_timeo;
|
|
current_timeo = *timeo_p;
|
|
if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
|
|
(current_timeo -= vm_wait) < 0)
|
|
current_timeo = 0;
|
|
vm_wait = 0;
|
|
}
|
|
*timeo_p = current_timeo;
|
|
}
|
|
out:
|
|
if (!sock_flag(sk, SOCK_DEAD))
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
return err;
|
|
|
|
do_error:
|
|
err = -EPIPE;
|
|
goto out;
|
|
do_eagain:
|
|
/* Make sure that whenever EAGAIN is returned, EPOLLOUT event can
|
|
* be generated later.
|
|
* When TCP receives ACK packets that make room, tcp_check_space()
|
|
* only calls tcp_new_space() if SOCK_NOSPACE is set.
|
|
*/
|
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
err = -EAGAIN;
|
|
goto out;
|
|
do_interrupted:
|
|
err = sock_intr_errno(*timeo_p);
|
|
goto out;
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_wait_memory);
|
|
|
|
int sk_stream_error(struct sock *sk, int flags, int err)
|
|
{
|
|
if (err == -EPIPE)
|
|
err = sock_error(sk) ? : -EPIPE;
|
|
if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
|
|
send_sig(SIGPIPE, current, 0);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_error);
|
|
|
|
void sk_stream_kill_queues(struct sock *sk)
|
|
{
|
|
/* First the read buffer. */
|
|
__skb_queue_purge(&sk->sk_receive_queue);
|
|
|
|
/* Next, the error queue.
|
|
* We need to use queue lock, because other threads might
|
|
* add packets to the queue without socket lock being held.
|
|
*/
|
|
skb_queue_purge(&sk->sk_error_queue);
|
|
|
|
/* Next, the write queue. */
|
|
WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue));
|
|
|
|
/* Account for returned memory. */
|
|
sk_mem_reclaim_final(sk);
|
|
|
|
WARN_ON_ONCE(sk->sk_wmem_queued);
|
|
|
|
/* It is _impossible_ for the backlog to contain anything
|
|
* when we get here. All user references to this socket
|
|
* have gone away, only the net layer knows can touch it.
|
|
*/
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_kill_queues);
|