A recent change triggers a KMSAN warning, because request sockets do not initialize @sk_rx_queue_mapping field. Add sk_rx_queue_update() helper to make our intent clear. BUG: KMSAN: uninit-value in sk_rx_queue_set include/net/sock.h:1922 [inline] BUG: KMSAN: uninit-value in tcp_conn_request+0x3bcc/0x4dc0 net/ipv4/tcp_input.c:6922 sk_rx_queue_set include/net/sock.h:1922 [inline] tcp_conn_request+0x3bcc/0x4dc0 net/ipv4/tcp_input.c:6922 tcp_v4_conn_request+0x218/0x2a0 net/ipv4/tcp_ipv4.c:1528 tcp_rcv_state_process+0x2c5/0x3290 net/ipv4/tcp_input.c:6406 tcp_v4_do_rcv+0xb4e/0x1330 net/ipv4/tcp_ipv4.c:1738 tcp_v4_rcv+0x468d/0x4ed0 net/ipv4/tcp_ipv4.c:2100 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 invoke_softirq+0xa4/0x130 kernel/softirq.c:432 __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0x76/0x130 kernel/softirq.c:648 common_interrupt+0xb6/0xd0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 smap_restore arch/x86/include/asm/smap.h:67 [inline] get_shadow_origin_ptr mm/kmsan/instrumentation.c:31 [inline] __msan_metadata_ptr_for_load_1+0x28/0x30 mm/kmsan/instrumentation.c:63 tomoyo_check_acl+0x1b0/0x630 security/tomoyo/domain.c:173 tomoyo_path_permission security/tomoyo/file.c:586 [inline] tomoyo_check_open_permission+0x61f/0xe10 security/tomoyo/file.c:777 tomoyo_file_open+0x24f/0x2d0 security/tomoyo/tomoyo.c:311 security_file_open+0xb1/0x1f0 security/security.c:1635 do_dentry_open+0x4e4/0x1bf0 fs/open.c:809 vfs_open+0xaf/0xe0 fs/open.c:957 do_open fs/namei.c:3426 [inline] path_openat+0x52f1/0x5dd0 fs/namei.c:3559 do_filp_open+0x306/0x760 fs/namei.c:3586 do_sys_openat2+0x263/0x8f0 fs/open.c:1212 do_sys_open fs/open.c:1228 [inline] __do_sys_open fs/open.c:1236 [inline] __se_sys_open fs/open.c:1232 [inline] __x64_sys_open+0x314/0x380 fs/open.c:1232 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409 alloc_pages+0x8a5/0xb80 alloc_slab_page mm/slub.c:1810 [inline] allocate_slab+0x287/0x1c20 mm/slub.c:1947 new_slab mm/slub.c:2010 [inline] ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039 __slab_alloc mm/slub.c:3126 [inline] slab_alloc_node mm/slub.c:3217 [inline] slab_alloc mm/slub.c:3259 [inline] kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264 reqsk_alloc include/net/request_sock.h:91 [inline] inet_reqsk_alloc+0xaf/0x8b0 net/ipv4/tcp_input.c:6712 tcp_conn_request+0x910/0x4dc0 net/ipv4/tcp_input.c:6852 tcp_v4_conn_request+0x218/0x2a0 net/ipv4/tcp_ipv4.c:1528 tcp_rcv_state_process+0x2c5/0x3290 net/ipv4/tcp_input.c:6406 tcp_v4_do_rcv+0xb4e/0x1330 net/ipv4/tcp_ipv4.c:1738 tcp_v4_rcv+0x468d/0x4ed0 net/ipv4/tcp_ipv4.c:2100 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20211130182939.2584764-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
165 lines
3.9 KiB
C
165 lines
3.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* net busy poll support
|
|
* Copyright(c) 2013 Intel Corporation.
|
|
*
|
|
* Author: Eliezer Tamir
|
|
*
|
|
* Contact Information:
|
|
* e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
|
*/
|
|
|
|
#ifndef _LINUX_NET_BUSY_POLL_H
|
|
#define _LINUX_NET_BUSY_POLL_H
|
|
|
|
#include <linux/netdevice.h>
|
|
#include <linux/sched/clock.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <net/ip.h>
|
|
|
|
/* 0 - Reserved to indicate value not set
|
|
* 1..NR_CPUS - Reserved for sender_cpu
|
|
* NR_CPUS+1..~0 - Region available for NAPI IDs
|
|
*/
|
|
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
|
|
|
|
#define BUSY_POLL_BUDGET 8
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
struct napi_struct;
|
|
extern unsigned int sysctl_net_busy_read __read_mostly;
|
|
extern unsigned int sysctl_net_busy_poll __read_mostly;
|
|
|
|
static inline bool net_busy_loop_on(void)
|
|
{
|
|
return sysctl_net_busy_poll;
|
|
}
|
|
|
|
static inline bool sk_can_busy_loop(const struct sock *sk)
|
|
{
|
|
return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current);
|
|
}
|
|
|
|
bool sk_busy_loop_end(void *p, unsigned long start_time);
|
|
|
|
void napi_busy_loop(unsigned int napi_id,
|
|
bool (*loop_end)(void *, unsigned long),
|
|
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
|
|
|
|
#else /* CONFIG_NET_RX_BUSY_POLL */
|
|
static inline unsigned long net_busy_loop_on(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline bool sk_can_busy_loop(struct sock *sk)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#endif /* CONFIG_NET_RX_BUSY_POLL */
|
|
|
|
static inline unsigned long busy_loop_current_time(void)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
return (unsigned long)(local_clock() >> 10);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/* in poll/select we use the global sysctl_net_ll_poll value */
|
|
static inline bool busy_loop_timeout(unsigned long start_time)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll);
|
|
|
|
if (bp_usec) {
|
|
unsigned long end_time = start_time + bp_usec;
|
|
unsigned long now = busy_loop_current_time();
|
|
|
|
return time_after(now, end_time);
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static inline bool sk_busy_loop_timeout(struct sock *sk,
|
|
unsigned long start_time)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec);
|
|
|
|
if (bp_usec) {
|
|
unsigned long end_time = start_time + bp_usec;
|
|
unsigned long now = busy_loop_current_time();
|
|
|
|
return time_after(now, end_time);
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static inline void sk_busy_loop(struct sock *sk, int nonblock)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
|
|
|
|
if (napi_id >= MIN_NAPI_ID)
|
|
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
|
|
READ_ONCE(sk->sk_prefer_busy_poll),
|
|
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
|
|
#endif
|
|
}
|
|
|
|
/* used in the NIC receive handler to mark the skb */
|
|
static inline void skb_mark_napi_id(struct sk_buff *skb,
|
|
struct napi_struct *napi)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
|
|
* it.
|
|
*/
|
|
if (skb->napi_id < MIN_NAPI_ID)
|
|
skb->napi_id = napi->napi_id;
|
|
#endif
|
|
}
|
|
|
|
/* used in the protocol hanlder to propagate the napi_id to the socket */
|
|
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
|
|
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
|
|
#endif
|
|
sk_rx_queue_update(sk, skb);
|
|
}
|
|
|
|
static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
if (!READ_ONCE(sk->sk_napi_id))
|
|
WRITE_ONCE(sk->sk_napi_id, napi_id);
|
|
#endif
|
|
}
|
|
|
|
/* variant used for unconnected sockets */
|
|
static inline void sk_mark_napi_id_once(struct sock *sk,
|
|
const struct sk_buff *skb)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
__sk_mark_napi_id_once(sk, skb->napi_id);
|
|
#endif
|
|
}
|
|
|
|
static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
|
|
const struct xdp_buff *xdp)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
__sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
|
|
#endif
|
|
}
|
|
|
|
#endif /* _LINUX_NET_BUSY_POLL_H */
|