c5114710c8
Currently when packet is shrunk via bpf_xdp_adjust_tail() and memory type is set to MEM_TYPE_XSK_BUFF_POOL, null ptr dereference happens: [1136314.192256] BUG: kernel NULL pointer dereference, address: 0000000000000034 [1136314.203943] #PF: supervisor read access in kernel mode [1136314.213768] #PF: error_code(0x0000) - not-present page [1136314.223550] PGD 0 P4D 0 [1136314.230684] Oops: 0000 [#1] PREEMPT SMP NOPTI [1136314.239621] CPU: 8 PID: 54203 Comm: xdpsock Not tainted 6.6.0+ #257 [1136314.250469] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [1136314.265615] RIP: 0010:__xdp_return+0x6c/0x210 [1136314.274653] Code: ad 00 48 8b 47 08 49 89 f8 a8 01 0f 85 9b 01 00 00 0f 1f 44 00 00 f0 41 ff 48 34 75 32 4c 89 c7 e9 79 cd 80 ff 83 fe 03 75 17 <f6> 41 34 01 0f 85 02 01 00 00 48 89 cf e9 22 cc 1e 00 e9 3d d2 86 [1136314.302907] RSP: 0018:ffffc900089f8db0 EFLAGS: 00010246 [1136314.312967] RAX: ffffc9003168aed0 RBX: ffff8881c3300000 RCX: 0000000000000000 [1136314.324953] RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffffc9003168c000 [1136314.336929] RBP: 0000000000000ae0 R08: 0000000000000002 R09: 0000000000010000 [1136314.348844] R10: ffffc9000e495000 R11: 0000000000000040 R12: 0000000000000001 [1136314.360706] R13: 0000000000000524 R14: ffffc9003168aec0 R15: 0000000000000001 [1136314.373298] FS: 00007f8df8bbcb80(0000) GS:ffff8897e0e00000(0000) knlGS:0000000000000000 [1136314.386105] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1136314.396532] CR2: 0000000000000034 CR3: 00000001aa912002 CR4: 00000000007706f0 [1136314.408377] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1136314.420173] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1136314.431890] PKRU: 55555554 [1136314.439143] Call Trace: [1136314.446058] <IRQ> [1136314.452465] ? __die+0x20/0x70 [1136314.459881] ? page_fault_oops+0x15b/0x440 [1136314.468305] ? exc_page_fault+0x6a/0x150 [1136314.476491] ? asm_exc_page_fault+0x22/0x30 [1136314.484927] ? __xdp_return+0x6c/0x210 [1136314.492863] bpf_xdp_adjust_tail+0x155/0x1d0 [1136314.501269] bpf_prog_ccc47ae29d3b6570_xdp_sock_prog+0x15/0x60 [1136314.511263] ice_clean_rx_irq_zc+0x206/0xc60 [ice] [1136314.520222] ? ice_xmit_zc+0x6e/0x150 [ice] [1136314.528506] ice_napi_poll+0x467/0x670 [ice] [1136314.536858] ? ttwu_do_activate.constprop.0+0x8f/0x1a0 [1136314.546010] __napi_poll+0x29/0x1b0 [1136314.553462] net_rx_action+0x133/0x270 [1136314.561619] __do_softirq+0xbe/0x28e [1136314.569303] do_softirq+0x3f/0x60 This comes from __xdp_return() call with xdp_buff argument passed as NULL which is supposed to be consumed by xsk_buff_free() call. To address this properly, in ZC case, a node that represents the frag being removed has to be pulled out of xskb_list. Introduce appropriate xsk helpers to do such node operation and use them accordingly within bpf_xdp_adjust_tail(). Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Acked-by: Magnus Karlsson <magnus.karlsson@intel.com> # For the xsk header part Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Link: https://lore.kernel.org/r/20240124191602.566724-4-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
418 lines
9.3 KiB
C
418 lines
9.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Interface for implementing AF_XDP zero-copy support in drivers.
|
|
* Copyright(c) 2020 Intel Corporation.
|
|
*/
|
|
|
|
#ifndef _LINUX_XDP_SOCK_DRV_H
|
|
#define _LINUX_XDP_SOCK_DRV_H
|
|
|
|
#include <net/xdp_sock.h>
|
|
#include <net/xsk_buff_pool.h>
|
|
|
|
#define XDP_UMEM_MIN_CHUNK_SHIFT 11
|
|
#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT)
|
|
|
|
struct xsk_cb_desc {
|
|
void *src;
|
|
u8 off;
|
|
u8 bytes;
|
|
};
|
|
|
|
#ifdef CONFIG_XDP_SOCKETS
|
|
|
|
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
|
|
bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
|
|
u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
|
|
void xsk_tx_release(struct xsk_buff_pool *pool);
|
|
struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
|
|
u16 queue_id);
|
|
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool);
|
|
void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
|
|
void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool);
|
|
void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool);
|
|
bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool);
|
|
|
|
static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
|
|
{
|
|
return XDP_PACKET_HEADROOM + pool->headroom;
|
|
}
|
|
|
|
static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
|
|
{
|
|
return pool->chunk_size;
|
|
}
|
|
|
|
static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
|
|
{
|
|
return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
|
|
}
|
|
|
|
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
|
|
struct xdp_rxq_info *rxq)
|
|
{
|
|
xp_set_rxq_info(pool, rxq);
|
|
}
|
|
|
|
static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
|
|
struct xsk_cb_desc *desc)
|
|
{
|
|
xp_fill_cb(pool, desc);
|
|
}
|
|
|
|
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
|
|
{
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
return pool->heads[0].xdp.rxq->napi_id;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
|
|
unsigned long attrs)
|
|
{
|
|
xp_dma_unmap(pool, attrs);
|
|
}
|
|
|
|
static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
|
|
struct device *dev, unsigned long attrs)
|
|
{
|
|
struct xdp_umem *umem = pool->umem;
|
|
|
|
return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
|
|
}
|
|
|
|
static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
|
|
|
return xp_get_dma(xskb);
|
|
}
|
|
|
|
static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
|
|
|
return xp_get_frame_dma(xskb);
|
|
}
|
|
|
|
static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
|
|
{
|
|
return xp_alloc(pool);
|
|
}
|
|
|
|
static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
|
|
{
|
|
return !xp_mb_desc(desc);
|
|
}
|
|
|
|
/* Returns as many entries as possible up to max. 0 <= N <= max. */
|
|
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
|
|
{
|
|
return xp_alloc_batch(pool, xdp, max);
|
|
}
|
|
|
|
static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
|
|
{
|
|
return xp_can_alloc(pool, count);
|
|
}
|
|
|
|
static inline void xsk_buff_free(struct xdp_buff *xdp)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
|
struct list_head *xskb_list = &xskb->pool->xskb_list;
|
|
struct xdp_buff_xsk *pos, *tmp;
|
|
|
|
if (likely(!xdp_buff_has_frags(xdp)))
|
|
goto out;
|
|
|
|
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
|
|
list_del(&pos->xskb_list_node);
|
|
xp_free(pos);
|
|
}
|
|
|
|
xdp_get_shared_info_from_buff(xdp)->nr_frags = 0;
|
|
out:
|
|
xp_free(xskb);
|
|
}
|
|
|
|
static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
|
|
{
|
|
struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
|
|
|
|
list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
|
|
}
|
|
|
|
static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
|
|
struct xdp_buff *ret = NULL;
|
|
struct xdp_buff_xsk *frag;
|
|
|
|
frag = list_first_entry_or_null(&xskb->pool->xskb_list,
|
|
struct xdp_buff_xsk, xskb_list_node);
|
|
if (frag) {
|
|
list_del(&frag->xskb_list_node);
|
|
ret = &frag->xdp;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
|
|
|
|
list_del(&xskb->xskb_list_node);
|
|
}
|
|
|
|
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
|
|
struct xdp_buff_xsk *frag;
|
|
|
|
frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
|
|
xskb_list_node);
|
|
return &frag->xdp;
|
|
}
|
|
|
|
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
|
|
{
|
|
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
|
|
xdp->data_meta = xdp->data;
|
|
xdp->data_end = xdp->data + size;
|
|
xdp->flags = 0;
|
|
}
|
|
|
|
static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
|
|
u64 addr)
|
|
{
|
|
return xp_raw_get_dma(pool, addr);
|
|
}
|
|
|
|
static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
|
|
{
|
|
return xp_raw_get_data(pool, addr);
|
|
}
|
|
|
|
#define XDP_TXMD_FLAGS_VALID ( \
|
|
XDP_TXMD_FLAGS_TIMESTAMP | \
|
|
XDP_TXMD_FLAGS_CHECKSUM | \
|
|
0)
|
|
|
|
static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
|
|
{
|
|
return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
|
|
}
|
|
|
|
static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
|
|
{
|
|
struct xsk_tx_metadata *meta;
|
|
|
|
if (!pool->tx_metadata_len)
|
|
return NULL;
|
|
|
|
meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
|
|
if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
|
|
return NULL; /* no way to signal the error to the user */
|
|
|
|
return meta;
|
|
}
|
|
|
|
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
|
|
{
|
|
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
|
|
|
if (!pool->dma_need_sync)
|
|
return;
|
|
|
|
xp_dma_sync_for_cpu(xskb);
|
|
}
|
|
|
|
static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
|
|
dma_addr_t dma,
|
|
size_t size)
|
|
{
|
|
xp_dma_sync_for_device(pool, dma, size);
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
|
|
{
|
|
}
|
|
|
|
static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
|
|
struct xdp_desc *desc)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void xsk_tx_release(struct xsk_buff_pool *pool)
|
|
{
|
|
}
|
|
|
|
static inline struct xsk_buff_pool *
|
|
xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
|
|
{
|
|
}
|
|
|
|
static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
|
|
{
|
|
}
|
|
|
|
static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
|
|
{
|
|
}
|
|
|
|
static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
|
|
{
|
|
}
|
|
|
|
static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
|
|
struct xdp_rxq_info *rxq)
|
|
{
|
|
}
|
|
|
|
static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
|
|
struct xsk_cb_desc *desc)
|
|
{
|
|
}
|
|
|
|
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
|
|
unsigned long attrs)
|
|
{
|
|
}
|
|
|
|
static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
|
|
struct device *dev, unsigned long attrs)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void xsk_buff_free(struct xdp_buff *xdp)
|
|
{
|
|
}
|
|
|
|
static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
|
|
{
|
|
}
|
|
|
|
static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
|
|
{
|
|
}
|
|
|
|
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
|
|
{
|
|
}
|
|
|
|
static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
|
|
u64 addr)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
|
|
{
|
|
}
|
|
|
|
static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
|
|
dma_addr_t dma,
|
|
size_t size)
|
|
{
|
|
}
|
|
|
|
#endif /* CONFIG_XDP_SOCKETS */
|
|
|
|
#endif /* _LINUX_XDP_SOCK_DRV_H */
|