From 487deb3e3393cccff0f148c4703efb185d46e314 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:50 +0100 Subject: [PATCH 1/4] selftests/bpf: robustify test_xdp_do_redirect with more payload magics Currently, the test relies on that only dropped ("xmitted") frames will be recycled and if a frame became an skb, it will be freed later by the stack and never come back to its page_pool. So, it easily gets broken by trying to recycle skbs[0]: test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:FAIL:pkt_count_zero unexpected pkt_count_zero: actual 9936 != expected 2 test_xdp_do_redirect:PASS:pkt_count_tc 0 nsec That huge mismatch happened because after the TC ingress hook zeroes the magic, the page gets recycled when skb is freed, not returned to the MM layer. "Live frames" mode initializes only new pages and keeps the recycled ones as is by design, so they appear with zeroed magic on the Rx path again. Expand the possible magic values from two: 0 (was "xmitted"/dropped or did hit the TC hook) and 0x42 (hit the input XDP prog) to three: the new one will mark frames hit the TC hook, so that they will elide both @pkt_count_zero and @pkt_count_xdp. They can then be recycled to their page_pool or returned to the page allocator, this won't affect the counters anyhow. Just make sure to mark them as "input" (0x42) when they appear on the Rx path again. Also make an enum from those magics, so that they will be always visible and can be changed in just one place anytime. This also eases adding any new marks later on. Link: https://github.com/kernel-patches/bpf/actions/runs/4386538411/jobs/7681081789 Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-2-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/test_xdp_do_redirect.c | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 77a123071940..cd2d4e3258b8 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -4,6 +4,19 @@ #define ETH_ALEN 6 #define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) + +/** + * enum frame_mark - magics to distinguish page/packet paths + * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC. + * @MARK_IN: frame is being processed by the input XDP prog. + * @MARK_SKB: frame did hit the TC ingress hook as an skb. + */ +enum frame_mark { + MARK_XMIT = 0U, + MARK_IN = 0x42, + MARK_SKB = 0x45, +}; + const volatile int ifindex_out; const volatile int ifindex_in; const volatile __u8 expect_dst[ETH_ALEN]; @@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp) if (*metadata != 0x42) return XDP_ABORTED; - if (*payload == 0) { - *payload = 0x42; + if (*payload == MARK_XMIT) pkts_seen_zero++; - } + + *payload = MARK_IN; if (bpf_xdp_adjust_meta(xdp, 4)) return XDP_ABORTED; @@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp) return ret; } -static bool check_pkt(void *data, void *data_end) +static bool check_pkt(void *data, void *data_end, const __u32 mark) { struct ipv6hdr *iph = data + sizeof(struct ethhdr); __u8 *payload = data + HDR_SZ; @@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end) if (payload + 1 > data_end) return false; - if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) + if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN) return false; /* reset the payload so the same packet doesn't get counted twice when * it cycles back through the kernel path and out the dst veth */ - *payload = 0; + *payload = mark; return true; } @@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp) void *data = (void *)(long)xdp->data; void *data_end = (void *)(long)xdp->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_XMIT)) pkts_seen_xdp++; - /* Return XDP_DROP to make sure the data page is recycled, like when it - * exits a physical NIC. Recycled pages will be counted in the + /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like + * it exited a physical NIC. Those pages will be counted in the * pkts_seen_zero counter above. */ return XDP_DROP; @@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb) void *data = (void *)(long)skb->data; void *data_end = (void *)(long)skb->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_SKB)) pkts_seen_tc++; + /* Will be either recycled or freed, %MARK_SKB makes sure it won't + * hit any of the counters above. + */ return 0; } From 2c854e5fcd7e243f5a7cf6a6afa0ef83060c903c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:51 +0100 Subject: [PATCH 2/4] net: page_pool, skbuff: make skb_mark_for_recycle() always available skb_mark_for_recycle() is guarded with CONFIG_PAGE_POOL, this creates unneeded complication when using it in the generic code. For now, it's only used in the drivers always selecting Page Pool, so this works. Move the guards so that preprocessor will cut out only the operation itself and the function will still be a noop on !PAGE_POOL systems, but available there as well. No functional changes. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303020342.Wi2PRFFH-lkp@intel.com Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe661011644b..3f3a2a82a86b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) #endif } -#ifdef CONFIG_PAGE_POOL static inline void skb_mark_for_recycle(struct sk_buff *skb) { +#ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; -} #endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ From 9c94bbf9a87b264294f42e6cc0f76d87854733ec Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:52 +0100 Subject: [PATCH 3/4] xdp: recycle Page Pool backed skbs built from XDP frames __xdp_build_skb_from_frame() state(d): /* Until page_pool get SKB return path, release DMA here */ Page Pool got skb pages recycling in April 2021, but missed this function. xdp_release_frame() is relevant only for Page Pool backed frames and it detaches the page from the corresponding page_pool in order to make it freeable via page_frag_free(). It can instead just mark the output skb as eligible for recycling if the frame is backed by a pp. No change for other memory model types (the same condition check as before). cpumap redirect and veth on Page Pool drivers now become zero-alloc (or almost). Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-4-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- net/core/xdp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc553317..a2237cfca8e9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -658,8 +658,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - /* Until page_pool get SKB return path, release DMA here */ - xdp_release_frame(xdpf); + if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ xdp_scrub_frame(xdpf); From d4e492338d11937c55841b1279287280d6e35894 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:53 +0100 Subject: [PATCH 4/4] xdp: remove unused {__,}xdp_release_frame() __xdp_build_skb_from_frame() was the last user of {__,}xdp_release_frame(), which detaches pages from the page_pool. All the consumers now recycle Page Pool skbs and page, except mlx5, stmmac and tsnep drivers, which use page_pool_release_page() directly (might change one day). It's safe to assume this functionality is not needed anymore and can be removed (in favor of recycling). Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-5-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 29 ----------------------------- net/core/xdp.c | 15 --------------- 2 files changed, 44 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index d517bfac937b..5393b3ebe56e 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); -/* When sending xdp_frame into the network stack, then there is no - * return point callback, which is needed to release e.g. DMA-mapping - * resources with page_pool. Thus, have explicit function to release - * frame resources. - */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem); -static inline void xdp_release_frame(struct xdp_frame *xdpf) -{ - struct xdp_mem_info *mem = &xdpf->mem; - struct skb_shared_info *sinfo; - int i; - - /* Curr only page_pool needs this */ - if (mem->type != MEM_TYPE_PAGE_POOL) - return; - - if (likely(!xdp_frame_has_frags(xdpf))) - goto out; - - sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); - - __xdp_release_frame(page_address(page), mem); - } -out: - __xdp_release_frame(xdpf->data, mem); -} - static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; diff --git a/net/core/xdp.c b/net/core/xdp.c index a2237cfca8e9..8d3ad315f18d 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -531,21 +531,6 @@ out: } EXPORT_SYMBOL_GPL(xdp_return_buff); -/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem) -{ - struct xdp_mem_allocator *xa; - struct page *page; - - rcu_read_lock(); - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - page = virt_to_head_page(data); - if (xa) - page_pool_release_page(xa->page_pool, page); - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(__xdp_release_frame); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) {