Merge branch 'xdp-page_pool-fixes-and-in-flight-accounting'

Jesper Dangaard Brouer says:

====================
xdp: page_pool fixes and in-flight accounting

This patchset fix page_pool API and users, such that drivers can use it for
DMA-mapping. A number of places exist, where the DMA-mapping would not get
released/unmapped, all these are fixed. This occurs e.g. when an xdp_frame
gets converted to an SKB. As network stack doesn't have any callback for XDP
memory models.

The patchset also address a shutdown race-condition. Today removing a XDP
memory model, based on page_pool, is only delayed one RCU grace period. This
isn't enough as redirected xdp_frames can still be in-flight on different
queues (remote driver TX, cpumap or veth).

We stress that when drivers use page_pool for DMA-mapping, then they MUST
use one packet per page. This might change in the future, but more work lies
ahead, before we can lift this restriction.

This patchset change the page_pool API to be more strict, as in-flight page
accounting is added.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-06-19 11:23:14 -04:00
commit 2a54003e7a
12 changed files with 506 additions and 49 deletions

View File

@ -545,8 +545,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
}
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
if (err)
if (err) {
page_pool_free(rq->page_pool);
goto err_free;
}
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@ -611,8 +613,6 @@ err_rq_wq_destroy:
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
if (rq->page_pool)
page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
@ -625,10 +625,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
if (rq->page_pool)
page_pool_destroy(rq->page_pool);
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
@ -645,6 +641,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
mlx5e_page_release(rq, dma_info, false);
}
xdp_rxq_info_unreg(&rq->xdp_rxq);
mlx5_wq_destroy(&rq->wq_ctrl);
}

View File

@ -248,7 +248,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
PAGE_SIZE, rq->buff.map_dir);
if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
put_page(dma_info->page);
page_pool_recycle_direct(rq->page_pool, dma_info->page);
dma_info->page = NULL;
return -ENOMEM;
}
@ -272,6 +272,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
page_pool_recycle_direct(rq->page_pool, dma_info->page);
} else {
mlx5e_page_dma_unmap(rq, dma_info);
page_pool_release_page(rq->page_pool, dma_info->page);
put_page(dma_info->page);
}
}

View File

@ -547,6 +547,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
goto err;
}
xdp_release_frame(frame);
xdp_scrub_frame(frame);
skb->protocol = eth_type_trans(skb, rq->dev);
err:

View File

@ -16,14 +16,16 @@
* page_pool_alloc_pages() call. Drivers should likely use
* page_pool_dev_alloc_pages() replacing dev_alloc_pages().
*
* If page_pool handles DMA mapping (use page->private), then API user
* is responsible for invoking page_pool_put_page() once. In-case of
* elevated refcnt, the DMA state is released, assuming other users of
* the page will eventually call put_page().
* API keeps track of in-flight pages, in-order to let API user know
* when it is safe to dealloactor page_pool object. Thus, API users
* must make sure to call page_pool_release_page() when a page is
* "leaving" the page_pool. Or call page_pool_put_page() where
* appropiate. For maintaining correct accounting.
*
* If no DMA mapping is done, then it can act as shim-layer that
* fall-through to alloc_page. As no state is kept on the page, the
* regular put_page() call is sufficient.
* API user must only call page_pool_put_page() once on a page, as it
* will either recycle the page, or in case of elevated refcnt, it
* will release the DMA mapping and in-flight state accounting. We
* hope to lift this requirement in the future.
*/
#ifndef _NET_PAGE_POOL_H
#define _NET_PAGE_POOL_H
@ -66,9 +68,10 @@ struct page_pool_params {
};
struct page_pool {
struct rcu_head rcu;
struct page_pool_params p;
u32 pages_state_hold_cnt;
/*
* Data structure for allocation side
*
@ -96,6 +99,8 @@ struct page_pool {
* TODO: Implement bulk return pages into this structure.
*/
struct ptr_ring ring;
atomic_t pages_state_release_cnt;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
@ -109,7 +114,16 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
struct page_pool *page_pool_create(const struct page_pool_params *params);
void page_pool_destroy(struct page_pool *pool);
void __page_pool_free(struct page_pool *pool);
static inline void page_pool_free(struct page_pool *pool)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
__page_pool_free(pool);
#endif
}
/* Never call this directly, use helpers below */
void __page_pool_put_page(struct page_pool *pool,
@ -132,6 +146,43 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
__page_pool_put_page(pool, page, true);
}
/* API user MUST have disconnected alloc-side (not allowed to call
* page_pool_alloc_pages()) before calling this. The free-side can
* still run concurrently, to handle in-flight packet-pages.
*
* A request to shutdown can fail (with false) if there are still
* in-flight packet-pages.
*/
bool __page_pool_request_shutdown(struct page_pool *pool);
static inline bool page_pool_request_shutdown(struct page_pool *pool)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
return __page_pool_request_shutdown(pool);
#endif
}
/* Disconnects a page (from a page_pool). API users can have a need
* to disconnect a page (from a page_pool), to allow it to be used as
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
void page_pool_unmap_page(struct page_pool *pool, struct page *page);
static inline void page_pool_release_page(struct page_pool *pool,
struct page *page)
{
#ifdef CONFIG_PAGE_POOL
page_pool_unmap_page(pool, page);
#endif
}
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
return page->dma_addr;
}
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL

View File

@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
/* When sending xdp_frame into the network stack, then there is no
* return point callback, which is needed to release e.g. DMA-mapping
* resources with page_pool. Thus, have explicit function to release
* frame resources.
*/
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;
/* Curr only page_pool needs this */
if (mem->type == MEM_TYPE_PAGE_POOL)
__xdp_release_frame(xdpf->data, mem);
}
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);

23
include/net/xdp_priv.h Normal file
View File

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_NET_XDP_PRIV_H__
#define __LINUX_NET_XDP_PRIV_H__
#include <linux/rhashtable.h>
/* Private to net/core/xdp.c, but used by trace/events/xdp.h */
struct xdp_mem_allocator {
struct xdp_mem_info mem;
union {
void *allocator;
struct page_pool *page_pool;
struct zero_copy_allocator *zc_alloc;
};
int disconnect_cnt;
unsigned long defer_start;
struct rhash_head node;
struct rcu_head rcu;
struct delayed_work defer_wq;
unsigned long defer_warn;
};
#endif /* __LINUX_NET_XDP_PRIV_H__ */

View File

@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM page_pool
#if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PAGE_POOL_H
#include <linux/types.h>
#include <linux/tracepoint.h>
#include <net/page_pool.h>
TRACE_EVENT(page_pool_inflight,
TP_PROTO(const struct page_pool *pool,
s32 inflight, u32 hold, u32 release),
TP_ARGS(pool, inflight, hold, release),
TP_STRUCT__entry(
__field(const struct page_pool *, pool)
__field(s32, inflight)
__field(u32, hold)
__field(u32, release)
),
TP_fast_assign(
__entry->pool = pool;
__entry->inflight = inflight;
__entry->hold = hold;
__entry->release = release;
),
TP_printk("page_pool=%p inflight=%d hold=%u release=%u",
__entry->pool, __entry->inflight, __entry->hold, __entry->release)
);
TRACE_EVENT(page_pool_state_release,
TP_PROTO(const struct page_pool *pool,
const struct page *page, u32 release),
TP_ARGS(pool, page, release),
TP_STRUCT__entry(
__field(const struct page_pool *, pool)
__field(const struct page *, page)
__field(u32, release)
),
TP_fast_assign(
__entry->pool = pool;
__entry->page = page;
__entry->release = release;
),
TP_printk("page_pool=%p page=%p release=%u",
__entry->pool, __entry->page, __entry->release)
);
TRACE_EVENT(page_pool_state_hold,
TP_PROTO(const struct page_pool *pool,
const struct page *page, u32 hold),
TP_ARGS(pool, page, hold),
TP_STRUCT__entry(
__field(const struct page_pool *, pool)
__field(const struct page *, page)
__field(u32, hold)
),
TP_fast_assign(
__entry->pool = pool;
__entry->page = page;
__entry->hold = hold;
),
TP_printk("page_pool=%p page=%p hold=%u",
__entry->pool, __entry->page, __entry->hold)
);
#endif /* _TRACE_PAGE_POOL_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -269,6 +269,121 @@ TRACE_EVENT(xdp_devmap_xmit,
__entry->from_ifindex, __entry->to_ifindex, __entry->err)
);
/* Expect users already include <net/xdp.h>, but not xdp_priv.h */
#include <net/xdp_priv.h>
#define __MEM_TYPE_MAP(FN) \
FN(PAGE_SHARED) \
FN(PAGE_ORDER0) \
FN(PAGE_POOL) \
FN(ZERO_COPY)
#define __MEM_TYPE_TP_FN(x) \
TRACE_DEFINE_ENUM(MEM_TYPE_##x);
#define __MEM_TYPE_SYM_FN(x) \
{ MEM_TYPE_##x, #x },
#define __MEM_TYPE_SYM_TAB \
__MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 }
__MEM_TYPE_MAP(__MEM_TYPE_TP_FN)
TRACE_EVENT(mem_disconnect,
TP_PROTO(const struct xdp_mem_allocator *xa,
bool safe_to_remove, bool force),
TP_ARGS(xa, safe_to_remove, force),
TP_STRUCT__entry(
__field(const struct xdp_mem_allocator *, xa)
__field(u32, mem_id)
__field(u32, mem_type)
__field(const void *, allocator)
__field(bool, safe_to_remove)
__field(bool, force)
__field(int, disconnect_cnt)
),
TP_fast_assign(
__entry->xa = xa;
__entry->mem_id = xa->mem.id;
__entry->mem_type = xa->mem.type;
__entry->allocator = xa->allocator;
__entry->safe_to_remove = safe_to_remove;
__entry->force = force;
__entry->disconnect_cnt = xa->disconnect_cnt;
),
TP_printk("mem_id=%d mem_type=%s allocator=%p"
" safe_to_remove=%s force=%s disconnect_cnt=%d",
__entry->mem_id,
__print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
__entry->allocator,
__entry->safe_to_remove ? "true" : "false",
__entry->force ? "true" : "false",
__entry->disconnect_cnt
)
);
TRACE_EVENT(mem_connect,
TP_PROTO(const struct xdp_mem_allocator *xa,
const struct xdp_rxq_info *rxq),
TP_ARGS(xa, rxq),
TP_STRUCT__entry(
__field(const struct xdp_mem_allocator *, xa)
__field(u32, mem_id)
__field(u32, mem_type)
__field(const void *, allocator)
__field(const struct xdp_rxq_info *, rxq)
__field(int, ifindex)
),
TP_fast_assign(
__entry->xa = xa;
__entry->mem_id = xa->mem.id;
__entry->mem_type = xa->mem.type;
__entry->allocator = xa->allocator;
__entry->rxq = rxq;
__entry->ifindex = rxq->dev->ifindex;
),
TP_printk("mem_id=%d mem_type=%s allocator=%p"
" ifindex=%d",
__entry->mem_id,
__print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
__entry->allocator,
__entry->ifindex
)
);
TRACE_EVENT(mem_return_failed,
TP_PROTO(const struct xdp_mem_info *mem,
const struct page *page),
TP_ARGS(mem, page),
TP_STRUCT__entry(
__field(const struct page *, page)
__field(u32, mem_id)
__field(u32, mem_type)
),
TP_fast_assign(
__entry->page = page;
__entry->mem_id = mem->id;
__entry->mem_type = mem->type;
),
TP_printk("mem_id=%d mem_type=%s page=%p",
__entry->mem_id,
__print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
__entry->page
)
);
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>

View File

@ -208,6 +208,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* - RX ring dev queue index (skb_record_rx_queue)
*/
/* Until page_pool get SKB return path, release DMA here */
xdp_release_frame(xdpf);
/* Allow SKB to reuse area used by xdp_frame */
xdp_scrub_frame(xdpf);

View File

@ -43,6 +43,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
#include <trace/events/page_pool.h>
#endif
#include <trace/events/neigh.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update);
EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done);

View File

@ -4,9 +4,11 @@
* Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
* Copyright (C) 2016 Red Hat, Inc.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <net/page_pool.h>
#include <linux/dma-direction.h>
@ -14,6 +16,8 @@
#include <linux/page-flags.h>
#include <linux/mm.h> /* for __put_page() */
#include <trace/events/page_pool.h>
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@ -43,6 +47,11 @@ static int page_pool_init(struct page_pool *pool,
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
atomic_set(&pool->pages_state_release_cnt, 0);
if (pool->p.flags & PP_FLAG_DMA_MAP)
get_device(pool->p.dev);
return 0;
}
@ -151,6 +160,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
page->dma_addr = dma;
skip_dma_map:
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
/* When page just alloc'ed is should/must have refcnt 1. */
return page;
}
@ -173,6 +187,33 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
}
EXPORT_SYMBOL(page_pool_alloc_pages);
/* Calculate distance between two u32 values, valid if distance is below 2^(31)
* https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
*/
#define _distance(a, b) (s32)((a) - (b))
static s32 page_pool_inflight(struct page_pool *pool)
{
u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
s32 distance;
distance = _distance(hold_cnt, release_cnt);
trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt);
return distance;
}
static bool __page_pool_safe_to_destroy(struct page_pool *pool)
{
s32 inflight = page_pool_inflight(pool);
/* The distance should not be able to become negative */
WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
return (inflight == 0);
}
/* Cleanup page_pool state from page */
static void __page_pool_clean_page(struct page_pool *pool,
struct page *page)
@ -180,7 +221,7 @@ static void __page_pool_clean_page(struct page_pool *pool,
dma_addr_t dma;
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
return;
goto skip_dma_unmap;
dma = page->dma_addr;
/* DMA unmap */
@ -188,12 +229,27 @@ static void __page_pool_clean_page(struct page_pool *pool,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC);
page->dma_addr = 0;
skip_dma_unmap:
atomic_inc(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page,
atomic_read(&pool->pages_state_release_cnt));
}
/* unmap the page and clean our state */
void page_pool_unmap_page(struct page_pool *pool, struct page *page)
{
/* When page is unmapped, this implies page will not be
* returned to page_pool.
*/
__page_pool_clean_page(pool, page);
}
EXPORT_SYMBOL(page_pool_unmap_page);
/* Return a page to the page allocator, cleaning up our state */
static void __page_pool_return_page(struct page_pool *pool, struct page *page)
{
__page_pool_clean_page(pool, page);
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
@ -285,21 +341,41 @@ static void __page_pool_empty_ring(struct page_pool *pool)
}
}
static void __page_pool_destroy_rcu(struct rcu_head *rcu)
static void __warn_in_flight(struct page_pool *pool)
{
struct page_pool *pool;
u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
s32 distance;
pool = container_of(rcu, struct page_pool, rcu);
distance = _distance(hold_cnt, release_cnt);
WARN(pool->alloc.count, "API usage violation");
__page_pool_empty_ring(pool);
ptr_ring_cleanup(&pool->ring, NULL);
kfree(pool);
/* Drivers should fix this, but only problematic when DMA is used */
WARN(1, "Still in-flight pages:%d hold:%u released:%u",
distance, hold_cnt, release_cnt);
}
/* Cleanup and release resources */
void page_pool_destroy(struct page_pool *pool)
void __page_pool_free(struct page_pool *pool)
{
WARN(pool->alloc.count, "API usage violation");
WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
/* Can happen due to forced shutdown */
if (!__page_pool_safe_to_destroy(pool))
__warn_in_flight(pool);
ptr_ring_cleanup(&pool->ring, NULL);
if (pool->p.flags & PP_FLAG_DMA_MAP)
put_device(pool->p.dev);
kfree(pool);
}
EXPORT_SYMBOL(__page_pool_free);
/* Request to shutdown: release pages cached by page_pool, and check
* for in-flight pages
*/
bool __page_pool_request_shutdown(struct page_pool *pool)
{
struct page *page;
@ -317,7 +393,6 @@ void page_pool_destroy(struct page_pool *pool)
*/
__page_pool_empty_ring(pool);
/* An xdp_mem_allocator can still ref page_pool pointer */
call_rcu(&pool->rcu, __page_pool_destroy_rcu);
return __page_pool_safe_to_destroy(pool);
}
EXPORT_SYMBOL(page_pool_destroy);
EXPORT_SYMBOL(__page_pool_request_shutdown);

View File

@ -14,6 +14,8 @@
#include <net/page_pool.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
#define REG_STATE_NEW 0x0
#define REG_STATE_REGISTERED 0x1
@ -29,17 +31,6 @@ static int mem_id_next = MEM_ID_MIN;
static bool mem_id_init; /* false */
static struct rhashtable *mem_id_ht;
struct xdp_mem_allocator {
struct xdp_mem_info mem;
union {
void *allocator;
struct page_pool *page_pool;
struct zero_copy_allocator *zc_alloc;
};
struct rhash_head node;
struct rcu_head rcu;
};
static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
{
const u32 *k = data;
@ -79,13 +70,13 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
/* Allocator have indicated safe to remove before this is called */
if (xa->mem.type == MEM_TYPE_PAGE_POOL)
page_pool_free(xa->page_pool);
/* Allow this ID to be reused */
ida_simple_remove(&mem_id_pool, xa->mem.id);
/* Notice, driver is expected to free the *allocator,
* e.g. page_pool, and MUST also use RCU free.
*/
/* Poison memory */
xa->mem.id = 0xFFFF;
xa->mem.type = 0xF0F0;
@ -94,6 +85,64 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
kfree(xa);
}
bool __mem_id_disconnect(int id, bool force)
{
struct xdp_mem_allocator *xa;
bool safe_to_remove = true;
mutex_lock(&mem_id_lock);
xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
if (!xa) {
mutex_unlock(&mem_id_lock);
WARN(1, "Request remove non-existing id(%d), driver bug?", id);
return true;
}
xa->disconnect_cnt++;
/* Detects in-flight packet-pages for page_pool */
if (xa->mem.type == MEM_TYPE_PAGE_POOL)
safe_to_remove = page_pool_request_shutdown(xa->page_pool);
trace_mem_disconnect(xa, safe_to_remove, force);
if ((safe_to_remove || force) &&
!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
mutex_unlock(&mem_id_lock);
return (safe_to_remove|force);
}
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (30 * HZ)
#define DEFER_MAX_RETRIES 120
static void mem_id_disconnect_defer_retry(struct work_struct *wq)
{
struct delayed_work *dwq = to_delayed_work(wq);
struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
bool force = false;
if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
force = true;
if (__mem_id_disconnect(xa->mem.id, force))
return;
/* Periodic warning */
if (time_after_eq(jiffies, xa->defer_warn)) {
int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
__func__, xa->mem.id, xa->disconnect_cnt, sec);
xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
}
/* Still not ready to be disconnected, retry later */
schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
}
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
@ -112,16 +161,30 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
if (id == 0)
return;
if (__mem_id_disconnect(id, false))
return;
/* Could not disconnect, defer new disconnect attempt to later */
mutex_lock(&mem_id_lock);
xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
if (!xa) {
mutex_unlock(&mem_id_lock);
return;
}
xa->defer_start = jiffies;
xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
mutex_unlock(&mem_id_lock);
schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
/* This unregister operation will also cleanup and destroy the
* allocator. The page_pool_free() operation is first called when it's
* safe to remove, possibly deferred to a workqueue.
*/
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
{
/* Simplify driver cleanup code paths, allow unreg "unused" */
@ -301,12 +364,15 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
xdp_rxq->mem.id = 0;
errno = PTR_ERR(ptr);
goto err;
}
mutex_unlock(&mem_id_lock);
trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
err:
mutex_unlock(&mem_id_lock);
@ -333,10 +399,13 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa) {
if (likely(xa)) {
napi_direct &= !xdp_return_frame_no_direct();
page_pool_put_page(xa->page_pool, page, napi_direct);
} else {
/* Hopefully stack show who to blame for late return */
WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
trace_mem_return_failed(mem, page);
put_page(page);
}
rcu_read_unlock();
@ -379,6 +448,21 @@ void xdp_return_buff(struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
{
struct xdp_mem_allocator *xa;
struct page *page;
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa)
page_pool_release_page(xa->page_pool, page);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);
int xdp_attachment_query(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{