Merge branch 'page_pool-allow-direct-bulk-recycling'
Alexander Lobakin says: ==================== page_pool: allow direct bulk recycling Previously, there was no reliable way to check whether it's safe to use direct PP cache. The drivers were passing @allow_direct to the PP recycling functions and that was it. Bulk recycling is used by xdp_return_frame_bulk() on .ndo_xdp_xmit() frames completion where the page origin is unknown, thus the direct recycling has never been tried. Now that we have at least 2 ways of checking if we're allowed to perform direct recycling -- pool->p.napi (Jakub) and pool->cpuid (Lorenzo), we can use them when doing bulk recycling as well. Just move that logic from the skb core to the PP core and call it before __page_pool_put_page() every time @allow_direct is false. Under high .ndo_xdp_xmit() traffic load, the win is 2-3% Pps assuming the sending driver uses xdp_return_frame_bulk() on Tx completion. ==================== Link: https://lore.kernel.org/r/20240329165507.3240110-1-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
eb05529a10
@ -3510,25 +3510,25 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
|
||||
unsigned int headroom);
|
||||
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
|
||||
struct bpf_prog *prog);
|
||||
bool napi_pp_put_page(struct page *page, bool napi_safe);
|
||||
bool napi_pp_put_page(struct page *page);
|
||||
|
||||
static inline void
|
||||
skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
|
||||
skb_page_unref(const struct sk_buff *skb, struct page *page)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
if (skb->pp_recycle && napi_pp_put_page(page, napi_safe))
|
||||
if (skb->pp_recycle && napi_pp_put_page(page))
|
||||
return;
|
||||
#endif
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
static inline void
|
||||
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
|
||||
napi_frag_unref(skb_frag_t *frag, bool recycle)
|
||||
{
|
||||
struct page *page = skb_frag_page(frag);
|
||||
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
if (recycle && napi_pp_put_page(page, napi_safe))
|
||||
if (recycle && napi_pp_put_page(page))
|
||||
return;
|
||||
#endif
|
||||
put_page(page);
|
||||
@ -3544,7 +3544,7 @@ napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
|
||||
*/
|
||||
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
|
||||
{
|
||||
napi_frag_unref(frag, recycle, false);
|
||||
napi_frag_unref(frag, recycle);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -690,8 +690,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
|
||||
page_pool_dma_sync_for_device(pool, page,
|
||||
dma_sync_size);
|
||||
|
||||
if (allow_direct && in_softirq() &&
|
||||
page_pool_recycle_in_cache(page, pool))
|
||||
if (allow_direct && page_pool_recycle_in_cache(page, pool))
|
||||
return NULL;
|
||||
|
||||
/* Page found as candidate for recycling */
|
||||
@ -716,9 +715,35 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool page_pool_napi_local(const struct page_pool *pool)
|
||||
{
|
||||
const struct napi_struct *napi;
|
||||
u32 cpuid;
|
||||
|
||||
if (unlikely(!in_softirq()))
|
||||
return false;
|
||||
|
||||
/* Allow direct recycle if we have reasons to believe that we are
|
||||
* in the same context as the consumer would run, so there's
|
||||
* no possible race.
|
||||
* __page_pool_put_page() makes sure we're not in hardirq context
|
||||
* and interrupts are enabled prior to accessing the cache.
|
||||
*/
|
||||
cpuid = smp_processor_id();
|
||||
if (READ_ONCE(pool->cpuid) == cpuid)
|
||||
return true;
|
||||
|
||||
napi = READ_ONCE(pool->p.napi);
|
||||
|
||||
return napi && READ_ONCE(napi->list_owner) == cpuid;
|
||||
}
|
||||
|
||||
void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
|
||||
unsigned int dma_sync_size, bool allow_direct)
|
||||
{
|
||||
if (!allow_direct)
|
||||
allow_direct = page_pool_napi_local(pool);
|
||||
|
||||
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
|
||||
if (page && !page_pool_recycle_in_ring(pool, page)) {
|
||||
/* Cache full, fallback to free pages */
|
||||
@ -747,8 +772,11 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
||||
int count)
|
||||
{
|
||||
int i, bulk_len = 0;
|
||||
bool allow_direct;
|
||||
bool in_softirq;
|
||||
|
||||
allow_direct = page_pool_napi_local(pool);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct page *page = virt_to_head_page(data[i]);
|
||||
|
||||
@ -756,13 +784,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
||||
if (!page_pool_is_last_ref(page))
|
||||
continue;
|
||||
|
||||
page = __page_pool_put_page(pool, page, -1, false);
|
||||
page = __page_pool_put_page(pool, page, -1, allow_direct);
|
||||
/* Approved for bulk recycling in ptr_ring cache */
|
||||
if (page)
|
||||
data[bulk_len++] = page;
|
||||
}
|
||||
|
||||
if (unlikely(!bulk_len))
|
||||
if (!bulk_len)
|
||||
return;
|
||||
|
||||
/* Bulk producer into ptr_ring page_pool cache */
|
||||
@ -969,7 +997,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
|
||||
static void page_pool_disable_direct_recycling(struct page_pool *pool)
|
||||
{
|
||||
/* Disable direct recycling based on pool->cpuid.
|
||||
* Paired with READ_ONCE() in napi_pp_put_page().
|
||||
* Paired with READ_ONCE() in page_pool_napi_local().
|
||||
*/
|
||||
WRITE_ONCE(pool->cpuid, -1);
|
||||
|
||||
|
@ -1004,11 +1004,8 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
|
||||
EXPORT_SYMBOL(skb_cow_data_for_xdp);
|
||||
|
||||
#if IS_ENABLED(CONFIG_PAGE_POOL)
|
||||
bool napi_pp_put_page(struct page *page, bool napi_safe)
|
||||
bool napi_pp_put_page(struct page *page)
|
||||
{
|
||||
bool allow_direct = false;
|
||||
struct page_pool *pp;
|
||||
|
||||
page = compound_head(page);
|
||||
|
||||
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
|
||||
@ -1021,39 +1018,18 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
|
||||
if (unlikely(!is_pp_page(page)))
|
||||
return false;
|
||||
|
||||
pp = page->pp;
|
||||
|
||||
/* Allow direct recycle if we have reasons to believe that we are
|
||||
* in the same context as the consumer would run, so there's
|
||||
* no possible race.
|
||||
* __page_pool_put_page() makes sure we're not in hardirq context
|
||||
* and interrupts are enabled prior to accessing the cache.
|
||||
*/
|
||||
if (napi_safe || in_softirq()) {
|
||||
const struct napi_struct *napi = READ_ONCE(pp->p.napi);
|
||||
unsigned int cpuid = smp_processor_id();
|
||||
|
||||
allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
|
||||
allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
|
||||
}
|
||||
|
||||
/* Driver set this to memory recycling info. Reset it on recycle.
|
||||
* This will *not* work for NIC using a split-page memory model.
|
||||
* The page will be returned to the pool here regardless of the
|
||||
* 'flipped' fragment being in use or not.
|
||||
*/
|
||||
page_pool_put_full_page(pp, page, allow_direct);
|
||||
page_pool_put_full_page(page->pp, page, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(napi_pp_put_page);
|
||||
#endif
|
||||
|
||||
static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
|
||||
static bool skb_pp_recycle(struct sk_buff *skb, void *data)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
|
||||
return false;
|
||||
return napi_pp_put_page(virt_to_page(data), napi_safe);
|
||||
return napi_pp_put_page(virt_to_page(data));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1095,12 +1071,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset)
|
||||
kfree(head);
|
||||
}
|
||||
|
||||
static void skb_free_head(struct sk_buff *skb, bool napi_safe)
|
||||
static void skb_free_head(struct sk_buff *skb)
|
||||
{
|
||||
unsigned char *head = skb->head;
|
||||
|
||||
if (skb->head_frag) {
|
||||
if (skb_pp_recycle(skb, head, napi_safe))
|
||||
if (skb_pp_recycle(skb, head))
|
||||
return;
|
||||
skb_free_frag(head);
|
||||
} else {
|
||||
@ -1108,8 +1084,7 @@ static void skb_free_head(struct sk_buff *skb, bool napi_safe)
|
||||
}
|
||||
}
|
||||
|
||||
static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
|
||||
bool napi_safe)
|
||||
static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
|
||||
{
|
||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
int i;
|
||||
@ -1126,13 +1101,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
|
||||
}
|
||||
|
||||
for (i = 0; i < shinfo->nr_frags; i++)
|
||||
napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
|
||||
napi_frag_unref(&shinfo->frags[i], skb->pp_recycle);
|
||||
|
||||
free_head:
|
||||
if (shinfo->frag_list)
|
||||
kfree_skb_list_reason(shinfo->frag_list, reason);
|
||||
|
||||
skb_free_head(skb, napi_safe);
|
||||
skb_free_head(skb);
|
||||
exit:
|
||||
/* When we clone an SKB we copy the reycling bit. The pp_recycle
|
||||
* bit is only set on the head though, so in order to avoid races
|
||||
@ -1193,12 +1168,11 @@ void skb_release_head_state(struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* Free everything but the sk_buff shell. */
|
||||
static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
|
||||
bool napi_safe)
|
||||
static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
|
||||
{
|
||||
skb_release_head_state(skb);
|
||||
if (likely(skb->head))
|
||||
skb_release_data(skb, reason, napi_safe);
|
||||
skb_release_data(skb, reason);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1212,7 +1186,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
|
||||
|
||||
void __kfree_skb(struct sk_buff *skb)
|
||||
{
|
||||
skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
|
||||
skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
|
||||
kfree_skbmem(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(__kfree_skb);
|
||||
@ -1269,7 +1243,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
|
||||
return;
|
||||
}
|
||||
|
||||
skb_release_all(skb, reason, false);
|
||||
skb_release_all(skb, reason);
|
||||
sa->skb_array[sa->skb_count++] = skb;
|
||||
|
||||
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
|
||||
@ -1443,7 +1417,7 @@ EXPORT_SYMBOL(consume_skb);
|
||||
void __consume_stateless_skb(struct sk_buff *skb)
|
||||
{
|
||||
trace_consume_skb(skb, __builtin_return_address(0));
|
||||
skb_release_data(skb, SKB_CONSUMED, false);
|
||||
skb_release_data(skb, SKB_CONSUMED);
|
||||
kfree_skbmem(skb);
|
||||
}
|
||||
|
||||
@ -1470,7 +1444,7 @@ static void napi_skb_cache_put(struct sk_buff *skb)
|
||||
|
||||
void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason)
|
||||
{
|
||||
skb_release_all(skb, reason, true);
|
||||
skb_release_all(skb, reason);
|
||||
napi_skb_cache_put(skb);
|
||||
}
|
||||
|
||||
@ -1508,7 +1482,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
|
||||
return;
|
||||
}
|
||||
|
||||
skb_release_all(skb, SKB_CONSUMED, !!budget);
|
||||
skb_release_all(skb, SKB_CONSUMED);
|
||||
napi_skb_cache_put(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(napi_consume_skb);
|
||||
@ -1639,7 +1613,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
|
||||
*/
|
||||
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
|
||||
{
|
||||
skb_release_all(dst, SKB_CONSUMED, false);
|
||||
skb_release_all(dst, SKB_CONSUMED);
|
||||
return __skb_clone(dst, src);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_morph);
|
||||
@ -2271,9 +2245,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
|
||||
if (skb_has_frag_list(skb))
|
||||
skb_clone_fraglist(skb);
|
||||
|
||||
skb_release_data(skb, SKB_CONSUMED, false);
|
||||
skb_release_data(skb, SKB_CONSUMED);
|
||||
} else {
|
||||
skb_free_head(skb, false);
|
||||
skb_free_head(skb);
|
||||
}
|
||||
off = (data + nhead) - skb->head;
|
||||
|
||||
@ -6574,12 +6548,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
|
||||
skb_frag_ref(skb, i);
|
||||
if (skb_has_frag_list(skb))
|
||||
skb_clone_fraglist(skb);
|
||||
skb_release_data(skb, SKB_CONSUMED, false);
|
||||
skb_release_data(skb, SKB_CONSUMED);
|
||||
} else {
|
||||
/* we can reuse existing recount- all we did was
|
||||
* relocate values
|
||||
*/
|
||||
skb_free_head(skb, false);
|
||||
skb_free_head(skb);
|
||||
}
|
||||
|
||||
skb->head = data;
|
||||
@ -6714,7 +6688,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
|
||||
skb_kfree_head(data, size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
skb_release_data(skb, SKB_CONSUMED, false);
|
||||
skb_release_data(skb, SKB_CONSUMED);
|
||||
|
||||
skb->head = data;
|
||||
skb->head_frag = 0;
|
||||
|
@ -114,7 +114,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
|
||||
*/
|
||||
if (req->src != req->dst)
|
||||
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
|
||||
skb_page_unref(skb, sg_page(sg), false);
|
||||
skb_page_unref(skb, sg_page(sg));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INET_ESPINTCP
|
||||
|
@ -131,7 +131,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
|
||||
*/
|
||||
if (req->src != req->dst)
|
||||
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
|
||||
skb_page_unref(skb, sg_page(sg), false);
|
||||
skb_page_unref(skb, sg_page(sg));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INET6_ESPINTCP
|
||||
|
Loading…
x
Reference in New Issue
Block a user