From c491eae8f9c0720520ebdeb4d335671f84b84b71 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sat, 16 Nov 2019 12:22:38 +0100 Subject: [PATCH 1/3] xdp: remove memory poison on free for struct xdp_mem_allocator When looking at the details I realised that the memory poison in __xdp_mem_allocator_rcu_free doesn't make sense. This is because the SLUB allocator uses the first 16 bytes (on 64 bit), for its freelist, which overlap with members in struct xdp_mem_allocator, that were updated. Thus, SLUB already does the "poisoning" for us. I still believe that poisoning memory make sense in other cases. Kernel have gained different use-after-free detection mechanism, but enabling those is associated with a huge overhead. Experience is that debugging facilities can change the timing so much, that that a race condition will not be provoked when enabled. Thus, I'm still in favour of poisoning memory where it makes sense. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/xdp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 8e405abaf05a..e334fad0a6b8 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -73,11 +73,6 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) /* Allow this ID to be reused */ ida_simple_remove(&mem_id_pool, xa->mem.id); - /* Poison memory */ - xa->mem.id = 0xFFFF; - xa->mem.type = 0xF0F0; - xa->allocator = (void *)0xDEAD9001; - kfree(xa); } From 7c9e69428da39ed761c9d903c4850368fa4ef7bf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sat, 16 Nov 2019 12:22:43 +0100 Subject: [PATCH 2/3] page_pool: add destroy attempts counter and rename tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Jonathan change the page_pool to become responsible to its own shutdown via deferred work queue, then the disconnect_cnt counter was removed from xdp memory model tracepoint. This patch change the page_pool_inflight tracepoint name to page_pool_release, because it reflects the new responsability better. And it reintroduces a counter that reflect the number of times page_pool_release have been tried. The counter is also used by the code, to only empty the alloc cache once. With a stuck work queue running every second and counter being 64-bit, it will overrun in approx 584 billion years. For comparison, Earth lifetime expectancy is 7.5 billion years, before the Sun will engulf, and destroy, the Earth. Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/net/page_pool.h | 2 ++ include/trace/events/page_pool.h | 9 ++++++--- net/core/page_pool.c | 13 +++++++++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 1121faa99c12..ace881c15dcb 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -112,6 +112,8 @@ struct page_pool { * refcnt serves purpose is to simplify drivers error handling. */ refcount_t user_cnt; + + u64 destroy_cnt; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index 47b5ee880aa9..ee7f1aca7839 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -10,7 +10,7 @@ #include -TRACE_EVENT(page_pool_inflight, +TRACE_EVENT(page_pool_release, TP_PROTO(const struct page_pool *pool, s32 inflight, u32 hold, u32 release), @@ -22,6 +22,7 @@ TRACE_EVENT(page_pool_inflight, __field(s32, inflight) __field(u32, hold) __field(u32, release) + __field(u64, cnt) ), TP_fast_assign( @@ -29,10 +30,12 @@ TRACE_EVENT(page_pool_inflight, __entry->inflight = inflight; __entry->hold = hold; __entry->release = release; + __entry->cnt = pool->destroy_cnt; ), - TP_printk("page_pool=%p inflight=%d hold=%u release=%u", - __entry->pool, __entry->inflight, __entry->hold, __entry->release) + TP_printk("page_pool=%p inflight=%d hold=%u release=%u cnt=%llu", + __entry->pool, __entry->inflight, __entry->hold, + __entry->release, __entry->cnt) ); TRACE_EVENT(page_pool_state_release, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index dfc2501c35d9..e28db2ef8e12 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -200,7 +200,7 @@ static s32 page_pool_inflight(struct page_pool *pool) inflight = _distance(hold_cnt, release_cnt); - trace_page_pool_inflight(pool, inflight, hold_cnt, release_cnt); + trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); return inflight; @@ -349,10 +349,13 @@ static void page_pool_free(struct page_pool *pool) kfree(pool); } -static void page_pool_scrub(struct page_pool *pool) +static void page_pool_empty_alloc_cache_once(struct page_pool *pool) { struct page *page; + if (pool->destroy_cnt) + return; + /* Empty alloc cache, assume caller made sure this is * no-longer in use, and page_pool_alloc_pages() cannot be * call concurrently. @@ -361,6 +364,12 @@ static void page_pool_scrub(struct page_pool *pool) page = pool->alloc.cache[--pool->alloc.count]; __page_pool_return_page(pool, page); } +} + +static void page_pool_scrub(struct page_pool *pool) +{ + page_pool_empty_alloc_cache_once(pool); + pool->destroy_cnt++; /* No more consumers should exist, but producers could still * be in-flight. From 832ccf6f80cda06ad2373cd1f40291b0183958b4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sat, 16 Nov 2019 12:22:48 +0100 Subject: [PATCH 3/3] page_pool: extend tracepoint to also include the page PFN The MM tracepoint for page free (called kmem:mm_page_free) doesn't provide the page pointer directly, instead it provides the PFN (Page Frame Number). This is annoying when writing a page_pool leak detector in BPF. This patch change page_pool tracepoints to also provide the PFN. The page pointer is still provided to allow other kinds of troubleshooting from BPF. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/page_pool.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index ee7f1aca7839..2f2a10e8eb56 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -8,6 +8,7 @@ #include #include +#include #include TRACE_EVENT(page_pool_release, @@ -49,16 +50,18 @@ TRACE_EVENT(page_pool_state_release, __field(const struct page_pool *, pool) __field(const struct page *, page) __field(u32, release) + __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; __entry->page = page; __entry->release = release; + __entry->pfn = page_to_pfn(page); ), - TP_printk("page_pool=%p page=%p release=%u", - __entry->pool, __entry->page, __entry->release) + TP_printk("page_pool=%p page=%p pfn=%lu release=%u", + __entry->pool, __entry->page, __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, @@ -72,16 +75,18 @@ TRACE_EVENT(page_pool_state_hold, __field(const struct page_pool *, pool) __field(const struct page *, page) __field(u32, hold) + __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; __entry->page = page; __entry->hold = hold; + __entry->pfn = page_to_pfn(page); ), - TP_printk("page_pool=%p page=%p hold=%u", - __entry->pool, __entry->page, __entry->hold) + TP_printk("page_pool=%p page=%p pfn=%lu hold=%u", + __entry->pool, __entry->page, __entry->pfn, __entry->hold) ); #endif /* _TRACE_PAGE_POOL_H */