f9893fdac3
syzbot was able to trigger a crash [1] in page_pool_unlist() page_pool_list() only inserts a page pool into a netdev page pool list if a netdev was set in params. Even if the kzalloc() call in page_pool_create happens to initialize pool->user.list, I chose to be more explicit in page_pool_list() adding one INIT_HLIST_NODE(). We could test in page_pool_unlist() if netdev was set, but since netdev can be changed to lo, it seems more robust to check if pool->user.list is hashed before calling hlist_del(). [1] Illegal XDP return value 4294946546 on prog (id 2) dev N/A, expect packet loss! general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 5064 Comm: syz-executor391 Not tainted 6.7.0-rc2-syzkaller-00533-ga379972973a8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:__hlist_del include/linux/list.h:988 [inline] RIP: 0010:hlist_del include/linux/list.h:1002 [inline] RIP: 0010:page_pool_unlist+0xd1/0x170 net/core/page_pool_user.c:342 Code: df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 90 00 00 00 4c 8b a3 f0 06 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 75 68 48 85 ed 49 89 2c 24 74 24 e8 1b ca 07 f9 48 8d RSP: 0018:ffffc900039ff768 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88814ae02000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88814ae026f0 RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff1d57fdc R10: ffffffff8eabfee3 R11: ffffffff8aa0008b R12: 0000000000000000 R13: ffff88814ae02000 R14: dffffc0000000000 R15: 0000000000000001 FS: 000055555717a380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000002555398 CR3: 0000000025044000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> __page_pool_destroy net/core/page_pool.c:851 [inline] page_pool_release+0x507/0x6b0 net/core/page_pool.c:891 page_pool_destroy+0x1ac/0x4c0 net/core/page_pool.c:956 xdp_test_run_teardown net/bpf/test_run.c:216 [inline] bpf_test_run_xdp_live+0x1578/0x1af0 net/bpf/test_run.c:388 bpf_prog_test_run_xdp+0x827/0x1530 net/bpf/test_run.c:1254 bpf_prog_test_run kernel/bpf/syscall.c:4041 [inline] __sys_bpf+0x11bf/0x4920 kernel/bpf/syscall.c:5402 __do_sys_bpf kernel/bpf/syscall.c:5488 [inline] __se_sys_bpf kernel/bpf/syscall.c:5486 [inline] __x64_sys_bpf+0x78/0xc0 kernel/bpf/syscall.c:5486 Fixes: 083772c9f972 ("net: page_pool: record pools per netdev") Reported-and-tested-by: syzbot+f9f8efb58a4db2ca98d0@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet <edumazet@google.com> Tested-by: Andrew Lunn <andrew@lunn.ch> Link: https://lore.kernel.org/r/20231130092259.3797753-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
411 lines
10 KiB
C
411 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/mutex.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/xarray.h>
|
|
#include <net/net_debug.h>
|
|
#include <net/page_pool/types.h>
|
|
#include <net/page_pool/helpers.h>
|
|
#include <net/sock.h>
|
|
|
|
#include "page_pool_priv.h"
|
|
#include "netdev-genl-gen.h"
|
|
|
|
static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
|
|
/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user.
|
|
* Ordering: inside rtnl_lock
|
|
*/
|
|
static DEFINE_MUTEX(page_pools_lock);
|
|
|
|
/* Page pools are only reachable from user space (via netlink) if they are
|
|
* linked to a netdev at creation time. Following page pool "visibility"
|
|
* states are possible:
|
|
* - normal
|
|
* - user.list: linked to real netdev, netdev: real netdev
|
|
* - orphaned - real netdev has disappeared
|
|
* - user.list: linked to lo, netdev: lo
|
|
* - invisible - either (a) created without netdev linking, (b) unlisted due
|
|
* to error, or (c) the entire namespace which owned this pool disappeared
|
|
* - user.list: unhashed, netdev: unknown
|
|
*/
|
|
|
|
typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
|
|
const struct genl_info *info);
|
|
|
|
static int
|
|
netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
|
|
{
|
|
struct page_pool *pool;
|
|
struct sk_buff *rsp;
|
|
int err;
|
|
|
|
mutex_lock(&page_pools_lock);
|
|
pool = xa_load(&page_pools, id);
|
|
if (!pool || hlist_unhashed(&pool->user.list) ||
|
|
!net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
|
|
err = -ENOENT;
|
|
goto err_unlock;
|
|
}
|
|
|
|
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
if (!rsp) {
|
|
err = -ENOMEM;
|
|
goto err_unlock;
|
|
}
|
|
|
|
err = fill(rsp, pool, info);
|
|
if (err)
|
|
goto err_free_msg;
|
|
|
|
mutex_unlock(&page_pools_lock);
|
|
|
|
return genlmsg_reply(rsp, info);
|
|
|
|
err_free_msg:
|
|
nlmsg_free(rsp);
|
|
err_unlock:
|
|
mutex_unlock(&page_pools_lock);
|
|
return err;
|
|
}
|
|
|
|
struct page_pool_dump_cb {
|
|
unsigned long ifindex;
|
|
u32 pp_id;
|
|
};
|
|
|
|
static int
|
|
netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
|
pp_nl_fill_cb fill)
|
|
{
|
|
struct page_pool_dump_cb *state = (void *)cb->ctx;
|
|
const struct genl_info *info = genl_info_dump(cb);
|
|
struct net *net = sock_net(skb->sk);
|
|
struct net_device *netdev;
|
|
struct page_pool *pool;
|
|
int err = 0;
|
|
|
|
rtnl_lock();
|
|
mutex_lock(&page_pools_lock);
|
|
for_each_netdev_dump(net, netdev, state->ifindex) {
|
|
hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
|
|
if (state->pp_id && state->pp_id < pool->user.id)
|
|
continue;
|
|
|
|
state->pp_id = pool->user.id;
|
|
err = fill(skb, pool, info);
|
|
if (err)
|
|
break;
|
|
}
|
|
|
|
state->pp_id = 0;
|
|
}
|
|
mutex_unlock(&page_pools_lock);
|
|
rtnl_unlock();
|
|
|
|
if (skb->len && err == -EMSGSIZE)
|
|
return skb->len;
|
|
return err;
|
|
}
|
|
|
|
static int
|
|
page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
|
|
const struct genl_info *info)
|
|
{
|
|
#ifdef CONFIG_PAGE_POOL_STATS
|
|
struct page_pool_stats stats = {};
|
|
struct nlattr *nest;
|
|
void *hdr;
|
|
|
|
if (!page_pool_get_stats(pool, &stats))
|
|
return 0;
|
|
|
|
hdr = genlmsg_iput(rsp, info);
|
|
if (!hdr)
|
|
return -EMSGSIZE;
|
|
|
|
nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);
|
|
|
|
if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
|
|
(pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
|
|
nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
|
|
pool->slow.netdev->ifindex)))
|
|
goto err_cancel_nest;
|
|
|
|
nla_nest_end(rsp, nest);
|
|
|
|
if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
|
|
stats.alloc_stats.fast) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
|
|
stats.alloc_stats.slow) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
|
|
stats.alloc_stats.slow_high_order) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
|
|
stats.alloc_stats.empty) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
|
|
stats.alloc_stats.refill) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
|
|
stats.alloc_stats.waive) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
|
|
stats.recycle_stats.cached) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
|
|
stats.recycle_stats.cache_full) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
|
|
stats.recycle_stats.ring) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
|
|
stats.recycle_stats.ring_full) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
|
|
stats.recycle_stats.released_refcnt))
|
|
goto err_cancel_msg;
|
|
|
|
genlmsg_end(rsp, hdr);
|
|
|
|
return 0;
|
|
err_cancel_nest:
|
|
nla_nest_cancel(rsp, nest);
|
|
err_cancel_msg:
|
|
genlmsg_cancel(rsp, hdr);
|
|
return -EMSGSIZE;
|
|
#else
|
|
GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
|
|
return -EOPNOTSUPP;
|
|
#endif
|
|
}
|
|
|
|
int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
|
|
struct genl_info *info)
|
|
{
|
|
struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
|
|
struct nlattr *nest;
|
|
int err;
|
|
u32 id;
|
|
|
|
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
|
|
return -EINVAL;
|
|
|
|
nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
|
|
err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
|
|
netdev_page_pool_info_nl_policy,
|
|
info->extack);
|
|
if (err)
|
|
return err;
|
|
|
|
if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
|
|
return -EINVAL;
|
|
if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
|
|
NL_SET_ERR_MSG_ATTR(info->extack,
|
|
tb[NETDEV_A_PAGE_POOL_IFINDEX],
|
|
"selecting by ifindex not supported");
|
|
return -EINVAL;
|
|
}
|
|
|
|
id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);
|
|
|
|
return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
|
|
}
|
|
|
|
int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
|
|
struct netlink_callback *cb)
|
|
{
|
|
return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
|
|
}
|
|
|
|
static int
|
|
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
|
|
const struct genl_info *info)
|
|
{
|
|
size_t inflight, refsz;
|
|
void *hdr;
|
|
|
|
hdr = genlmsg_iput(rsp, info);
|
|
if (!hdr)
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
|
|
goto err_cancel;
|
|
|
|
if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
|
|
nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
|
|
pool->slow.netdev->ifindex))
|
|
goto err_cancel;
|
|
if (pool->user.napi_id &&
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id))
|
|
goto err_cancel;
|
|
|
|
inflight = page_pool_inflight(pool, false);
|
|
refsz = PAGE_SIZE << pool->p.order;
|
|
if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
|
|
inflight * refsz))
|
|
goto err_cancel;
|
|
if (pool->user.detach_time &&
|
|
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
|
|
pool->user.detach_time))
|
|
goto err_cancel;
|
|
|
|
genlmsg_end(rsp, hdr);
|
|
|
|
return 0;
|
|
err_cancel:
|
|
genlmsg_cancel(rsp, hdr);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
|
|
{
|
|
struct genl_info info;
|
|
struct sk_buff *ntf;
|
|
struct net *net;
|
|
|
|
lockdep_assert_held(&page_pools_lock);
|
|
|
|
/* 'invisible' page pools don't matter */
|
|
if (hlist_unhashed(&pool->user.list))
|
|
return;
|
|
net = dev_net(pool->slow.netdev);
|
|
|
|
if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
|
|
return;
|
|
|
|
genl_info_init_ntf(&info, &netdev_nl_family, cmd);
|
|
|
|
ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
if (!ntf)
|
|
return;
|
|
|
|
if (page_pool_nl_fill(ntf, pool, &info)) {
|
|
nlmsg_free(ntf);
|
|
return;
|
|
}
|
|
|
|
genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
|
|
0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
|
|
}
|
|
|
|
int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
|
|
{
|
|
u32 id;
|
|
|
|
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
|
|
return -EINVAL;
|
|
|
|
id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);
|
|
|
|
return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
|
|
}
|
|
|
|
int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
|
|
struct netlink_callback *cb)
|
|
{
|
|
return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
|
|
}
|
|
|
|
int page_pool_list(struct page_pool *pool)
|
|
{
|
|
static u32 id_alloc_next;
|
|
int err;
|
|
|
|
mutex_lock(&page_pools_lock);
|
|
err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
|
|
&id_alloc_next, GFP_KERNEL);
|
|
if (err < 0)
|
|
goto err_unlock;
|
|
|
|
INIT_HLIST_NODE(&pool->user.list);
|
|
if (pool->slow.netdev) {
|
|
hlist_add_head(&pool->user.list,
|
|
&pool->slow.netdev->page_pools);
|
|
pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0;
|
|
|
|
netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
|
|
}
|
|
|
|
mutex_unlock(&page_pools_lock);
|
|
return 0;
|
|
|
|
err_unlock:
|
|
mutex_unlock(&page_pools_lock);
|
|
return err;
|
|
}
|
|
|
|
void page_pool_detached(struct page_pool *pool)
|
|
{
|
|
mutex_lock(&page_pools_lock);
|
|
pool->user.detach_time = ktime_get_boottime_seconds();
|
|
netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
|
|
mutex_unlock(&page_pools_lock);
|
|
}
|
|
|
|
void page_pool_unlist(struct page_pool *pool)
|
|
{
|
|
mutex_lock(&page_pools_lock);
|
|
netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
|
|
xa_erase(&page_pools, pool->user.id);
|
|
if (!hlist_unhashed(&pool->user.list))
|
|
hlist_del(&pool->user.list);
|
|
mutex_unlock(&page_pools_lock);
|
|
}
|
|
|
|
static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
|
|
{
|
|
struct page_pool *pool;
|
|
struct hlist_node *n;
|
|
|
|
mutex_lock(&page_pools_lock);
|
|
hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
|
|
hlist_del_init(&pool->user.list);
|
|
pool->slow.netdev = NET_PTR_POISON;
|
|
}
|
|
mutex_unlock(&page_pools_lock);
|
|
}
|
|
|
|
static void page_pool_unreg_netdev(struct net_device *netdev)
|
|
{
|
|
struct page_pool *pool, *last;
|
|
struct net_device *lo;
|
|
|
|
lo = dev_net(netdev)->loopback_dev;
|
|
|
|
mutex_lock(&page_pools_lock);
|
|
last = NULL;
|
|
hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
|
|
pool->slow.netdev = lo;
|
|
netdev_nl_page_pool_event(pool,
|
|
NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
|
|
last = pool;
|
|
}
|
|
if (last)
|
|
hlist_splice_init(&netdev->page_pools, &last->user.list,
|
|
&lo->page_pools);
|
|
mutex_unlock(&page_pools_lock);
|
|
}
|
|
|
|
static int
|
|
page_pool_netdevice_event(struct notifier_block *nb,
|
|
unsigned long event, void *ptr)
|
|
{
|
|
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
|
|
|
|
if (event != NETDEV_UNREGISTER)
|
|
return NOTIFY_DONE;
|
|
|
|
if (hlist_empty(&netdev->page_pools))
|
|
return NOTIFY_OK;
|
|
|
|
if (netdev->ifindex != LOOPBACK_IFINDEX)
|
|
page_pool_unreg_netdev(netdev);
|
|
else
|
|
page_pool_unreg_netdev_wipe(netdev);
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static struct notifier_block page_pool_netdevice_nb = {
|
|
.notifier_call = page_pool_netdevice_event,
|
|
};
|
|
|
|
static int __init page_pool_user_init(void)
|
|
{
|
|
return register_netdevice_notifier(&page_pool_netdevice_nb);
|
|
}
|
|
|
|
subsys_initcall(page_pool_user_init);
|