d636fc5dd6
syzbot reported a race around qdisc->qdisc_sleeping [1]
It is time we add proper annotations to reads and writes to/from
qdisc->qdisc_sleeping.
[1]
BUG: KCSAN: data-race in dev_graft_qdisc / qdisc_lookup_rcu
read to 0xffff8881286fc618 of 8 bytes by task 6928 on cpu 1:
qdisc_lookup_rcu+0x192/0x2c0 net/sched/sch_api.c:331
__tcf_qdisc_find+0x74/0x3c0 net/sched/cls_api.c:1174
tc_get_tfilter+0x18f/0x990 net/sched/cls_api.c:2547
rtnetlink_rcv_msg+0x7af/0x8c0 net/core/rtnetlink.c:6386
netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2546
rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6413
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1913
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
____sys_sendmsg+0x375/0x4c0 net/socket.c:2503
___sys_sendmsg net/socket.c:2557 [inline]
__sys_sendmsg+0x1e3/0x270 net/socket.c:2586
__do_sys_sendmsg net/socket.c:2595 [inline]
__se_sys_sendmsg net/socket.c:2593 [inline]
__x64_sys_sendmsg+0x46/0x50 net/socket.c:2593
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
write to 0xffff8881286fc618 of 8 bytes by task 6912 on cpu 0:
dev_graft_qdisc+0x4f/0x80 net/sched/sch_generic.c:1115
qdisc_graft+0x7d0/0xb60 net/sched/sch_api.c:1103
tc_modify_qdisc+0x712/0xf10 net/sched/sch_api.c:1693
rtnetlink_rcv_msg+0x807/0x8c0 net/core/rtnetlink.c:6395
netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2546
rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6413
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1913
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
____sys_sendmsg+0x375/0x4c0 net/socket.c:2503
___sys_sendmsg net/socket.c:2557 [inline]
__sys_sendmsg+0x1e3/0x270 net/socket.c:2586
__do_sys_sendmsg net/socket.c:2595 [inline]
__se_sys_sendmsg net/socket.c:2593 [inline]
__x64_sys_sendmsg+0x46/0x50 net/socket.c:2593
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 6912 Comm: syz-executor.5 Not tainted 6.4.0-rc3-syzkaller-00190-g0d85b27b0cc6 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/16/2023
Fixes: 3a7d0d07a3
("net: sched: extend Qdisc with rcu")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Vlad Buslov <vladbu@nvidia.com>
Acked-by: Jamal Hadi Salim<jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
276 lines
6.7 KiB
C
276 lines
6.7 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* net/sched/sch_mq.c Classful multiqueue dummy scheduler
|
|
*
|
|
* Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/netlink.h>
|
|
#include <net/pkt_cls.h>
|
|
#include <net/pkt_sched.h>
|
|
#include <net/sch_generic.h>
|
|
|
|
struct mq_sched {
|
|
struct Qdisc **qdiscs;
|
|
};
|
|
|
|
static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct tc_mq_qopt_offload opt = {
|
|
.command = cmd,
|
|
.handle = sch->handle,
|
|
};
|
|
|
|
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
|
|
return -EOPNOTSUPP;
|
|
|
|
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
|
|
}
|
|
|
|
static int mq_offload_stats(struct Qdisc *sch)
|
|
{
|
|
struct tc_mq_qopt_offload opt = {
|
|
.command = TC_MQ_STATS,
|
|
.handle = sch->handle,
|
|
.stats = {
|
|
.bstats = &sch->bstats,
|
|
.qstats = &sch->qstats,
|
|
},
|
|
};
|
|
|
|
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt);
|
|
}
|
|
|
|
static void mq_destroy(struct Qdisc *sch)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
unsigned int ntx;
|
|
|
|
mq_offload(sch, TC_MQ_DESTROY);
|
|
|
|
if (!priv->qdiscs)
|
|
return;
|
|
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
|
|
qdisc_put(priv->qdiscs[ntx]);
|
|
kfree(priv->qdiscs);
|
|
}
|
|
|
|
static int mq_init(struct Qdisc *sch, struct nlattr *opt,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
struct netdev_queue *dev_queue;
|
|
struct Qdisc *qdisc;
|
|
unsigned int ntx;
|
|
|
|
if (sch->parent != TC_H_ROOT)
|
|
return -EOPNOTSUPP;
|
|
|
|
if (!netif_is_multiqueue(dev))
|
|
return -EOPNOTSUPP;
|
|
|
|
/* pre-allocate qdiscs, attachment can't fail */
|
|
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
|
|
GFP_KERNEL);
|
|
if (!priv->qdiscs)
|
|
return -ENOMEM;
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
dev_queue = netdev_get_tx_queue(dev, ntx);
|
|
qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
|
|
TC_H_MAKE(TC_H_MAJ(sch->handle),
|
|
TC_H_MIN(ntx + 1)),
|
|
extack);
|
|
if (!qdisc)
|
|
return -ENOMEM;
|
|
priv->qdiscs[ntx] = qdisc;
|
|
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
|
|
}
|
|
|
|
sch->flags |= TCQ_F_MQROOT;
|
|
|
|
mq_offload(sch, TC_MQ_CREATE);
|
|
return 0;
|
|
}
|
|
|
|
static void mq_attach(struct Qdisc *sch)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
struct Qdisc *qdisc, *old;
|
|
unsigned int ntx;
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
qdisc = priv->qdiscs[ntx];
|
|
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
|
|
if (old)
|
|
qdisc_put(old);
|
|
#ifdef CONFIG_NET_SCHED
|
|
if (ntx < dev->real_num_tx_queues)
|
|
qdisc_hash_add(qdisc, false);
|
|
#endif
|
|
|
|
}
|
|
kfree(priv->qdiscs);
|
|
priv->qdiscs = NULL;
|
|
}
|
|
|
|
static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct Qdisc *qdisc;
|
|
unsigned int ntx;
|
|
|
|
sch->q.qlen = 0;
|
|
gnet_stats_basic_sync_init(&sch->bstats);
|
|
memset(&sch->qstats, 0, sizeof(sch->qstats));
|
|
|
|
/* MQ supports lockless qdiscs. However, statistics accounting needs
|
|
* to account for all, none, or a mix of locked and unlocked child
|
|
* qdiscs. Percpu stats are added to counters in-band and locking
|
|
* qdisc totals are added at end.
|
|
*/
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
|
|
spin_lock_bh(qdisc_lock(qdisc));
|
|
|
|
gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
|
|
&qdisc->bstats, false);
|
|
gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
|
|
&qdisc->qstats);
|
|
sch->q.qlen += qdisc_qlen(qdisc);
|
|
|
|
spin_unlock_bh(qdisc_lock(qdisc));
|
|
}
|
|
|
|
return mq_offload_stats(sch);
|
|
}
|
|
|
|
static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
unsigned long ntx = cl - 1;
|
|
|
|
if (ntx >= dev->num_tx_queues)
|
|
return NULL;
|
|
return netdev_get_tx_queue(dev, ntx);
|
|
}
|
|
|
|
static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
|
|
struct tcmsg *tcm)
|
|
{
|
|
return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
|
|
}
|
|
|
|
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
|
|
struct Qdisc **old, struct netlink_ext_ack *extack)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
struct tc_mq_qopt_offload graft_offload;
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
|
|
if (dev->flags & IFF_UP)
|
|
dev_deactivate(dev);
|
|
|
|
*old = dev_graft_qdisc(dev_queue, new);
|
|
if (new)
|
|
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
|
|
if (dev->flags & IFF_UP)
|
|
dev_activate(dev);
|
|
|
|
graft_offload.handle = sch->handle;
|
|
graft_offload.graft_params.queue = cl - 1;
|
|
graft_offload.graft_params.child_handle = new ? new->handle : 0;
|
|
graft_offload.command = TC_MQ_GRAFT;
|
|
|
|
qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
|
|
TC_SETUP_QDISC_MQ, &graft_offload, extack);
|
|
return 0;
|
|
}
|
|
|
|
static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
return rtnl_dereference(dev_queue->qdisc_sleeping);
|
|
}
|
|
|
|
static unsigned long mq_find(struct Qdisc *sch, u32 classid)
|
|
{
|
|
unsigned int ntx = TC_H_MIN(classid);
|
|
|
|
if (!mq_queue_get(sch, ntx))
|
|
return 0;
|
|
return ntx;
|
|
}
|
|
|
|
static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
|
|
struct sk_buff *skb, struct tcmsg *tcm)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
tcm->tcm_parent = TC_H_ROOT;
|
|
tcm->tcm_handle |= TC_H_MIN(cl);
|
|
tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
|
|
return 0;
|
|
}
|
|
|
|
static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
|
|
struct gnet_dump *d)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
sch = rtnl_dereference(dev_queue->qdisc_sleeping);
|
|
if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
|
|
qdisc_qstats_copy(d, sch) < 0)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
unsigned int ntx;
|
|
|
|
if (arg->stop)
|
|
return;
|
|
|
|
arg->count = arg->skip;
|
|
for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
|
|
if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
|
|
break;
|
|
}
|
|
}
|
|
|
|
static const struct Qdisc_class_ops mq_class_ops = {
|
|
.select_queue = mq_select_queue,
|
|
.graft = mq_graft,
|
|
.leaf = mq_leaf,
|
|
.find = mq_find,
|
|
.walk = mq_walk,
|
|
.dump = mq_dump_class,
|
|
.dump_stats = mq_dump_class_stats,
|
|
};
|
|
|
|
struct Qdisc_ops mq_qdisc_ops __read_mostly = {
|
|
.cl_ops = &mq_class_ops,
|
|
.id = "mq",
|
|
.priv_size = sizeof(struct mq_sched),
|
|
.init = mq_init,
|
|
.destroy = mq_destroy,
|
|
.attach = mq_attach,
|
|
.change_real_num_tx = mq_change_real_num_tx,
|
|
.dump = mq_dump,
|
|
.owner = THIS_MODULE,
|
|
};
|