af7b29b1de
taprio_attach() has this logic at the end, which should have been removed with the blamed patch (which is now being reverted): /* access to the child qdiscs is not needed in offload mode */ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { kfree(q->qdiscs); q->qdiscs = NULL; } because otherwise, we make use of q->qdiscs[] even after this array was deallocated, namely in taprio_leaf(). Therefore, whenever one would try to attach a valid child qdisc to a fully offloaded taprio root, one would immediately dereference a NULL pointer. $ tc qdisc replace dev eno0 handle 8001: parent root taprio \ num_tc 8 \ map 0 1 2 3 4 5 6 7 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ max-sdu 0 0 0 0 0 200 0 0 \ base-time 200 \ sched-entry S 80 20000 \ sched-entry S a0 20000 \ sched-entry S 5f 60000 \ flags 2 $ max_frame_size=1500 $ data_rate_kbps=20000 $ port_transmit_rate_kbps=1000000 $ idleslope=$data_rate_kbps $ sendslope=$(($idleslope - $port_transmit_rate_kbps)) $ locredit=$(($max_frame_size * $sendslope / $port_transmit_rate_kbps)) $ hicredit=$(($max_frame_size * $idleslope / $port_transmit_rate_kbps)) $ tc qdisc replace dev eno0 parent 8001:7 cbs \ idleslope $idleslope \ sendslope $sendslope \ hicredit $hicredit \ locredit $locredit \ offload 0 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 pc : taprio_leaf+0x28/0x40 lr : qdisc_leaf+0x3c/0x60 Call trace: taprio_leaf+0x28/0x40 tc_modify_qdisc+0xf0/0x72c rtnetlink_rcv_msg+0x12c/0x390 netlink_rcv_skb+0x5c/0x130 rtnetlink_rcv+0x1c/0x2c The solution is not as obvious as the problem. The code which deallocates q->qdiscs[] is in fact copied and pasted from mqprio, which also deallocates the array in mqprio_attach() and never uses it afterwards. Therefore, the identical cleanup logic of priv->qdiscs[] that mqprio_destroy() has is deceptive because it will never take place at qdisc_destroy() time, but just at raw ops->destroy() time (otherwise said, priv->qdiscs[] do not last for the entire lifetime of the mqprio root), but rather, this is just the twisted way in which the Qdisc API understands error path cleanup should be done (Qdisc_ops :: destroy() is called even when Qdisc_ops :: init() never succeeded). Side note, in fact this is also what the comment in mqprio_init() says: /* pre-allocate qdisc, attachment can't fail */ Or reworded, mqprio's priv->qdiscs[] scheme is only meant to serve as data passing between Qdisc_ops :: init() and Qdisc_ops :: attach(). [ this comment was also copied and pasted into the initial taprio commit, even though taprio_attach() came way later ] The problem is that taprio also makes extensive use of the q->qdiscs[] array in the software fast path (taprio_enqueue() and taprio_dequeue()), but it does not keep a reference of its own on q->qdiscs[i] (you'd think that since it creates these Qdiscs, it holds the reference, but nope, this is not completely true). To understand the difference between taprio_destroy() and mqprio_destroy() one must look before commit |
||
---|---|---|
.. | ||
act_api.c | ||
act_bpf.c | ||
act_connmark.c | ||
act_csum.c | ||
act_ct.c | ||
act_ctinfo.c | ||
act_gact.c | ||
act_gate.c | ||
act_ife.c | ||
act_ipt.c | ||
act_meta_mark.c | ||
act_meta_skbprio.c | ||
act_meta_skbtcindex.c | ||
act_mirred.c | ||
act_mpls.c | ||
act_nat.c | ||
act_pedit.c | ||
act_police.c | ||
act_sample.c | ||
act_simple.c | ||
act_skbedit.c | ||
act_skbmod.c | ||
act_tunnel_key.c | ||
act_vlan.c | ||
cls_api.c | ||
cls_basic.c | ||
cls_bpf.c | ||
cls_cgroup.c | ||
cls_flow.c | ||
cls_flower.c | ||
cls_fw.c | ||
cls_matchall.c | ||
cls_route.c | ||
cls_rsvp6.c | ||
cls_rsvp.c | ||
cls_rsvp.h | ||
cls_tcindex.c | ||
cls_u32.c | ||
em_canid.c | ||
em_cmp.c | ||
em_ipset.c | ||
em_ipt.c | ||
em_meta.c | ||
em_nbyte.c | ||
em_text.c | ||
em_u32.c | ||
ematch.c | ||
Kconfig | ||
Makefile | ||
sch_api.c | ||
sch_atm.c | ||
sch_blackhole.c | ||
sch_cake.c | ||
sch_cbq.c | ||
sch_cbs.c | ||
sch_choke.c | ||
sch_codel.c | ||
sch_drr.c | ||
sch_dsmark.c | ||
sch_etf.c | ||
sch_ets.c | ||
sch_fifo.c | ||
sch_fq_codel.c | ||
sch_fq_pie.c | ||
sch_fq.c | ||
sch_frag.c | ||
sch_generic.c | ||
sch_gred.c | ||
sch_hfsc.c | ||
sch_hhf.c | ||
sch_htb.c | ||
sch_ingress.c | ||
sch_mq.c | ||
sch_mqprio.c | ||
sch_multiq.c | ||
sch_netem.c | ||
sch_pie.c | ||
sch_plug.c | ||
sch_prio.c | ||
sch_qfq.c | ||
sch_red.c | ||
sch_sfb.c | ||
sch_sfq.c | ||
sch_skbprio.c | ||
sch_taprio.c | ||
sch_tbf.c | ||
sch_teql.c |