net: sched, fix OOO packets with pfifo_fast
After the qdisc lock was dropped in pfifo_fast we allow multiple
enqueue threads and dequeue threads to run in parallel. On the
enqueue side the skb bit ooo_okay is used to ensure all related
skbs are enqueued in-order. On the dequeue side though there is
no similar logic. What we observe is with fewer queues than CPUs
it is possible to re-order packets when two instances of
__qdisc_run() are running in parallel. Each thread will dequeue
a skb and then whichever thread calls the ndo op first will
be sent on the wire. This doesn't typically happen because
qdisc_run() is usually triggered by the same core that did the
enqueue. However, drivers will trigger __netif_schedule()
when queues are transitioning from stopped to awake using the
netif_tx_wake_* APIs. When this happens netif_schedule() calls
qdisc_run() on the same CPU that did the netif_tx_wake_* which
is usually done in the interrupt completion context. This CPU
is selected with the irq affinity which is unrelated to the
enqueue operations.
To resolve this we add a RUNNING bit to the qdisc to ensure
only a single dequeue per qdisc is running. Enqueue and dequeue
operations can still run in parallel and also on multi queue
NICs we can still have a dequeue in-flight per qdisc, which
is typically per CPU.
Fixes: c5ad119fb6
("net: sched: pfifo_fast use skb_array")
Reported-by: Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
743989254e
commit
eb82a99447
@ -30,6 +30,7 @@ struct qdisc_rate_table {
|
||||
enum qdisc_state_t {
|
||||
__QDISC_STATE_SCHED,
|
||||
__QDISC_STATE_DEACTIVATED,
|
||||
__QDISC_STATE_RUNNING,
|
||||
};
|
||||
|
||||
struct qdisc_size_table {
|
||||
|
@ -373,24 +373,33 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
|
||||
*/
|
||||
static inline bool qdisc_restart(struct Qdisc *q, int *packets)
|
||||
{
|
||||
bool more, validate, nolock = q->flags & TCQ_F_NOLOCK;
|
||||
spinlock_t *root_lock = NULL;
|
||||
struct netdev_queue *txq;
|
||||
struct net_device *dev;
|
||||
struct sk_buff *skb;
|
||||
bool validate;
|
||||
|
||||
/* Dequeue packet */
|
||||
skb = dequeue_skb(q, &validate, packets);
|
||||
if (unlikely(!skb))
|
||||
if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
|
||||
return false;
|
||||
|
||||
if (!(q->flags & TCQ_F_NOLOCK))
|
||||
skb = dequeue_skb(q, &validate, packets);
|
||||
if (unlikely(!skb)) {
|
||||
if (nolock)
|
||||
clear_bit(__QDISC_STATE_RUNNING, &q->state);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nolock)
|
||||
root_lock = qdisc_lock(q);
|
||||
|
||||
dev = qdisc_dev(q);
|
||||
txq = skb_get_tx_queue(dev, skb);
|
||||
|
||||
return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
|
||||
more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
|
||||
if (nolock)
|
||||
clear_bit(__QDISC_STATE_RUNNING, &q->state);
|
||||
return more;
|
||||
}
|
||||
|
||||
void __qdisc_run(struct Qdisc *q)
|
||||
|
Loading…
Reference in New Issue
Block a user