Merge branch 'lockless-qdisc-opts'
Yunsheng Lin says: ==================== Some optimization for lockless qdisc Patch 1: remove unnecessary seqcount operation. Patch 2: implement TCQ_F_CAN_BYPASS. Patch 3: remove qdisc->empty. Performance data for pktgen in queue_xmit mode + dummy netdev with pfifo_fast: threads unpatched patched delta 1 2.60Mpps 3.21Mpps +23% 2 3.84Mpps 5.56Mpps +44% 4 5.52Mpps 5.58Mpps +1% 8 2.77Mpps 2.76Mpps -0.3% 16 2.24Mpps 2.23Mpps -0.4% Performance for IP forward testing: 1.05Mpps increases to 1.16Mpps, about 10% improvement. V3: Add 'Acked-by' from Jakub and 'Tested-by' from Vladimir, and resend based on latest net-next. V2: Adjust the comment and commit log according to discussion in V1. V1: Drop RFC tag, add nolock_qdisc_is_empty() and do the qdisc empty checking without the protection of qdisc->seqlock to aviod doing unnecessary spin_trylock() for contention case. RFC v4: Use STATE_MISSED and STATE_DRAINING to indicate non-empty qdisc, and add patch 1 and 3. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
e940eb3c1b
@ -37,8 +37,15 @@ enum qdisc_state_t {
|
||||
__QDISC_STATE_SCHED,
|
||||
__QDISC_STATE_DEACTIVATED,
|
||||
__QDISC_STATE_MISSED,
|
||||
__QDISC_STATE_DRAINING,
|
||||
};
|
||||
|
||||
#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
|
||||
#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING)
|
||||
|
||||
#define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \
|
||||
QDISC_STATE_DRAINING)
|
||||
|
||||
struct qdisc_size_table {
|
||||
struct rcu_head rcu;
|
||||
struct list_head list;
|
||||
@ -110,8 +117,6 @@ struct Qdisc {
|
||||
spinlock_t busylock ____cacheline_aligned_in_smp;
|
||||
spinlock_t seqlock;
|
||||
|
||||
/* for NOLOCK qdisc, true if there are no enqueued skbs */
|
||||
bool empty;
|
||||
struct rcu_head rcu;
|
||||
|
||||
/* private data */
|
||||
@ -145,6 +150,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
|
||||
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
|
||||
}
|
||||
|
||||
static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
|
||||
{
|
||||
return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
|
||||
}
|
||||
|
||||
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
|
||||
{
|
||||
return q->flags & TCQ_F_CPUSTATS;
|
||||
@ -153,7 +163,7 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
|
||||
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
|
||||
{
|
||||
if (qdisc_is_percpu_stats(qdisc))
|
||||
return READ_ONCE(qdisc->empty);
|
||||
return nolock_qdisc_is_empty(qdisc);
|
||||
return !READ_ONCE(qdisc->q.qlen);
|
||||
}
|
||||
|
||||
@ -161,7 +171,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
|
||||
{
|
||||
if (qdisc->flags & TCQ_F_NOLOCK) {
|
||||
if (spin_trylock(&qdisc->seqlock))
|
||||
goto nolock_empty;
|
||||
return true;
|
||||
|
||||
/* If the MISSED flag is set, it means other thread has
|
||||
* set the MISSED flag before second spin_trylock(), so
|
||||
@ -183,11 +193,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
|
||||
/* Retry again in case other CPU may not see the new flag
|
||||
* after it releases the lock at the end of qdisc_run_end().
|
||||
*/
|
||||
if (!spin_trylock(&qdisc->seqlock))
|
||||
return false;
|
||||
|
||||
nolock_empty:
|
||||
WRITE_ONCE(qdisc->empty, false);
|
||||
return spin_trylock(&qdisc->seqlock);
|
||||
} else if (qdisc_is_running(qdisc)) {
|
||||
return false;
|
||||
}
|
||||
@ -201,15 +207,14 @@ nolock_empty:
|
||||
|
||||
static inline void qdisc_run_end(struct Qdisc *qdisc)
|
||||
{
|
||||
write_seqcount_end(&qdisc->running);
|
||||
if (qdisc->flags & TCQ_F_NOLOCK) {
|
||||
spin_unlock(&qdisc->seqlock);
|
||||
|
||||
if (unlikely(test_bit(__QDISC_STATE_MISSED,
|
||||
&qdisc->state))) {
|
||||
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
|
||||
&qdisc->state)))
|
||||
__netif_schedule(qdisc);
|
||||
}
|
||||
} else {
|
||||
write_seqcount_end(&qdisc->running);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
qdisc_calculate_pkt_len(skb, q);
|
||||
|
||||
if (q->flags & TCQ_F_NOLOCK) {
|
||||
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
|
||||
if (likely(!netif_xmit_frozen_or_stopped(txq)))
|
||||
qdisc_run(q);
|
||||
if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
|
||||
qdisc_run_begin(q)) {
|
||||
/* Retest nolock_qdisc_is_empty() within the protection
|
||||
* of q->seqlock to protect from racing with requeuing.
|
||||
*/
|
||||
if (unlikely(!nolock_qdisc_is_empty(q))) {
|
||||
rc = q->enqueue(skb, q, &to_free) &
|
||||
NET_XMIT_MASK;
|
||||
__qdisc_run(q);
|
||||
qdisc_run_end(q);
|
||||
|
||||
goto no_lock_out;
|
||||
}
|
||||
|
||||
qdisc_bstats_cpu_update(q, skb);
|
||||
if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
|
||||
!nolock_qdisc_is_empty(q))
|
||||
__qdisc_run(q);
|
||||
|
||||
qdisc_run_end(q);
|
||||
return NET_XMIT_SUCCESS;
|
||||
}
|
||||
|
||||
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
|
||||
qdisc_run(q);
|
||||
|
||||
no_lock_out:
|
||||
if (unlikely(to_free))
|
||||
kfree_skb_list(to_free);
|
||||
return rc;
|
||||
|
@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
|
||||
*/
|
||||
if (!netif_xmit_frozen_or_stopped(txq))
|
||||
set_bit(__QDISC_STATE_MISSED, &q->state);
|
||||
else
|
||||
set_bit(__QDISC_STATE_DRAINING, &q->state);
|
||||
}
|
||||
|
||||
/* Main transmission queue. */
|
||||
@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
|
||||
|
||||
skb = next;
|
||||
}
|
||||
if (lock)
|
||||
|
||||
if (lock) {
|
||||
spin_unlock(lock);
|
||||
__netif_schedule(q);
|
||||
set_bit(__QDISC_STATE_MISSED, &q->state);
|
||||
} else {
|
||||
__netif_schedule(q);
|
||||
}
|
||||
}
|
||||
|
||||
static void try_bulk_dequeue_skb(struct Qdisc *q,
|
||||
@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
|
||||
while (qdisc_restart(q, &packets)) {
|
||||
quota -= packets;
|
||||
if (quota <= 0) {
|
||||
__netif_schedule(q);
|
||||
if (q->flags & TCQ_F_NOLOCK)
|
||||
set_bit(__QDISC_STATE_MISSED, &q->state);
|
||||
else
|
||||
__netif_schedule(q);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -698,13 +708,14 @@ retry:
|
||||
if (likely(skb)) {
|
||||
qdisc_update_stats_at_dequeue(qdisc, skb);
|
||||
} else if (need_retry &&
|
||||
test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
|
||||
READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
|
||||
/* Delay clearing the STATE_MISSED here to reduce
|
||||
* the overhead of the second spin_trylock() in
|
||||
* qdisc_run_begin() and __netif_schedule() calling
|
||||
* in qdisc_run_end().
|
||||
*/
|
||||
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
|
||||
clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
|
||||
|
||||
/* Make sure dequeuing happens after clearing
|
||||
* STATE_MISSED.
|
||||
@ -714,8 +725,6 @@ retry:
|
||||
need_retry = false;
|
||||
|
||||
goto retry;
|
||||
} else {
|
||||
WRITE_ONCE(qdisc->empty, true);
|
||||
}
|
||||
|
||||
return skb;
|
||||
@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
|
||||
sch->enqueue = ops->enqueue;
|
||||
sch->dequeue = ops->dequeue;
|
||||
sch->dev_queue = dev_queue;
|
||||
sch->empty = true;
|
||||
dev_hold(dev);
|
||||
refcount_set(&sch->refcnt, 1);
|
||||
|
||||
@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev,
|
||||
spin_unlock_bh(qdisc_lock(qdisc));
|
||||
if (nolock) {
|
||||
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
|
||||
clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
|
||||
spin_unlock_bh(&qdisc->seqlock);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user