net: thunderx: Fix transmit queue timeout issue

Transmit queue timeout issue is seen in two cases
- Due to a race condition btw setting stop_queue at xmit()
  and checking for stopped_queue in NAPI poll routine, at times
  transmission from a SQ comes to a halt. This is fixed
  by using barriers and also added a check for SQ free descriptors,
  incase SQ is stopped and there are only CQE_RX i.e no CQE_TX.
- Contrary to an assumption, a HW errata where HW doesn't stop transmission
  even though there are not enough CQEs available for a CQE_TX is
  not fixed in T88 pass 2.x. This results in a Qset error with
  'CQ_WR_FULL' stalling transmission. This is fixed by adjusting
  RXQ's  RED levels for CQ level such that there is always enough
  space left for CQE_TXs.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Sunil Goutham 2016-12-01 18:24:28 +05:30 committed by David S. Miller
parent 9aac3c1879
commit bd3ad7d3a1
3 changed files with 54 additions and 37 deletions

View File

@ -644,6 +644,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
struct cmp_queue *cq = &qs->cq[cq_idx];
struct cqe_rx_t *cq_desc;
struct netdev_queue *txq;
struct snd_queue *sq;
unsigned int tx_pkts = 0, tx_bytes = 0;
spin_lock_bh(&cq->lock);
@ -709,16 +710,20 @@ loop:
done:
/* Wakeup TXQ if its stopped earlier due to SQ full */
if (tx_done) {
sq = &nic->qs->sq[cq_idx];
if (tx_done ||
(atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) {
netdev = nic->pnicvf->netdev;
txq = netdev_get_tx_queue(netdev,
nicvf_netdev_qidx(nic, cq_idx));
if (tx_pkts)
netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
nic = nic->pnicvf;
/* To read updated queue and carrier status */
smp_mb();
if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
netif_tx_start_queue(txq);
netif_tx_wake_queue(txq);
nic = nic->pnicvf;
this_cpu_inc(nic->drv_stats->txq_wake);
if (netif_msg_tx_err(nic))
netdev_warn(netdev,
@ -1054,6 +1059,9 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
struct nicvf *nic = netdev_priv(netdev);
int qid = skb_get_queue_mapping(skb);
struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid);
struct nicvf *snic;
struct snd_queue *sq;
int tmp;
/* Check for minimum packet length */
if (skb->len <= ETH_HLEN) {
@ -1061,13 +1069,39 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK;
}
if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) {
snic = nic;
/* Get secondary Qset's SQ structure */
if (qid >= MAX_SND_QUEUES_PER_QS) {
tmp = qid / MAX_SND_QUEUES_PER_QS;
snic = (struct nicvf *)nic->snicvf[tmp - 1];
if (!snic) {
netdev_warn(nic->netdev,
"Secondary Qset#%d's ptr not initialized\n",
tmp - 1);
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
qid = qid % MAX_SND_QUEUES_PER_QS;
}
sq = &snic->qs->sq[qid];
if (!netif_tx_queue_stopped(txq) &&
!nicvf_sq_append_skb(snic, sq, skb, qid)) {
netif_tx_stop_queue(txq);
this_cpu_inc(nic->drv_stats->txq_stop);
if (netif_msg_tx_err(nic))
netdev_warn(netdev,
"%s: Transmit ring full, stopping SQ%d\n",
netdev->name, qid);
/* Barrier, so that stop_queue visible to other cpus */
smp_mb();
/* Check again, incase another cpu freed descriptors */
if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) {
netif_tx_wake_queue(txq);
} else {
this_cpu_inc(nic->drv_stats->txq_stop);
if (netif_msg_tx_err(nic))
netdev_warn(netdev,
"%s: Transmit ring full, stopping SQ%d\n",
netdev->name, qid);
}
return NETDEV_TX_BUSY;
}

View File

@ -1190,30 +1190,12 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
}
/* Append an skb to a SQ for packet transfer. */
int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
struct sk_buff *skb, u8 sq_num)
{
int i, size;
int subdesc_cnt, tso_sqe = 0;
int sq_num, qentry;
struct queue_set *qs;
struct snd_queue *sq;
sq_num = skb_get_queue_mapping(skb);
if (sq_num >= MAX_SND_QUEUES_PER_QS) {
/* Get secondary Qset's SQ structure */
i = sq_num / MAX_SND_QUEUES_PER_QS;
if (!nic->snicvf[i - 1]) {
netdev_warn(nic->netdev,
"Secondary Qset#%d's ptr not initialized\n",
i - 1);
return 1;
}
nic = (struct nicvf *)nic->snicvf[i - 1];
sq_num = sq_num % MAX_SND_QUEUES_PER_QS;
}
qs = nic->qs;
sq = &qs->sq[sq_num];
int qentry;
subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
if (subdesc_cnt > atomic_read(&sq->free_cnt))

View File

@ -88,13 +88,13 @@
/* RED and Backpressure levels of CQ for pkt reception
* For CQ, level is a measure of emptiness i.e 0x0 means full
* eg: For CQ of size 4K, and for pass/drop levels of 128/96
* HW accepts pkt if unused CQE >= 2048
* RED accepts pkt if unused CQE < 2048 & >= 1536
* DROPs pkts if unused CQE < 1536
* eg: For CQ of size 4K, and for pass/drop levels of 160/144
* HW accepts pkt if unused CQE >= 2560
* RED accepts pkt if unused CQE < 2304 & >= 2560
* DROPs pkts if unused CQE < 2304
*/
#define RQ_PASS_CQ_LVL 128ULL
#define RQ_DROP_CQ_LVL 96ULL
#define RQ_PASS_CQ_LVL 160ULL
#define RQ_DROP_CQ_LVL 144ULL
/* RED and Backpressure levels of RBDR for pkt reception
* For RBDR, level is a measure of fullness i.e 0x0 means empty
@ -306,7 +306,8 @@ void nicvf_sq_disable(struct nicvf *nic, int qidx);
void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt);
void nicvf_sq_free_used_descs(struct net_device *netdev,
struct snd_queue *sq, int qidx);
int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb);
int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
struct sk_buff *skb, u8 sq_num);
struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
void nicvf_rbdr_task(unsigned long data);