From 5827fe2bc9c4bade6af994676c9823908e091877 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Tue, 14 Nov 2023 05:45:32 -0800 Subject: [PATCH 1/4] octeon_ep: add padding for small packets Pad small packets to ETH_ZLEN before transmit, as hardware cannot pad and requires software padding to ensure minimum ethernet frame length. Signed-off-by: Shinas Rasheed Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 552970c7dec0..2c86b911a380 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -820,6 +820,9 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, u16 nr_frags, si; u16 q_no, wi; + if (skb_put_padto(skb, ETH_ZLEN)) + return NETDEV_TX_OK; + q_no = skb_get_queue_mapping(skb); if (q_no >= oct->num_iqs) { netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no); From 2fba5069959c50c0e5be95e8d67356c63909cbc2 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Tue, 14 Nov 2023 05:45:33 -0800 Subject: [PATCH 2/4] octeon_ep: remove dma sync in trasmit path Cleanup dma sync calls for scatter gather mappings, since they are coherent allocations and do not need explicit sync to be called. Signed-off-by: Shinas Rasheed Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 2c86b911a380..1c02304677c9 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -872,9 +872,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, if (dma_mapping_error(iq->dev, dma)) goto dma_map_err; - dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma, - OCTEP_SGLIST_SIZE_PER_PKT, - DMA_TO_DEVICE); memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT); sglist[0].len[3] = len; sglist[0].dma_ptr[0] = dma; @@ -894,10 +891,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, frag++; si++; } - dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma, - OCTEP_SGLIST_SIZE_PER_PKT, - DMA_TO_DEVICE); - hw_desc->dptr = tx_buffer->sglist_dma; } From 373d9a55ba746c579ca98222d2b4308d06978ca5 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Tue, 14 Nov 2023 05:45:34 -0800 Subject: [PATCH 3/4] octeon_ep: implement xmit_more in transmit Add xmit_more handling in tx datapath for octeon_ep pf. Signed-off-by: Shinas Rasheed Signed-off-by: David S. Miller --- .../ethernet/marvell/octeon_ep/octep_config.h | 2 +- .../ethernet/marvell/octeon_ep/octep_main.c | 36 ++++++++++++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h index 1622a6ebf036..ed8b1ace56b9 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h @@ -15,7 +15,7 @@ /* Tx Queue: maximum descriptors per ring */ #define OCTEP_IQ_MAX_DESCRIPTORS 1024 /* Minimum input (Tx) requests to be enqueued to ring doorbell */ -#define OCTEP_DB_MIN 1 +#define OCTEP_DB_MIN 8 /* Packet threshold for Tx queue interrupt */ #define OCTEP_IQ_INTR_THRESHOLD 0x0 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 1c02304677c9..2d1bcdc589f3 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -784,6 +784,13 @@ static inline int octep_iq_full_check(struct octep_iq *iq) /* Stop the queue if unable to send */ netif_stop_subqueue(iq->netdev, iq->q_no); + /* Allow for pending updates in write index + * from iq_process_completion in other cpus + * to reflect, in case queue gets free + * entries. + */ + smp_mb(); + /* check again and restart the queue, in case NAPI has just freed * enough Tx ring entries. */ @@ -818,6 +825,7 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, struct octep_iq *iq; skb_frag_t *frag; u16 nr_frags, si; + int xmit_more; u16 q_no, wi; if (skb_put_padto(skb, ETH_ZLEN)) @@ -830,10 +838,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, } iq = oct->iq[q_no]; - if (octep_iq_full_check(iq)) { - iq->stats.tx_busy++; - return NETDEV_TX_BUSY; - } shinfo = skb_shinfo(skb); nr_frags = shinfo->nr_frags; @@ -894,19 +898,33 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, hw_desc->dptr = tx_buffer->sglist_dma; } - netdev_tx_sent_queue(iq->netdev_q, skb->len); + xmit_more = netdev_xmit_more(); + + __netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more); + skb_tx_timestamp(skb); atomic_inc(&iq->instr_pending); + iq->fill_cnt++; wi++; if (wi == iq->max_count) wi = 0; iq->host_write_index = wi; + + /* octep_iq_full_check stops the queue and returns + * true if so, in case the queue has become full + * by inserting current packet. If so, we can + * go ahead and ring doorbell. + */ + if (!octep_iq_full_check(iq) && xmit_more && + iq->fill_cnt < iq->fill_threshold) + return NETDEV_TX_OK; + /* Flush the hw descriptor before writing to doorbell */ wmb(); - - /* Ring Doorbell to notify the NIC there is a new packet */ - writel(1, iq->doorbell_reg); - iq->stats.instr_posted++; + /* Ring Doorbell to notify the NIC of new packets */ + writel(iq->fill_cnt, iq->doorbell_reg); + iq->stats.instr_posted += iq->fill_cnt; + iq->fill_cnt = 0; return NETDEV_TX_OK; dma_map_sg_err: From dc9c02b7faa0f37a1dd52752192a665319657d14 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Tue, 14 Nov 2023 05:45:35 -0800 Subject: [PATCH 4/4] octeon_ep: remove atomic variable usage in Tx data path Replace atomic variable "instr_pending" which represents number of posted tx instructions pending completion, with host_write_idx and flush_index variables in the xmit and completion processing respectively. Signed-off-by: Shinas Rasheed Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_config.h | 1 + drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 9 +++------ drivers/net/ethernet/marvell/octeon_ep/octep_main.h | 9 +++++++++ drivers/net/ethernet/marvell/octeon_ep/octep_tx.c | 5 +---- drivers/net/ethernet/marvell/octeon_ep/octep_tx.h | 3 --- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h index ed8b1ace56b9..91cfa19c65b9 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h @@ -13,6 +13,7 @@ #define OCTEP_64BYTE_INSTR 64 /* Tx Queue: maximum descriptors per ring */ +/* This needs to be a power of 2 */ #define OCTEP_IQ_MAX_DESCRIPTORS 1024 /* Minimum input (Tx) requests to be enqueued to ring doorbell */ #define OCTEP_DB_MIN 8 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 2d1bcdc589f3..974a34be9ffa 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -777,7 +777,7 @@ static int octep_stop(struct net_device *netdev) */ static inline int octep_iq_full_check(struct octep_iq *iq) { - if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >= + if (likely((IQ_INSTR_SPACE(iq)) > OCTEP_WAKE_QUEUE_THRESHOLD)) return 0; @@ -794,7 +794,7 @@ static inline int octep_iq_full_check(struct octep_iq *iq) /* check again and restart the queue, in case NAPI has just freed * enough Tx ring entries. */ - if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >= + if (unlikely(IQ_INSTR_SPACE(iq) > OCTEP_WAKE_QUEUE_THRESHOLD)) { netif_start_subqueue(iq->netdev, iq->q_no); iq->stats.restart_cnt++; @@ -903,12 +903,9 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, __netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more); skb_tx_timestamp(skb); - atomic_inc(&iq->instr_pending); iq->fill_cnt++; wi++; - if (wi == iq->max_count) - wi = 0; - iq->host_write_index = wi; + iq->host_write_index = wi & iq->ring_size_mask; /* octep_iq_full_check stops the queue and returns * true if so, in case the queue has become full diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index 6df902ebb7f3..c33e046b69a4 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -40,6 +40,15 @@ #define OCTEP_OQ_INTR_RESEND_BIT 59 #define OCTEP_MMIO_REGIONS 3 + +#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \ + ((iq__)->host_write_index - (iq__)->flush_index) & \ + (iq__)->ring_size_mask; \ + }) +#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \ + (iq_)->max_count - IQ_INSTR_PENDING(iq_); \ + }) + /* PCI address space mapping information. * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of * Octeon gets mapped to different physical address spaces in diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c index d0adb82d65c3..06851b78aa28 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c @@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq) iq->flush_index = 0; iq->pkts_processed = 0; iq->pkt_in_done = 0; - atomic_set(&iq->instr_pending, 0); } /** @@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget) } iq->pkts_processed += compl_pkts; - atomic_sub(compl_pkts, &iq->instr_pending); iq->stats.instr_completed += compl_pkts; iq->stats.bytes_sent += compl_bytes; iq->stats.sgentry_sent += compl_sg; @@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget) netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes); if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) && - ((iq->max_count - atomic_read(&iq->instr_pending)) > + (IQ_INSTR_SPACE(iq) > OCTEP_WAKE_QUEUE_THRESHOLD)) netif_wake_subqueue(iq->netdev, iq->q_no); return !budget; @@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq) dev_kfree_skb_any(skb); } - atomic_set(&iq->instr_pending, 0); iq->flush_index = fi; netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no)); } diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h index 86c98b13fc44..1ba4ff65e54d 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h @@ -172,9 +172,6 @@ struct octep_iq { /* Statistics for this input queue. */ struct octep_iq_stats stats; - /* This field keeps track of the instructions pending in this queue. */ - atomic_t instr_pending; - /* Pointer to the Virtual Base addr of the input ring. */ struct octep_tx_desc_hw *desc_ring;