From 0c8493d90b6bb0f5c4fe9217db8f7203f24c0f28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 24 May 2017 07:55:34 +0200 Subject: [PATCH 01/15] i40e: add XDP support for pass and drop actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds basic XDP support for i40e derived NICs. All XDP actions will end up in XDP_DROP. Signed-off-by: Björn Töpel Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 7 ++ drivers/net/ethernet/intel/i40e/i40e_main.c | 87 +++++++++++++ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 130 +++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 + 4 files changed, 194 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 395ca94faf80..d3195b29d53c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -645,6 +645,8 @@ struct i40e_vsi { u16 max_frame; u16 rx_buf_len; + struct bpf_prog *xdp_prog; + /* List of q_vectors allocated to this VSI */ struct i40e_q_vector **q_vectors; int num_q_vectors; @@ -972,4 +974,9 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf); i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf); void i40e_print_link_message(struct i40e_vsi *vsi, bool isup); + +static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) +{ + return !!vsi->xdp_prog; +} #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 98fb644a580e..89bbe32a5934 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -27,6 +27,7 @@ #include #include #include +#include /* Local includes */ #include "i40e.h" @@ -2395,6 +2396,18 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) } } +/** + * i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @vsi: the vsi + **/ +static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi) +{ + if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + return I40E_RXBUFFER_2048; + else + return I40E_RXBUFFER_3072; +} + /** * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit * @netdev: network interface device structure @@ -2408,6 +2421,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + if (i40e_enabled_xdp_vsi(vsi)) { + int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + + if (frame_size > i40e_max_xdp_frame_size(vsi)) + return -EINVAL; + } + netdev_info(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); netdev->mtu = new_mtu; @@ -9311,6 +9331,72 @@ out_err: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } +/** + * i40e_xdp_setup - add/remove an XDP program + * @vsi: VSI to changed + * @prog: XDP program + **/ +static int i40e_xdp_setup(struct i40e_vsi *vsi, + struct bpf_prog *prog) +{ + int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct i40e_pf *pf = vsi->back; + struct bpf_prog *old_prog; + bool need_reset; + int i; + + /* Don't allow frames that span over multiple buffers */ + if (frame_size > vsi->rx_buf_len) + return -EINVAL; + + if (!i40e_enabled_xdp_vsi(vsi) && !prog) + return 0; + + /* When turning XDP on->off/off->on we reset and rebuild the rings. */ + need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); + + if (need_reset) + i40e_prep_for_reset(pf, true); + + old_prog = xchg(&vsi->xdp_prog, prog); + + if (need_reset) + i40e_reset_and_rebuild(pf, true, true); + + for (i = 0; i < vsi->num_queue_pairs; i++) + WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +/** + * i40e_xdp - implements ndo_xdp for i40e + * @dev: netdevice + * @xdp: XDP command + **/ +static int i40e_xdp(struct net_device *dev, + struct netdev_xdp *xdp) +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + struct i40e_vsi *vsi = np->vsi; + + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return i40e_xdp_setup(vsi, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = i40e_enabled_xdp_vsi(vsi); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops i40e_netdev_ops = { .ndo_open = i40e_open, .ndo_stop = i40e_close, @@ -9343,6 +9429,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_features_check = i40e_features_check, .ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink, + .ndo_xdp = i40e_xdp, }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index af554f3cda19..f744f843bc72 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -26,6 +26,7 @@ #include #include +#include #include "i40e.h" #include "i40e_trace.h" #include "i40e_prototype.h" @@ -1195,6 +1196,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) void i40e_free_rx_resources(struct i40e_ring *rx_ring) { i40e_clean_rx_ring(rx_ring); + rx_ring->xdp_prog = NULL; kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL; @@ -1241,6 +1243,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; + return 0; err: kfree(rx_ring->rx_bi); @@ -1593,6 +1597,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed + * @rx_desc: pointer to the EOP Rx descriptor * * Also address the case where we are pulling data in on pages only * and as such no data is present in the skb header. @@ -1602,8 +1607,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, * * Returns true if an error was encountered and skb was freed. **/ -static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) +static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, + union i40e_rx_desc *rx_desc) + { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + + /* ERR_MASK will only have valid bits if EOP set, and + * what we are doing here is actually checking + * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in + * the error field + */ + if (unlikely(i40e_test_staterr(rx_desc, + BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { + dev_kfree_skb_any(skb); + return true; + } + /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -1776,7 +1798,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from - * @size: size of buffer to add to skb + * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the @@ -1784,9 +1806,9 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, */ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; #if (PAGE_SIZE < 8192) unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else @@ -1796,9 +1818,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data + L1_CACHE_BYTES); #endif /* allocate a skb to store the frags */ @@ -1811,10 +1833,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > I40E_RX_HDR_SIZE) - headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); + headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, + ALIGN(headlen, sizeof(long))); /* update all of the pointers */ size -= headlen; @@ -1841,16 +1864,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, * i40e_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @rx_buffer: Rx buffer to pull data from - * @size: size of buffer to add to skb + * @xdp: xdp_buff pointing to the data * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; #if (PAGE_SIZE < 8192) unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else @@ -1860,12 +1883,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data + L1_CACHE_BYTES); #endif /* build an skb around the page buffer */ - skb = build_skb(va - I40E_SKB_PAD, truesize); + skb = build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL; @@ -1944,6 +1967,46 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, return true; } +#define I40E_XDP_PASS 0 +#define I40E_XDP_CONSUMED 1 + +/** + * i40e_run_xdp - run an XDP program + * @rx_ring: Rx ring being processed + * @xdp: XDP buffer containing the frame + **/ +static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, + struct xdp_buff *xdp) +{ + int result = I40E_XDP_PASS; + struct bpf_prog *xdp_prog; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + + if (!xdp_prog) + goto xdp_out; + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_TX: + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping packet */ + case XDP_DROP: + result = I40E_XDP_CONSUMED; + break; + } +xdp_out: + rcu_read_unlock(); + return ERR_PTR(-result); +} + /** * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on @@ -1966,6 +2029,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) while (likely(total_rx_packets < budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + struct xdp_buff xdp; unsigned int size; u16 vlan_tag; u8 rx_ptype; @@ -2006,12 +2070,27 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_buffer = i40e_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ - if (skb) + if (!skb) { + xdp.data = page_address(rx_buffer->page) + + rx_buffer->page_offset; + xdp.data_hard_start = xdp.data - + i40e_rx_offset(rx_ring); + xdp.data_end = xdp.data + size; + + skb = i40e_run_xdp(rx_ring, &xdp); + } + + if (IS_ERR(skb)) { + total_rx_bytes += size; + total_rx_packets++; + rx_buffer->pagecnt_bias++; + } else if (skb) { i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) - skb = i40e_build_skb(rx_ring, rx_buffer, size); - else - skb = i40e_construct_skb(rx_ring, rx_buffer, size); + } else if (ring_uses_build_skb(rx_ring)) { + skb = i40e_build_skb(rx_ring, rx_buffer, &xdp); + } else { + skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp); + } /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -2026,18 +2105,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (i40e_is_non_eop(rx_ring, rx_desc, skb)) continue; - /* ERR_MASK will only have valid bits if EOP set, and - * what we are doing here is actually checking - * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in - * the error field - */ - if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { - dev_kfree_skb_any(skb); - skb = NULL; - continue; - } - - if (i40e_cleanup_headers(rx_ring, skb)) { + if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) { skb = NULL; continue; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index f5de51124cae..31f0b162996f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -360,6 +360,7 @@ struct i40e_ring { void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ + struct bpf_prog *xdp_prog; union { struct i40e_tx_buffer *tx_bi; struct i40e_rx_buffer *rx_bi; From 74608d17fe29b2cddceea609033019b32e8a0650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 24 May 2017 07:55:35 +0200 Subject: [PATCH 02/15] i40e: add support for XDP_TX action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds proper XDP_TX action support. For each Tx ring, an additional XDP Tx ring is allocated and setup. This version does the DMA mapping in the fast-path, which will penalize performance for IOMMU enabled systems. Further, debugfs support is not wired up for the XDP Tx rings. Signed-off-by: Björn Töpel Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + .../net/ethernet/intel/i40e/i40e_ethtool.c | 42 ++- drivers/net/ethernet/intel/i40e/i40e_main.c | 299 +++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 118 ++++++- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 11 + 5 files changed, 384 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d3195b29d53c..4250ab55a9f1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -629,6 +629,7 @@ struct i40e_vsi { /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; struct i40e_ring **tx_rings; + struct i40e_ring **xdp_rings; /* XDP Tx rings */ u32 active_filters; u32 promisc_threshold; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3d58762efbc0..9d3233c2c9cd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1299,6 +1299,17 @@ static void i40e_get_ringparam(struct net_device *netdev, ring->rx_jumbo_pending = 0; } +static bool i40e_active_tx_ring_index(struct i40e_vsi *vsi, u16 index) +{ + if (i40e_enabled_xdp_vsi(vsi)) { + return index < vsi->num_queue_pairs || + (index >= vsi->alloc_queue_pairs && + index < vsi->alloc_queue_pairs + vsi->num_queue_pairs); + } + + return index < vsi->num_queue_pairs; +} + static int i40e_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { @@ -1308,6 +1319,7 @@ static int i40e_set_ringparam(struct net_device *netdev, struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; u32 new_rx_count, new_tx_count; + u16 tx_alloc_queue_pairs; int timeout = 50; int i, err = 0; @@ -1345,6 +1357,8 @@ static int i40e_set_ringparam(struct net_device *netdev, for (i = 0; i < vsi->num_queue_pairs; i++) { vsi->tx_rings[i]->count = new_tx_count; vsi->rx_rings[i]->count = new_rx_count; + if (i40e_enabled_xdp_vsi(vsi)) + vsi->xdp_rings[i]->count = new_tx_count; } goto done; } @@ -1354,20 +1368,24 @@ static int i40e_set_ringparam(struct net_device *netdev, * to the Tx and Rx ring structs. */ - /* alloc updated Tx resources */ + /* alloc updated Tx and XDP Tx resources */ + tx_alloc_queue_pairs = vsi->alloc_queue_pairs * + (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); if (new_tx_count != vsi->tx_rings[0]->count) { netdev_info(netdev, "Changing Tx descriptor count from %d to %d.\n", vsi->tx_rings[0]->count, new_tx_count); - tx_rings = kcalloc(vsi->alloc_queue_pairs, + tx_rings = kcalloc(tx_alloc_queue_pairs, sizeof(struct i40e_ring), GFP_KERNEL); if (!tx_rings) { err = -ENOMEM; goto done; } - for (i = 0; i < vsi->num_queue_pairs; i++) { - /* clone ring and setup updated count */ + for (i = 0; i < tx_alloc_queue_pairs; i++) { + if (!i40e_active_tx_ring_index(vsi, i)) + continue; + tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_count; /* the desc and bi pointers will be reallocated in the @@ -1379,6 +1397,8 @@ static int i40e_set_ringparam(struct net_device *netdev, if (err) { while (i) { i--; + if (!i40e_active_tx_ring_index(vsi, i)) + continue; i40e_free_tx_resources(&tx_rings[i]); } kfree(tx_rings); @@ -1446,9 +1466,11 @@ rx_unwind: i40e_down(vsi); if (tx_rings) { - for (i = 0; i < vsi->num_queue_pairs; i++) { - i40e_free_tx_resources(vsi->tx_rings[i]); - *vsi->tx_rings[i] = tx_rings[i]; + for (i = 0; i < tx_alloc_queue_pairs; i++) { + if (i40e_active_tx_ring_index(vsi, i)) { + i40e_free_tx_resources(vsi->tx_rings[i]); + *vsi->tx_rings[i] = tx_rings[i]; + } } kfree(tx_rings); tx_rings = NULL; @@ -1479,8 +1501,10 @@ rx_unwind: free_tx: /* error cleanup if the Rx allocations failed after getting Tx */ if (tx_rings) { - for (i = 0; i < vsi->num_queue_pairs; i++) - i40e_free_tx_resources(&tx_rings[i]); + for (i = 0; i < tx_alloc_queue_pairs; i++) { + if (i40e_active_tx_ring_index(vsi, i)) + i40e_free_tx_resources(vsi->tx_rings[i]); + } kfree(tx_rings); tx_rings = NULL; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 89bbe32a5934..7e415bb5a7dc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -407,6 +407,27 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi) return &vsi->net_stats; } +/** + * i40e_get_netdev_stats_struct_tx - populate stats from a Tx ring + * @ring: Tx ring to get statistics from + * @stats: statistics entry to be updated + **/ +static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, + struct rtnl_link_stats64 *stats) +{ + u64 bytes, packets; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + + stats->tx_packets += packets; + stats->tx_bytes += bytes; +} + /** * i40e_get_netdev_stats_struct - Get statistics for netdev interface * @netdev: network interface device structure @@ -437,15 +458,8 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); if (!tx_ring) continue; + i40e_get_netdev_stats_struct_tx(tx_ring, stats); - do { - start = u64_stats_fetch_begin_irq(&tx_ring->syncp); - packets = tx_ring->stats.packets; - bytes = tx_ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); - - stats->tx_packets += packets; - stats->tx_bytes += bytes; rx_ring = &tx_ring[1]; do { @@ -456,6 +470,9 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, stats->rx_packets += packets; stats->rx_bytes += bytes; + + if (i40e_enabled_xdp_vsi(vsi)) + i40e_get_netdev_stats_struct_tx(&rx_ring[1], stats); } rcu_read_unlock(); @@ -2814,6 +2831,12 @@ static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs && !err; i++) err = i40e_setup_tx_descriptors(vsi->tx_rings[i]); + if (!i40e_enabled_xdp_vsi(vsi)) + return err; + + for (i = 0; i < vsi->num_queue_pairs && !err; i++) + err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]); + return err; } @@ -2827,12 +2850,17 @@ static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi) { int i; - if (!vsi->tx_rings) - return; + if (vsi->tx_rings) { + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) + i40e_free_tx_resources(vsi->tx_rings[i]); + } - for (i = 0; i < vsi->num_queue_pairs; i++) - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) - i40e_free_tx_resources(vsi->tx_rings[i]); + if (vsi->xdp_rings) { + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + i40e_free_tx_resources(vsi->xdp_rings[i]); + } } /** @@ -3093,6 +3121,12 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) err = i40e_configure_tx_ring(vsi->tx_rings[i]); + if (!i40e_enabled_xdp_vsi(vsi)) + return err; + + for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) + err = i40e_configure_tx_ring(vsi->xdp_rings[i]); + return err; } @@ -3237,6 +3271,7 @@ static int i40e_vsi_configure(struct i40e_vsi *vsi) **/ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) { + bool has_xdp = i40e_enabled_xdp_vsi(vsi); struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u16 vector; @@ -3267,28 +3302,40 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) /* Linked list for the queuepairs assigned to this vector */ wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp); for (q = 0; q < q_vector->num_ringpairs; q++) { + u32 nextqp = has_xdp ? qp + vsi->alloc_queue_pairs : qp; u32 val; val = I40E_QINT_RQCTL_CAUSE_ENA_MASK | - (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | - (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (qp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)| - (I40E_QUEUE_TYPE_TX - << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); + (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | + (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_TX << + I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(qp), val); + if (has_xdp) { + val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | + (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | + (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | + (qp << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_TX << + I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); + + wr32(hw, I40E_QINT_TQCTL(nextqp), val); + } + val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | - (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | - (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | - ((qp+1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT)| - (I40E_QUEUE_TYPE_RX - << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); + (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | + (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | + ((qp + 1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_RX << + I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); /* Terminate the linked list */ if (q == (q_vector->num_ringpairs - 1)) - val |= (I40E_QUEUE_END_OF_LIST - << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); + val |= (I40E_QUEUE_END_OF_LIST << + I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(qp), val); qp++; @@ -3342,6 +3389,7 @@ static void i40e_enable_misc_int_causes(struct i40e_pf *pf) **/ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) { + u32 nextqp = i40e_enabled_xdp_vsi(vsi) ? vsi->alloc_queue_pairs : 0; struct i40e_q_vector *q_vector = vsi->q_vectors[0]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; @@ -3362,12 +3410,22 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the queue int */ - val = I40E_QINT_RQCTL_CAUSE_ENA_MASK | - (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + val = I40E_QINT_RQCTL_CAUSE_ENA_MASK | + (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)| (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), val); + if (i40e_enabled_xdp_vsi(vsi)) { + val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | + (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT)| + (I40E_QUEUE_TYPE_TX + << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); + + wr32(hw, I40E_QINT_TQCTL(nextqp), val); + } + val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_END_OF_LIST << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); @@ -3534,6 +3592,9 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs; i++) { wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), 0); wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), 0); + if (!i40e_enabled_xdp_vsi(vsi)) + continue; + wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0); } if (pf->flags & I40E_FLAG_MSIX_ENABLED) { @@ -3836,6 +3897,16 @@ static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; + /* Place XDP Tx ring in the same q_vector ring list as regular Tx */ + if (i40e_enabled_xdp_vsi(vsi)) { + struct i40e_ring *xdp_ring = vsi->xdp_rings[qp_idx]; + + xdp_ring->q_vector = q_vector; + xdp_ring->next = q_vector->tx.ring; + q_vector->tx.ring = xdp_ring; + q_vector->tx.count++; + } + rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; q_vector->rx.ring = rx_ring; @@ -4014,6 +4085,33 @@ static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable) wr32(hw, I40E_QTX_ENA(pf_q), tx_reg); } +/** + * i40e_control_wait_tx_q - Start/stop Tx queue and wait for completion + * @seid: VSI SEID + * @pf: the PF structure + * @pf_q: the PF queue to configure + * @is_xdp: true if the queue is used for XDP + * @enable: start or stop the queue + **/ +static int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, + bool is_xdp, bool enable) +{ + int ret; + + i40e_control_tx_q(pf, pf_q, enable); + + /* wait for the change to finish */ + ret = i40e_pf_txq_wait(pf, pf_q, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d %sTx ring %d %sable timeout\n", + seid, (is_xdp ? "XDP " : ""), pf_q, + (enable ? "en" : "dis")); + } + + return ret; +} + /** * i40e_vsi_control_tx - Start or stop a VSI's rings * @vsi: the VSI being configured @@ -4026,16 +4124,20 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - i40e_control_tx_q(pf, pf_q, enable); - - /* wait for the change to finish */ - ret = i40e_pf_txq_wait(pf, pf_q, enable); - if (ret) { - dev_info(&pf->pdev->dev, - "VSI seid %d Tx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); + ret = i40e_control_wait_tx_q(vsi->seid, pf, + pf_q, + false /*is xdp*/, enable); + if (ret) + break; + + if (!i40e_enabled_xdp_vsi(vsi)) + continue; + + ret = i40e_control_wait_tx_q(vsi->seid, pf, + pf_q + vsi->alloc_queue_pairs, + true /*is xdp*/, enable); + if (ret) break; - } } return ret; @@ -4547,7 +4649,21 @@ int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) vsi->seid, pf_q); return ret; } - /* Check and wait for the Tx queue */ + + if (!i40e_enabled_xdp_vsi(vsi)) + goto wait_rx; + + /* Check and wait for the XDP Tx queue */ + ret = i40e_pf_txq_wait(pf, pf_q + vsi->alloc_queue_pairs, + false); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d XDP Tx ring %d disable timeout\n", + vsi->seid, pf_q); + return ret; + } +wait_rx: + /* Check and wait for the Rx queue */ ret = i40e_pf_rxq_wait(pf, pf_q, false); if (ret) { dev_info(&pf->pdev->dev, @@ -5466,6 +5582,8 @@ void i40e_down(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); + if (i40e_enabled_xdp_vsi(vsi)) + i40e_clean_tx_ring(vsi->xdp_rings[i]); i40e_clean_rx_ring(vsi->rx_rings[i]); } @@ -7535,15 +7653,22 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) **/ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors) { + struct i40e_ring **next_rings; int size; int ret = 0; - /* allocate memory for both Tx and Rx ring pointers */ - size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs * 2; + /* allocate memory for both Tx, XDP Tx and Rx ring pointers */ + size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs * + (i40e_enabled_xdp_vsi(vsi) ? 3 : 2); vsi->tx_rings = kzalloc(size, GFP_KERNEL); if (!vsi->tx_rings) return -ENOMEM; - vsi->rx_rings = &vsi->tx_rings[vsi->alloc_queue_pairs]; + next_rings = vsi->tx_rings + vsi->alloc_queue_pairs; + if (i40e_enabled_xdp_vsi(vsi)) { + vsi->xdp_rings = next_rings; + next_rings += vsi->alloc_queue_pairs; + } + vsi->rx_rings = next_rings; if (alloc_qvectors) { /* allocate memory for q_vector pointers */ @@ -7663,6 +7788,7 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors) kfree(vsi->tx_rings); vsi->tx_rings = NULL; vsi->rx_rings = NULL; + vsi->xdp_rings = NULL; } /** @@ -7746,6 +7872,8 @@ static void i40e_vsi_clear_rings(struct i40e_vsi *vsi) kfree_rcu(vsi->tx_rings[i], rcu); vsi->tx_rings[i] = NULL; vsi->rx_rings[i] = NULL; + if (vsi->xdp_rings) + vsi->xdp_rings[i] = NULL; } } } @@ -7756,43 +7884,61 @@ static void i40e_vsi_clear_rings(struct i40e_vsi *vsi) **/ static int i40e_alloc_rings(struct i40e_vsi *vsi) { - struct i40e_ring *tx_ring, *rx_ring; + int i, qpv = i40e_enabled_xdp_vsi(vsi) ? 3 : 2; struct i40e_pf *pf = vsi->back; - int i; + struct i40e_ring *ring; /* Set basic values in the rings to be used later during open() */ for (i = 0; i < vsi->alloc_queue_pairs; i++) { /* allocate space for both Tx and Rx in one shot */ - tx_ring = kzalloc(sizeof(struct i40e_ring) * 2, GFP_KERNEL); - if (!tx_ring) + ring = kcalloc(qpv, sizeof(struct i40e_ring), GFP_KERNEL); + if (!ring) goto err_out; - tx_ring->queue_index = i; - tx_ring->reg_idx = vsi->base_queue + i; - tx_ring->ring_active = false; - tx_ring->vsi = vsi; - tx_ring->netdev = vsi->netdev; - tx_ring->dev = &pf->pdev->dev; - tx_ring->count = vsi->num_desc; - tx_ring->size = 0; - tx_ring->dcb_tc = 0; + ring->queue_index = i; + ring->reg_idx = vsi->base_queue + i; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = vsi->netdev; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + ring->size = 0; + ring->dcb_tc = 0; if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) - tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - tx_ring->tx_itr_setting = pf->tx_itr_default; - vsi->tx_rings[i] = tx_ring; + ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; + ring->tx_itr_setting = pf->tx_itr_default; + vsi->tx_rings[i] = ring++; - rx_ring = &tx_ring[1]; - rx_ring->queue_index = i; - rx_ring->reg_idx = vsi->base_queue + i; - rx_ring->ring_active = false; - rx_ring->vsi = vsi; - rx_ring->netdev = vsi->netdev; - rx_ring->dev = &pf->pdev->dev; - rx_ring->count = vsi->num_desc; - rx_ring->size = 0; - rx_ring->dcb_tc = 0; - rx_ring->rx_itr_setting = pf->rx_itr_default; - vsi->rx_rings[i] = rx_ring; + if (!i40e_enabled_xdp_vsi(vsi)) + goto setup_rx; + + ring->queue_index = vsi->alloc_queue_pairs + i; + ring->reg_idx = vsi->base_queue + ring->queue_index; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = NULL; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + ring->size = 0; + ring->dcb_tc = 0; + if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) + ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; + set_ring_xdp(ring); + ring->tx_itr_setting = pf->tx_itr_default; + vsi->xdp_rings[i] = ring++; + +setup_rx: + ring->queue_index = i; + ring->reg_idx = vsi->base_queue + i; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = vsi->netdev; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + ring->size = 0; + ring->dcb_tc = 0; + ring->rx_itr_setting = pf->rx_itr_default; + vsi->rx_rings[i] = ring; } return 0; @@ -9998,6 +10144,7 @@ vector_setup_out: **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { + u16 alloc_queue_pairs; struct i40e_pf *pf; u8 enabled_tc; int ret; @@ -10016,11 +10163,14 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) if (ret) goto err_vsi; - ret = i40e_get_lump(pf, pf->qp_pile, vsi->alloc_queue_pairs, vsi->idx); + alloc_queue_pairs = vsi->alloc_queue_pairs * + (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); + + ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err %d\n", - vsi->alloc_queue_pairs, vsi->seid, ret); + alloc_queue_pairs, vsi->seid, ret); goto err_vsi; } vsi->base_queue = ret; @@ -10076,6 +10226,7 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, { struct i40e_vsi *vsi = NULL; struct i40e_veb *veb = NULL; + u16 alloc_queue_pairs; int ret, i; int v_idx; @@ -10163,12 +10314,14 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, else if (type == I40E_VSI_SRIOV) vsi->vf_id = param1; /* assign it some queues */ - ret = i40e_get_lump(pf, pf->qp_pile, vsi->alloc_queue_pairs, - vsi->idx); + alloc_queue_pairs = vsi->alloc_queue_pairs * + (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); + + ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err=%d\n", - vsi->alloc_queue_pairs, vsi->seid, ret); + alloc_queue_pairs, vsi->seid, ret); goto err_vsi; } vsi->base_queue = ret; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f744f843bc72..b936febc315a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -630,6 +630,8 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, if (tx_buffer->skb) { if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) kfree(tx_buffer->raw_buf); + else if (ring_is_xdp(ring)) + page_frag_free(tx_buffer->raw_buf); else dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) @@ -771,8 +773,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, total_bytes += tx_buf->bytecount; total_packets += tx_buf->gso_segs; - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); + /* free the skb/XDP data */ + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buf->raw_buf); + else + napi_consume_skb(tx_buf->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -848,6 +853,9 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } + if (ring_is_xdp(tx_ring)) + return !!budget; + /* notify netdev of completed buffers */ netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -1969,6 +1977,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define I40E_XDP_PASS 0 #define I40E_XDP_CONSUMED 1 +#define I40E_XDP_TX 2 + +static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring); /** * i40e_run_xdp - run an XDP program @@ -1979,6 +1991,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { int result = I40E_XDP_PASS; + struct i40e_ring *xdp_ring; struct bpf_prog *xdp_prog; u32 act; @@ -1992,9 +2005,12 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, switch (act) { case XDP_PASS: break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + result = i40e_xmit_xdp_ring(xdp, xdp_ring); + break; default: bpf_warn_invalid_xdp_action(act); - case XDP_TX: case XDP_ABORTED: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); /* fallthrough -- handle aborts by dropping packet */ @@ -2007,6 +2023,27 @@ xdp_out: return ERR_PTR(-result); } +/** + * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region + * @rx_ring: Rx ring + * @rx_buffer: Rx buffer to adjust + * @size: Size of adjustment + **/ +static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; + + rx_buffer->page_offset ^= truesize; +#else + unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size); + + rx_buffer->page_offset += truesize; +#endif +} + /** * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on @@ -2024,7 +2061,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - bool failure = false; + bool failure = false, xdp_xmit = false; while (likely(total_rx_packets < budget)) { struct i40e_rx_buffer *rx_buffer; @@ -2081,9 +2118,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) } if (IS_ERR(skb)) { + if (PTR_ERR(skb) == -I40E_XDP_TX) { + xdp_xmit = true; + i40e_rx_buffer_flip(rx_ring, rx_buffer, size); + } else { + rx_buffer->pagecnt_bias++; + } total_rx_bytes += size; total_rx_packets++; - rx_buffer->pagecnt_bias++; } else if (skb) { i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); } else if (ring_uses_build_skb(rx_ring)) { @@ -2131,6 +2173,19 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) total_rx_packets++; } + if (xdp_xmit) { + struct i40e_ring *xdp_ring; + + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + + writel(xdp_ring->next_to_use, xdp_ring->tail); + } + rx_ring->skb = skb; u64_stats_update_begin(&rx_ring->syncp); @@ -3187,6 +3242,59 @@ dma_error: return -1; } +/** + * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring + * @xdp: data to transmit + * @xdp_ring: XDP Tx ring + **/ +static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring) +{ + u32 size = xdp->data_end - xdp->data; + u16 i = xdp_ring->next_to_use; + struct i40e_tx_buffer *tx_bi; + struct i40e_tx_desc *tx_desc; + dma_addr_t dma; + + if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + return I40E_XDP_CONSUMED; + } + + dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) + return I40E_XDP_CONSUMED; + + tx_bi = &xdp_ring->tx_bi[i]; + tx_bi->bytecount = size; + tx_bi->gso_segs = 1; + tx_bi->raw_buf = xdp->data; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + + tx_desc = I40E_TX_DESC(xdp_ring, i); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC + | I40E_TXD_CMD, + 0, size, 0); + + /* Make certain all of the status bits have been updated + * before next_to_watch is written. + */ + smp_wmb(); + + i++; + if (i == xdp_ring->count) + i = 0; + + tx_bi->next_to_watch = tx_desc; + xdp_ring->next_to_use = i; + + return I40E_XDP_TX; +} + /** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 31f0b162996f..b288d58313a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -396,6 +396,7 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) #define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) +#define I40E_TXR_FLAGS_XDP BIT(2) /* stats structs */ struct i40e_queue_stats stats; @@ -438,6 +439,16 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; } +static inline bool ring_is_xdp(struct i40e_ring *ring) +{ + return !!(ring->flags & I40E_TXR_FLAGS_XDP); +} + +static inline void set_ring_xdp(struct i40e_ring *ring) +{ + ring->flags |= I40E_TXR_FLAGS_XDP; +} + enum i40e_latency_range { I40E_LOWEST_LATENCY = 0, I40E_LOW_LATENCY = 1, From 65c7006f234c9ede887d468f595f259a5c5cc552 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 7 Jun 2017 05:43:01 -0400 Subject: [PATCH 03/15] i40evf: assign num_active_queues inside i40evf_alloc_queues The variable num_active_queues represents the number of active queues we have for the device. We assign this pretty early in i40evf_init_subtask. Several code locations are written with loops over the tx_rings and rx_rings structures, which don't get allocated until i40evf_alloc_queues, and which get freed by i40evf_free_queues. These call sites were written under the assumption that tx_rings and rx_rings would always be allocated at least when num_active_queues is non-zero. Lets fix this by moving the assignment into the function where we allocate queues. We'll use a temporary variable for storage so that we don't assign the value in the adapter structure until after the rings have been set up. Finally, when we free the queues, we'll clear the value to ensure that we do not loop over the rings memory that no longer exists. This resolves a possible NULL pointer dereference in i40evf_get_ethtool_stats which could occur if the VF fails to recover from a reset, and then a user requests statistics. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40evf/i40evf_main.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 3a3ca965b242..7c213a347909 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1198,6 +1198,7 @@ static void i40evf_free_queues(struct i40evf_adapter *adapter) { if (!adapter->vsi_res) return; + adapter->num_active_queues = 0; kfree(adapter->tx_rings); adapter->tx_rings = NULL; kfree(adapter->rx_rings); @@ -1214,18 +1215,22 @@ static void i40evf_free_queues(struct i40evf_adapter *adapter) **/ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { - int i; + int i, num_active_queues; - adapter->tx_rings = kcalloc(adapter->num_active_queues, + num_active_queues = min_t(int, + adapter->vsi_res->num_queue_pairs, + (int)(num_online_cpus())); + + adapter->tx_rings = kcalloc(num_active_queues, sizeof(struct i40e_ring), GFP_KERNEL); if (!adapter->tx_rings) goto err_out; - adapter->rx_rings = kcalloc(adapter->num_active_queues, + adapter->rx_rings = kcalloc(num_active_queues, sizeof(struct i40e_ring), GFP_KERNEL); if (!adapter->rx_rings) goto err_out; - for (i = 0; i < adapter->num_active_queues; i++) { + for (i = 0; i < num_active_queues; i++) { struct i40e_ring *tx_ring; struct i40e_ring *rx_ring; @@ -1247,6 +1252,8 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); } + adapter->num_active_queues = num_active_queues; + return 0; err_out: @@ -2636,9 +2643,6 @@ static void i40evf_init_task(struct work_struct *work) adapter->watchdog_timer.data = (unsigned long)adapter; mod_timer(&adapter->watchdog_timer, jiffies + 1); - adapter->num_active_queues = min_t(int, - adapter->vsi_res->num_queue_pairs, - (int)(num_online_cpus())); adapter->tx_desc_count = I40EVF_DEFAULT_TXD; adapter->rx_desc_count = I40EVF_DEFAULT_RXD; err = i40evf_init_interrupt_scheme(adapter); From 7c32b1e650752408a8dcc7a85f1776c2e24ea1da Mon Sep 17 00:00:00 2001 From: Alice Michael Date: Wed, 7 Jun 2017 05:43:02 -0400 Subject: [PATCH 04/15] i40e/i40evf: update WOL and I40E_AQC_ADDR_VALID_MASK flags Update a few flags related to FW interactions. Copyright updated to 2017. Signed-off-by: Alice Michael Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 4 ++-- drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5eb04114e13f..5d5f422cbae5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -531,7 +531,7 @@ struct i40e_aqc_mac_address_read { #define I40E_AQC_PORT_ADDR_VALID 0x40 #define I40E_AQC_WOL_ADDR_VALID 0x80 #define I40E_AQC_MC_MAG_EN_VALID 0x100 -#define I40E_AQC_ADDR_VALID_MASK 0x1F0 +#define I40E_AQC_ADDR_VALID_MASK 0x3F0 u8 reserved[6]; __le32 addr_high; __le32 addr_low; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 91d8786d386d..83e63e55c4b4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -528,7 +528,7 @@ struct i40e_aqc_mac_address_read { #define I40E_AQC_PORT_ADDR_VALID 0x40 #define I40E_AQC_WOL_ADDR_VALID 0x80 #define I40E_AQC_MC_MAG_EN_VALID 0x100 -#define I40E_AQC_ADDR_VALID_MASK 0x1F0 +#define I40E_AQC_ADDR_VALID_MASK 0x3F0 u8 reserved[6]; __le32 addr_high; __le32 addr_low; @@ -586,6 +586,7 @@ struct i40e_aqc_set_wol_filter { __le16 cmd_flags; #define I40E_AQC_SET_WOL_FILTER 0x8000 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL 0x4000 +#define I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR 0x2000 #define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR 0 #define I40E_AQC_SET_WOL_FILTER_ACTION_SET 1 __le16 valid_flags; From 59e331e36ef934791947a616cc578bf3c62a019c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 7 Jun 2017 05:43:03 -0400 Subject: [PATCH 05/15] i40e: use dev_dbg instead of dev_info when warning about missing routine When searching for the vf_capability client routine, dev_info() was used, instead of the normal dev_dbg(). This causes the message to be displayed at standard log levels which can cause administrators to worry. Avoid this by using dev_dbg instead. Copyright updated to 2017. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 36f694ccdc09..1b1e2acbd07f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -273,8 +273,8 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id) if (!cdev || !cdev->client) goto out; if (!cdev->client->ops || !cdev->client->ops->vf_capable) { - dev_info(&pf->pdev->dev, - "Cannot locate client instance VF capability routine\n"); + dev_dbg(&pf->pdev->dev, + "Cannot locate client instance VF capability routine\n"); goto out; } if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) From 15d23b4c361f1449d44249bea127d2bdb981aa01 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 7 Jun 2017 05:43:04 -0400 Subject: [PATCH 06/15] i40e: comment that udp_port must be in host byte order The firmware expects the port number passed when setting up the UDP tunnel configuration to be in Little Endian format. The i40e_aq_add_udp_tunnel command byte swaps the value from host order to Little Endian. Since commit fe0b0cd97b4f ("i40e: send correct port number to AdminQ when enabling UDP tunnels") we've correctly sent the value in host order. Let's also add a comment to the function explaining that it must be in host order, as the port numbers are commonly stored as Big Endian values. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index cbad4eba7ae7..8e082a946411 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -3614,11 +3614,15 @@ i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw, /** * i40e_aq_add_udp_tunnel * @hw: pointer to the hw struct - * @udp_port: the UDP port to add + * @udp_port: the UDP port to add in Host byte order * @header_len: length of the tunneling header length in DWords * @protocol_index: protocol index type * @filter_index: pointer to filter index * @cmd_details: pointer to command details structure or NULL + * + * Note: Firmware expects the udp_port value to be in Little Endian format, + * and this function will call cpu_to_le16 to convert from Host byte order to + * Little Endian order. **/ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw, u16 udp_port, u8 protocol_index, From 1e99854715c79b3e2ebe09d80006aaff0f5c2335 Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Wed, 7 Jun 2017 05:43:05 -0400 Subject: [PATCH 07/15] i40e: Fix potential out of bound array access This is a fix for the static code analysis issue where dcbcfg->numapps could be greater than size of array (i.e dcbcfg->app[I40E_DCBX_MAX_APPS]). The fix makes sure that the array is not accessed past the size of of the array (i.e. I40E_DCBX_MAX_APPS). Copyright updated to 2017. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_dcb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 0fab3a9b51d9..bf1d67e184f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -390,6 +390,8 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, if (!dcbcfg->numapps) return; + if (dcbcfg->numapps > I40E_DCBX_MAX_APPS) + dcbcfg->numapps = I40E_DCBX_MAX_APPS; for (i = 0; i < dcbcfg->numapps; i++) { u8 up, selector; From 68fb13a7677475e5470ef6aba585da5c609ea2cb Mon Sep 17 00:00:00 2001 From: Greg Bowers Date: Wed, 7 Jun 2017 05:43:06 -0400 Subject: [PATCH 08/15] i40e: Support firmware CEE DCB UP to TC map re-definition Changes parsing of FW 4.33 AQ command Get CEE DCBX OPER CFG (0x0A07). Change is required because FW now creates the oper_prio_tc nibbles reversed from those in the CEE Priority Group sub-TLV. This change will only apply to FW 4.33 as future FW versions will use a different function to parse the CEE data. Signed-off-by: Greg Bowers Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_dcb.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index bf1d67e184f7..55079fe3ed63 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -620,14 +620,17 @@ static void i40e_cee_to_dcb_v1_config( /* CEE PG data to ETS config */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc; + /* Note that the FW creates the oper_prio_tc nibbles reversed + * from those in the CEE Priority Group sub-TLV. + */ for (i = 0; i < 4; i++) { - tc = (u8)((cee_cfg->oper_prio_tc[i] & - I40E_CEE_PGID_PRIO_1_MASK) >> - I40E_CEE_PGID_PRIO_1_SHIFT); - dcbcfg->etscfg.prioritytable[i*2] = tc; tc = (u8)((cee_cfg->oper_prio_tc[i] & I40E_CEE_PGID_PRIO_0_MASK) >> I40E_CEE_PGID_PRIO_0_SHIFT); + dcbcfg->etscfg.prioritytable[i * 2] = tc; + tc = (u8)((cee_cfg->oper_prio_tc[i] & + I40E_CEE_PGID_PRIO_1_MASK) >> + I40E_CEE_PGID_PRIO_1_SHIFT); dcbcfg->etscfg.prioritytable[i*2 + 1] = tc; } From 83d14c595e011f96c47e5fb09ddb51902e8367aa Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 7 Jun 2017 05:43:07 -0400 Subject: [PATCH 09/15] i40e: Add message for unsupported MFP mode This patch adds a check and message if the device is in MFP mode as changing RSS input set is not supported in MFP mode. Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 9d3233c2c9cd..9692a5294fa3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2711,6 +2711,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) u8 flow_pctype = 0; u64 i_set, i_setc; + if (pf->flags & I40E_FLAG_MFP_ENABLED) { + dev_err(&pf->pdev->dev, + "Change of RSS hash input set is not supported when MFP mode is enabled\n"); + return -EOPNOTSUPP; + } + /* RSS does not support anything other than hashing * to queues on src and dst IPs and ports */ From 4fc8c67639575e38fff41bb4bd01c601aba930ff Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 7 Jun 2017 05:43:08 -0400 Subject: [PATCH 10/15] i40e: genericize the partition bandwidth control Partition bandwidth control is not in just one form of MFP (multi-function partitioning), so make the code more generic and be sure to nudge the Tx scheduler for all MFP. Copyright updated to 2017. Signed-off-by: Shannon Nelson Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 13 +++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 41 ++++++++++----------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4250ab55a9f1..76395e695007 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -516,9 +516,8 @@ struct i40e_pf { bool ptp_tx; bool ptp_rx; u16 rss_table_size; /* HW RSS table size */ - /* These are only valid in NPAR modes */ - u32 npar_max_bw; - u32 npar_min_bw; + u32 max_bw; + u32 min_bw; u32 ioremap_len; u32 fd_inv; @@ -971,9 +970,9 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); -i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf); -i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf); -i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf); +i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf); +i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); +i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf); void i40e_print_link_message(struct i40e_vsi *vsi, bool isup); static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7e415bb5a7dc..8d7bd85933bb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -8740,10 +8740,10 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) } /** - * i40e_get_npar_bw_setting - Retrieve BW settings for this PF partition + * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition * @pf: board private structure **/ -i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf) +i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf) { i40e_status status; bool min_valid, max_valid; @@ -8754,27 +8754,27 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf) if (!status) { if (min_valid) - pf->npar_min_bw = min_bw; + pf->min_bw = min_bw; if (max_valid) - pf->npar_max_bw = max_bw; + pf->max_bw = max_bw; } return status; } /** - * i40e_set_npar_bw_setting - Set BW settings for this PF partition + * i40e_set_partition_bw_setting - Set BW settings for this PF partition * @pf: board private structure **/ -i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf) +i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf) { struct i40e_aqc_configure_partition_bw_data bw_data; i40e_status status; /* Set the valid bit for this PF */ bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id)); - bw_data.max_bw[pf->hw.pf_id] = pf->npar_max_bw & I40E_ALT_BW_VALUE_MASK; - bw_data.min_bw[pf->hw.pf_id] = pf->npar_min_bw & I40E_ALT_BW_VALUE_MASK; + bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK; + bw_data.min_bw[pf->hw.pf_id] = pf->min_bw & I40E_ALT_BW_VALUE_MASK; /* Set the new bandwidths */ status = i40e_aq_configure_partition_bw(&pf->hw, &bw_data, NULL); @@ -8783,10 +8783,10 @@ i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf) } /** - * i40e_commit_npar_bw_setting - Commit BW settings for this PF partition + * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition * @pf: board private structure **/ -i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf) +i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf) { /* Commit temporary BW setting to permanent NVM image */ enum i40e_admin_queue_err last_aq_status; @@ -8905,16 +8905,19 @@ static int i40e_sw_init(struct i40e_pf *pf) if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) { pf->flags |= I40E_FLAG_MFP_ENABLED; dev_info(&pf->pdev->dev, "MFP mode Enabled\n"); - if (i40e_get_npar_bw_setting(pf)) + if (i40e_get_partition_bw_setting(pf)) { dev_warn(&pf->pdev->dev, - "Could not get NPAR bw settings\n"); - else + "Could not get partition bw settings\n"); + } else { dev_info(&pf->pdev->dev, - "Min BW = %8.8x, Max BW = %8.8x\n", - pf->npar_min_bw, pf->npar_max_bw); + "Partition BW Min = %8.8x, Max = %8.8x\n", + pf->min_bw, pf->max_bw); + + /* nudge the Tx scheduler */ + i40e_set_partition_bw_setting(pf); + } } - /* FW/NVM is not yet fixed in this regard */ if ((pf->hw.func_caps.fd_filters_guaranteed > 0) || (pf->hw.func_caps.fd_filters_best_effort > 0)) { pf->flags |= I40E_FLAG_FD_ATR_ENABLED; @@ -9017,10 +9020,6 @@ static int i40e_sw_init(struct i40e_pf *pf) mutex_init(&pf->switch_mutex); - /* If NPAR is enabled nudge the Tx scheduler */ - if (pf->hw.func_caps.npar_enable && (!i40e_get_npar_bw_setting(pf))) - i40e_set_npar_bw_setting(pf); - sw_init_done: return err; } From 5bbb2e2045449706a6daf092e5727998e4984c0b Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Wed, 7 Jun 2017 05:43:09 -0400 Subject: [PATCH 11/15] i40e: Add support for OEM firmware version This patch adds support for OEM firmware version. If OEM specific adapter is detected ethtool reports OEM product version in firmware version string instead of etrack id. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 46 ++++++++++++++------ drivers/net/ethernet/intel/i40e/i40e_main.c | 47 +++++++++++++++++++++ 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 76395e695007..d616f698e155 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -103,6 +103,12 @@ (I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \ I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW) +#define I40E_OEM_EETRACK_ID 0xffffffff +#define I40E_OEM_GEN_SHIFT 24 +#define I40E_OEM_SNAP_MASK 0x00ff0000 +#define I40E_OEM_SNAP_SHIFT 16 +#define I40E_OEM_RELEASE_MASK 0x0000ffff + /* The values in here are decimal coded as hex as is the case in the NVM map*/ #define I40E_CURRENT_NVM_VERSION_HI 0x2 #define I40E_CURRENT_NVM_VERSION_LO 0x40 @@ -734,22 +740,36 @@ static inline char *i40e_nvm_version_str(struct i40e_hw *hw) { static char buf[32]; u32 full_ver; - u8 ver, patch; - u16 build; full_ver = hw->nvm.oem_ver; - ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT); - build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) & - I40E_OEM_VER_BUILD_MASK); - patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK); - snprintf(buf, sizeof(buf), - "%x.%02x 0x%x %d.%d.%d", - (hw->nvm.version & I40E_NVM_VERSION_HI_MASK) >> - I40E_NVM_VERSION_HI_SHIFT, - (hw->nvm.version & I40E_NVM_VERSION_LO_MASK) >> - I40E_NVM_VERSION_LO_SHIFT, - hw->nvm.eetrack, ver, build, patch); + if (hw->nvm.eetrack == I40E_OEM_EETRACK_ID) { + u8 gen, snap; + u16 release; + + gen = (u8)(full_ver >> I40E_OEM_GEN_SHIFT); + snap = (u8)((full_ver & I40E_OEM_SNAP_MASK) >> + I40E_OEM_SNAP_SHIFT); + release = (u16)(full_ver & I40E_OEM_RELEASE_MASK); + + snprintf(buf, sizeof(buf), "%x.%x.%x", gen, snap, release); + } else { + u8 ver, patch; + u16 build; + + ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT); + build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) & + I40E_OEM_VER_BUILD_MASK); + patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK); + + snprintf(buf, sizeof(buf), + "%x.%02x 0x%x %d.%d.%d", + (hw->nvm.version & I40E_NVM_VERSION_HI_MASK) >> + I40E_NVM_VERSION_HI_SHIFT, + (hw->nvm.version & I40E_NVM_VERSION_LO_MASK) >> + I40E_NVM_VERSION_LO_SHIFT, + hw->nvm.eetrack, ver, build, patch); + } return buf; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8d7bd85933bb..8af6420826d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7107,6 +7107,51 @@ static void i40e_send_version(struct i40e_pf *pf) i40e_aq_send_driver_version(&pf->hw, &dv, NULL); } +/** + * i40e_get_oem_version - get OEM specific version information + * @hw: pointer to the hardware structure + **/ +static void i40e_get_oem_version(struct i40e_hw *hw) +{ + u16 block_offset = 0xffff; + u16 block_length = 0; + u16 capabilities = 0; + u16 gen_snap = 0; + u16 release = 0; + +#define I40E_SR_NVM_OEM_VERSION_PTR 0x1B +#define I40E_NVM_OEM_LENGTH_OFFSET 0x00 +#define I40E_NVM_OEM_CAPABILITIES_OFFSET 0x01 +#define I40E_NVM_OEM_GEN_OFFSET 0x02 +#define I40E_NVM_OEM_RELEASE_OFFSET 0x03 +#define I40E_NVM_OEM_CAPABILITIES_MASK 0x000F +#define I40E_NVM_OEM_LENGTH 3 + + /* Check if pointer to OEM version block is valid. */ + i40e_read_nvm_word(hw, I40E_SR_NVM_OEM_VERSION_PTR, &block_offset); + if (block_offset == 0xffff) + return; + + /* Check if OEM version block has correct length. */ + i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_LENGTH_OFFSET, + &block_length); + if (block_length < I40E_NVM_OEM_LENGTH) + return; + + /* Check if OEM version format is as expected. */ + i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_CAPABILITIES_OFFSET, + &capabilities); + if ((capabilities & I40E_NVM_OEM_CAPABILITIES_MASK) != 0) + return; + + i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_GEN_OFFSET, + &gen_snap); + i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET, + &release); + hw->nvm.oem_ver = (gen_snap << I40E_OEM_SNAP_SHIFT) | release; + hw->nvm.eetrack = I40E_OEM_EETRACK_ID; +} + /** * i40e_reset - wait for core reset to finish reset, reset pf if corer not seen * @pf: board private structure @@ -7154,6 +7199,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto clear_recovery; } + i40e_get_oem_version(&pf->hw); /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) @@ -11338,6 +11384,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_pf_reset; } + i40e_get_oem_version(hw); /* provide nvm, fw, api versions */ dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n", From e588723986845457942e8a1acb1e31cf18e8eb08 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 7 Jun 2017 05:43:10 -0400 Subject: [PATCH 12/15] i40e: fix disabling overflow promiscuous mode There exists a bug in which the driver does not correctly exit overflow promiscuous mode. This can occur if "too many" mac filters are added, putting the driver into overflow promiscuous mode, and the filters are then removed. When the failed filters are removed, the driver reports exiting overflow promiscuous mode which is correct, however traffic continues to be received as if in promiscuous mode still. The bug occurs because the conditional for toggling promiscuous mode was set to only execute when promiscuous mode was enabled and not when it was disabled as well. This patch fixes the conditional to correctly execute when promiscuous mode is toggled and not just enabled. Without this patch, the driver is unable to correctly exit overflow promiscuous mode. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8af6420826d1..b743eca879d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2281,9 +2281,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) i40e_aq_str(hw, hw->aq.asq_last_status)); } } - if ((changed_flags & IFF_PROMISC) || - (promisc_changed && - test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))) { + + if ((changed_flags & IFF_PROMISC) || promisc_changed) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || From 2e5c26ea0d0843074a1b8c868aae5c828c155569 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 7 Jun 2017 05:43:11 -0400 Subject: [PATCH 13/15] i40e: clear only cause_ena bit When disabling interrupts, we should only be clearing the CAUSE_ENA bit, not clearing the whole register. Clearing the whole register sets the NEXTQ_IDX field to 0 instead of 0x7ff which can confuse the Firmware in some reset sequences. Signed-off-by: Shannon Nelson Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b743eca879d5..5d82ff54c7b0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3588,14 +3588,24 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) int base = vsi->base_vector; int i; + /* disable interrupt causation from each queue */ for (i = 0; i < vsi->num_queue_pairs; i++) { - wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), 0); - wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), 0); + u32 val; + + val = rd32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); + val &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; + wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); + + val = rd32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx)); + val &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; + wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), val); + if (!i40e_enabled_xdp_vsi(vsi)) continue; wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0); } + /* disable each interrupt */ if (pf->flags & I40E_FLAG_MSIX_ENABLED) { for (i = vsi->base_vector; i < (vsi->num_q_vectors + vsi->base_vector); i++) From 7642984b08760b8d0ff7f4cfbe524bb53eb4cec2 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Wed, 7 Jun 2017 05:43:12 -0400 Subject: [PATCH 14/15] i40e: Handle PE_CRITERR properly with IWARP enabled When IWARP is enabled, we weren't clearing the PE_CRITERR, just logging it and removing it from the mask. We need to do a corer to reset the PE_CRITERR register, so set the bit for that as we handle the interrupt. We should also be checking for the error against the PFINT_ICR0 register, and only need to clear it in the value getting written to PFINT_ICR0_ENA. Signed-off-by: Catherine Sullivan Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5d82ff54c7b0..c4328b4bec95 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3684,10 +3684,10 @@ static irqreturn_t i40e_intr(int irq, void *data) pf->sw_int_count++; if ((pf->flags & I40E_FLAG_IWARP_ENABLED) && - (ena_mask & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) { + (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) { ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; - icr0 &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n"); + set_bit(__I40E_CORE_RESET_REQUESTED, pf->state); } /* only q0 is used in MSI/Legacy mode, and none are used in MSIX */ From dfc4ff644674a133878aded9a86ab36c358f3138 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 7 Jun 2017 05:43:13 -0400 Subject: [PATCH 15/15] i40e: don't hold RTNL lock for the entire reset We recently refactored i40e_do_reset() and its friends to be able to hold the RTNL lock only for the portions that actually need to be protected. However, a separate refactoring added several new callers of these functions during the PCIe error recovery and suspend/resume cycles. When merging the changes together, it was not noticed that we could reduce the RTNL scope by letting the reset function handle the lock itself, as previously it was not possible. Fix this by replacing these call sites to indicate that the reset function should handle its own lock. This enables multiple PFs to reset or resume simultaneously without serializing the resets via the RTNL lock. The end result is that on systems with lots of PFs and VFs the resets don't stall waiting for each other to finish. It is probable that we can also do the same for i40e_do_reset_safe, but this author did not research that change carefully enough to be confident. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 27 ++++++--------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c4328b4bec95..2db93d3f6d23 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6566,9 +6566,7 @@ static void i40e_reset_subtask(struct i40e_pf *pf) if (reset_flags && !test_bit(__I40E_DOWN, pf->state) && !test_bit(__I40E_CONFIG_BUSY, pf->state)) { - rtnl_lock(); - i40e_do_reset(pf, reset_flags, true); - rtnl_unlock(); + i40e_do_reset(pf, reset_flags, false); } } @@ -11906,11 +11904,8 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, } /* shutdown all operations */ - if (!test_bit(__I40E_SUSPENDED, pf->state)) { - rtnl_lock(); - i40e_prep_for_reset(pf, true); - rtnl_unlock(); - } + if (!test_bit(__I40E_SUSPENDED, pf->state)) + i40e_prep_for_reset(pf, false); /* Request a slot reset */ return PCI_ERS_RESULT_NEED_RESET; @@ -11976,9 +11971,7 @@ static void i40e_pci_error_resume(struct pci_dev *pdev) if (test_bit(__I40E_SUSPENDED, pf->state)) return; - rtnl_lock(); - i40e_handle_reset_warning(pf, true); - rtnl_unlock(); + i40e_handle_reset_warning(pf, false); } /** @@ -12058,9 +12051,7 @@ static void i40e_shutdown(struct pci_dev *pdev) if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); - rtnl_lock(); - i40e_prep_for_reset(pf, true); - rtnl_unlock(); + i40e_prep_for_reset(pf, false); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); @@ -12092,9 +12083,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); - rtnl_lock(); - i40e_prep_for_reset(pf, true); - rtnl_unlock(); + i40e_prep_for_reset(pf, false); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); @@ -12140,9 +12129,7 @@ static int i40e_resume(struct pci_dev *pdev) /* handling the reset will rebuild the device state */ if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { clear_bit(__I40E_DOWN, pf->state); - rtnl_lock(); - i40e_reset_and_rebuild(pf, false, true); - rtnl_unlock(); + i40e_reset_and_rebuild(pf, false, false); } return 0;