diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 67f304289fd9..2b311382167a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -154,6 +154,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed); +#define IXGBE_FAILED_READ_RETRIES 5 #define IXGBE_FAILED_READ_REG 0xffffffffU #define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU #define IXGBE_FAILED_READ_CFG_WORD 0xffffU diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index f2254528dcfc..68af127987bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -774,11 +774,7 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC; - itd->flags = 0; if (xs->id.proto == IPPROTO_ESP) { - struct sk_buff *skb = first->skb; - int ret, authlen, trailerlen; - u8 padlen; itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP | IXGBE_ADVTXD_TUCMD_L4T_TCP; @@ -790,19 +786,28 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, * padlen bytes of padding. This ends up not the same * as the static value found in xs->props.trailer_len (21). * - * The "correct" way to get the auth length would be to use - * authlen = crypto_aead_authsize(xs->data); - * but since we know we only have one size to worry about - * we can let the compiler use the constant and save us a - * few CPU cycles. + * ... but if we're doing GSO, don't bother as the stack + * doesn't add a trailer for those. */ - authlen = IXGBE_IPSEC_AUTH_BITS / 8; + if (!skb_is_gso(first->skb)) { + /* The "correct" way to get the auth length would be + * to use + * authlen = crypto_aead_authsize(xs->data); + * but since we know we only have one size to worry + * about * we can let the compiler use the constant + * and save us a few CPU cycles. + */ + const int authlen = IXGBE_IPSEC_AUTH_BITS / 8; + struct sk_buff *skb = first->skb; + u8 padlen; + int ret; - ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1); - if (unlikely(ret)) - return 0; - trailerlen = authlen + 2 + padlen; - itd->trailer_len = trailerlen; + ret = skb_copy_bits(skb, skb->len - (authlen + 2), + &padlen, 1); + if (unlikely(ret)) + return 0; + itd->trailer_len = authlen + 2 + padlen; + } } if (tsa->encrypt) itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN; @@ -924,8 +929,13 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) ixgbe_ipsec_clear_hw_tables(adapter); adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops; - adapter->netdev->features |= NETIF_F_HW_ESP; - adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP; + +#define IXGBE_ESP_FEATURES (NETIF_F_HW_ESP | \ + NETIF_F_HW_ESP_TX_CSUM | \ + NETIF_F_GSO_ESP) + + adapter->netdev->features |= IXGBE_ESP_FEATURES; + adapter->netdev->hw_enc_features |= IXGBE_ESP_FEATURES; return; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 85369423452d..afadba99f7b8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -353,23 +353,32 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw) ixgbe_service_event_schedule(adapter); } -static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) +static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) { + u8 __iomem *reg_addr; u32 value; + int i; - /* The following check not only optimizes a bit by not - * performing a read on the status register when the - * register just read was a status register read that - * returned IXGBE_FAILED_READ_REG. It also blocks any - * potential recursion. + reg_addr = READ_ONCE(hw->hw_addr); + if (ixgbe_removed(reg_addr)) + return IXGBE_FAILED_READ_REG; + + /* Register read of 0xFFFFFFF can indicate the adapter has been removed, + * so perform several status register reads to determine if the adapter + * has been removed. */ - if (reg == IXGBE_STATUS) { - ixgbe_remove_adapter(hw); - return; + for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) { + value = readl(reg_addr + IXGBE_STATUS); + if (value != IXGBE_FAILED_READ_REG) + break; + mdelay(3); } - value = ixgbe_read_reg(hw, IXGBE_STATUS); + if (value == IXGBE_FAILED_READ_REG) ixgbe_remove_adapter(hw); + else + value = readl(reg_addr + reg); + return value; } /** @@ -415,7 +424,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) writes_completed: value = readl(reg_addr + reg); if (unlikely(value == IXGBE_FAILED_READ_REG)) - ixgbe_check_remove(hw, reg); + value = ixgbe_check_remove(hw, reg); return value; } @@ -1620,7 +1629,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->dma = dma; bi->page = page; bi->page_offset = ixgbe_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; rx_ring->rx_stats.alloc_rx_page++; return true; @@ -2030,8 +2040,8 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -7721,7 +7731,8 @@ static void ixgbe_service_task(struct work_struct *work) static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first, - u8 *hdr_len) + u8 *hdr_len, + struct ixgbe_ipsec_tx_data *itd) { u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; @@ -7735,6 +7746,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, unsigned char *hdr; } l4; u32 paylen, l4_offset; + u32 fceof_saidx = 0; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -7760,13 +7772,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, if (ip.v4->version == 4) { unsigned char *csum_start = skb_checksum_start(skb); unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); + int len = csum_start - trans_start; /* IP header will have to cancel out any data that - * is not a part of the outer IP header + * is not a part of the outer IP header, so set to + * a reverse csum if needed, else init check to 0. */ - ip.v4->check = csum_fold(csum_partial(trans_start, - csum_start - trans_start, - 0)); + ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ? + csum_fold(csum_partial(trans_start, + len, 0)) : 0; type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; ip.v4->tot_len = 0; @@ -7797,12 +7811,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; + fceof_saidx |= itd->sa_idx; + type_tucmd |= itd->flags | itd->trailer_len; + /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ vlan_macip_lens = l4.hdr - ip.hdr; vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, mss_l4len_idx); return 1; @@ -7864,10 +7881,8 @@ no_csum: vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) { - fceof_saidx |= itd->sa_idx; - type_tucmd |= itd->flags | itd->trailer_len; - } + fceof_saidx |= itd->sa_idx; + type_tucmd |= itd->flags | itd->trailer_len; ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0); } @@ -8495,7 +8510,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif - tso = ixgbe_tso(tx_ring, first, &hdr_len); + tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx); if (tso < 0) goto out_drop; else if (!tso) @@ -9904,15 +9919,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, /* We can only support IPV4 TSO in tunnels if we can mangle the * inner IP ID field, so strip TSO if MANGLEID is not supported. + * IPsec offoad sets skb->encapsulation but still can handle + * the TSO, so it's the exception. */ - if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) - features &= ~NETIF_F_TSO; - -#ifdef CONFIG_XFRM_OFFLOAD - /* IPsec offload doesn't get along well with others *yet* */ - if (skb->sp) - features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM); + if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) { +#ifdef CONFIG_XFRM + if (!skb->sp) #endif + features &= ~NETIF_F_TSO; + } return features; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index f470d0204771..3123267dfba9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1847,9 +1847,9 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, (IXGBE_CS4227_EDC_MODE_SR << 1)); if (setup_linear) - reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; else - reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; ret_val = hw->phy.ops.write_reg(hw, reg_slice, IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext); diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index e7623fed42da..8e7d6c6f5c92 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. + Copyright(c) 1999 - 2018 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -82,6 +82,7 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = { #define IXGBEVF_QUEUE_STATS_LEN ( \ (((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \ + ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \ ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \ (sizeof(struct ixgbevf_stats) / sizeof(u64))) #define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats) @@ -269,7 +270,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL; u32 new_rx_count, new_tx_count; - int i, err = 0; + int i, j, err = 0; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; @@ -293,15 +294,19 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, if (!netif_running(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_xdp_queues; i++) + adapter->xdp_ring[i]->count = new_tx_count; for (i = 0; i < adapter->num_rx_queues; i++) adapter->rx_ring[i]->count = new_rx_count; adapter->tx_ring_count = new_tx_count; + adapter->xdp_ring_count = new_tx_count; adapter->rx_ring_count = new_rx_count; goto clear_reset; } if (new_tx_count != adapter->tx_ring_count) { - tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring)); + tx_ring = vmalloc((adapter->num_tx_queues + + adapter->num_xdp_queues) * sizeof(*tx_ring)); if (!tx_ring) { err = -ENOMEM; goto clear_reset; @@ -324,6 +329,24 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, goto clear_reset; } } + + for (j = 0; j < adapter->num_xdp_queues; i++, j++) { + /* clone ring and setup updated count */ + tx_ring[i] = *adapter->xdp_ring[j]; + tx_ring[i].count = new_tx_count; + err = ixgbevf_setup_tx_resources(&tx_ring[i]); + if (err) { + while (i) { + i--; + ixgbevf_free_tx_resources(&tx_ring[i]); + } + + vfree(tx_ring); + tx_ring = NULL; + + goto clear_reset; + } + } } if (new_rx_count != adapter->rx_ring_count) { @@ -336,8 +359,13 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) { /* clone ring and setup updated count */ rx_ring[i] = *adapter->rx_ring[i]; + + /* Clear copied XDP RX-queue info */ + memset(&rx_ring[i].xdp_rxq, 0, + sizeof(rx_ring[i].xdp_rxq)); + rx_ring[i].count = new_rx_count; - err = ixgbevf_setup_rx_resources(&rx_ring[i]); + err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]); if (err) { while (i) { i--; @@ -363,6 +391,12 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, } adapter->tx_ring_count = new_tx_count; + for (j = 0; j < adapter->num_xdp_queues; i++, j++) { + ixgbevf_free_tx_resources(adapter->xdp_ring[j]); + *adapter->xdp_ring[j] = tx_ring[i]; + } + adapter->xdp_ring_count = new_tx_count; + vfree(tx_ring); tx_ring = NULL; } @@ -385,7 +419,8 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, clear_reset: /* free Tx resources if Rx error is encountered */ if (tx_ring) { - for (i = 0; i < adapter->num_tx_queues; i++) + for (i = 0; + i < adapter->num_tx_queues + adapter->num_xdp_queues; i++) ixgbevf_free_tx_resources(&tx_ring[i]); vfree(tx_ring); } @@ -457,6 +492,23 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, i += 2; } + /* populate XDP queue data */ + for (j = 0; j < adapter->num_xdp_queues; j++) { + ring = adapter->xdp_ring[j]; + if (!ring) { + data[i++] = 0; + data[i++] = 0; + continue; + } + + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + i += 2; + } + /* populate Rx queue data */ for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; @@ -500,6 +552,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, sprintf(p, "tx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; } + for (i = 0; i < adapter->num_xdp_queues; i++) { + sprintf(p, "xdp_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "xdp_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } for (i = 0; i < adapter->num_rx_queues; i++) { sprintf(p, "rx_queue_%u_packets", i); p += ETH_GSTRING_LEN; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index c06ea4dc49a0..447ce1d5e0e3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -2,7 +2,7 @@ /******************************************************************************* Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. + Copyright(c) 1999 - 2018 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -35,6 +35,7 @@ #include #include #include +#include #include "vf.h" @@ -51,7 +52,11 @@ struct ixgbevf_tx_buffer { union ixgbe_adv_tx_desc *next_to_watch; unsigned long time_stamp; - struct sk_buff *skb; + union { + struct sk_buff *skb; + /* XDP uses address ptr on irq_clean */ + void *data; + }; unsigned int bytecount; unsigned short gso_segs; __be16 protocol; @@ -94,12 +99,21 @@ enum ixgbevf_ring_state_t { __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, + __IXGBEVF_TX_XDP_RING, }; +#define ring_is_xdp(ring) \ + test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define set_ring_xdp(ring) \ + set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define clear_ring_xdp(ring) \ + clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) + struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; + struct bpf_prog *xdp_prog; struct device *dev; void *desc; /* descriptor ring memory */ dma_addr_t dma; /* phys. address of descriptor ring */ @@ -120,7 +134,7 @@ struct ixgbevf_ring { struct ixgbevf_tx_queue_stats tx_stats; struct ixgbevf_rx_queue_stats rx_stats; }; - + struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; struct sk_buff *skb; @@ -137,6 +151,7 @@ struct ixgbevf_ring { #define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES #define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES +#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES #define IXGBEVF_MAX_RSS_QUEUES 2 #define IXGBEVF_82599_RETA_SIZE 128 /* 128 entries */ #define IXGBEVF_X550_VFRETA_SIZE 64 /* 64 entries */ @@ -337,6 +352,10 @@ struct ixgbevf_adapter { u32 eims_enable_mask; u32 eims_other; + /* XDP */ + int num_xdp_queues; + struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES]; + /* TX */ int num_tx_queues; struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */ @@ -357,6 +376,7 @@ struct ixgbevf_adapter { /* OS defined structs */ struct net_device *netdev; + struct bpf_prog *xdp_prog; struct pci_dev *pdev; /* structs defined in ixgbe_vf.h */ @@ -370,6 +390,7 @@ struct ixgbevf_adapter { unsigned long state; u64 tx_busy; unsigned int tx_ring_count; + unsigned int xdp_ring_count; unsigned int rx_ring_count; u8 __iomem *io_addr; /* Mainly for iounmap use */ @@ -443,7 +464,8 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); void ixgbevf_reset(struct ixgbevf_adapter *adapter); void ixgbevf_set_ethtool_ops(struct net_device *netdev); -int ixgbevf_setup_rx_resources(struct ixgbevf_ring *); +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring); int ixgbevf_setup_tx_resources(struct ixgbevf_ring *); void ixgbevf_free_rx_resources(struct ixgbevf_ring *); void ixgbevf_free_tx_resources(struct ixgbevf_ring *); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4da449e0a4ba..3d9033f26eff 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. + Copyright(c) 1999 - 2018 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -50,6 +50,9 @@ #include #include #include +#include +#include +#include #include "ixgbevf.h" @@ -321,7 +324,10 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, total_packets += tx_buffer->gso_segs; /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buffer->data); + else + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -385,7 +391,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc = tx_ring->tx_buffer_info[i].next_to_watch; - pr_err("Detected Tx Unit Hang\n" + pr_err("Detected Tx Unit Hang%s\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" " next_to_use <%x>\n" @@ -395,6 +401,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, " eop_desc->wb.status <%x>\n" " time_stamp <%lx>\n" " jiffies <%lx>\n", + ring_is_xdp(tx_ring) ? " XDP" : "", tx_ring->queue_index, IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)), IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)), @@ -402,7 +409,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + if (!ring_is_xdp(tx_ring)) + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -410,6 +419,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } + if (ring_is_xdp(tx_ring)) + return !!budget; + #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -552,19 +564,21 @@ struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, } static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer) + struct ixgbevf_rx_buffer *rx_buffer, + struct sk_buff *skb) { if (ixgbevf_can_reuse_rx_page(rx_buffer)) { /* hand second half of page back to the ring */ ixgbevf_reuse_rx_page(rx_ring, rx_buffer); } else { - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); + if (IS_ERR(skb)) + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBEVF_RX_DMA_ATTR); __page_frag_cache_drain(rx_buffer->page, rx_buffer->pagecnt_bias); } @@ -737,6 +751,10 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + /* verify that the packet does not have any known errors */ if (unlikely(ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { @@ -853,23 +871,38 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, static struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + union ixgbe_adv_rx_desc *rx_desc) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data + L1_CACHE_BYTES); #endif + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via ixgbevf_build_skb(). + * + * For ixgbevf_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * changed in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); @@ -879,16 +912,18 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); + headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, + ALIGN(headlen, sizeof(long))); /* update all of the pointers */ size -= headlen; if (size) { skb_add_rx_frag(skb, 0, rx_buffer->page, - (va + headlen) - page_address(rx_buffer->page), + (xdp->data + headlen) - + page_address(rx_buffer->page), size, truesize); #if (PAGE_SIZE < 8192) rx_buffer->page_offset ^= truesize; @@ -912,32 +947,39 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + union ixgbe_adv_rx_desc *rx_desc) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(va); + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points to xdp->data, otherwise, we likely + * have a consumer accessing first few bytes of meta data, + * and then actual data. + */ + prefetch(xdp->data_meta); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data_meta + L1_CACHE_BYTES); #endif - /* build an skb to around the page buffer */ - skb = build_skb(va - IXGBEVF_SKB_PAD, truesize); + /* build an skb around the page buffer */ + skb = build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL; /* update pointers within the skb to store the data */ - skb_reserve(skb, IXGBEVF_SKB_PAD); - __skb_put(skb, size); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); /* update buffer offset */ #if (PAGE_SIZE < 8192) @@ -948,17 +990,138 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, return skb; } + +#define IXGBEVF_XDP_PASS 0 +#define IXGBEVF_XDP_CONSUMED 1 +#define IXGBEVF_XDP_TX 2 + +static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, + struct xdp_buff *xdp) +{ + struct ixgbevf_tx_buffer *tx_buffer; + union ixgbe_adv_tx_desc *tx_desc; + u32 len, cmd_type; + dma_addr_t dma; + u16 i; + + len = xdp->data_end - xdp->data; + + if (unlikely(!ixgbevf_desc_unused(ring))) + return IXGBEVF_XDP_CONSUMED; + + dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) + return IXGBEVF_XDP_CONSUMED; + + /* record the location of the first descriptor for this packet */ + tx_buffer = &ring->tx_buffer_info[ring->next_to_use]; + tx_buffer->bytecount = len; + tx_buffer->gso_segs = 1; + tx_buffer->protocol = 0; + + i = ring->next_to_use; + tx_desc = IXGBEVF_TX_DESC(ring, i); + + dma_unmap_len_set(tx_buffer, len, len); + dma_unmap_addr_set(tx_buffer, dma, dma); + tx_buffer->data = xdp->data; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + /* put descriptor type bits */ + cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS; + cmd_type |= len | IXGBE_TXD_CMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.olinfo_status = + cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + + /* Avoid any potential race with cleanup */ + smp_wmb(); + + /* set next_to_watch value indicating a packet is present */ + i++; + if (i == ring->count) + i = 0; + + tx_buffer->next_to_watch = tx_desc; + ring->next_to_use = i; + + return IXGBEVF_XDP_TX; +} + +static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring, + struct xdp_buff *xdp) +{ + int result = IXGBEVF_XDP_PASS; + struct ixgbevf_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + + if (!xdp_prog) + goto xdp_out; + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; + result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fallthrough */ + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping packet */ + case XDP_DROP: + result = IXGBEVF_XDP_CONSUMED; + break; + } +xdp_out: + rcu_read_unlock(); + return ERR_PTR(-result); +} + +static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + unsigned int size) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; + + rx_buffer->page_offset ^= truesize; +#else + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : + SKB_DATA_ALIGN(size); + + rx_buffer->page_offset += truesize; +#endif +} + static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; + bool xdp_xmit = false; + struct xdp_buff xdp; + + xdp.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { - union ixgbe_adv_rx_desc *rx_desc; struct ixgbevf_rx_buffer *rx_buffer; + union ixgbe_adv_rx_desc *rx_desc; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -981,14 +1144,36 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ - if (skb) + if (!skb) { + xdp.data = page_address(rx_buffer->page) + + rx_buffer->page_offset; + xdp.data_meta = xdp.data; + xdp.data_hard_start = xdp.data - + ixgbevf_rx_offset(rx_ring); + xdp.data_end = xdp.data + size; + + skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp); + } + + if (IS_ERR(skb)) { + if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) { + xdp_xmit = true; + ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, + size); + } else { + rx_buffer->pagecnt_bias++; + } + total_rx_packets++; + total_rx_bytes += size; + } else if (skb) { ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) + } else if (ring_uses_build_skb(rx_ring)) { skb = ixgbevf_build_skb(rx_ring, rx_buffer, - rx_desc, size); - else + &xdp, rx_desc); + } else { skb = ixgbevf_construct_skb(rx_ring, rx_buffer, - rx_desc, size); + &xdp, rx_desc); + } /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -997,7 +1182,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer); + ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1039,6 +1224,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; + if (xdp_xmit) { + struct ixgbevf_ring *xdp_ring = + adapter->xdp_ring[rx_ring->queue_index]; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); + } + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1540,6 +1736,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) /* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]); } #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 @@ -2171,7 +2369,10 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buffer->data); + else + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2239,6 +2440,8 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); } void ixgbevf_down(struct ixgbevf_adapter *adapter) @@ -2277,6 +2480,13 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) IXGBE_TXDCTL_SWFLSH); } + for (i = 0; i < adapter->num_xdp_queues; i++) { + u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); + } + if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); @@ -2374,6 +2584,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) /* Start with base case */ adapter->num_rx_queues = 1; adapter->num_tx_queues = 1; + adapter->num_xdp_queues = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2395,8 +2606,13 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_11: case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; + adapter->num_rx_queues = rss; adapter->num_tx_queues = rss; + adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0; default: break; } @@ -2453,6 +2669,8 @@ static void ixgbevf_add_ring(struct ixgbevf_ring *ring, * @v_idx: index of vector in adapter struct * @txr_count: number of Tx rings for q vector * @txr_idx: index of first Tx ring to assign + * @xdp_count: total number of XDP rings to allocate + * @xdp_idx: index of first XDP ring to allocate * @rxr_count: number of Rx rings for q vector * @rxr_idx: index of first Rx ring to assign * @@ -2460,13 +2678,15 @@ static void ixgbevf_add_ring(struct ixgbevf_ring *ring, **/ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, int txr_count, int txr_idx, + int xdp_count, int xdp_idx, int rxr_count, int rxr_idx) { struct ixgbevf_q_vector *q_vector; + int reg_idx = txr_idx + xdp_idx; struct ixgbevf_ring *ring; int ring_count, size; - ring_count = txr_count + rxr_count; + ring_count = txr_count + xdp_count + rxr_count; size = sizeof(*q_vector) + (sizeof(*ring) * ring_count); /* allocate q_vector and rings */ @@ -2499,7 +2719,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, /* apply Tx specific ring traits */ ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; - ring->reg_idx = txr_idx; + ring->reg_idx = reg_idx; /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -2507,6 +2727,36 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, /* update count and index */ txr_count--; txr_idx++; + reg_idx++; + + /* push pointer to next ring */ + ring++; + } + + while (xdp_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + ixgbevf_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = xdp_idx; + ring->reg_idx = reg_idx; + set_ring_xdp(ring); + + /* assign ring to adapter */ + adapter->xdp_ring[xdp_idx] = ring; + + /* update count and index */ + xdp_count--; + xdp_idx++; + reg_idx++; /* push pointer to next ring */ ring++; @@ -2556,8 +2806,12 @@ static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx) struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx]; struct ixgbevf_ring *ring; - ixgbevf_for_each_ring(ring, q_vector->tx) - adapter->tx_ring[ring->queue_index] = NULL; + ixgbevf_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + adapter->xdp_ring[ring->queue_index] = NULL; + else + adapter->tx_ring[ring->queue_index] = NULL; + } ixgbevf_for_each_ring(ring, q_vector->rx) adapter->rx_ring[ring->queue_index] = NULL; @@ -2583,15 +2837,16 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; int rxr_remaining = adapter->num_rx_queues; int txr_remaining = adapter->num_tx_queues; - int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int xdp_remaining = adapter->num_xdp_queues; + int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0; int err; - if (q_vectors >= (rxr_remaining + txr_remaining)) { + if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) { for (; rxr_remaining; v_idx++, q_vectors--) { int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); err = ixgbevf_alloc_q_vector(adapter, v_idx, - 0, 0, rqpv, rxr_idx); + 0, 0, 0, 0, rqpv, rxr_idx); if (err) goto err_out; @@ -2604,9 +2859,11 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) for (; q_vectors; v_idx++, q_vectors--) { int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors); + int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors); err = ixgbevf_alloc_q_vector(adapter, v_idx, tqpv, txr_idx, + xqpv, xdp_idx, rqpv, rxr_idx); if (err) @@ -2617,6 +2874,8 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) rxr_idx += rqpv; txr_remaining -= tqpv; txr_idx += tqpv; + xdp_remaining -= xqpv; + xdp_idx += xqpv; } return 0; @@ -2688,9 +2947,10 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) goto err_alloc_q_vectors; } - hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", - (adapter->num_rx_queues > 1) ? "Enabled" : - "Disabled", adapter->num_rx_queues, adapter->num_tx_queues); + hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n", + (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", + adapter->num_rx_queues, adapter->num_tx_queues, + adapter->num_xdp_queues); set_bit(__IXGBEVF_DOWN, &adapter->state); @@ -2711,6 +2971,7 @@ err_set_interrupt: static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) { adapter->num_tx_queues = 0; + adapter->num_xdp_queues = 0; adapter->num_rx_queues = 0; ixgbevf_free_q_vectors(adapter); @@ -2918,6 +3179,8 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter) if (netif_carrier_ok(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + set_check_for_tx_hang(adapter->xdp_ring[i]); } /* get one bit for every active Tx/Rx interrupt vector */ @@ -3089,6 +3352,9 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) if (adapter->tx_ring[i]->desc) ixgbevf_free_tx_resources(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + if (adapter->xdp_ring[i]->desc) + ixgbevf_free_tx_resources(adapter->xdp_ring[i]); } /** @@ -3139,7 +3405,7 @@ err: **/ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) { - int i, err = 0; + int i, j = 0, err = 0; for (i = 0; i < adapter->num_tx_queues; i++) { err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]); @@ -3149,21 +3415,34 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) goto err_setup_tx; } + for (j = 0; j < adapter->num_xdp_queues; j++) { + err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]); + if (!err) + continue; + hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j); + break; + } + return 0; err_setup_tx: /* rewind the index freeing the rings as we go */ + while (j--) + ixgbevf_free_tx_resources(adapter->xdp_ring[j]); while (i--) ixgbevf_free_tx_resources(adapter->tx_ring[i]); + return err; } /** * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) + * @adapter: board private structure * @rx_ring: Rx descriptor ring (for a specific queue) to setup * * Returns 0 on success, negative on failure **/ -int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) { int size; @@ -3184,6 +3463,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) if (!rx_ring->desc) goto err; + /* XDP RX-queue info */ + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index) < 0) + goto err; + + rx_ring->xdp_prog = adapter->xdp_prog; + return 0; err: vfree(rx_ring->rx_buffer_info); @@ -3207,7 +3493,7 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_rx_queues; i++) { - err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]); + err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]); if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i); @@ -3232,6 +3518,8 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { ixgbevf_clean_rx_ring(rx_ring); + rx_ring->xdp_prog = NULL; + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -3918,6 +4206,12 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int ret; + /* prevent MTU being changed to a size unsupported by XDP */ + if (adapter->xdp_prog) { + dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n"); + return -EPERM; + } + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); @@ -4029,6 +4323,23 @@ static void ixgbevf_shutdown(struct pci_dev *pdev) ixgbevf_suspend(pdev, PMSG_SUSPEND); } +static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats, + const struct ixgbevf_ring *ring) +{ + u64 bytes, packets; + unsigned int start; + + if (ring) { + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + bytes = ring->stats.bytes; + packets = ring->stats.packets; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + stats->tx_bytes += bytes; + stats->tx_packets += packets; + } +} + static void ixgbevf_get_stats(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -4056,13 +4367,12 @@ static void ixgbevf_get_stats(struct net_device *netdev, for (i = 0; i < adapter->num_tx_queues; i++) { ring = adapter->tx_ring[i]; - do { - start = u64_stats_fetch_begin_irq(&ring->syncp); - bytes = ring->stats.bytes; - packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); - stats->tx_bytes += bytes; - stats->tx_packets += packets; + ixgbevf_get_tx_ring_stats(stats, ring); + } + + for (i = 0; i < adapter->num_xdp_queues; i++) { + ring = adapter->xdp_ring[i]; + ixgbevf_get_tx_ring_stats(stats, ring); } rcu_read_unlock(); } @@ -4101,6 +4411,64 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, return features; } +static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +{ + int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct bpf_prog *old_prog; + + /* verify ixgbevf ring attributes are sufficient for XDP */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbevf_ring *ring = adapter->rx_ring[i]; + + if (frame_size > ixgbevf_rx_bufsz(ring)) + return -EINVAL; + } + + old_prog = xchg(&adapter->xdp_prog, prog); + + /* If transitioning XDP modes reconfigure rings */ + if (!!prog != !!old_prog) { + /* Hardware has to reinitialize queues and interrupts to + * match packet buffer alignment. Unfortunately, the + * hardware is not flexible enough to do this dynamically. + */ + if (netif_running(dev)) + ixgbevf_close(dev); + + ixgbevf_clear_interrupt_scheme(adapter); + ixgbevf_init_interrupt_scheme(adapter); + + if (netif_running(dev)) + ixgbevf_open(dev); + } else { + for (i = 0; i < adapter->num_rx_queues; i++) + xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return ixgbevf_xdp_setup(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!(adapter->xdp_prog); + xdp->prog_id = adapter->xdp_prog ? + adapter->xdp_prog->aux->id : 0; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, @@ -4117,6 +4485,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_poll_controller = ixgbevf_netpoll, #endif .ndo_features_check = ixgbevf_features_check, + .ndo_bpf = ixgbevf_xdp, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev)