From 29e2d9eb82647654abff150ff02fa1e07362214f Mon Sep 17 00:00:00 2001 From: Henry Tieman Date: Fri, 20 Nov 2020 16:38:30 -0800 Subject: [PATCH 1/7] ice: fix FDir IPv6 flexbyte The packet classifier would occasionally misrecognize an IPv6 training packet when the next protocol field was 0. The correct value for unspecified protocol is IPPROTO_NONE. Fixes: 165d80d6adab ("ice: Support IPv6 Flow Director filters") Signed-off-by: Henry Tieman Reviewed-by: Paul Menzel Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 2d27f66ac853..192729546bbf 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1576,7 +1576,13 @@ ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp, sizeof(struct in6_addr)); input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes; input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass; - input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto; + + /* if no protocol requested, use IPPROTO_NONE */ + if (!fsp->m_u.usr_ip6_spec.l4_proto) + input->ip.v6.proto = IPPROTO_NONE; + else + input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto; + memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, sizeof(struct in6_addr)); memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src, From 1b0b0b581b945ee27beb70e8199270a22dd5a2f6 Mon Sep 17 00:00:00 2001 From: Nick Nunley Date: Fri, 20 Nov 2020 16:38:31 -0800 Subject: [PATCH 2/7] ice: Implement flow for IPv6 next header (extension header) This patch is based on a similar change to i40e by Slawomir Laba: "i40e: Implement flow for IPv6 next header (extension header)". When a packet contains an IPv6 header with next header which is an extension header and not a protocol one, the kernel function skb_transport_header called with such sk_buff will return a pointer to the extension header and not to the TCP one. The above explained call caused a problem with packet processing for skb with encapsulation for tunnel with ICE_TX_CTX_EIPT_IPV6. The extension header was not skipped at all. The ipv6_skip_exthdr function does check if next header of the IPV6 header is an extension header and doesn't modify the l4_proto pointer if it points to a protocol header value so its safe to omit the comparison of exthdr and l4.hdr pointers. The ipv6_skip_exthdr can return value -1. This means that the skipping process failed and there is something wrong with the packet so it will be dropped. Fixes: a4e82a81f573 ("ice: Add support for tunnel offloads") Signed-off-by: Nick Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index a2d0aad8cfdd..b6fa83c619dd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1923,12 +1923,15 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) ICE_TX_CTX_EIPT_IPV4_NO_CSUM; l4_proto = ip.v4->protocol; } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { + int ret; + tunnel |= ICE_TX_CTX_EIPT_IPV6; exthdr = ip.hdr + sizeof(*ip.v6); l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); + ret = ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + if (ret < 0) + return -1; } /* define outer transport */ From 13ed5e8a9b9ccd140a79e80283f69d724c9bb2be Mon Sep 17 00:00:00 2001 From: Nick Nunley Date: Fri, 20 Nov 2020 16:38:33 -0800 Subject: [PATCH 3/7] ice: update dev_addr in ice_set_mac_address even if HW filter exists Fix the driver to copy the MAC address configured in ndo_set_mac_address into dev_addr, even if the MAC filter already exists in HW. In some situations (e.g. bonding) the netdev's dev_addr could have been modified outside of the driver, with no change to the HW filter, so the driver cannot assume that they match. Fixes: 757976ab16be ("ice: Fix check for removing/adding mac filters") Signed-off-by: Nick Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c52b9bb0e3ab..fb81aa5979e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4884,9 +4884,15 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) goto err_update_filters; } - /* Add filter for new MAC. If filter exists, just return success */ + /* Add filter for new MAC. If filter exists, return success */ status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); if (status == ICE_ERR_ALREADY_EXISTS) { + /* Although this MAC filter is already present in hardware it's + * possible in some cases (e.g. bonding) that dev_addr was + * modified outside of the driver and needs to be restored back + * to this value. + */ + memcpy(netdev->dev_addr, mac, netdev->addr_len); netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); return 0; } From 943b881e35829403da638fcb34a959125deafef3 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 21 Jan 2021 10:38:05 -0800 Subject: [PATCH 4/7] ice: Don't allow more channels than LAN MSI-X available Currently users could create more channels than LAN MSI-X available. This is happening because there is no check against pf->num_lan_msix when checking the max allowed channels and will cause performance issues if multiple Tx and Rx queues are tied to a single MSI-X. Fix this by not allowing more channels than LAN MSI-X available in pf->num_lan_msix. Fixes: 87324e747fde ("ice: Implement ethtool ops for channels") Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 9e8e9531cd87..69c113a4de7e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3258,8 +3258,8 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, */ static int ice_get_max_txq(struct ice_pf *pf) { - return min_t(int, num_online_cpus(), - pf->hw.func_caps.common_cap.num_txq); + return min3(pf->num_lan_msix, (u16)num_online_cpus(), + (u16)pf->hw.func_caps.common_cap.num_txq); } /** @@ -3268,8 +3268,8 @@ static int ice_get_max_txq(struct ice_pf *pf) */ static int ice_get_max_rxq(struct ice_pf *pf) { - return min_t(int, num_online_cpus(), - pf->hw.func_caps.common_cap.num_rxq); + return min3(pf->num_lan_msix, (u16)num_online_cpus(), + (u16)pf->hw.func_caps.common_cap.num_rxq); } /** From f3fe97f64384fa4073d9dc0278c4b351c92e295c Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 21 Jan 2021 10:38:06 -0800 Subject: [PATCH 5/7] ice: Fix MSI-X vector fallback logic The current MSI-X enablement logic tries to enable best-case MSI-X vectors and if that fails we only support a bare-minimum set. This includes a single MSI-X for 1 Tx and 1 Rx queue and a single MSI-X for the OICR interrupt. Unfortunately, the driver fails to load when we don't get as many MSI-X as requested for a couple reasons. First, the code to allocate MSI-X in the driver tries to allocate num_online_cpus() MSI-X for LAN traffic without caring about the number of MSI-X actually enabled/requested from the kernel for LAN traffic. So, when calling ice_get_res() for the PF VSI, it returns failure because the number of available vectors is less than requested. Fix this by not allowing the PF VSI to allocation more than pf->num_lan_msix MSI-X vectors and pf->num_lan_msix Rx/Tx queues. Limiting the number of queues is done because we don't want more than 1 Tx/Rx queue per interrupt due to performance conerns. Second, the driver assigns pf->num_lan_msix = 2, to account for LAN traffic and the OICR. However, pf->num_lan_msix is only meant for LAN MSI-X. This is causing a failure when the PF VSI tries to allocate/reserve the minimum pf->num_lan_msix because the OICR MSI-X has already been reserved, so there may not be enough MSI-X vectors left. Fix this by setting pf->num_lan_msix = 1 for the failure case. Then the ICE_MIN_MSIX accounts for the LAN MSI-X and the OICR MSI-X needed for the failure case. Update the related defines used in ice_ena_msix_range() to align with the above behavior and remove the unused RDMA defines because RDMA is currently not supported. Also, remove the now incorrect comment. Fixes: 152b978a1f90 ("ice: Rework ice_ena_msix_range") Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 4 +++- drivers/net/ethernet/intel/ice/ice_lib.c | 14 +++++++++----- drivers/net/ethernet/intel/ice/ice_main.c | 8 ++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 56725356a17b..fa1e128c24ec 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -68,7 +68,9 @@ #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_AQ_LEN 64 #define ICE_MBXSQ_LEN 64 -#define ICE_MIN_MSIX 2 +#define ICE_MIN_LAN_TXRX_MSIX 1 +#define ICE_MIN_LAN_OICR_MSIX 1 +#define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) #define ICE_FDIR_MSIX 1 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 3df67486d42d..ad9c22a1b97a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -161,8 +161,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) switch (vsi->type) { case ICE_VSI_PF: - vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf), - num_online_cpus()); + vsi->alloc_txq = min3(pf->num_lan_msix, + ice_get_avail_txq_count(pf), + (u16)num_online_cpus()); if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; @@ -174,8 +175,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { vsi->alloc_rxq = 1; } else { - vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf), - num_online_cpus()); + vsi->alloc_rxq = min3(pf->num_lan_msix, + ice_get_avail_rxq_count(pf), + (u16)num_online_cpus()); if (vsi->req_rxq) { vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; @@ -184,7 +186,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) pf->num_lan_rx = vsi->alloc_rxq; - vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq); + vsi->num_q_vectors = min_t(int, pf->num_lan_msix, + max_t(int, vsi->alloc_rxq, + vsi->alloc_txq)); break; case ICE_VSI_VF: vf = &pf->vf[vsi->vf_id]; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index fb81aa5979e3..e10ca8929f85 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3430,18 +3430,14 @@ static int ice_ena_msix_range(struct ice_pf *pf) if (v_actual < v_budget) { dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", v_budget, v_actual); -/* 2 vectors each for LAN and RDMA (traffic + OICR), one for flow director */ -#define ICE_MIN_LAN_VECS 2 -#define ICE_MIN_RDMA_VECS 2 -#define ICE_MIN_VECS (ICE_MIN_LAN_VECS + ICE_MIN_RDMA_VECS + 1) - if (v_actual < ICE_MIN_LAN_VECS) { + if (v_actual < ICE_MIN_MSIX) { /* error if we can't get minimum vectors */ pci_disable_msix(pf->pdev); err = -ERANGE; goto msix_err; } else { - pf->num_lan_msix = ICE_MIN_LAN_VECS; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; } } From 67a3c6b3cc40bb217c3ff947a55053151a00fea0 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 30 Nov 2020 14:12:57 +0100 Subject: [PATCH 6/7] i40e: acquire VSI pointer only after VF is initialized This change simplifies the VF initialization check and also minimizes the delay between acquiring the VSI pointer and using it. As known by the commit being fixed, there is a risk of the VSI pointer getting changed. Therefore minimize the delay between getting and using the pointer. Fixes: 9889707b06ac ("i40e: Fix crash caused by stress setting of VF MAC addresses") Signed-off-by: Stefan Assmann Reviewed-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 21ee56420c3a..7efc61aacb0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4046,20 +4046,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto error_param; vf = &pf->vf[vf_id]; - vsi = pf->vsi[vf->lan_vsi_idx]; /* When the VF is resetting wait until it is done. * It can take up to 200 milliseconds, * but wait for up to 300 milliseconds to be safe. - * If the VF is indeed in reset, the vsi pointer has - * to show on the newly loaded vsi under pf->vsi[id]. + * Acquire the VSI pointer only after the VF has been + * properly initialized. */ for (i = 0; i < 15; i++) { - if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { - if (i > 0) - vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) break; - } msleep(20); } if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { @@ -4068,6 +4064,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) ret = -EAGAIN; goto error_param; } + vsi = pf->vsi[vf->lan_vsi_idx]; if (is_multicast_ether_addr(mac)) { dev_err(&pf->pdev->dev, From 329a3678ec69962aa67c91397efbd46d36635f91 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 17 Nov 2020 20:50:40 +0100 Subject: [PATCH 7/7] igc: fix link speed advertising Link speed advertising in igc has two problems: - When setting the advertisement via ethtool, the link speed is converted to the legacy 32 bit representation for the intel PHY code. This inadvertently drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT (being beyond bit 31). As a result, any call to `ethtool -s ...' drops the 2500Mbit/s link speed from the PHY settings. Only reloading the driver alleviates that problem. Fix this by converting the ETHTOOL_LINK_MODE_2500baseT_Full_BIT to the Intel PHY ADVERTISE_2500_FULL bit explicitly. - Rather than checking the actual PHY setting, the .get_link_ksettings function always fills link_modes.advertising with all link speeds the device is capable of. Fix this by checking the PHY autoneg_advertised settings and report only the actually advertised speeds up to ethtool. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Corinna Vinschen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 24 +++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 61d331ce38cd..831f2f09de5f 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1675,12 +1675,18 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, cmd->base.phy_address = hw->phy.addr; /* advertising link modes */ - ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); + if (hw->phy.autoneg_advertised & ADVERTISE_10_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_100_HALF) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); + if (hw->phy.autoneg_advertised & ADVERTISE_100_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_1000_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_2500_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); /* set autoneg settings */ if (hw->mac.autoneg == 1) { @@ -1792,6 +1798,12 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, ethtool_convert_link_mode_to_legacy_u32(&advertising, cmd->link_modes.advertising); + /* Converting to legacy u32 drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT. + * We have to check this and convert it to ADVERTISE_2500_FULL + * (aka ETHTOOL_LINK_MODE_2500baseX_Full_BIT) explicitly. + */ + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, 2500baseT_Full)) + advertising |= ADVERTISE_2500_FULL; if (cmd->base.autoneg == AUTONEG_ENABLE) { hw->mac.autoneg = 1;