diff --git a/Documentation/networking/vxlan.rst b/Documentation/networking/vxlan.rst index ce239fa01848..2759dc1cc525 100644 --- a/Documentation/networking/vxlan.rst +++ b/Documentation/networking/vxlan.rst @@ -58,3 +58,31 @@ forwarding table using the new bridge command. 3. Show forwarding table:: # bridge fdb show dev vxlan0 + +The following NIC features may indicate support for UDP tunnel-related +offloads (most commonly VXLAN features, but support for a particular +encapsulation protocol is NIC specific): + + - `tx-udp_tnl-segmentation` + - `tx-udp_tnl-csum-segmentation` + ability to perform TCP segmentation offload of UDP encapsulated frames + + - `rx-udp_tunnel-port-offload` + receive side parsing of UDP encapsulated frames which allows NICs to + perform protocol-aware offloads, like checksum validation offload of + inner frames (only needed by NICs without protocol-agnostic offloads) + +For devices supporting `rx-udp_tunnel-port-offload` the list of currently +offloaded ports can be interrogated with `ethtool`:: + + $ ethtool --show-tunnels eth0 + Tunnel information for eth0: + UDP port table 0: + Size: 4 + Types: vxlan + No entries + UDP port table 1: + Size: 4 + Types: geneve, vxlan-gpe + Entries (1): + port 1230, vxlan-gpe diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index ada0e93c38f0..537300e762f0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "i40e_type.h" #include "i40e_prototype.h" @@ -133,7 +134,6 @@ enum i40e_state_t { __I40E_PORT_SUSPENDED, __I40E_VF_DISABLE, __I40E_MACVLAN_SYNC_PENDING, - __I40E_UDP_FILTER_SYNC_PENDING, __I40E_TEMP_LINK_POLLING, __I40E_CLIENT_SERVICE_REQUESTED, __I40E_CLIENT_L2_CHANGE, @@ -478,8 +478,8 @@ struct i40e_pf { struct list_head l3_flex_pit_list; struct list_head l4_flex_pit_list; - struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; - u16 pending_udp_bitmap; + struct udp_tunnel_nic_shared udp_tunnel_shared; + struct udp_tunnel_nic_info udp_tunnel_nic; struct hlist_head cloud_filter_list; u16 num_cloud_filters; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index cb366daa1ad7..70bd3f47e43a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10386,106 +10386,6 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) i40e_flush(hw); } -static const char *i40e_tunnel_name(u8 type) -{ - switch (type) { - case UDP_TUNNEL_TYPE_VXLAN: - return "vxlan"; - case UDP_TUNNEL_TYPE_GENEVE: - return "geneve"; - default: - return "unknown"; - } -} - -/** - * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters - * @pf: board private structure - **/ -static void i40e_sync_udp_filters(struct i40e_pf *pf) -{ - int i; - - /* loop through and set pending bit for all active UDP filters */ - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->udp_ports[i].port) - pf->pending_udp_bitmap |= BIT_ULL(i); - } - - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); -} - -/** - * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW - * @pf: board private structure - **/ -static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) -{ - struct i40e_hw *hw = &pf->hw; - u8 filter_index, type; - u16 port; - int i; - - if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state)) - return; - - /* acquire RTNL to maintain state of flags and port requests */ - rtnl_lock(); - - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->pending_udp_bitmap & BIT_ULL(i)) { - struct i40e_udp_port_config *udp_port; - i40e_status ret = 0; - - udp_port = &pf->udp_ports[i]; - pf->pending_udp_bitmap &= ~BIT_ULL(i); - - port = READ_ONCE(udp_port->port); - type = READ_ONCE(udp_port->type); - filter_index = READ_ONCE(udp_port->filter_index); - - /* release RTNL while we wait on AQ command */ - rtnl_unlock(); - - if (port) - ret = i40e_aq_add_udp_tunnel(hw, port, - type, - &filter_index, - NULL); - else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED) - ret = i40e_aq_del_udp_tunnel(hw, filter_index, - NULL); - - /* reacquire RTNL so we can update filter_index */ - rtnl_lock(); - - if (ret) { - dev_info(&pf->pdev->dev, - "%s %s port %d, index %d failed, err %s aq_err %s\n", - i40e_tunnel_name(type), - port ? "add" : "delete", - port, - filter_index, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); - if (port) { - /* failed to add, just reset port, - * drop pending bit for any deletion - */ - udp_port->port = 0; - pf->pending_udp_bitmap &= ~BIT_ULL(i); - } - } else if (port) { - /* record filter index on success */ - udp_port->filter_index = filter_index; - } - } - } - - rtnl_unlock(); -} - /** * i40e_service_task - Run the driver's async subtasks * @work: pointer to work_struct containing our data @@ -10525,7 +10425,6 @@ static void i40e_service_task(struct work_struct *work) pf->vsi[pf->lan_vsi]); } i40e_sync_filters_subtask(pf); - i40e_sync_udp_filters_subtask(pf); } else { i40e_reset_subtask(pf); } @@ -12225,131 +12124,48 @@ static int i40e_set_features(struct net_device *netdev, return 0; } -/** - * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port - * @pf: board private structure - * @port: The UDP port to look up - * - * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found - **/ -static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port) -{ - u8 i; - - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - /* Do not report ports with pending deletions as - * being available. - */ - if (!port && (pf->pending_udp_bitmap & BIT_ULL(i))) - continue; - if (pf->udp_ports[i].port == port) - return i; - } - - return i; -} - -/** - * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - **/ -static void i40e_udp_tunnel_add(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int i40e_udp_tunnel_set_port(struct net_device *netdev, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - u8 next_idx; - u8 idx; + struct i40e_hw *hw = &np->vsi->back->hw; + u8 type, filter_index; + i40e_status ret; - idx = i40e_get_udp_port_idx(pf, port); + type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN : + I40E_AQC_TUNNEL_TYPE_NGE; - /* Check if port already exists */ - if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { - netdev_info(netdev, "port %d already offloaded\n", port); - return; + ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index, + NULL); + if (ret) { + netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return -EIO; } - /* Now check if there is space to add the new port */ - next_idx = i40e_get_udp_port_idx(pf, 0); - - if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) { - netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n", - port); - return; - } - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE)) - return; - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE; - break; - default: - return; - } - - /* New port: add it and mark its index in the bitmap */ - pf->udp_ports[next_idx].port = port; - pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED; - pf->pending_udp_bitmap |= BIT_ULL(next_idx); - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); + udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index); + return 0; } -/** - * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - **/ -static void i40e_udp_tunnel_del(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int i40e_udp_tunnel_unset_port(struct net_device *netdev, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - u8 idx; + struct i40e_hw *hw = &np->vsi->back->hw; + i40e_status ret; - idx = i40e_get_udp_port_idx(pf, port); - - /* Check if port already exists */ - if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS) - goto not_found; - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN) - goto not_found; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE) - goto not_found; - break; - default: - goto not_found; + ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL); + if (ret) { + netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return -EIO; } - /* if port exists, set it to 0 (mark for deletion) - * and make it pending - */ - pf->udp_ports[idx].port = 0; - - /* Toggle pending bit instead of setting it. This way if we are - * deleting a port that has yet to be added we just clear the pending - * bit and don't have to worry about it. - */ - pf->pending_udp_bitmap ^= BIT_ULL(idx); - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); - - return; -not_found: - netdev_warn(netdev, "UDP port %d was not found, not deleting\n", - port); + return 0; } static int i40e_get_phys_port_id(struct net_device *netdev, @@ -12955,8 +12771,8 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, .ndo_set_vf_trust = i40e_ndo_set_vf_trust, - .ndo_udp_tunnel_add = i40e_udp_tunnel_add, - .ndo_udp_tunnel_del = i40e_udp_tunnel_del, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_get_phys_port_id = i40e_get_phys_port_id, .ndo_fdb_add = i40e_ndo_fdb_add, .ndo_features_check = i40e_features_check, @@ -13020,6 +12836,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE)) netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic; + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; netdev->hw_enc_features |= hw_enc_features; @@ -14420,7 +14238,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) i40e_ptp_init(pf); /* repopulate tunnel port filters */ - i40e_sync_udp_filters(pf); + udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev); return ret; } @@ -15149,6 +14967,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_switch_setup; + pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; + pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; + pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; + pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared; + pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS; + pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN | + UDP_TUNNEL_TYPE_GENEVE; + /* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with @@ -15158,6 +14984,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; + if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { + dev_warn(&pf->pdev->dev, + "limiting the VSI count due to UDP tunnel limitation %d > %d\n", + pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); + pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; + } /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index d7430ce6af26..2d27f66ac853 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1268,8 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool is_tun = tun == ICE_FD_HW_SEG_TUN; int err; - if (is_tun && !ice_get_open_tunnel_port(&pf->hw, TNL_ALL, - &port_num)) + if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num)) continue; err = ice_fdir_write_fltr(pf, input, add, is_tun); if (err) @@ -1647,8 +1646,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) } /* return error if not an update and no available filters */ - fltrs_needed = ice_get_open_tunnel_port(hw, TNL_ALL, &tunnel_port) ? - 2 : 1; + fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1; if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) && ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) { dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index 6834df14332f..59c0c6a0f8c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -556,7 +556,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len); loc = pkt; } else { - if (!ice_get_open_tunnel_port(hw, TNL_ALL, &tnl_port)) + if (!ice_get_open_tunnel_port(hw, &tnl_port)) return ICE_ERR_DOES_NOT_EXIST; if (!ice_fdir_pkt[idx].tun_pkt) return ICE_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index b17ae3e20157..ac448d223e9a 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -489,8 +489,6 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg) if ((label_name[len] - '0') == hw->pf_id) { hw->tnl.tbl[hw->tnl.count].type = tnls[i].type; hw->tnl.tbl[hw->tnl.count].valid = false; - hw->tnl.tbl[hw->tnl.count].in_use = false; - hw->tnl.tbl[hw->tnl.count].marked = false; hw->tnl.tbl[hw->tnl.count].boost_addr = val; hw->tnl.tbl[hw->tnl.count].port = 0; hw->tnl.count++; @@ -505,8 +503,11 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg) for (i = 0; i < hw->tnl.count; i++) { ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr, &hw->tnl.tbl[i].boost_entry); - if (hw->tnl.tbl[i].boost_entry) + if (hw->tnl.tbl[i].boost_entry) { hw->tnl.tbl[i].valid = true; + if (hw->tnl.tbl[i].type < __TNL_TYPE_CNT) + hw->tnl.valid_count[hw->tnl.tbl[i].type]++; + } } } @@ -1625,83 +1626,13 @@ static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) return &bld->buf; } -/** - * ice_tunnel_port_in_use_hlpr - helper function to determine tunnel usage - * @hw: pointer to the HW structure - * @port: port to search for - * @index: optionally returns index - * - * Returns whether a port is already in use as a tunnel, and optionally its - * index - */ -static bool ice_tunnel_port_in_use_hlpr(struct ice_hw *hw, u16 port, u16 *index) -{ - u16 i; - - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].in_use && hw->tnl.tbl[i].port == port) { - if (index) - *index = i; - return true; - } - - return false; -} - -/** - * ice_tunnel_port_in_use - * @hw: pointer to the HW structure - * @port: port to search for - * @index: optionally returns index - * - * Returns whether a port is already in use as a tunnel, and optionally its - * index - */ -bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index) -{ - bool res; - - mutex_lock(&hw->tnl_lock); - res = ice_tunnel_port_in_use_hlpr(hw, port, index); - mutex_unlock(&hw->tnl_lock); - - return res; -} - -/** - * ice_find_free_tunnel_entry - * @hw: pointer to the HW structure - * @type: tunnel type - * @index: optionally returns index - * - * Returns whether there is a free tunnel entry, and optionally its index - */ -static bool -ice_find_free_tunnel_entry(struct ice_hw *hw, enum ice_tunnel_type type, - u16 *index) -{ - u16 i; - - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].valid && !hw->tnl.tbl[i].in_use && - hw->tnl.tbl[i].type == type) { - if (index) - *index = i; - return true; - } - - return false; -} - /** * ice_get_open_tunnel_port - retrieve an open tunnel port * @hw: pointer to the HW structure - * @type: tunnel type (TNL_ALL will return any open port) * @port: returns open port */ bool -ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, - u16 *port) +ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port) { bool res = false; u16 i; @@ -1709,8 +1640,7 @@ ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, mutex_lock(&hw->tnl_lock); for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && - (type == TNL_ALL || hw->tnl.tbl[i].type == type)) { + if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port) { *port = hw->tnl.tbl[i].port; res = true; break; @@ -1721,9 +1651,35 @@ ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, return res; } +/** + * ice_tunnel_idx_to_entry - convert linear index to the sparse one + * @hw: pointer to the HW structure + * @type: type of tunnel + * @idx: linear index + * + * Stack assumes we have 2 linear tables with indexes [0, count_valid), + * but really the port table may be sprase, and types are mixed, so convert + * the stack index into the device index. + */ +static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type, + u16 idx) +{ + u16 i; + + for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) + if (hw->tnl.tbl[i].valid && + hw->tnl.tbl[i].type == type && + idx--) + return i; + + WARN_ON_ONCE(1); + return 0; +} + /** * ice_create_tunnel * @hw: pointer to the HW structure + * @index: device table entry * @type: type of tunnel * @port: port of tunnel to create * @@ -1731,27 +1687,16 @@ ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, * creating a package buffer with the tunnel info and issuing an update package * command. */ -enum ice_status -ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port) +static enum ice_status +ice_create_tunnel(struct ice_hw *hw, u16 index, + enum ice_tunnel_type type, u16 port) { struct ice_boost_tcam_section *sect_rx, *sect_tx; enum ice_status status = ICE_ERR_MAX_LIMIT; struct ice_buf_build *bld; - u16 index; mutex_lock(&hw->tnl_lock); - if (ice_tunnel_port_in_use_hlpr(hw, port, &index)) { - hw->tnl.tbl[index].ref++; - status = 0; - goto ice_create_tunnel_end; - } - - if (!ice_find_free_tunnel_entry(hw, type, &index)) { - status = ICE_ERR_OUT_OF_RANGE; - goto ice_create_tunnel_end; - } - bld = ice_pkg_buf_alloc(hw); if (!bld) { status = ICE_ERR_NO_MEMORY; @@ -1790,11 +1735,8 @@ ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port) memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam)); status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); - if (!status) { + if (!status) hw->tnl.tbl[index].port = port; - hw->tnl.tbl[index].in_use = true; - hw->tnl.tbl[index].ref = 1; - } ice_create_tunnel_err: ice_pkg_buf_free(hw, bld); @@ -1808,46 +1750,31 @@ ice_create_tunnel_end: /** * ice_destroy_tunnel * @hw: pointer to the HW structure + * @index: device table entry + * @type: type of tunnel * @port: port of tunnel to destroy (ignored if the all parameter is true) - * @all: flag that states to destroy all tunnels * * Destroys a tunnel or all tunnels by creating an update package buffer * targeting the specific updates requested and then performing an update * package. */ -enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) +static enum ice_status +ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type, + u16 port) { struct ice_boost_tcam_section *sect_rx, *sect_tx; enum ice_status status = ICE_ERR_MAX_LIMIT; struct ice_buf_build *bld; - u16 count = 0; - u16 index; - u16 size; - u16 i; mutex_lock(&hw->tnl_lock); - if (!all && ice_tunnel_port_in_use_hlpr(hw, port, &index)) - if (hw->tnl.tbl[index].ref > 1) { - hw->tnl.tbl[index].ref--; - status = 0; - goto ice_destroy_tunnel_end; - } - - /* determine count */ - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && - (all || hw->tnl.tbl[i].port == port)) - count++; - - if (!count) { - status = ICE_ERR_PARAM; + if (WARN_ON(!hw->tnl.tbl[index].valid || + hw->tnl.tbl[index].type != type || + hw->tnl.tbl[index].port != port)) { + status = ICE_ERR_OUT_OF_RANGE; goto ice_destroy_tunnel_end; } - /* size of section - there is at least one entry */ - size = struct_size(sect_rx, tcam, count); - bld = ice_pkg_buf_alloc(hw); if (!bld) { status = ICE_ERR_NO_MEMORY; @@ -1859,13 +1786,13 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) goto ice_destroy_tunnel_err; sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM, - size); + struct_size(sect_rx, tcam, 1)); if (!sect_rx) goto ice_destroy_tunnel_err; sect_rx->count = cpu_to_le16(1); sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM, - size); + struct_size(sect_tx, tcam, 1)); if (!sect_tx) goto ice_destroy_tunnel_err; sect_tx->count = cpu_to_le16(1); @@ -1873,26 +1800,14 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) /* copy original boost entry to update package buffer, one copy to Rx * section, another copy to the Tx section */ - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && - (all || hw->tnl.tbl[i].port == port)) { - memcpy(sect_rx->tcam + i, hw->tnl.tbl[i].boost_entry, - sizeof(*sect_rx->tcam)); - memcpy(sect_tx->tcam + i, hw->tnl.tbl[i].boost_entry, - sizeof(*sect_tx->tcam)); - hw->tnl.tbl[i].marked = true; - } + memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry, + sizeof(*sect_rx->tcam)); + memcpy(sect_tx->tcam, hw->tnl.tbl[index].boost_entry, + sizeof(*sect_tx->tcam)); status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); if (!status) - for (i = 0; i < hw->tnl.count && - i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].marked) { - hw->tnl.tbl[i].ref = 0; - hw->tnl.tbl[i].port = 0; - hw->tnl.tbl[i].in_use = false; - hw->tnl.tbl[i].marked = false; - } + hw->tnl.tbl[index].port = 0; ice_destroy_tunnel_err: ice_pkg_buf_free(hw, bld); @@ -1903,6 +1818,52 @@ ice_destroy_tunnel_end: return status; } +int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + enum ice_tunnel_type tnl_type; + enum ice_status status; + u16 index; + + tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE; + index = ice_tunnel_idx_to_entry(&pf->hw, idx, tnl_type); + + status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port)); + if (status) { + netdev_err(netdev, "Error adding UDP tunnel - %s\n", + ice_stat_str(status)); + return -EIO; + } + + udp_tunnel_nic_set_port_priv(netdev, table, idx, index); + return 0; +} + +int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + enum ice_tunnel_type tnl_type; + enum ice_status status; + + tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE; + + status = ice_destroy_tunnel(&pf->hw, ti->hw_priv, tnl_type, + ntohs(ti->port)); + if (status) { + netdev_err(netdev, "Error removing UDP tunnel - %s\n", + ice_stat_str(status)); + return -EIO; + } + + return 0; +} + /* PTG Management */ /** diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 568ea519af51..20deddb807c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -19,12 +19,11 @@ #define ICE_PKG_CNT 4 bool -ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, - u16 *port); -enum ice_status -ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port); -enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all); -bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index); +ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port); +int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti); +int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti); enum ice_status ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index c1c99a267a98..24063c1351b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -298,6 +298,7 @@ struct ice_pkg_enum { enum ice_tunnel_type { TNL_VXLAN = 0, TNL_GENEVE, + __TNL_TYPE_CNT, TNL_LAST = 0xFF, TNL_ALL = 0xFF, }; @@ -311,11 +312,8 @@ struct ice_tunnel_entry { enum ice_tunnel_type type; u16 boost_addr; u16 port; - u16 ref; struct ice_boost_tcam_entry *boost_entry; u8 valid; - u8 in_use; - u8 marked; }; #define ICE_TUNNEL_MAX_ENTRIES 16 @@ -323,6 +321,7 @@ struct ice_tunnel_entry { struct ice_tunnel_table { struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES]; u16 count; + u16 valid_count[__TNL_TYPE_CNT]; }; struct ice_pkg_es { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2297ee7dba26..ffef4c901ea2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2873,6 +2873,7 @@ static void ice_set_ops(struct net_device *netdev) } netdev->netdev_ops = &ice_netdev_ops; + netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; ice_set_ethtool_ops(netdev); } @@ -3978,7 +3979,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) struct device *dev = &pdev->dev; struct ice_pf *pf; struct ice_hw *hw; - int err; + int i, err; /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst @@ -4073,11 +4074,37 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_devlink_init_regions(pf); + pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; + pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; + pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; + pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; + i = 0; + if (pf->hw.tnl.valid_count[TNL_VXLAN]) { + pf->hw.udp_tunnel_nic.tables[i].n_entries = + pf->hw.tnl.valid_count[TNL_VXLAN]; + pf->hw.udp_tunnel_nic.tables[i].tunnel_types = + UDP_TUNNEL_TYPE_VXLAN; + i++; + } + if (pf->hw.tnl.valid_count[TNL_GENEVE]) { + pf->hw.udp_tunnel_nic.tables[i].n_entries = + pf->hw.tnl.valid_count[TNL_GENEVE]; + pf->hw.udp_tunnel_nic.tables[i].tunnel_types = + UDP_TUNNEL_TYPE_GENEVE; + i++; + } + pf->num_alloc_vsi = hw->func_caps.guar_num_vsi; if (!pf->num_alloc_vsi) { err = -EIO; goto err_init_pf_unroll; } + if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { + dev_warn(&pf->pdev->dev, + "limiting the VSI count due to UDP tunnel limitation %d > %d\n", + pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); + pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; + } pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), GFP_KERNEL); @@ -6574,70 +6601,6 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) pf->tx_timeout_recovery_level++; } -/** - * ice_udp_tunnel_add - Get notifications about UDP tunnel ports that come up - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - */ -static void -ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; - enum ice_tunnel_type tnl_type; - u16 port = ntohs(ti->port); - enum ice_status status; - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - tnl_type = TNL_VXLAN; - break; - case UDP_TUNNEL_TYPE_GENEVE: - tnl_type = TNL_GENEVE; - break; - default: - netdev_err(netdev, "Unknown tunnel type\n"); - return; - } - - status = ice_create_tunnel(&pf->hw, tnl_type, port); - if (status == ICE_ERR_OUT_OF_RANGE) - netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n", - port); - else if (status) - netdev_err(netdev, "Error adding UDP tunnel - %s\n", - ice_stat_str(status)); -} - -/** - * ice_udp_tunnel_del - Get notifications about UDP tunnel ports that go away - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - */ -static void -ice_udp_tunnel_del(struct net_device *netdev, struct udp_tunnel_info *ti) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - enum ice_status status; - bool retval; - - retval = ice_tunnel_port_in_use(&pf->hw, port, NULL); - if (!retval) { - netdev_info(netdev, "port %d not found in UDP tunnels list\n", - port); - return; - } - - status = ice_destroy_tunnel(&pf->hw, port, false); - if (status) - netdev_err(netdev, "error deleting port %d from UDP tunnels list\n", - port); -} - /** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure @@ -6830,6 +6793,6 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, - .ndo_udp_tunnel_add = ice_udp_tunnel_add, - .ndo_udp_tunnel_del = ice_udp_tunnel_del, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, }; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 4cdccfadf274..2226a291a394 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -676,6 +676,9 @@ struct ice_hw { struct mutex tnl_lock; struct ice_tunnel_table tnl; + struct udp_tunnel_nic_shared udp_tunnel_shared; + struct udp_tunnel_nic_info udp_tunnel_nic; + /* HW block tables */ struct ice_blk_info blk[ICE_BLK_COUNT]; struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */ diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 0a1b28aea894..827fc80f50a0 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #define DRV_NAME "netdevsim" @@ -84,7 +85,8 @@ struct netdevsim { struct { u32 inject_error; u32 sleep; - u32 ports[2][NSIM_UDP_TUNNEL_N_PORTS]; + u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; + u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS]; struct debugfs_u32_array dfs_ports[2]; } udp_ports; @@ -209,9 +211,13 @@ struct nsim_dev { bool fail_trap_policer_set; bool fail_trap_policer_counter_get; struct { + struct udp_tunnel_nic_shared utn_shared; + u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; bool sync_all; bool open_only; bool ipv4_only; + bool shared; + bool static_iana_vxlan; u32 sleep; } udp_ports; }; diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c index 22c06a76033c..6ab023acefd6 100644 --- a/drivers/net/netdevsim/udp_tunnels.c +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -22,11 +22,13 @@ nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, msleep(ns->udp_ports.sleep); if (!ret) { - if (ns->udp_ports.ports[table][entry]) + if (ns->udp_ports.ports[table][entry]) { + WARN(1, "entry already in use\n"); ret = -EBUSY; - else + } else { ns->udp_ports.ports[table][entry] = be16_to_cpu(ti->port) << 16 | ti->type; + } } netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", @@ -50,10 +52,13 @@ nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, if (!ret) { u32 val = be16_to_cpu(ti->port) << 16 | ti->type; - if (val == ns->udp_ports.ports[table][entry]) + if (val == ns->udp_ports.ports[table][entry]) { ns->udp_ports.ports[table][entry] = 0; - else + } else { + WARN(1, "entry not installed %x vs %x\n", + val, ns->udp_ports.ports[table][entry]); ret = -ENOENT; + } } netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", @@ -107,7 +112,7 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); - memset(&ns->udp_ports.ports, 0, sizeof(ns->udp_ports.ports)); + memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); rtnl_lock(); udp_tunnel_nic_reset_ntf(dev); rtnl_unlock(); @@ -127,6 +132,17 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_nic_info *info; + if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { + dev_err(&nsim_dev->nsim_bus_dev->dev, + "shared can't be used in conjunction with open_only\n"); + return -EINVAL; + } + + if (!nsim_dev->udp_ports.shared) + ns->udp_ports.ports = ns->udp_ports.__ports; + else + ns->udp_ports.ports = nsim_dev->udp_ports.__ports; + debugfs_create_u32("udp_ports_inject_error", 0600, ns->nsim_dev_port->ddir, &ns->udp_ports.inject_error); @@ -168,6 +184,10 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; if (nsim_dev->udp_ports.ipv4_only) info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; + if (nsim_dev->udp_ports.shared) + info->shared = &nsim_dev->udp_ports.utn_shared; + if (nsim_dev->udp_ports.static_iana_vxlan) + info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; dev->udp_tunnel_nic_info = info; return 0; @@ -187,6 +207,10 @@ void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) &nsim_dev->udp_ports.open_only); debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.ipv4_only); + debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.shared); + debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.static_iana_vxlan); debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sleep); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 94bb7a882250..2ea453dac876 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags { UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), }; +struct udp_tunnel_nic; + +#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) + +struct udp_tunnel_nic_shared { + struct udp_tunnel_nic *udp_tunnel_nic_info; + + struct list_head devices; +}; + +struct udp_tunnel_nic_shared_node { + struct net_device *dev; + struct list_head list; +}; + /** * struct udp_tunnel_nic_info - driver UDP tunnel offload information * @set_port: callback for adding a new port * @unset_port: callback for removing a port * @sync_table: callback for syncing the entire port table at once + * @shared: reference to device global state (optional) * @flags: device flags from enum udp_tunnel_nic_info_flags * @tables: UDP port tables this device has * @tables.n_entries: number of entries in this table @@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags { * Drivers are expected to provide either @set_port and @unset_port callbacks * or the @sync_table callback. Callbacks are invoked with rtnl lock held. * + * Devices which (misguidedly) share the UDP tunnel port table across multiple + * netdevs should allocate an instance of struct udp_tunnel_nic_shared and + * point @shared at it. + * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices + * sharing a table. + * * Known limitations: * - UDP tunnel port notifications are fundamentally best-effort - * it is likely the driver will both see skbs which use a UDP tunnel port, @@ -234,6 +256,8 @@ struct udp_tunnel_nic_info { /* all at once */ int (*sync_table)(struct net_device *dev, unsigned int table); + struct udp_tunnel_nic_shared *shared; + unsigned int flags; struct udp_tunnel_nic_table_info { diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 69962165c0e8..0d122edc368d 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -19,8 +19,9 @@ enum udp_tunnel_nic_table_entry_flags { struct udp_tunnel_nic_table_entry { __be16 port; u8 type; - u8 use_cnt; u8 flags; + u16 use_cnt; +#define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX u8 hw_priv; }; @@ -370,6 +371,8 @@ udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; unsigned int from, to; + WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX); + /* If not going from used to unused or vice versa - all done. * For dodgy entries make sure we try to sync again (queue the entry). */ @@ -675,6 +678,7 @@ static void udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic_shared_node *node; unsigned int i, j; /* Freeze all the ports we are already tracking so that the replay @@ -686,7 +690,12 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) utn->missed = 0; utn->need_replay = 0; - udp_tunnel_get_rx_info(dev); + if (!info->shared) { + udp_tunnel_get_rx_info(dev); + } else { + list_for_each_entry(node, &info->shared->devices, list) + udp_tunnel_get_rx_info(node->dev); + } for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) @@ -742,20 +751,39 @@ err_free_utn: return NULL; } +static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) +{ + unsigned int i; + + for (i = 0; i < utn->n_tables; i++) + kfree(utn->entries[i]); + kfree(utn->entries); + kfree(utn); +} + static int udp_tunnel_nic_register(struct net_device *dev) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic_shared_node *node = NULL; struct udp_tunnel_nic *utn; unsigned int n_tables, i; BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < UDP_TUNNEL_NIC_MAX_TABLES); + /* Expect use count of at most 2 (IPv4, IPv6) per device */ + BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX < + UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2); + /* Check that the driver info is sane */ if (WARN_ON(!info->set_port != !info->unset_port) || WARN_ON(!info->set_port == !info->sync_table) || WARN_ON(!info->tables[0].n_entries)) return -EINVAL; + if (WARN_ON(info->shared && + info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + return -EINVAL; + n_tables = 1; for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { if (!info->tables[i].n_entries) @@ -766,9 +794,33 @@ static int udp_tunnel_nic_register(struct net_device *dev) return -EINVAL; } - utn = udp_tunnel_nic_alloc(info, n_tables); - if (!utn) - return -ENOMEM; + /* Create UDP tunnel state structures */ + if (info->shared) { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->dev = dev; + } + + if (info->shared && info->shared->udp_tunnel_nic_info) { + utn = info->shared->udp_tunnel_nic_info; + } else { + utn = udp_tunnel_nic_alloc(info, n_tables); + if (!utn) { + kfree(node); + return -ENOMEM; + } + } + + if (info->shared) { + if (!info->shared->udp_tunnel_nic_info) { + INIT_LIST_HEAD(&info->shared->devices); + info->shared->udp_tunnel_nic_info = utn; + } + + list_add_tail(&node->list, &info->shared->devices); + } utn->dev = dev; dev_hold(dev); @@ -783,7 +835,33 @@ static int udp_tunnel_nic_register(struct net_device *dev) static void udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) { - unsigned int i; + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + + /* For a shared table remove this dev from the list of sharing devices + * and if there are other devices just detach. + */ + if (info->shared) { + struct udp_tunnel_nic_shared_node *node, *first; + + list_for_each_entry(node, &info->shared->devices, list) + if (node->dev == dev) + break; + if (node->dev != dev) + return; + + list_del(&node->list); + kfree(node); + + first = list_first_entry_or_null(&info->shared->devices, + typeof(*first), list); + if (first) { + udp_tunnel_drop_rx_info(dev); + utn->dev = first->dev; + goto release_dev; + } + + info->shared->udp_tunnel_nic_info = NULL; + } /* Flush before we check work, so we don't waste time adding entries * from the work which we will boot immediately. @@ -796,10 +874,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) if (utn->work_pending) return; - for (i = 0; i < utn->n_tables; i++) - kfree(utn->entries[i]); - kfree(utn->entries); - kfree(utn); + udp_tunnel_nic_free(utn); +release_dev: dev->udp_tunnel_nic = NULL; dev_put(dev); } diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh old mode 100644 new mode 100755 index ba1d53b9f815..1b08e042cf94 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID NSIM_NETDEV= HAS_ETHTOOL= +STATIC_ENTRIES= EXIT_STATUS=0 num_cases=0 num_errors=0 @@ -193,6 +194,21 @@ function check_tables { sleep 0.02 ((retries--)) done + + if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then + fail=0 + for i in "${!STATIC_ENTRIES[@]}"; do + pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}` + cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected") + if [ $cnt -ne 1 ]; then + err_cnt "ethtool static entry: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + fail=1 + fi + done + [ $fail == 0 ] && pass_cnt + fi } function print_table { @@ -775,6 +791,157 @@ for port in 0 1; do exp1=( 0 0 0 0 ) done +cleanup_nsim + +# shared port tables +pfx="table sharing" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 0 > $NSIM_DEV_DFS/udp_ports_open_only +echo 1 > $NSIM_DEV_DFS/udp_ports_sleep +echo 1 > $NSIM_DEV_DFS/udp_ports_shared + +old_netdevs=$(ls /sys/class/net) +echo 1 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` +old_netdevs=$(ls /sys/class/net) +echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV2=`get_netdev_name old_netdevs` + +msg="VxLAN v4 devices" +exp0=( `mke 4789 1` 0 0 0 ) +exp1=( 0 0 0 0 ) +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV2 + +msg="VxLAN v4 devices go down" +exp0=( 0 0 0 0 ) +ifconfig vxlan1 down +ifconfig vxlan0 down +check_tables + +for ifc in vxlan0 vxlan1; do + ifconfig $ifc up +done + +msg="VxLAN v6 device" +exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) +new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + +msg="Geneve device" +exp1=( `mke 6081 2` 0 0 0 ) +new_geneve gnv0 6081 + +msg="NIC device goes down" +ifconfig $NSIM_NETDEV down +check_tables + +msg="NIC device goes up again" +ifconfig $NSIM_NETDEV up +check_tables + +for i in `seq 2`; do + msg="turn feature off - 1, rep $i" + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature off - 2, rep $i" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature on - 1, rep $i" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="turn feature on - 2, rep $i" + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on + check_tables +done + +msg="tunnels destroyed 1" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +overflow_table0 "overflow NIC table" + +msg="re-add a port" + +echo 2 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/new_port +check_tables + +msg="replace VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) +del_dev vxlan1 + +msg="vacate VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) +del_dev vxlan2 + +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +check_tables + +msg="tunnels destroyed 2" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +echo 1 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/del_port + +cleanup_nsim + +# Static IANA port +pfx="static IANA vxlan" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan +STATIC_ENTRIES=( `mke 4789 1` ) + +port=1 +old_netdevs=$(ls /sys/class/net) +echo $port > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="check empty" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="add on static port" +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV + +msg="add on different port" +exp0=( `mke 4790 1` 0 0 0 ) +new_vxlan vxlan2 4790 $NSIM_NETDEV + +cleanup_tuns + +msg="tunnels destroyed" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="different type" +new_geneve gnv0 4789 + +cleanup_tuns +cleanup_nsim + +# END + modprobe -r netdevsim if [ $num_errors -eq 0 ]; then