From 37beabe9a891b92174cd1aafbfa881fe9e05aa87 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 8 Feb 2023 14:25:54 +0200 Subject: [PATCH 01/14] net/mlx5e: Fix macsec ASO context alignment Currently mlx5e_macsec_umr struct does not satisfy hardware memory alignment requirement. Hence the result of querying advanced steering operation (ASO) is not copied to the memory region as expected. Fix by satisfying hardware memory alignment requirement and move context to be first field in struct for better readability. Fixes: 1f53da676439 ("net/mlx5e: Create advanced steering operation (ASO) object for MACsec") Signed-off-by: Emeel Hakim Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 08d0929e8260..8af53178e40d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -89,8 +89,8 @@ struct mlx5e_macsec_rx_sc { }; struct mlx5e_macsec_umr { + u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; dma_addr_t dma_addr; - u8 ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; u32 mkey; }; From 9a92fe1db9e57ea94388a1d768e8ee42af858377 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 12 Mar 2021 07:21:29 -0600 Subject: [PATCH 02/14] net/mlx5e: Don't cache tunnel offloads capability When mlx5e attaches again after device health recovery, the device capabilities might have changed by the eswitch manager. For example in one flow when ECPF changes the eswitch mode between legacy and switchdev, it updates the flow table tunnel capability. The cached value is only used in one place, so just check the capability there instead. Fixes: 5bef709d76a2 ("net/mlx5: Enable host PF HCA after eswitch is initialized") Signed-off-by: Parav Pandit Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 1 - 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4276c6eb6820..4a19ef4a9811 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -313,7 +313,6 @@ struct mlx5e_params { } channel; } mqprio; bool rx_cqe_compress_def; - bool tunneled_offload_en; struct dim_cq_moder rx_cq_moderation; struct dim_cq_moder tx_cq_moderation; struct mlx5e_packet_merge_param packet_merge; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 51b5f3cca504..56fc2aebb9ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4979,8 +4979,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev); - /* AF_XDP */ params->xsk = xsk; @@ -5285,7 +5283,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) } features = MLX5E_RX_RES_FEATURE_PTP; - if (priv->channels.params.tunneled_offload_en) + if (mlx5_tunnel_inner_ft_supported(mdev)) features |= MLX5E_RX_RES_FEATURE_INNER_FT; err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, priv->max_nch, priv->drop_rq.rqn, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 43fd12fb87b8..8ff654b4e9e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -755,7 +755,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) mlx5e_set_rx_cq_mode_params(params, cq_period_mode); params->mqprio.num_tc = 1; - params->tunneled_offload_en = false; if (rep->vport != MLX5_VPORT_UPLINK) params->vlan_strip_disable = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index c2a4f86bc890..baa7ef812313 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -70,7 +70,6 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; - params->tunneled_offload_en = false; /* CQE compression is not supported for IPoIB */ params->rx_cqe_compress_def = false; From ba5d8f72b82cc197355c9340ef89dab813815865 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 24 Jun 2021 18:22:57 +0300 Subject: [PATCH 03/14] net/mlx5: Fix setting ec_function bit in MANAGE_PAGES When ECPF is a page supplier, reclaim pages missed to honor the ec_function bit provided by the firmware. It always used the ec_function to true during driver unload flow for ECPF. This is incorrect. Honor the ec_function bit provided by device during page allocation request event. Fixes: d6945242f45d ("net/mlx5: Hold pages RB tree per VF") Signed-off-by: Parav Pandit Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/pagealloc.c | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 64d4e7125e9b..95dc67fb3001 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -82,6 +82,16 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; } +static u32 mlx5_get_ec_function(u32 function) +{ + return function >> 16; +} + +static u32 mlx5_get_func_id(u32 function) +{ + return function & 0xffff; +} + static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) { struct rb_root *root; @@ -665,20 +675,22 @@ static int optimal_reclaimed_pages(void) } static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, - struct rb_root *root, u16 func_id) + struct rb_root *root, u32 function) { u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES)); unsigned long end = jiffies + recl_pages_to_jiffies; while (!RB_EMPTY_ROOT(root)) { + u32 ec_function = mlx5_get_ec_function(function); + u32 function_id = mlx5_get_func_id(function); int nclaimed; int err; - err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(), - &nclaimed, false, mlx5_core_is_ecpf(dev)); + err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(), + &nclaimed, false, ec_function); if (err) { - mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n", - err, func_id); + mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n", + err, function_id, ec_function); return err; } From 7ba930fc25def6fd736abcdfa224272948a65cf7 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 20 Oct 2022 00:13:50 +0300 Subject: [PATCH 04/14] net/mlx5: Disable eswitch before waiting for VF pages The offending commit changed the ordering of moving to legacy mode and waiting for the VF pages. Moving to legacy mode is important in bluefield, because it sends the host driver into error state, and frees its pages. Without this transition we end up waiting 2 minutes for pages that aren't coming before carrying on with the unload process. Fixes: f019679ea5f2 ("net/mlx5: E-switch, Remove dependency between sriov and eswitch mode") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 540840e80493..f36a3aa4b5c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1364,8 +1364,8 @@ static void mlx5_unload(struct mlx5_core_dev *dev) { mlx5_devlink_traps_unregister(priv_to_devlink(dev)); mlx5_sf_dev_table_destroy(dev); - mlx5_sriov_detach(dev); mlx5_eswitch_disable(dev->priv.eswitch); + mlx5_sriov_detach(dev); mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); From 1313d78ac0c1cfcff7bdece8da54b080e71487c4 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 7 Feb 2023 15:07:00 +0200 Subject: [PATCH 05/14] net/mlx5: E-switch, Fix wrong usage of source port rewrite in split rules In few cases, rules with mirror use case are split to two FTEs, one which do the mirror action and forward to second FTE which do the rest of the rule actions and the second redirect action. In case of mirror rules which do split and forward to ovs internal port or VF stack devices, source port rewrite should be used in the second FTE but it is wrongly also set in the first FTE which break the offload. Fix this issue by removing the wrong check if source port rewrite is needed to be used on the first FTE of the split and instead return EOPNOTSUPP which will block offload of rules which mirror to ovs internal port or VF stack devices which isn't supported. Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading") Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d766a64b1823..22075943bb58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -723,11 +723,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < esw_attr->split_count; i++) { - if (esw_is_indir_table(esw, attr)) - err = esw_setup_indir_table(dest, &flow_act, esw, attr, false, &i); - else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) - err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr, - &i); + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + /* Source port rewrite (forward to ovs internal port or statck device) isn't + * supported in the rule of split action. + */ + err = -EOPNOTSUPP; else esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false); From 28d3815a629cbdee660dd1c9de28d77cb3d77917 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 8 Feb 2023 11:37:41 +0200 Subject: [PATCH 06/14] net/mlx5: E-switch, Fix missing set of split_count when forward to ovs internal port Rules with mirror actions are split to two FTEs when the actions after the mirror action contains pedit, vlan push/pop or ct. Forward to ovs internal port adds implicit header rewrite (pedit) but missing trigger to do split. Fix by setting split_count when forwarding to ovs internal port which will trigger split in mirror rules. Fixes: 27484f7170ed ("net/mlx5e: Offload tc rules that redirect to ovs internal port") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 70b8d2dfa751..90944bf271ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4304,6 +4304,7 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, esw_attr->dest_int_port = dest_int_port; esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + esw_attr->split_count = out_index; /* Forward to root fdb for matching against the new source vport */ attr->dest_chain = 0; From c9668f0b1d28570327dbba189f2c61f6f9e43ae7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 12 Feb 2023 11:01:43 +0200 Subject: [PATCH 07/14] net/mlx5e: Fix cleanup null-ptr deref on encap lock During module is unloaded while a peer tc flow is still offloaded, first the peer uplink rep profile is changed to a nic profile, and so neigh encap lock is destroyed. Next during unload, the VF reps netdevs are unregistered which causes the original non-peer tc flow to be deleted, which deletes the peer flow. The peer flow deletion detaches the encap entry and try to take the already destroyed encap lock, causing the below trace. Fix this by clearing peer flows during tc eswitch cleanup (mlx5e_tc_esw_cleanup()). Relevant trace: [ 4316.837128] BUG: kernel NULL pointer dereference, address: 00000000000001d8 [ 4316.842239] RIP: 0010:__mutex_lock+0xb5/0xc40 [ 4316.851897] Call Trace: [ 4316.852481] [ 4316.857214] mlx5e_rep_neigh_entry_release+0x93/0x790 [mlx5_core] [ 4316.858258] mlx5e_rep_encap_entry_detach+0xa7/0xf0 [mlx5_core] [ 4316.859134] mlx5e_encap_dealloc+0xa3/0xf0 [mlx5_core] [ 4316.859867] clean_encap_dests.part.0+0x5c/0xe0 [mlx5_core] [ 4316.860605] mlx5e_tc_del_fdb_flow+0x32a/0x810 [mlx5_core] [ 4316.862609] __mlx5e_tc_del_fdb_peer_flow+0x1a2/0x250 [mlx5_core] [ 4316.863394] mlx5e_tc_del_flow+0x(/0x630 [mlx5_core] [ 4316.864090] mlx5e_flow_put+0x5f/0x100 [mlx5_core] [ 4316.864771] mlx5e_delete_flower+0x4de/0xa40 [mlx5_core] [ 4316.865486] tc_setup_cb_reoffload+0x20/0x80 [ 4316.865905] fl_reoffload+0x47c/0x510 [cls_flower] [ 4316.869181] tcf_block_playback_offloads+0x91/0x1d0 [ 4316.869649] tcf_block_unbind+0xe7/0x1b0 [ 4316.870049] tcf_block_offload_cmd.isra.0+0x1ee/0x270 [ 4316.879266] tcf_block_offload_unbind+0x61/0xa0 [ 4316.879711] __tcf_block_put+0xa4/0x310 Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows") Fixes: 1418ddd96afd ("net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG") Signed-off-by: Paul Blakey Reviewed-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 90944bf271ce..cc35cbc9934d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5464,6 +5464,16 @@ err_tun_mapping: void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) { + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + mlx5e_tc_clean_fdb_peer_flows(esw); + mlx5e_tc_tun_cleanup(uplink_priv->encap); mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); From dd64572490c3d7aab04083db8791fab157a941ed Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 24 Jan 2023 17:34:32 +0200 Subject: [PATCH 08/14] net/mlx5e: kTLS, Fix missing error unwind on unsupported cipher type Do proper error unwinding when adding an unsupported TX/RX cipher type. Move the switch case prior to key creation so there's less to unwind, and change the goto label name to describe the action performed instead of what failed. Fixes: 4960c414db35 ("net/mlx5e: Support 256 bit keys with kTLS device offload") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 24 ++++++++++--------- .../mellanox/mlx5/core/en_accel/ktls_tx.c | 22 +++++++++-------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 4be770443b0c..9b597cb24598 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -621,15 +621,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, if (unlikely(!priv_rx)) return -ENOMEM; - dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); - if (IS_ERR(dek)) { - err = PTR_ERR(dek); - goto err_create_key; - } - priv_rx->dek = dek; - - INIT_LIST_HEAD(&priv_rx->list); - spin_lock_init(&priv_rx->lock); switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: priv_rx->crypto_info.crypto_info_128 = @@ -642,9 +633,20 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, default: WARN_ONCE(1, "Unsupported cipher type %u\n", crypto_info->cipher_type); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_cipher_type; } + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); + goto err_cipher_type; + } + priv_rx->dek = dek; + + INIT_LIST_HEAD(&priv_rx->list); + spin_lock_init(&priv_rx->lock); + rxq = mlx5e_ktls_sk_get_rxq(sk); priv_rx->rxq = rxq; priv_rx->sk = sk; @@ -677,7 +679,7 @@ err_post_wqes: mlx5e_tir_destroy(&priv_rx->tir); err_create_tir: mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek); -err_create_key: +err_cipher_type: kfree(priv_rx); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 60b3e08a1028..0e4c0a093293 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -469,14 +469,6 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, if (IS_ERR(priv_tx)) return PTR_ERR(priv_tx); - dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); - if (IS_ERR(dek)) { - err = PTR_ERR(dek); - goto err_create_key; - } - priv_tx->dek = dek; - - priv_tx->expected_seq = start_offload_tcp_sn; switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: priv_tx->crypto_info.crypto_info_128 = @@ -489,8 +481,18 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, default: WARN_ONCE(1, "Unsupported cipher type %u\n", crypto_info->cipher_type); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_pool_push; } + + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); + goto err_pool_push; + } + + priv_tx->dek = dek; + priv_tx->expected_seq = start_offload_tcp_sn; priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx); mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); @@ -500,7 +502,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, return 0; -err_create_key: +err_pool_push: pool_push(pool, priv_tx); return err; } From 031a163f2c476adcb2c01e27a7d323e66174ac11 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 28 Feb 2023 10:36:19 +0200 Subject: [PATCH 09/14] net/mlx5: Set BREAK_FW_WAIT flag first when removing driver Currently, BREAK_FW_WAIT flag is set after syncing with fw_reset. However, fw_reset can call mlx5_load_one() which is waiting for fw init bit and BREAK_FW_WAIT flag is intended to stop. e.g.: the driver might wait on a loop it should exit. Fix it by setting the flag before syncing with fw_reset. Fixes: 8324a02c342a ("net/mlx5: Add exit route when waiting for FW") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f36a3aa4b5c8..f1de152a6113 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1789,11 +1789,11 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain * fw_reset before unregistering the devlink. */ mlx5_drain_fw_reset(dev); - set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); devlink_unregister(devlink); mlx5_sriov_disable(pdev); mlx5_crdump_disable(dev); From 78dee7befd56987283c13877b834c0aa97ad51b9 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Mon, 23 Jan 2023 10:09:01 +0200 Subject: [PATCH 10/14] net/mlx5e: Lower maximum allowed MTU in XSK to match XDP prerequisites XSK redirecting XDP programs require linearity, hence applies restrictions on the MTU. For PAGE_SIZE=4K, MTU shouldn't exceed 3498. Features that contradict with XDP such HW-LRO and HW-GRO are enforced by the driver in advance, during XSK params validation, except for MTU, which was not enforced before this patch. This has been spotted during test scenario described below: Attaching xdpsock program (PAGE_SIZE=4K), with MTU < 3498, detaching XDP program, changing the MTU to arbitrary value in the range [3499, 3754], attaching XDP program again, which ended up with failure since MTU is > 3498. This commit lowers the XSK MTU limitation to be aligned with XDP MTU limitation, since XSK socket is meaningless without XDP program. Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 56fc2aebb9ee..a7f2ab22cc40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4169,13 +4169,17 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, struct xsk_buff_pool *xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); struct mlx5e_xsk_param xsk; + int max_xdp_mtu; if (!xsk_pool) continue; mlx5e_build_xsk_param(xsk_pool, &xsk); + max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); - if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { + /* Validate XSK params and XDP MTU in advance */ + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || + new_params->sw_mtu > max_xdp_mtu) { u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); int max_mtu_frame, max_mtu_page, max_mtu; @@ -4185,9 +4189,9 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, */ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); - max_mtu = min(max_mtu_frame, max_mtu_page); + max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); - netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n", + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", new_params->sw_mtu, ix, max_mtu); return false; } From d1a0075ad6b693c2bd41e7aedb8a4f3b74b6999c Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Thu, 16 Feb 2023 12:34:21 +0000 Subject: [PATCH 11/14] net/sched: TC, fix raw counter initialization Freed counters may be reused by fs core. As such, raw counters may not be initialized to zero. Cache the counter values when the action stats object is initialized to have a proper base value for calculating the difference from the previous query. Fixes: 2b68d659a704 ("net/mlx5e: TC, support per action stats") Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c index 626cb7470fa5..07c1895a2b23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c @@ -64,6 +64,7 @@ mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle, { struct mlx5e_tc_act_stats *act_stats, *old_act_stats; struct rhashtable *ht = &handle->ht; + u64 lastused; int err = 0; act_stats = kvzalloc(sizeof(*act_stats), GFP_KERNEL); @@ -73,6 +74,10 @@ mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle, act_stats->tc_act_cookie = act_cookie; act_stats->counter = counter; + mlx5_fc_query_cached_raw(counter, + &act_stats->lastbytes, + &act_stats->lastpackets, &lastused); + rcu_read_lock(); old_act_stats = rhashtable_lookup_get_insert_fast(ht, &act_stats->hash, From 1166add424dae14ccdb64a6eefbf26766c9d0ef2 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Tue, 21 Feb 2023 14:46:56 +0000 Subject: [PATCH 12/14] net/mlx5e: TC, fix missing error code Missing error code when mlx5e_tc_act_stats_create fails Fixes: d13674b1d14c ("net/mlx5e: TC, map tc action cookie to a hw counter") Reported-by: Dan Carpenter Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cc35cbc9934d..d2e191ee0704 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5305,8 +5305,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); tc->action_stats_handle = mlx5e_tc_act_stats_create(); - if (IS_ERR(tc->action_stats_handle)) + if (IS_ERR(tc->action_stats_handle)) { + err = PTR_ERR(tc->action_stats_handle); goto err_act_stats; + } return 0; @@ -5441,8 +5443,10 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) } uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); - if (IS_ERR(uplink_priv->action_stats_handle)) + if (IS_ERR(uplink_priv->action_stats_handle)) { + err = PTR_ERR(uplink_priv->action_stats_handle); goto err_action_counter; + } return 0; From b23bf10cca59b2955fa5c2b5e4f753962b4f88ca Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Tue, 21 Feb 2023 15:24:39 +0000 Subject: [PATCH 13/14] net/mlx5e: TC, fix cloned flow attribute Currently the cloned flow attr resets the original tc action cookies count. Fix that by resetting the cloned flow attribute. Fixes: cca7eac13856 ("net/mlx5e: TC, store tc action cookies per attr") Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d2e191ee0704..6bfed633343a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3752,7 +3752,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, parse_attr->filter_dev = attr->parse_attr->filter_dev; attr2->action = 0; attr2->counter = NULL; - attr->tc_act_cookies_count = 0; + attr2->tc_act_cookies_count = 0; attr2->flags = 0; attr2->parse_attr = parse_attr; attr2->dest_chain = 0; From c7b7c64ab5821352db0b3fbaa92773e5a60bfaa7 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 22 Feb 2023 10:03:36 +0000 Subject: [PATCH 14/14] net/mlx5e: TC, Remove error message log print The cited commit attempts to update the hw stats when dumping tc actions. However, the driver may be called to update the stats of a police action that may not be in hardware. In such cases the driver will fail to lookup the police action object and will output an error message both to extack and dmesg. The dmesg error is confusing as it may not indicate an actual error. Remove the dmesg error. Fixes: 2b68d659a704 ("net/mlx5e: TC, support per action stats") Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index c4378afdec09..1bd1c94fb977 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -178,7 +178,6 @@ tc_act_police_stats(struct mlx5e_priv *priv, meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); if (IS_ERR(meter)) { NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); - mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); return PTR_ERR(meter); }