From 075056005d8cceecbbb8e054d8e3cd2b7519d9c6 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 1 Jan 2023 00:20:53 +0200 Subject: [PATCH 01/15] net/mlx5: DR, Fix dumping of legacy modify_hdr in debug dump The steering dump parser expects to see 0 as rewrite num of actions in case pattern/args aren't supported - parsing of legacy modify header is based on this assumption. Fix this to align to parser's expectation. Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index 1ff8bde90e1e..ea9f27db4c74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -153,13 +153,15 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id, rule_id, action->rewrite->index, action->rewrite->single_action_opt, - action->rewrite->num_of_actions, + ptrn_arg ? action->rewrite->num_of_actions : 0, ptrn_arg ? ptrn->index : 0, ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0); - for (i = 0; i < action->rewrite->num_of_actions; i++) { - seq_printf(file, ",0x%016llx", - be64_to_cpu(((__be64 *)rewrite_data)[i])); + if (ptrn_arg) { + for (i = 0; i < action->rewrite->num_of_actions; i++) { + seq_printf(file, ",0x%016llx", + be64_to_cpu(((__be64 *)rewrite_data)[i])); + } } seq_puts(file, "\n"); From 72b2cff68405e91ee5e772385b68eb4442bcbf43 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 8 Nov 2022 17:45:03 +0200 Subject: [PATCH 02/15] net/mlx5: DR, Calculate sync threshold of each pool according to its type When certain ICM chunk is no longer needed, it needs to be freed. Fully freeing ICM memory involves issuing FW SYNC_STEERING command. This is very time consuming, and it is impractical to do it for every freed chunk. Instead, we manage these 'freed' chunks in hot list (list of chunks that are not required by SW any more, but HW might still access them). When size of the hot list reaches certain threshold, we purge it and issue SYNC_STEERING FW command. There is one threshold for all the different ICM types, which is not optimal, as different ICM types require different approach: STEs pool is very large, and it is very 'dynamic' in its nature, so letting hot list to become too large will result in a significant perf hiccup when purging the hot list. Modify action is much smaller and less dynamic, so we can let the hot list to grow to almost the size of the whole pool. This patch fixes this problem: instead of having same hot memory threshold for all the pools, sync operation will be triggered in accordance with the ICM type. Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_icm_pool.c | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 04fc170a6c16..19e9b4d78454 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,7 +4,9 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_POOL_HOT_MEMORY_FRACTION 4 +#define DR_ICM_POOL_STE_HOT_MEM_PERCENT 25 +#define DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT 50 +#define DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT 90 struct mlx5dr_icm_hot_chunk { struct mlx5dr_icm_buddy_mem *buddy_mem; @@ -29,6 +31,8 @@ struct mlx5dr_icm_pool { struct mlx5dr_icm_hot_chunk *hot_chunks_arr; u32 hot_chunks_num; u64 hot_memory_size; + /* hot memory size threshold for triggering sync */ + u64 th; }; struct mlx5dr_icm_dm { @@ -330,15 +334,7 @@ dr_icm_chunk_init(struct mlx5dr_icm_chunk *chunk, static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - int allow_hot_size; - - /* sync when hot memory reaches a certain fraction of the pool size */ - allow_hot_size = - mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, - pool->icm_type) / - DR_ICM_POOL_HOT_MEMORY_FRACTION; - - return pool->hot_memory_size > allow_hot_size; + return pool->hot_memory_size > pool->th; } static void dr_icm_pool_clear_hot_chunks_arr(struct mlx5dr_icm_pool *pool) @@ -503,8 +499,9 @@ void mlx5dr_icm_pool_free_htbl(struct mlx5dr_icm_pool *pool, struct mlx5dr_ste_h struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, enum mlx5dr_icm_type icm_type) { - u32 num_of_chunks, entry_size, max_hot_size; + u32 num_of_chunks, entry_size; struct mlx5dr_icm_pool *pool; + u32 max_hot_size = 0; pool = kvzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) @@ -520,12 +517,21 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, switch (icm_type) { case DR_ICM_TYPE_STE: pool->max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_STE_HOT_MEM_PERCENT / 100; break; case DR_ICM_TYPE_MODIFY_ACTION: pool->max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT / 100; break; case DR_ICM_TYPE_MODIFY_HDR_PTRN: pool->max_log_chunk_sz = dmn->info.max_log_modify_hdr_pattern_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT / 100; break; default: WARN_ON(icm_type); @@ -533,11 +539,8 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type); - max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, - pool->icm_type) / - DR_ICM_POOL_HOT_MEMORY_FRACTION; - num_of_chunks = DIV_ROUND_UP(max_hot_size, entry_size) + 1; + pool->th = max_hot_size; pool->hot_chunks_arr = kvcalloc(num_of_chunks, sizeof(struct mlx5dr_icm_hot_chunk), From cedb6665bc331054a3477ac29ccac5e399c972da Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 9 Nov 2022 01:35:24 +0200 Subject: [PATCH 03/15] net/mlx5: DR, Add more info in domain dbg dump Add additinal items to domain info dump: Linux version and device name. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index ea9f27db4c74..552c7857ca1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "dr_types.h" #define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL) @@ -632,9 +633,15 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) u64 domain_id = DR_DBG_PTR_TO_ID(dmn); int ret; - seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN, + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d\n", + DR_DUMP_REC_TYPE_DOMAIN, domain_id, dmn->type, dmn->info.caps.gvmi, - dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev)); + dmn->info.supp_sw_steering, + /* package version */ + LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, + LINUX_VERSION_SUBLEVEL, + pci_name(dmn->mdev->pdev), + 0); /* domain flags */ ret = dr_dump_domain_info(file, &dmn->info, domain_id); if (ret < 0) From 57295e069cd8f7ac79f03d732ac17afb9f790ba5 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 9 Nov 2022 01:39:32 +0200 Subject: [PATCH 04/15] net/mlx5: DR, Add memory statistics for domain object Add counters for number of buddies that are currently in use per domain per buddy type (STE, MODIFY-HEADER, MODIFY-PATTERN). Signed-off-by: Erez Shitrit Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c | 7 +++++-- .../ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c | 8 +++++++- .../net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 3 +++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index 552c7857ca1f..7e36e1062139 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -633,7 +633,7 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) u64 domain_id = DR_DBG_PTR_TO_ID(dmn); int ret; - seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d\n", + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d,%u,%u,%u\n", DR_DUMP_REC_TYPE_DOMAIN, domain_id, dmn->type, dmn->info.caps.gvmi, dmn->info.supp_sw_steering, @@ -641,7 +641,10 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL, pci_name(dmn->mdev->pdev), - 0); /* domain flags */ + 0, /* domain flags */ + dmn->num_buddies[DR_ICM_TYPE_STE], + dmn->num_buddies[DR_ICM_TYPE_MODIFY_ACTION], + dmn->num_buddies[DR_ICM_TYPE_MODIFY_HDR_PTRN]); ret = dr_dump_domain_info(file, &dmn->info, domain_id); if (ret < 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 19e9b4d78454..0b5af9f3f605 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -288,6 +288,8 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) /* add it to the -start- of the list in order to search in it first */ list_add(&buddy->list_node, &pool->buddy_mem_list); + pool->dmn->num_buddies[pool->icm_type]++; + return 0; err_cleanup_buddy: @@ -301,13 +303,17 @@ free_mr: static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) { + enum mlx5dr_icm_type icm_type = buddy->pool->icm_type; + dr_icm_pool_mr_destroy(buddy->icm_mr); mlx5dr_buddy_cleanup(buddy); - if (buddy->pool->icm_type == DR_ICM_TYPE_STE) + if (icm_type == DR_ICM_TYPE_STE) dr_icm_buddy_cleanup_ste_cache(buddy); + buddy->pool->dmn->num_buddies[icm_type]--; + kvfree(buddy); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 37b7b1a79f93..678a993ab053 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -72,6 +72,7 @@ enum mlx5dr_icm_type { DR_ICM_TYPE_STE, DR_ICM_TYPE_MODIFY_ACTION, DR_ICM_TYPE_MODIFY_HDR_PTRN, + DR_ICM_TYPE_MAX, }; static inline enum mlx5dr_icm_chunk_size @@ -955,6 +956,8 @@ struct mlx5dr_domain { struct list_head dbg_tbl_list; struct mlx5dr_dbg_dump_info dump_info; struct xarray definers_xa; + /* memory management statistics */ + u32 num_buddies[DR_ICM_TYPE_MAX]; }; struct mlx5dr_table_rx_tx { From e267b8a52ca5d5e8434929a5e9f5574aed141024 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 20 Mar 2023 19:43:27 +0200 Subject: [PATCH 05/15] Revert "net/mlx5: Expose steering dropped packets counter" This reverts commit 4fe1b3a5f8fe2fdcedcaba9561e5b0ae5cb1d15b, which exposes the steering dropped packets counter via debugfs. The upcoming series will expose the counter via devlink health reporter instead of debugfs. Signed-off-by: Maher Sanalla Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/debugfs.c | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c index 3d0bbcca1cb9..2db13c71e88c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c @@ -12,11 +12,10 @@ enum vnic_diag_counter { MLX5_VNIC_DIAG_CQ_OVERRUN, MLX5_VNIC_DIAG_INVALID_COMMAND, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND, - MLX5_VNIC_DIAG_RX_STEERING_DISCARD, }; static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter, - u64 *val) + u32 *val) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; @@ -58,10 +57,6 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND: *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command); break; - case MLX5_VNIC_DIAG_RX_STEERING_DISCARD: - *val = MLX5_GET64(vnic_diagnostic_statistics, vnic_diag_out, - nic_receive_steering_discard); - break; } return 0; @@ -70,14 +65,14 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport, enum vnic_diag_counter type) { - u64 val = 0; + u32 val = 0; int ret; ret = mlx5_esw_query_vnic_diag(vport, type, &val); if (ret) return ret; - seq_printf(file, "%llu\n", val); + seq_printf(file, "%d\n", val); return 0; } @@ -117,11 +112,6 @@ static int quota_exceeded_command_show(struct seq_file *file, void *priv) return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND); } -static int rx_steering_discard_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_RX_STEERING_DISCARD); -} - DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle); DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow); DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun); @@ -129,7 +119,6 @@ DEFINE_SHOW_ATTRIBUTE(async_eq_overrun); DEFINE_SHOW_ATTRIBUTE(cq_overrun); DEFINE_SHOW_ATTRIBUTE(invalid_command); DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command); -DEFINE_SHOW_ATTRIBUTE(rx_steering_discard); void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num) { @@ -190,9 +179,4 @@ void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count)) debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport, "a_exceeded_command_fops); - - if (MLX5_CAP_GEN(esw->dev, nic_receive_steering_discard)) - debugfs_create_file("rx_steering_discard", 0444, vnic_diag, vport, - &rx_steering_discard_fops); - } From 0a431418f685e100c45ff150efaf4a5afa6f1982 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 20 Mar 2023 19:43:47 +0200 Subject: [PATCH 06/15] Revert "net/mlx5: Expose vnic diagnostic counters for eswitch managed vports" This reverts commit 606e6a72e29dff9e3341c4cc9b554420e4793f401 which exposes the vnic diagnostic counters via debugfs. Instead, The upcoming series will expose the same counters through devlink health reporter. Signed-off-by: Maher Sanalla Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../ethernet/mellanox/mlx5/core/esw/debugfs.c | 182 ------------------ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 6 - .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 - .../mellanox/mlx5/core/eswitch_offloads.c | 3 - 5 files changed, 1 insertion(+), 197 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index ca3c66cd47ec..68f6a4544f7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ ecpf.o rdma.o esw/legacy.o \ - esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o + esw/devlink_port.o esw/vporttbl.o esw/qos.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c deleted file mode 100644 index 2db13c71e88c..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c +++ /dev/null @@ -1,182 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ - -#include -#include "eswitch.h" - -enum vnic_diag_counter { - MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE, - MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW, - MLX5_VNIC_DIAG_COMP_EQ_OVERRUN, - MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN, - MLX5_VNIC_DIAG_CQ_OVERRUN, - MLX5_VNIC_DIAG_INVALID_COMMAND, - MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND, -}; - -static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter, - u32 *val) -{ - u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; - struct mlx5_core_dev *dev = vport->dev; - u16 vport_num = vport->vport; - void *vnic_diag_out; - int err; - - MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); - MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); - if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num)) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env); - switch (counter) { - case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues); - break; - case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, - send_queue_priority_update_flow); - break; - case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun); - break; - case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun); - break; - case MLX5_VNIC_DIAG_CQ_OVERRUN: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun); - break; - case MLX5_VNIC_DIAG_INVALID_COMMAND: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command); - break; - case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND: - *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command); - break; - } - - return 0; -} - -static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport, - enum vnic_diag_counter type) -{ - u32 val = 0; - int ret; - - ret = mlx5_esw_query_vnic_diag(vport, type, &val); - if (ret) - return ret; - - seq_printf(file, "%d\n", val); - return 0; -} - -static int total_q_under_processor_handle_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE); -} - -static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, - MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW); -} - -static int comp_eq_overrun_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN); -} - -static int async_eq_overrun_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN); -} - -static int cq_overrun_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN); -} - -static int invalid_command_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND); -} - -static int quota_exceeded_command_show(struct seq_file *file, void *priv) -{ - return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND); -} - -DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle); -DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow); -DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun); -DEFINE_SHOW_ATTRIBUTE(async_eq_overrun); -DEFINE_SHOW_ATTRIBUTE(cq_overrun); -DEFINE_SHOW_ATTRIBUTE(invalid_command); -DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command); - -void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num) -{ - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); - - debugfs_remove_recursive(vport->dbgfs); - vport->dbgfs = NULL; -} - -/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */ -#define VNIC_DIAG_DIR_NAME_MAX_LEN 8 - -void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num) -{ - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); - struct dentry *vnic_diag; - char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN]; - int err; - - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) - return; - - if (vport_num == MLX5_VPORT_PF) { - strcpy(dir_name, "pf"); - } else if (vport_num == MLX5_VPORT_ECPF) { - strcpy(dir_name, "ecpf"); - } else { - err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf", - is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF); - if (WARN_ON(err < 0)) - return; - } - - vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs); - vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs); - - if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) { - debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport, - &total_q_under_processor_handle_fops); - debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport, - &send_queue_priority_update_flow_fops); - } - - if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) { - debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport, - &comp_eq_overrun_fops); - debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport, - &async_eq_overrun_fops); - } - - if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun)) - debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops); - - if (MLX5_CAP_GEN(esw->dev, invalid_command_count)) - debugfs_create_file("invalid_command", 0444, vnic_diag, vport, - &invalid_command_fops); - - if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count)) - debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport, - "a_exceeded_command_fops); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8bdf28762f41..8d63f5df7646 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "esw/acl/lgcy.h" #include "esw/legacy.h" #include "esw/qos.h" @@ -1056,7 +1055,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, if (err) return err; - mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0); err = esw_offloads_load_rep(esw, vport_num); if (err) goto err_rep; @@ -1064,7 +1062,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, return err; err_rep: - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); return err; } @@ -1072,7 +1069,6 @@ err_rep: void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) { esw_offloads_unload_rep(esw, vport_num); - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } @@ -1672,7 +1668,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); - esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev)); esw_info(dev, "Total vports %d, per vport: max uc(%d) max mc(%d)\n", esw->total_vports, @@ -1696,7 +1691,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); - debugfs_remove_recursive(esw->dbgfs); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index e9d68fdf68f5..f8e25ddc066a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -195,7 +195,6 @@ struct mlx5_vport { enum mlx5_eswitch_vport_event enabled_events; int index; struct devlink_port *dl_port; - struct dentry *dbgfs; }; struct mlx5_esw_indir_table; @@ -343,7 +342,6 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; - struct dentry *dbgfs; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -704,9 +702,6 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num); -void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num); - int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, u16 vport_num, u32 controller, u32 sfnum); void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b6e2709c1371..93ece46a0041 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3828,14 +3828,12 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p if (err) goto devlink_err; - mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum); err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) goto rep_err; return 0; rep_err: - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num); devlink_err: mlx5_esw_vport_disable(esw, vport_num); @@ -3845,7 +3843,6 @@ devlink_err: void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) { mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } From b0bc615df488abd0e95107e4a9ecefb9bf8c250a Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 21 Mar 2023 00:10:16 +0200 Subject: [PATCH 07/15] net/mlx5: Add vnic devlink health reporter to PFs/VFs Create a vnic devlink health reporter for PFs/VFs interfaces. The reporter's diagnose callback displays the values of vNIC/vport transport debug counters of PFs/VFs, as follows: $ devlink health diagnose pci/0000:08:00.0 reporter vnic vNIC env counters: total_error_queues: 0 send_queue_priority_update_flow: 0 comp_eq_overrun: 0 async_eq_overrun: 0 cq_overrun: 0 invalid_command: 0 quota_exceeded_command: 0 nic_receive_steering_discard: 0 Moreover, add documentation on the reporter functionality and the counters description. While at it, expose the vNIC counters diagnose function to be used by the downstream patch, which will reveal the counters for representor interfaces. Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/devlink.rst | 30 +++++ .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../mellanox/mlx5/core/diag/reporter_vnic.c | 125 ++++++++++++++++++ .../mellanox/mlx5/core/diag/reporter_vnic.h | 16 +++ .../net/ethernet/mellanox/mlx5/core/health.c | 4 + include/linux/mlx5/driver.h | 1 + 6 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 0995e4e5acd7..ceab18e46456 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -257,3 +257,33 @@ User commands examples: $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal NOTE: This command can run only on PF. + +vnic reporter +------------- +The vnic reporter implements only the `diagnose` callback. +It is responsible for querying the vnic diagnostic counters from fw and displaying +them in realtime. + +Description of the vnic counters: +total_q_under_processor_handle: number of queues in an error state due to +an async error or errored command. +send_queue_priority_update_flow: number of QP/SQ priority/SL update +events. +cq_overrun: number of times CQ entered an error state due to an +overflow. +async_eq_overrun: number of times an EQ mapped to async events was +overrun. +comp_eq_overrun: number of times an EQ mapped to completion events was +overrun. +quota_exceeded_command: number of commands issued and failed due to quota +exceeded. +invalid_command: number of commands issued and failed dues to any reason +other than quota exceeded. +nic_receive_steering_discard: number of packets that completed RX flow +steering but were discarded due to a mismatch in flow table. + +User commands examples: +- Diagnose PF/VF vnic counters + $ devlink health diagnose pci/0000:82:00.1 reporter vnic + +NOTE: This command can run only on PF/VF ports. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 68f6a4544f7e..ddf1e352f51d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ fw_reset.o qos.o lib/tout.o lib/aso.o # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c new file mode 100644 index 000000000000..9114661cd967 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ + +#include "reporter_vnic.h" +#include "devlink.h" + +#define VNIC_ENV_GET64(vnic_env_stats, c) \ + MLX5_GET64(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + +struct mlx5_vnic_diag_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_vnic_diag_stats vnic; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); + MLX5_SET(query_vnic_env_in, in, other_vport, !!other_vport); + + err = mlx5_cmd_exec_inout(dev, query_vnic_env, in, &vnic.query_vnic_env_out); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "vNIC env counters"); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues", + VNIC_ENV_GET64(&vnic, total_error_queues)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow", + VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun", + VNIC_ENV_GET64(&vnic, comp_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun", + VNIC_ENV_GET64(&vnic, async_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun", + VNIC_ENV_GET64(&vnic, cq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command", + VNIC_ENV_GET64(&vnic, invalid_command)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command", + VNIC_ENV_GET64(&vnic, quota_exceeded_command)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", + VNIC_ENV_GET64(&vnic, nic_receive_steering_discard)); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5_reporter_vnic_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_reporter_vnic_diagnose_counters(dev, fmsg, 0, false); +} + +static const struct devlink_health_reporter_ops mlx5_reporter_vnic_ops = { + .name = "vnic", + .diagnose = mlx5_reporter_vnic_diagnose, +}; + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->vnic_reporter = + devlink_health_reporter_create(devlink, + &mlx5_reporter_vnic_ops, + 0, dev); + if (IS_ERR(health->vnic_reporter)) + mlx5_core_warn(dev, + "Failed to create vnic reporter, err = %ld\n", + PTR_ERR(health->vnic_reporter)); +} + +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->vnic_reporter)) + devlink_health_reporter_destroy(health->vnic_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h new file mode 100644 index 000000000000..eba87a39e9b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __MLX5_REPORTER_VNIC_H +#define __MLX5_REPORTER_VNIC_H + +#include "mlx5_core.h" + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev); +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev); + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport); + +#endif /* __MLX5_REPORTER_VNIC_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 016c5f99c470..871c32dda66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -42,6 +42,7 @@ #include "lib/pci_vsc.h" #include "lib/tout.h" #include "diag/fw_tracer.h" +#include "diag/reporter_vnic.h" enum { MAX_MISSES = 3, @@ -898,6 +899,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) cancel_delayed_work_sync(&health->update_fw_log_ts_work); destroy_workqueue(health->wq); + mlx5_reporter_vnic_destroy(dev); mlx5_fw_reporters_destroy(dev); } @@ -907,6 +909,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) char *name; mlx5_fw_reporters_create(dev); + mlx5_reporter_vnic_create(dev); health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); @@ -926,6 +929,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return 0; out_err: + mlx5_reporter_vnic_destroy(dev); mlx5_fw_reporters_destroy(dev); return -ENOMEM; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 135a3c8d8237..5d25c4c73046 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -439,6 +439,7 @@ struct mlx5_core_health { struct work_struct report_work; struct devlink_health_reporter *fw_reporter; struct devlink_health_reporter *fw_fatal_reporter; + struct devlink_health_reporter *vnic_reporter; struct delayed_work update_fw_log_ts_work; }; From cf14af140a5ad0937d385ce693100f33f02e9c54 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 21 Mar 2023 13:33:00 +0200 Subject: [PATCH 08/15] net/mlx5e: Add vnic devlink health reporter to representors Create a new devlink health reporter for representor interface, which reports the values of representor vnic diagnostic counters when diagnosed. This patch will allow admins to monitor VF diagnostic counters through the representor-interface vnic reporter. Example of usage: $ devlink health diagnose pci/0000:08:00.0/65537 reporter vnic vNIC env counters: total_error_queues: 0 send_queue_priority_update_flow: 0 comp_eq_overrun: 0 async_eq_overrun: 0 cq_overrun: 0 invalid_command: 0 quota_exceeded_command: 0 nic_receive_steering_discard: 0 Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/devlink.rst | 5 +- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 52 ++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en_rep.h | 1 + 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index ceab18e46456..3a7a714cc08f 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -285,5 +285,8 @@ steering but were discarded due to a mismatch in flow table. User commands examples: - Diagnose PF/VF vnic counters $ devlink health diagnose pci/0000:82:00.1 reporter vnic +- Diagnose representor vnic counters (performed by supplying devlink port of the + representor, which can be obtained via devlink port command) + $ devlink health diagnose pci/0000:82:00.1/65537 reporter vnic -NOTE: This command can run only on PF/VF ports. +NOTE: This command can run over all interfaces such as PF/VF and representor ports. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8ff654b4e9e1..2d87068f63fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -53,6 +53,7 @@ #include "lib/vxlan.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" +#include "diag/reporter_vnic.h" #include "en_accel/ipsec.h" #include "en/tc/int_port.h" #include "en/ptp.h" @@ -1294,6 +1295,50 @@ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) return ARRAY_SIZE(mlx5e_ul_rep_stats_grps); } +static int +mlx5e_rep_vnic_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = devlink_health_reporter_priv(reporter); + struct mlx5_eswitch_rep *rep = rpriv->rep; + + return mlx5_reporter_vnic_diagnose_counters(rep->esw->dev, fmsg, + rep->vport, true); +} + +static const struct devlink_health_reporter_ops mlx5_rep_vnic_reporter_ops = { + .name = "vnic", + .diagnose = mlx5e_rep_vnic_reporter_diagnose, +}; + +static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv, + struct devlink_port *dl_port) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct devlink_health_reporter *reporter; + + reporter = devl_port_health_reporter_create(dl_port, + &mlx5_rep_vnic_reporter_ops, + 0, rpriv); + if (IS_ERR(reporter)) { + mlx5_core_err(priv->mdev, + "Failed to create representor vnic reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + + rpriv->rep_vnic_reporter = reporter; +} + +static void mlx5e_rep_vnic_reporter_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!IS_ERR_OR_NULL(rpriv->rep_vnic_reporter)) + devl_health_reporter_destroy(rpriv->rep_vnic_reporter); +} + static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, @@ -1394,8 +1439,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); - if (dl_port) + if (dl_port) { SET_NETDEV_DEVLINK_PORT(netdev, dl_port); + mlx5e_rep_vnic_reporter_create(priv, dl_port); + } err = register_netdev(netdev); if (err) { @@ -1408,8 +1455,8 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; err_detach_netdev: + mlx5e_rep_vnic_reporter_destroy(priv); mlx5e_detach_netdev(netdev_priv(netdev)); - err_cleanup_profile: priv->profile->cleanup(priv); @@ -1458,6 +1505,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) } unregister_netdev(netdev); + mlx5e_rep_vnic_reporter_destroy(priv); mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index dcfad0bf0f45..80b7f5079a5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -118,6 +118,7 @@ struct mlx5e_rep_priv { struct rtnl_link_stats64 prev_vf_vport_stats; struct mlx5_flow_handle *send_to_vport_meta_rule; struct rhashtable tc_ht; + struct devlink_health_reporter *rep_vnic_reporter; }; static inline From c8e9090233a70b8bbd8a73521a7a81856695732f Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 30 Mar 2023 19:37:00 +0300 Subject: [PATCH 09/15] net/mlx5e: RX, Fix releasing page_pool pages twice for striding RQ mlx5e_free_rx_descs is responsible for calling the dealloc_wqe op which returns pages to the page_pool. This can happen during flush or close. For XSK, the regular RQ is flushed (when replaced by the XSK RQ) and also closed later. This is normally not a problem as the wqe list is empty on a second call to mlx5e_free_rx_descs. However, for striding RQ, the previously released wqes from the list will appear as missing and will be released a second time by mlx5e_free_rx_missing_descs. This patch sets the no release bits on the striding RQ wqes in the dealloc_wqe op to prevent releasing the pages a second time. Please note that the bits are set only in the control path during close and not in the data path. Fixes: 4c2a13236807 ("net/mlx5e: RX, Defer page release in striding rq for better recycling") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a8c2ae389d6c..5dc907541094 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -861,6 +861,11 @@ static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); /* This function is called on rq/netdev close. */ mlx5e_free_rx_mpwqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called also + * for missing wqes on an already flushed RQ. + */ + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); } INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) From 40afb3b14496afb01d5b3d028444e09d29b95559 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 3 Apr 2023 20:03:11 +0300 Subject: [PATCH 10/15] net/mlx5e: RX, Fix XDP_TX page release for legacy rq nonlinear case When the XDP handler marks the data for transmission (XDP_TX), it is incorrect to release the page fragment. Instead, the fragments should be marked as MLX5E_WQE_FRAG_SKIP_RELEASE because XDP will release the page directly to the page_pool (page_pool_put_defragged_page) after TX completion. The linear case already does this. This patch fixes the nonlinear part as well. Also, the looping over the fragments was incorrect: When handling pages after XDP_TX in the legacy rq nonlinear case, the loop was skipping the first wqe fragment. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5dc907541094..69634829558e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1746,10 +1746,10 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - int i; + struct mlx5e_wqe_frag_info *pwi; - for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) - mlx5e_put_rx_frag(rq, &head_wi[i]); + for (pwi = head_wi; pwi < wi; pwi++) + pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } return NULL; /* page/packet was consumed by XDP */ } From a880f814739c84fa11e17ac87c91fb711c185610 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 13 Apr 2023 17:14:05 +0300 Subject: [PATCH 11/15] net/mlx5e: RX, Hook NAPIs to page pools Linking the NAPI to the rq page_pool to improve page_pool cache usage during skb recycling. Here are the observed improvements for a iperf single stream test case: - For 1500 MTU and legacy rq, seeing a 20% improvement of cache usage. - For 9K MTU, seeing 33-40 % page_pool cache usage improvements for both striding and legacy rq (depending if the application is running on the same core as the rq or not). Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7eb1eeb115ca..f5504b699fcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -857,6 +857,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pp_params.pool_size = pool_size; pp_params.nid = node; pp_params.dev = rq->pdev; + pp_params.napi = rq->cq.napi; pp_params.dma_dir = rq->buff.map_dir; pp_params.max_len = PAGE_SIZE; From 45e261b7b821139556d039b69c9101207c152a50 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 2 Apr 2023 15:17:09 +0300 Subject: [PATCH 12/15] net/mlx5: Include linux/pci.h for pci_msix_can_alloc_dyn() Add include directive to assure pci_msix_can_alloc_dyn() prototype. Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303291328.sNmTyyWF-lkp@intel.com/ Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index e12e528c09f5..2245d3b2f393 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies. */ +#include #include #include #include From 8ca52ada6267fb67d31a5bf7d568a54d793c759e Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 21 Mar 2023 10:32:03 +0200 Subject: [PATCH 13/15] net/mlx5: E-Switch, Remove redundant dev arg from mlx5_esw_vport_alloc() The passded esw->dev is redundant as esw being passed and esw->dev being used inside. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8d63f5df7646..3f25fa2893fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1506,7 +1506,7 @@ out_free: return err; } -static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev, +static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num) { struct mlx5_vport *vport; @@ -1560,7 +1560,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) xa_init(&esw->vports); - err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF); + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); if (err) goto err; if (esw->first_host_vport == MLX5_VPORT_PF) @@ -1568,7 +1568,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; for (i = 0; i < mlx5_core_max_vfs(dev); i++) { - err = mlx5_esw_vport_alloc(esw, dev, idx, idx); + err = mlx5_esw_vport_alloc(esw, idx, idx); if (err) goto err; xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); @@ -1577,7 +1577,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) } base_sf_num = mlx5_sf_start_function_id(dev); for (i = 0; i < mlx5_sf_max_functions(dev); i++) { - err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); if (err) goto err; xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); @@ -1588,7 +1588,7 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) if (err) goto err; for (i = 0; i < max_host_pf_sfs; i++) { - err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); if (err) goto err; xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); @@ -1596,12 +1596,12 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) } if (mlx5_ecpf_vport_exists(dev)) { - err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF); + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); if (err) goto err; idx++; } - err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK); + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK); if (err) goto err; return 0; From 38d9a740f68d8bbe92029cdd9eb6259e3974e52a Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 21 Mar 2023 15:40:02 +0200 Subject: [PATCH 14/15] net/mlx5: E-Switch, Remove unused mlx5_esw_offloads_vport_metadata_set() Remove unused function which also seems a duplicate of esw_port_metadata_set(). Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 - .../mellanox/mlx5/core/eswitch_offloads.c | 22 ------------------- 2 files changed, 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f8e25ddc066a..62f01d4600fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -354,7 +354,6 @@ mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule); bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 93ece46a0041..12c07a44aa4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2939,28 +2939,6 @@ metadata_err: return err; } -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) -{ - int err = 0; - - down_write(&esw->mode_lock); - if (mlx5_esw_is_fdb_created(esw)) { - err = -EBUSY; - goto done; - } - if (!mlx5_esw_vport_match_metadata_supported(esw)) { - err = -EOPNOTSUPP; - goto done; - } - if (enable) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - else - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; -done: - up_write(&esw->mode_lock); - return err; -} - int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) From f9c895a72a390656f9582e048fdcc3d2cec1dd7c Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 22 Mar 2023 10:21:46 +0200 Subject: [PATCH 15/15] net/mlx5: Update op_mode to op_mod for port selection To be consistent with the other enum keys use OP_MOD instead of OP_MODE. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a95d1218def9..89a65779494e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -717,7 +717,7 @@ static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, MLX5_ST_SZ_BYTES(port_selection_cap)); MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); - err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION); + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION); return err; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 20d00e09b168..b42696d74c9f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -69,7 +69,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, - MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25, + MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION = 0x25, }; enum {