From a08a61934cfad0506f8ed39d605ee7cd77c2381f Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 8 Jun 2021 15:44:07 +0300 Subject: [PATCH 1/8] mlxsw: spectrum_router: Remove abort mechanism The abort mechanism was introduced in commit 8e05fd7166c6 ("fib: hook IPv4 fib for hardware offload") with the purpose of falling back to software-based routing in case of a route programming error in hardware. The process is irreversible and requires users to reload the offloading driver or reboot the machine. While this approach might make sense in theory, it makes very little sense in practice. In the case of high speed ASICs such as the Spectrum ASIC, the abort mechanism effectively kills the machine upon a non-fatal error such as a route programming error. Such an extreme policy does not belong in the kernel, especially when user space can simply try to reprogram the route following the RTM_NEWROUTE failure notification. Therefore, remove the abort mechanism. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 129 +----------------- .../ethernet/mellanox/mlxsw/spectrum_router.h | 1 - 2 files changed, 5 insertions(+), 125 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6decc5a43f98..bc47ed766878 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4312,9 +4312,6 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - if (mlxsw_sp->router->aborted) - return; - key.fib_nh = fib_nh; nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); if (!nh) @@ -6422,9 +6419,6 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node; int err; - if (mlxsw_sp->router->aborted) - return 0; - if (fen_info->fi->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id)) return 0; @@ -6485,9 +6479,6 @@ static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node; int err; - if (mlxsw_sp->router->aborted) - return 0; - fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); if (!fib4_entry) return 0; @@ -7070,9 +7061,6 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, struct fib6_info *rt = rt_arr[0]; int err; - if (mlxsw_sp->router->aborted) - return 0; - if (rt->fib6_src.plen) return -EINVAL; @@ -7136,9 +7124,6 @@ static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, struct fib6_info *rt = rt_arr[0]; int err; - if (mlxsw_sp->router->aborted) - return 0; - if (rt->fib6_src.plen) return -EINVAL; @@ -7180,9 +7165,6 @@ static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, struct fib6_info *rt = rt_arr[0]; int err; - if (mlxsw_sp->router->aborted) - return 0; - if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; @@ -7211,55 +7193,6 @@ static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, return err; } -static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_l3proto proto, - u8 tree_id) -{ - const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; - enum mlxsw_reg_ralxx_protocol ralxx_proto = - (enum mlxsw_reg_ralxx_protocol) proto; - struct mlxsw_sp_fib_entry_priv *priv; - char xralta_pl[MLXSW_REG_XRALTA_LEN]; - char xralst_pl[MLXSW_REG_XRALST_LEN]; - int i, err; - - mlxsw_reg_xralta_pack(xralta_pl, true, ralxx_proto, tree_id); - err = ll_ops->ralta_write(mlxsw_sp, xralta_pl); - if (err) - return err; - - mlxsw_reg_xralst_pack(xralst_pl, 0xff, tree_id); - err = ll_ops->ralst_write(mlxsw_sp, xralst_pl); - if (err) - return err; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; - struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; - char xraltb_pl[MLXSW_REG_XRALTB_LEN]; - - mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); - mlxsw_reg_xraltb_pack(xraltb_pl, vr->id, ralxx_proto, tree_id); - err = ll_ops->raltb_write(mlxsw_sp, xraltb_pl); - if (err) - return err; - - priv = mlxsw_sp_fib_entry_priv_create(ll_ops); - if (IS_ERR(priv)) - return PTR_ERR(priv); - - ll_ops->fib_entry_pack(op_ctx, proto, MLXSW_SP_FIB_ENTRY_OP_WRITE, - vr->id, 0, NULL, priv); - ll_ops->fib_entry_act_ip2me_pack(op_ctx); - err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, NULL); - mlxsw_sp_fib_entry_priv_put(priv); - if (err) - return err; - } - - return 0; -} - static struct mlxsw_sp_mr_table * mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family) { @@ -7276,9 +7209,6 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_table *mrt; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return 0; - vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL); if (IS_ERR(vr)) return PTR_ERR(vr); @@ -7293,9 +7223,6 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_table *mrt; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return; - vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); if (WARN_ON(!vr)) return; @@ -7313,9 +7240,6 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return 0; - vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL); if (IS_ERR(vr)) return PTR_ERR(vr); @@ -7334,9 +7258,6 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_table *mrt; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return; - vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); if (WARN_ON(!vr)) return; @@ -7346,25 +7267,6 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) -{ - enum mlxsw_sp_l3proto proto = MLXSW_SP_L3_PROTO_IPV4; - int err; - - err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, - MLXSW_SP_LPM_TREE_MIN); - if (err) - return err; - - /* The multicast router code does not need an abort trap as by default, - * packets that don't match any routes are trapped to the CPU. - */ - - proto = MLXSW_SP_L3_PROTO_IPV6; - return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, - MLXSW_SP_LPM_TREE_MIN + 1); -} - static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -7451,20 +7353,6 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) mlxsw_sp->router->adj_discard_index_valid = false; } -static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - if (mlxsw_sp->router->aborted) - return; - dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); - mlxsw_sp_router_fib_flush(mlxsw_sp); - mlxsw_sp->router->aborted = true; - err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); - if (err) - dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); -} - struct mlxsw_sp_fib6_event { struct fib6_info **rt_arr; unsigned int nrt6; @@ -7546,7 +7434,7 @@ static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info); if (err) { mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); - mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp, &fib_event->fen_info); } @@ -7581,7 +7469,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, fib_event->fib6_event.nrt6); if (err) { mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); - mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, fib6_event->rt_arr, fib6_event->nrt6); @@ -7593,7 +7481,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, fib_event->fib6_event.nrt6); if (err) { mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); - mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n"); mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, fib6_event->rt_arr, fib6_event->nrt6); @@ -7625,7 +7513,7 @@ static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace); if (err) - mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n"); mr_cache_put(fib_event->men_info.mfc); break; case FIB_EVENT_ENTRY_DEL: @@ -7636,7 +7524,7 @@ static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, &fib_event->ven_info); if (err) - mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n"); dev_put(fib_event->ven_info.dev); break; case FIB_EVENT_VIF_DEL: @@ -7800,9 +7688,6 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, if (event == FIB_EVENT_RULE_DEL) return 0; - if (mlxsw_sp->router->aborted) - return 0; - fr_info = container_of(info, struct fib_rule_notifier_info, info); rule = fr_info->rule; @@ -7860,10 +7745,6 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: - if (router->aborted) { - NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); - return notifier_from_errno(-EINVAL); - } if (info->family == AF_INET) { struct fib_entry_notifier_info *fen_info = ptr; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index be7708a375e1..c5d7007f9173 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -58,7 +58,6 @@ struct mlxsw_sp_router { #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ struct list_head nexthop_neighs_list; struct list_head ipip_list; - bool aborted; struct notifier_block nexthop_nb; struct notifier_block fib_nb; struct notifier_block netevent_nb; From 00190c2b19eb33969befb68bbbc6d00edc11bda5 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 8 Jun 2021 15:44:08 +0300 Subject: [PATCH 2/8] selftests: router_scale: Do not count failed routes To check how many routes are installed in hardware, the test runs "ip route" and greps for "offload", which includes routes with state "offload_failed". Till now, this wrong check was not found because after one failure in route insertion, the driver moved to "abort" mode, which means that user cannot try to add more routes. The previous patch removed the abort mechanism and now failed routes are counted as offloaded. Fix this by not considering routes with "offload_failed" flag as offloaded. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/router_scale.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh index e93878d42596..683759d29199 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh @@ -68,7 +68,7 @@ wait_for_routes() local t0=$1; shift local route_count=$1; shift - local t1=$(ip route | grep -o 'offload' | wc -l) + local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l) local delta=$((t1 - t0)) echo $delta [[ $delta -ge $route_count ]] From e67dfb8d15deb33c425d0b0ee22f2e5eef54c162 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 8 Jun 2021 15:44:09 +0300 Subject: [PATCH 3/8] selftests: Clean forgotten resources as part of cleanup() Several tests do not set some ports down as part of their cleanup(), resulting in IPv6 link-local addresses and associated routes not being deleted. These leaks were found using a BPF tool that monitors ASIC resources. Solve this by setting the ports down at the end of the tests. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh | 3 +++ .../selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh | 3 +++ tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh | 2 ++ tools/testing/selftests/net/forwarding/pedit_dsfield.sh | 2 ++ tools/testing/selftests/net/forwarding/pedit_l4port.sh | 2 ++ tools/testing/selftests/net/forwarding/skbedit_priority.sh | 2 ++ 6 files changed, 14 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 4029833f7e27..160891dcb4bc 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -109,6 +109,9 @@ router_destroy() __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 42d44e27802c..190c1b6b5365 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -111,6 +111,9 @@ router_destroy() __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh index 5cbff8038f84..28a570006d4d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -93,7 +93,9 @@ switch_destroy() lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null lldpad_app_wait_del + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index 55eeacf59241..64fbd211d907 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -75,7 +75,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh index 5f20d289ee43..10e594c55117 100755 --- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh +++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh @@ -71,7 +71,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh index e3bd8a6bb8b4..bde11dc27873 100755 --- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -72,7 +72,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } From 0521a262f043ea521790ed2976141086c75d2f74 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 8 Jun 2021 15:44:10 +0300 Subject: [PATCH 4/8] selftests: devlink_lib: Fix bouncing of netdevsim DEVLINK_DEV In the commit referenced below, a check was added to devlink_lib that asserts the existence of a devlink device referenced by $DEVLINK_DEV. Unfortunately, several netdevsim tests point DEVLINK_DEV at a device that does not exist at the time that devlink_lib is sourced. Thus these tests spuriously fail. Fix this by introducing an override. By setting DEVLINK_DEV to an empty string, the user declares their intention to handle DEVLINK_DEV management on their own. In all netdevsim tests that use devlink_lib and set DEVLINK_DEV, set instead an empty DEVLINK_DEV just before sourcing devlink_lib, and set it to the correct value right afterwards. Fixes: 557c4d2f780c ("selftests: devlink_lib: add check for devlink device existence") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/netdevsim/devlink_trap.sh | 4 +++- tools/testing/selftests/drivers/net/netdevsim/fib.sh | 6 ++++-- tools/testing/selftests/drivers/net/netdevsim/nexthop.sh | 4 +++- tools/testing/selftests/drivers/net/netdevsim/psample.sh | 4 +++- tools/testing/selftests/net/forwarding/devlink_lib.sh | 2 +- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index da49ad2761b5..6165901a1cf3 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -24,13 +24,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ SLEEP_TIME=1 NETDEV="" NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} require_command udevadm diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index 251f228ce63e..fc794cd30389 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -33,13 +33,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ NUM_NETIFS=0 source $lib_dir/lib.sh -source $lib_dir/devlink_lib.sh source $lib_dir/fib_offload_lib.sh +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + ipv4_identical_routes() { fib_ipv4_identical_routes_test "testns1" diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index ba75c81cda91..e8e0dc088d6a 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -44,12 +44,14 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} nexthop_check() { diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh index ee10b1a8933c..e689ff7a0b12 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh @@ -14,13 +14,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/ CAPTURE_FILE=$(mktemp) NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} # Available at https://github.com/Mellanox/libpsample require_command psample diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index c19e001f138b..39fb9b8e7b58 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -18,7 +18,7 @@ if [[ ! -v DEVLINK_DEV ]]; then DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ -n | cut -d" " -f3) -else +elif [[ ! -z "$DEVLINK_DEV" ]]; then devlink dev show $DEVLINK_DEV &> /dev/null if [ $? -ne 0 ]; then echo "SKIP: devlink device \"$DEVLINK_DEV\" not found" From 314dbb19f95b67456cb042e4a7a36b777a029bea Mon Sep 17 00:00:00 2001 From: Mykola Kostenok Date: Tue, 8 Jun 2021 15:44:11 +0300 Subject: [PATCH 5/8] mlxsw: reg: Extend MTMP register with new threshold field Extend Management Temperature (MTMP) register with new field specifying the maximum temperature threshold. Extend mlxsw_reg_mtmp_unpack() function with two extra arguments, providing high and maximum temperature thresholds. For modules, these thresholds correspond to critical and emergency thresholds that are read from the module's EEPROM. Signed-off-by: Mykola Kostenok Acked-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_env.c | 2 +- .../net/ethernet/mellanox/mlxsw/core_hwmon.c | 6 +++--- .../ethernet/mellanox/mlxsw/core_thermal.c | 6 +++--- drivers/net/ethernet/mellanox/mlxsw/reg.h | 20 ++++++++++++++++++- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index dd26865bd587..bcad1327d861 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -142,7 +142,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) return err; - mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL, NULL, NULL); if (!module_temp) { *temp = 0; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 2196c946698a..d41afdfbd085 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -72,7 +72,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); return err; } - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); return sprintf(buf, "%d\n", temp); } @@ -95,7 +95,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); return err; } - mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL, NULL, NULL); return sprintf(buf, "%d\n", temp_max); } @@ -239,7 +239,7 @@ static int mlxsw_hwmon_module_temp_get(struct device *dev, dev_err(dev, "Failed to query module temperature\n"); return err; } - mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL, NULL, NULL); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index dfea14399607..cb1b68b6bf47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -281,7 +281,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, dev_err(dev, "Failed to query temp sensor\n"); return err; } - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); if (temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, temp); @@ -442,7 +442,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, *p_temp = (int) temp; return 0; } - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); *p_temp = temp; if (!temp) @@ -560,7 +560,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, if (err) return err; - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); if (temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index f9419cc53480..5304309ecb9d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -9463,6 +9463,14 @@ MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12); ((s16)((GENMASK(15, 0) + (v_) + 1) \ * 125)); }) +/* reg_mtmp_max_operational_temperature + * The highest temperature in the nominal operational range. Reading is in + * 0.125 Celsius degrees units. + * In case of module this is SFF critical temperature threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_operational_temperature, 0x04, 16, 16); + /* reg_mtmp_temperature * Temperature reading from the sensor. Reading is in 0.125 Celsius * degrees units. @@ -9541,7 +9549,9 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index, } static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, - int *p_max_temp, char *sensor_name) + int *p_max_temp, int *p_temp_hi, + int *p_max_oper_temp, + char *sensor_name) { s16 temp; @@ -9553,6 +9563,14 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, temp = mlxsw_reg_mtmp_max_temperature_get(payload); *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); } + if (p_temp_hi) { + temp = mlxsw_reg_mtmp_temperature_threshold_hi_get(payload); + *p_temp_hi = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_oper_temp) { + temp = mlxsw_reg_mtmp_max_operational_temperature_get(payload); + *p_max_oper_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } if (sensor_name) mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } From befc2048088aefbcd88b18225ba33231887137dc Mon Sep 17 00:00:00 2001 From: Mykola Kostenok Date: Tue, 8 Jun 2021 15:44:12 +0300 Subject: [PATCH 6/8] mlxsw: core_env: Read module temperature thresholds using MTMP register Currently, module temperature thresholds are obtained from Management Cable Info Access (MCIA) register by specifying the thresholds offsets within module EEPROM layout. This data does not pass validation and in some cases can be unreliable. For example, due to some problem with the module. Add support for a new feature provided by Management Temperature (MTMP) register for sanitization of temperature thresholds values. Extend mlxsw_env_module_temp_thresholds_get() to get temperature thresholds through MTMP field 'max_operational_temperature' - if it is not zero, feature is supported. Otherwise fallback to old method and get the thresholds through MCIA. Signed-off-by: Mykola Kostenok Acked-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_env.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index bcad1327d861..b3ca5bd33a7f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -125,6 +125,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, int off, int *temp) { + unsigned int module_temp, module_crit, module_emerg; char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; union { u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE]; @@ -132,7 +133,6 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, } temp_thresh; char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int module_temp; bool qsfp, cmis; int page; int err; @@ -142,12 +142,21 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) return err; - mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, &module_crit, + &module_emerg, NULL); if (!module_temp) { *temp = 0; return 0; } + /* Validate if threshold reading is available through MTMP register, + * otherwise fallback to read through MCIA. + */ + if (module_emerg) { + *temp = off == SFP_TEMP_HIGH_WARN ? module_crit : module_emerg; + return 0; + } + /* Read Free Side Device Temperature Thresholds from page 03h * (MSB at lower byte address). * Bytes: From e57977b34ab5d52d73bc0b8b2ff941ac21d7166f Mon Sep 17 00:00:00 2001 From: Mykola Kostenok Date: Tue, 8 Jun 2021 15:44:13 +0300 Subject: [PATCH 7/8] mlxsw: thermal: Add function for reading module temperature and thresholds Provide new function mlxsw_thermal_module_temp_and_thresholds_get() for reading temperature and temperature thresholds by a single operation. The motivation is to reduce the number of transactions with the device which is important when operating over a slow bus such as I2C. Currently, the sole caller of the function is only using it to read the module's temperature. The next patch will also use it to query the module's temperature thresholds. Signed-off-by: Mykola Kostenok Acked-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/core_thermal.c | 50 +++++++++++++------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index cb1b68b6bf47..0983e4d4f888 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -420,29 +420,49 @@ static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev, return err; } +static void +mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core, + u16 sensor_index, int *p_temp, + int *p_crit_temp, + int *p_emerg_temp) +{ + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int err; + + /* Read module temperature and thresholds. */ + mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false); + err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + /* Set temperature and thresholds to zero to avoid passing + * uninitialized data back to the caller. + */ + *p_temp = 0; + *p_crit_temp = 0; + *p_emerg_temp = 0; + + return; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp, + NULL); +} + static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, int *p_temp) { struct mlxsw_thermal_module *tz = tzdev->devdata; struct mlxsw_thermal *thermal = tz->parent; - struct device *dev = thermal->bus_info->dev; - char mtmp_pl[MLXSW_REG_MTMP_LEN]; + struct device *dev; + u16 sensor_index; int temp; int err; - /* Read module temperature. */ - mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + - tz->module, false, false); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); - if (err) { - /* Do not return error - in case of broken module's sensor - * it will cause error message flooding. - */ - temp = 0; - *p_temp = (int) temp; - return 0; - } - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); + dev = thermal->bus_info->dev; + sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module; + + /* Read module temperature and thresholds. */ + mlxsw_thermal_module_temp_and_thresholds_get(thermal->core, + sensor_index, &temp, NULL, + NULL); *p_temp = temp; if (!temp) From 72a64c2fe9d8a08c9c57fc22adc1b44d13f97cac Mon Sep 17 00:00:00 2001 From: Mykola Kostenok Date: Tue, 8 Jun 2021 15:44:14 +0300 Subject: [PATCH 8/8] mlxsw: thermal: Read module temperature thresholds using MTMP register mlxsw_thermal_module_trips_update() is used to update the trip points of the module's thermal zone. Currently, this is done by querying the thresholds from the module's EEPROM via MCIA register. This data does not pass validation and in some cases can be unreliable. For example, due to some problem with transceiver module. Previous patch made it possible to read module's temperature and thresholds via MTMP register. Therefore, extend mlxsw_thermal_module_trips_update() to use the thresholds queried from MTMP, if valid. This is both more reliable and more efficient than current method, as temperature and thresholds are queried in one transaction instead of three. This is significant when working over a slow bus such as I2C. Signed-off-by: Mykola Kostenok Acked-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/core_thermal.c | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 0983e4d4f888..b96fb88aac0a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -149,22 +149,27 @@ mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) static int mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal_module *tz) + struct mlxsw_thermal_module *tz, + int crit_temp, int emerg_temp) { - int crit_temp, emerg_temp; int err; - err = mlxsw_env_module_temp_thresholds_get(core, tz->module, - SFP_TEMP_HIGH_WARN, - &crit_temp); - if (err) - return err; + /* Do not try to query temperature thresholds directly from the module's + * EEPROM if we got valid thresholds from MTMP. + */ + if (!emerg_temp || !crit_temp) { + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_WARN, + &crit_temp); + if (err) + return err; - err = mlxsw_env_module_temp_thresholds_get(core, tz->module, - SFP_TEMP_HIGH_ALARM, - &emerg_temp); - if (err) - return err; + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_ALARM, + &emerg_temp); + if (err) + return err; + } if (crit_temp > emerg_temp) { dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", @@ -451,9 +456,9 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, { struct mlxsw_thermal_module *tz = tzdev->devdata; struct mlxsw_thermal *thermal = tz->parent; + int temp, crit_temp, emerg_temp; struct device *dev; u16 sensor_index; - int temp; int err; dev = thermal->bus_info->dev; @@ -461,15 +466,16 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, /* Read module temperature and thresholds. */ mlxsw_thermal_module_temp_and_thresholds_get(thermal->core, - sensor_index, &temp, NULL, - NULL); + sensor_index, &temp, + &crit_temp, &emerg_temp); *p_temp = temp; if (!temp) return 0; /* Update trip points. */ - err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); + err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz, + crit_temp, emerg_temp); if (!err && temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); @@ -736,7 +742,10 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal, u8 module) { struct mlxsw_thermal_module *module_tz; + int crit_temp, emerg_temp; + u16 sensor_index; + sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module; module_tz = &thermal->tz_module_arr[module]; /* Skip if parent is already set (case of port split). */ if (module_tz->parent) @@ -747,8 +756,12 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, sizeof(thermal->trips)); /* Initialize all trip point. */ mlxsw_thermal_module_trips_reset(module_tz); + /* Read module temperature and thresholds. */ + mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, NULL, + &crit_temp, &emerg_temp); /* Update trip point according to the module data. */ - return mlxsw_thermal_module_trips_update(dev, core, module_tz); + return mlxsw_thermal_module_trips_update(dev, core, module_tz, + crit_temp, emerg_temp); } static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)